RapidPoseTriangulation/scripts/test_triangulate.py

import copy
import json
import os
import sys
import time
from typing import List

import cv2
import matplotlib
import numpy as np

import utils_2d_pose
from skelda import utils_pose, utils_view

sys.path.append("/RapidPoseTriangulation/swig/")
import rpt

# ==================================================================================================

filepath = os.path.dirname(os.path.realpath(__file__)) + "/"
test_img_dir = filepath + "../data/"
whole_body = {
    "foots": False,
    "face": False,
    "hands": False,
}

joint_names_2d = [
    "nose",
    "eye_left",
    "eye_right",
    "ear_left",
    "ear_right",
    "shoulder_left",
    "shoulder_right",
    "elbow_left",
    "elbow_right",
    "wrist_left",
    "wrist_right",
    "hip_left",
    "hip_right",
    "knee_left",
    "knee_right",
    "ankle_left",
    "ankle_right",
]
if whole_body["foots"]:
    joint_names_2d.extend(
        [
            "foot_toe_big_left",
            "foot_toe_small_left",
            "foot_heel_left",
            "foot_toe_big_right",
            "foot_toe_small_right",
            "foot_heel_right",
        ]
    )
if whole_body["face"]:
    joint_names_2d.extend(
        [
            "face_jaw_right_1",
            "face_jaw_right_2",
            "face_jaw_right_3",
            "face_jaw_right_4",
            "face_jaw_right_5",
            "face_jaw_right_6",
            "face_jaw_right_7",
            "face_jaw_right_8",
            "face_jaw_middle",
            "face_jaw_left_1",
            "face_jaw_left_2",
            "face_jaw_left_3",
            "face_jaw_left_4",
            "face_jaw_left_5",
            "face_jaw_left_6",
            "face_jaw_left_7",
            "face_jaw_left_8",
            "face_eyebrow_right_1",
            "face_eyebrow_right_2",
            "face_eyebrow_right_3",
            "face_eyebrow_right_4",
            "face_eyebrow_right_5",
            "face_eyebrow_left_1",
            "face_eyebrow_left_2",
            "face_eyebrow_left_3",
            "face_eyebrow_left_4",
            "face_eyebrow_left_5",
            "face_nose_1",
            "face_nose_2",
            "face_nose_3",
            "face_nose_4",
            "face_nose_5",
            "face_nose_6",
            "face_nose_7",
            "face_nose_8",
            "face_nose_9",
            "face_eye_right_1",
            "face_eye_right_2",
            "face_eye_right_3",
            "face_eye_right_4",
            "face_eye_right_5",
            "face_eye_right_6",
            "face_eye_left_1",
            "face_eye_left_2",
            "face_eye_left_3",
            "face_eye_left_4",
            "face_eye_left_5",
            "face_eye_left_6",
            "face_mouth_1",
            "face_mouth_2",
            "face_mouth_3",
            "face_mouth_4",
            "face_mouth_5",
            "face_mouth_6",
            "face_mouth_7",
            "face_mouth_8",
            "face_mouth_9",
            "face_mouth_10",
            "face_mouth_11",
            "face_mouth_12",
            "face_mouth_13",
            "face_mouth_14",
            "face_mouth_15",
            "face_mouth_16",
            "face_mouth_17",
            "face_mouth_18",
            "face_mouth_19",
            "face_mouth_20",
        ]
    )
if whole_body["hands"]:
    joint_names_2d.extend(
        [
            "hand_wrist_left",
            "hand_finger_thumb_left_1",
            "hand_finger_thumb_left_2",
            "hand_finger_thumb_left_3",
            "hand_finger_thumb_left_4",
            "hand_finger_index_left_1",
            "hand_finger_index_left_2",
            "hand_finger_index_left_3",
            "hand_finger_index_left_4",
            "hand_finger_middle_left_1",
            "hand_finger_middle_left_2",
            "hand_finger_middle_left_3",
            "hand_finger_middle_left_4",
            "hand_finger_ring_left_1",
            "hand_finger_ring_left_2",
            "hand_finger_ring_left_3",
            "hand_finger_ring_left_4",
            "hand_finger_pinky_left_1",
            "hand_finger_pinky_left_2",
            "hand_finger_pinky_left_3",
            "hand_finger_pinky_left_4",
            "hand_wrist_right",
            "hand_finger_thumb_right_1",
            "hand_finger_thumb_right_2",
            "hand_finger_thumb_right_3",
            "hand_finger_thumb_right_4",
            "hand_finger_index_right_1",
            "hand_finger_index_right_2",
            "hand_finger_index_right_3",
            "hand_finger_index_right_4",
            "hand_finger_middle_right_1",
            "hand_finger_middle_right_2",
            "hand_finger_middle_right_3",
            "hand_finger_middle_right_4",
            "hand_finger_ring_right_1",
            "hand_finger_ring_right_2",
            "hand_finger_ring_right_3",
            "hand_finger_ring_right_4",
            "hand_finger_pinky_right_1",
            "hand_finger_pinky_right_2",
            "hand_finger_pinky_right_3",
            "hand_finger_pinky_right_4",
        ]
    )
joint_names_2d.extend(
    [
        "hip_middle",
        "shoulder_middle",
        "head",
    ]
)
joint_names_3d = list(joint_names_2d)

main_limbs = [
    ("shoulder_left", "elbow_left"),
    ("elbow_left", "wrist_left"),
    ("shoulder_right", "elbow_right"),
    ("elbow_right", "wrist_right"),
    ("hip_left", "knee_left"),
    ("knee_left", "ankle_left"),
    ("hip_right", "knee_right"),
    ("knee_right", "ankle_right"),
]

# ==================================================================================================


def update_sample(sample, new_dir=""):
    sample = copy.deepcopy(sample)

    # Rename image paths
    sample["imgpaths"] = [
        os.path.join(new_dir, os.path.basename(v)) for v in sample["imgpaths"]
    ]

    # Add placeholders for missing keys
    sample["cameras_color"] = sample["cameras"]
    sample["imgpaths_color"] = sample["imgpaths"]
    sample["cameras_depth"] = []

    return sample


# ==================================================================================================


def load_image(path: str):
    image = cv2.imread(path, 3)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = np.asarray(image, dtype=np.uint8)
    return image


# ==================================================================================================


def update_keypoints(poses_2d: list, joint_names: List[str]) -> list:
    new_views = []
    for view in poses_2d:
        new_bodies = []
        for body in view:
            body = body.tolist()

            new_body = body[:17]
            if whole_body["foots"]:
                new_body.extend(body[17:22])
            if whole_body["face"]:
                new_body.extend(body[22:90])
            if whole_body["hands"]:
                new_body.extend(body[90:])
            body = new_body

            hlid = joint_names.index("hip_left")
            hrid = joint_names.index("hip_right")
            mid_hip = [
                float(((body[hlid][0] + body[hrid][0]) / 2.0)),
                float(((body[hlid][1] + body[hrid][1]) / 2.0)),
                min(body[hlid][2], body[hrid][2]),
            ]
            body.append(mid_hip)

            slid = joint_names.index("shoulder_left")
            srid = joint_names.index("shoulder_right")
            mid_shoulder = [
                float(((body[slid][0] + body[srid][0]) / 2.0)),
                float(((body[slid][1] + body[srid][1]) / 2.0)),
                min(body[slid][2], body[srid][2]),
            ]
            body.append(mid_shoulder)

            elid = joint_names.index("ear_left")
            erid = joint_names.index("ear_right")
            head = [
                float(((body[elid][0] + body[erid][0]) / 2.0)),
                float(((body[elid][1] + body[erid][1]) / 2.0)),
                min(body[elid][2], body[erid][2]),
            ]
            body.append(head)

            new_bodies.append(body)
        new_views.append(new_bodies)

    return new_views


# ==================================================================================================


def main():
    if any((whole_body[k] for k in whole_body)):
        kpt_model = utils_2d_pose.load_wb_model()
    else:
        kpt_model = utils_2d_pose.load_model(min_bbox_score=0.3)

    # Manually set matplotlib backend
    matplotlib.use("TkAgg")

    for dirname in sorted(os.listdir(test_img_dir)):
        dirpath = os.path.join(test_img_dir, dirname)

        if not os.path.isdir(dirpath):
            continue

        if (dirname[0] not in ["p", "h", "e", "q"]) or len(dirname) != 2:
            continue

        # Load sample infos
        print("\n" + dirpath)
        with open(os.path.join(dirpath, "sample.json"), "r", encoding="utf-8") as file:
            sample = json.load(file)
        sample = update_sample(sample, dirpath)

        camparams = sample["cameras_color"]
        roomparams = {
            "room_size": sample["room_size"],
            "room_center": sample["room_center"],
        }

        # Load color images
        images_2d = []
        for i in range(len(sample["cameras_color"])):
            imgpath = sample["imgpaths_color"][i]
            img = load_image(imgpath)
            images_2d.append(img)

        # Get 2D poses
        stime = time.time()
        poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
        poses_2d = update_keypoints(poses_2d, joint_names_2d)
        print("2D time:", time.time() - stime)
        # print([np.array(p).round(6).tolist() for p in poses_2d])

        fig1 = utils_view.draw_many_images(
            sample["imgpaths_color"], [], [], poses_2d, joint_names_2d, "2D detections"
        )
        fig1.savefig(os.path.join(dirpath, "2d-k.png"), dpi=fig1.dpi)
        # draw_utils.utils_view.show_plots()

        if len(images_2d) == 1:
            utils_view.show_plots()
            continue

        # Get 3D poses
        if sum(np.sum(p) for p in poses_2d) == 0:
            poses3D = np.zeros([1, len(joint_names_3d), 4])
            poses2D = np.zeros([len(images_2d), 1, len(joint_names_3d), 3])
        else:
            cameras = rpt.convert_cameras(camparams)
            roomp = [roomparams["room_size"], roomparams["room_center"]]
            triangulator = rpt.Triangulator(min_match_score=0.94)

            stime = time.time()
            poses_3d = triangulator.triangulate_poses(
                poses_2d, cameras, roomp, joint_names_2d
            )
            poses3D = np.array(poses_3d)
            if len(poses3D) == 0:
                poses3D = np.zeros([1, len(joint_names_3d), 4])
            print("3D time:", time.time() - stime)

            poses2D = []
            for cam in camparams:
                poses_2d, _ = utils_pose.project_poses(poses3D, cam)
                poses2D.append(poses_2d)

        print(poses3D)
        # print(poses2D)
        # print(poses3D.round(3).tolist())

        fig2 = utils_view.draw_poses3d(poses3D, joint_names_3d, roomparams, camparams)
        fig3 = utils_view.draw_many_images(
            sample["imgpaths_color"], [], [], poses2D, joint_names_3d, "2D projections"
        )
        fig2.savefig(os.path.join(dirpath, "3d-p.png"), dpi=fig2.dpi)
        fig3.savefig(os.path.join(dirpath, "2d-p.png"), dpi=fig3.dpi)
        utils_view.show_plots()


# ==================================================================================================

if __name__ == "__main__":
    main()