Using bayer encoding for images.

This commit is contained in:
Daniel
2025-01-13 16:50:08 +01:00
parent 53543368c4
commit 2f52521b9a
3 changed files with 2657 additions and 2613 deletions

File diff suppressed because it is too large Load Diff

View File

@ -61,7 +61,7 @@ datasets = {
"human36m": { "human36m": {
"path": "/datasets/human36m/skelda/pose_test.json", "path": "/datasets/human36m/skelda/pose_test.json",
"take_interval": 5, "take_interval": 5,
"min_match_score": 0.94, "min_match_score": 0.95,
"min_group_size": 1, "min_group_size": 1,
"min_bbox_score": 0.4, "min_bbox_score": 0.4,
"min_bbox_area": 0.1 * 0.1, "min_bbox_area": 0.1 * 0.1,
@ -73,6 +73,7 @@ datasets = {
# "cams": ["00_03", "00_06", "00_12"], # "cams": ["00_03", "00_06", "00_12"],
# "cams": ["00_03", "00_06", "00_12", "00_13", "00_23", "00_15", "00_10", "00_21", "00_09", "00_01"], # "cams": ["00_03", "00_06", "00_12", "00_13", "00_23", "00_15", "00_10", "00_21", "00_09", "00_01"],
"take_interval": 3, "take_interval": 3,
"min_match_score": 0.95,
"use_scenes": ["160906_pizza1", "160422_haggling1", "160906_ian5"], "use_scenes": ["160906_pizza1", "160422_haggling1", "160906_ian5"],
"min_group_size": 1, "min_group_size": 1,
# "min_group_size": 4, # "min_group_size": 4,
@ -88,6 +89,7 @@ datasets = {
"campus": { "campus": {
"path": "/datasets/campus/skelda/test.json", "path": "/datasets/campus/skelda/test.json",
"take_interval": 1, "take_interval": 1,
"min_match_score": 0.90,
"min_bbox_score": 0.5, "min_bbox_score": 0.5,
}, },
"shelf": { "shelf": {
@ -109,6 +111,7 @@ datasets = {
"tsinghua": { "tsinghua": {
"path": "/datasets/tsinghua/skelda/test.json", "path": "/datasets/tsinghua/skelda/test.json",
"take_interval": 3, "take_interval": 3,
"min_match_score": 0.95,
"min_group_size": 2, "min_group_size": 2,
}, },
"human36m_wb": { "human36m_wb": {
@ -122,7 +125,7 @@ datasets = {
"take_interval": 2, "take_interval": 2,
"subset": "tagging", "subset": "tagging",
"min_group_size": 2, "min_group_size": 2,
"min_bbox_score": 0.25, "min_bbox_score": 0.2,
"min_bbox_area": 0.05 * 0.05, "min_bbox_area": 0.05 * 0.05,
}, },
"egohumans_legoassemble": { "egohumans_legoassemble": {
@ -343,19 +346,32 @@ def main():
# Print a dataset sample for debugging # Print a dataset sample for debugging
print(labels[0]) print(labels[0])
print("\nPrefetching images ...")
for label in tqdm.tqdm(labels):
# If the images are stored on a HDD, it sometimes takes a while to load them
# Prefetching them results in more stable timings of the following steps
# To prevent memory overflow, the code only loads the images, but does not store them
try:
for i in range(len(label["imgpaths"])):
imgpath = label["imgpaths"][i]
img = test_triangulate.load_image(imgpath)
except cv2.error:
print("One of the paths not found:", label["imgpaths"])
continue
time.sleep(3)
print("\nCalculating 2D predictions ...") print("\nCalculating 2D predictions ...")
all_poses_2d = [] all_poses_2d = []
times = [] times = []
for label in tqdm.tqdm(labels): for label in tqdm.tqdm(labels):
images_2d = [] images_2d = []
start = time.time()
try: try:
start = time.time()
for i in range(len(label["imgpaths"])): for i in range(len(label["imgpaths"])):
imgpath = label["imgpaths"][i] imgpath = label["imgpaths"][i]
img = test_triangulate.load_image(imgpath) img = test_triangulate.load_image(imgpath)
images_2d.append(img) images_2d.append(img)
time_imgs = time.time() - start
except cv2.error: except cv2.error:
print("One of the paths not found:", label["imgpaths"]) print("One of the paths not found:", label["imgpaths"])
continue continue
@ -373,7 +389,16 @@ def main():
cam["K"][0][2] = cam["K"][0][2] * (1000 / ishape[1]) cam["K"][0][2] = cam["K"][0][2] * (1000 / ishape[1])
images_2d[i] = cv2.resize(img, (1000, 1000)) images_2d[i] = cv2.resize(img, (1000, 1000))
# Convert image format to Bayer encoding to simulate real camera input
# This also resulted in notably better MPJPE results in most cases, presumbly since the
# demosaicing algorithm from OpenCV is better than the default one from the cameras
for i in range(len(images_2d)):
images_2d[i] = test_triangulate.rgb2bayer(images_2d[i])
time_imgs = time.time() - start
start = time.time() start = time.time()
for i in range(len(images_2d)):
images_2d[i] = test_triangulate.bayer2rgb(images_2d[i])
poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d) poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
poses_2d = test_triangulate.update_keypoints(poses_2d, joint_names_2d) poses_2d = test_triangulate.update_keypoints(poses_2d, joint_names_2d)
time_2d = time.time() - start time_2d = time.time() - start

View File

@ -227,6 +227,23 @@ def load_image(path: str):
# ================================================================================================== # ==================================================================================================
def rgb2bayer(img):
bayer = np.zeros((img.shape[0], img.shape[1]), dtype=img.dtype)
bayer[0::2, 0::2] = img[0::2, 0::2, 0]
bayer[0::2, 1::2] = img[0::2, 1::2, 1]
bayer[1::2, 0::2] = img[1::2, 0::2, 1]
bayer[1::2, 1::2] = img[1::2, 1::2, 2]
return bayer
def bayer2rgb(bayer):
img = cv2.cvtColor(bayer, cv2.COLOR_BayerBG2RGB)
return img
# ==================================================================================================
def update_keypoints(poses_2d: list, joint_names: List[str]) -> list: def update_keypoints(poses_2d: list, joint_names: List[str]) -> list:
new_views = [] new_views = []
for view in poses_2d: for view in poses_2d:
@ -314,6 +331,8 @@ def main():
for i in range(len(sample["cameras_color"])): for i in range(len(sample["cameras_color"])):
imgpath = sample["imgpaths_color"][i] imgpath = sample["imgpaths_color"][i]
img = load_image(imgpath) img = load_image(imgpath)
img = rgb2bayer(img)
img = bayer2rgb(img)
images_2d.append(img) images_2d.append(img)
# Get 2D poses # Get 2D poses