Merge branch 'bayer' into 'master'
Use Bayer Images See merge request Percipiote/RapidPoseTriangulation!4
This commit is contained in:
@ -70,7 +70,8 @@ mv /mmdeploy/work_dir/end2end.onnx /RapidPoseTriangulation/extras/mmdeploy/expor
|
||||
```
|
||||
|
||||
```bash
|
||||
python3 /RapidPoseTriangulation/extras/mmdeploy/make_extra_graphs.py
|
||||
python3 /RapidPoseTriangulation/extras/mmdeploy/make_extra_graphs_pt.py
|
||||
python3 /RapidPoseTriangulation/extras/mmdeploy/make_extra_graphs_tf.py
|
||||
```
|
||||
|
||||
```bash
|
||||
|
||||
@ -34,5 +34,8 @@ RUN pip3 install --upgrade --no-cache-dir onnxconverter_common
|
||||
# Fix an error when profiling
|
||||
RUN pip3 install --upgrade --no-cache-dir "onnxruntime-gpu<1.17"
|
||||
|
||||
RUN pip3 install --upgrade --no-cache-dir tensorflow
|
||||
RUN pip3 install --upgrade --no-cache-dir tf2onnx
|
||||
|
||||
WORKDIR /mmdeploy/
|
||||
CMD ["/bin/bash"]
|
||||
|
||||
276
extras/mmdeploy/make_extra_graphs_tf.py
Normal file
276
extras/mmdeploy/make_extra_graphs_tf.py
Normal file
@ -0,0 +1,276 @@
|
||||
import cv2
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import tf2onnx
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
base_path = "/RapidPoseTriangulation/extras/mmdeploy/exports/"
|
||||
det_target_size = (320, 320)
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
|
||||
class BayerToRGB(tf.keras.layers.Layer):
|
||||
"""Convert Bayer image to RGB
|
||||
See: https://stanford.edu/class/ee367/reading/Demosaicing_ICASSP04.pdf
|
||||
See: https://github.com/cheind/pytorch-debayer/blob/master/debayer/modules.py#L231
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.layout = "RGGB"
|
||||
self.max_val = 255.0
|
||||
|
||||
self.kernels = tf.constant(
|
||||
np.array(
|
||||
[
|
||||
# G at R/B locations
|
||||
[
|
||||
[0, 0, -1, 0, 0],
|
||||
[0, 0, 2, 0, 0],
|
||||
[-1, 2, 4, 2, -1],
|
||||
[0, 0, 2, 0, 0],
|
||||
[0, 0, -1, 0, 0],
|
||||
],
|
||||
# R/B at G in R/B rows and B/R columns
|
||||
[
|
||||
[0, 0, 0.5, 0, 0],
|
||||
[0, -1, 0, -1, 0],
|
||||
[-1, 4, 5, 4, -1],
|
||||
[0, -1, 0, -1, 0],
|
||||
[0, 0, 0.5, 0, 0],
|
||||
],
|
||||
# R/B at G in B/R rows and R/B columns
|
||||
[
|
||||
[0, 0, 0.5, 0, 0],
|
||||
[0, -1, 4, -1, 0],
|
||||
[-1, 0, 5, 0, -1],
|
||||
[0, -1, 4, -1, 0],
|
||||
[0, 0, 0.5, 0, 0],
|
||||
],
|
||||
# R/B at B/R in B/R rows and B/R columns
|
||||
[
|
||||
[0, 0, -1.5, 0, 0],
|
||||
[0, 2, 0, 2, 0],
|
||||
[-1.5, 0, 6, 0, -1.5],
|
||||
[0, 2, 0, 2, 0],
|
||||
[0, 0, -1.5, 0, 0],
|
||||
],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
.reshape(1, 4, 5, 5)
|
||||
.transpose(2, 3, 0, 1)
|
||||
/ 8.0
|
||||
)
|
||||
self.index = tf.constant(
|
||||
np.array(
|
||||
# Describes the kernel indices that calculate the corresponding RGB values for
|
||||
# the 2x2 layout (RGGB) sub-structure
|
||||
[
|
||||
# Destination R
|
||||
[
|
||||
[4, 1], # identity, R at G in R row B column
|
||||
[2, 3], # R at G in B row R column, R at B in B row R column
|
||||
],
|
||||
# Destination G
|
||||
[
|
||||
[0, 4],
|
||||
[4, 0],
|
||||
],
|
||||
# Destination B
|
||||
[
|
||||
[3, 2],
|
||||
[1, 4],
|
||||
],
|
||||
]
|
||||
).reshape(1, 3, 2, 2)
|
||||
)
|
||||
|
||||
def call(self, img):
|
||||
H, W = tf.shape(img)[1], tf.shape(img)[2]
|
||||
|
||||
# Pad the image
|
||||
tpad = img[:, 0:2, :, :]
|
||||
bpad = img[:, H - 2 : H, :, :]
|
||||
ipad = tf.concat([tpad, img, bpad], axis=1)
|
||||
lpad = ipad[:, :, 0:2, :]
|
||||
rpad = ipad[:, :, W - 2 : W, :]
|
||||
ipad = tf.concat([lpad, ipad, rpad], axis=2)
|
||||
|
||||
# Convolve with kernels
|
||||
planes = tf.nn.conv2d(ipad, self.kernels, strides=[1, 1, 1, 1], padding="VALID")
|
||||
|
||||
# Concatenate identity kernel
|
||||
planes = tf.concat([planes, img], axis=-1)
|
||||
|
||||
# Gather values
|
||||
index_repeated = tf.tile(self.index, multiples=[1, 1, H // 2, W // 2])
|
||||
index_repeated = tf.transpose(index_repeated, perm=[0, 2, 3, 1])
|
||||
row_indices, col_indices = tf.meshgrid(tf.range(H), tf.range(W), indexing="ij")
|
||||
index_tensor = tf.stack([row_indices, col_indices], axis=-1)
|
||||
index_tensor = tf.expand_dims(index_tensor, axis=0)
|
||||
index_tensor = tf.expand_dims(index_tensor, axis=-2)
|
||||
index_tensor = tf.repeat(index_tensor, repeats=3, axis=-2)
|
||||
index_repeated = tf.expand_dims(index_repeated, axis=-1)
|
||||
indices = tf.concat([tf.cast(index_tensor, tf.int64), index_repeated], axis=-1)
|
||||
rgb = tf.gather_nd(planes, indices, batch_dims=1)
|
||||
|
||||
if self.max_val == 255.0:
|
||||
# Make value range valid again
|
||||
rgb = tf.round(rgb)
|
||||
|
||||
return rgb
|
||||
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
|
||||
def bayer_resize(img, size):
|
||||
"""Resize a Bayer image by splitting color channels"""
|
||||
|
||||
# Split the image into 4 channels
|
||||
r = img[:, 0::2, 0::2, 0]
|
||||
g1 = img[:, 0::2, 1::2, 0]
|
||||
g2 = img[:, 1::2, 0::2, 0]
|
||||
b = img[:, 1::2, 1::2, 0]
|
||||
bsplit = tf.stack([r, g1, g2, b], axis=-1)
|
||||
|
||||
# Resize the image
|
||||
# Make sure the target size is divisible by 2
|
||||
size = (size[0] // 2, size[1] // 2)
|
||||
bsized = tf.image.resize(bsplit, size=size, method="bilinear")
|
||||
|
||||
# Create a bayer image again
|
||||
img = tf.nn.depth_to_space(bsized, block_size=2)
|
||||
|
||||
return img
|
||||
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
|
||||
class Letterbox(tf.keras.layers.Layer):
|
||||
def __init__(self, target_size, fill_value=128):
|
||||
"""Resize and pad image while keeping aspect ratio"""
|
||||
super(Letterbox, self).__init__()
|
||||
|
||||
self.b2rgb = BayerToRGB()
|
||||
self.target_size = target_size
|
||||
self.fill_value = fill_value
|
||||
|
||||
def calc_params(self, ishape):
|
||||
img_h, img_w = ishape[1], ishape[2]
|
||||
target_h, target_w = self.target_size
|
||||
|
||||
scale = tf.minimum(target_w / img_w, target_h / img_h)
|
||||
new_w = tf.round(tf.cast(img_w, scale.dtype) * scale)
|
||||
new_h = tf.round(tf.cast(img_h, scale.dtype) * scale)
|
||||
new_w = tf.cast(new_w, tf.int32)
|
||||
new_h = tf.cast(new_h, tf.int32)
|
||||
new_w = new_w - (new_w % 2)
|
||||
new_h = new_h - (new_h % 2)
|
||||
|
||||
pad_w = target_w - new_w
|
||||
pad_h = target_h - new_h
|
||||
pad_left = tf.cast(tf.floor(tf.cast(pad_w, tf.float32) / 2.0), tf.int32)
|
||||
pad_top = tf.cast(tf.floor(tf.cast(pad_h, tf.float32) / 2.0), tf.int32)
|
||||
pad_right = pad_w - pad_left
|
||||
pad_bottom = pad_h - pad_top
|
||||
paddings = [pad_top, pad_bottom, pad_left, pad_right]
|
||||
|
||||
return paddings, scale, (new_w, new_h)
|
||||
|
||||
def call(self, img):
|
||||
paddings, _, (nw, nh) = self.calc_params(tf.shape(img))
|
||||
|
||||
# Resize the image and convert to RGB
|
||||
img = bayer_resize(img, (nh, nw))
|
||||
img = self.b2rgb(img)
|
||||
|
||||
# Pad the image
|
||||
pad_top, pad_bottom, pad_left, pad_right = paddings
|
||||
img = tf.pad(
|
||||
img,
|
||||
paddings=[[0, 0], [pad_top, pad_bottom], [pad_left, pad_right], [0, 0]],
|
||||
mode="CONSTANT",
|
||||
constant_values=self.fill_value,
|
||||
)
|
||||
|
||||
return img
|
||||
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
|
||||
class DetPreprocess(tf.keras.layers.Layer):
|
||||
def __init__(self, target_size, fill_value=114):
|
||||
super(DetPreprocess, self).__init__()
|
||||
self.letterbox = Letterbox(target_size, fill_value)
|
||||
|
||||
def call(self, img):
|
||||
"""img: tf.Tensor of shape [batch, H, W, C], dtype=tf.uint8"""
|
||||
|
||||
# Cast to float32 since TensorRT does not support uint8 layers
|
||||
img = tf.cast(img, tf.float32)
|
||||
|
||||
img = self.letterbox(img)
|
||||
return img
|
||||
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
|
||||
def rgb2bayer(img):
|
||||
bayer = np.zeros((img.shape[0], img.shape[1]), dtype=img.dtype)
|
||||
bayer[0::2, 0::2] = img[0::2, 0::2, 0]
|
||||
bayer[0::2, 1::2] = img[0::2, 1::2, 1]
|
||||
bayer[1::2, 0::2] = img[1::2, 0::2, 1]
|
||||
bayer[1::2, 1::2] = img[1::2, 1::2, 2]
|
||||
return bayer
|
||||
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
img_path = "/RapidPoseTriangulation/scripts/../data/h1/54138969-img_003201.jpg"
|
||||
image = cv2.imread(img_path, 3)
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
image = rgb2bayer(image)
|
||||
image = np.expand_dims(image, axis=-1)
|
||||
image = np.asarray(image, dtype=np.uint8)
|
||||
|
||||
# Initialize the DetPreprocess module
|
||||
preprocess_model = tf.keras.Sequential()
|
||||
preprocess_model.add(DetPreprocess(target_size=det_target_size))
|
||||
det_dummy_input_a0 = tf.convert_to_tensor(
|
||||
np.expand_dims(image, axis=0), dtype=tf.uint8
|
||||
)
|
||||
det_dummy_output_a0 = preprocess_model(det_dummy_input_a0)
|
||||
print("\n", det_dummy_output_a0.shape, "\n")
|
||||
|
||||
output_a0 = det_dummy_output_a0.numpy()
|
||||
output_a0 = np.squeeze(output_a0, axis=0)
|
||||
output_a0 = np.asarray(output_a0, dtype=np.uint8)
|
||||
output_a0 = cv2.cvtColor(output_a0, cv2.COLOR_RGB2BGR)
|
||||
cv2.imwrite(base_path + "det_preprocess.jpg", output_a0)
|
||||
|
||||
# Export to ONNX
|
||||
input_signature = [tf.TensorSpec([None, None, None, 1], tf.uint8, name="x")]
|
||||
_, _ = tf2onnx.convert.from_keras(
|
||||
preprocess_model,
|
||||
input_signature,
|
||||
opset=11,
|
||||
output_path=base_path + "det_preprocess.onnx",
|
||||
target=["tensorrt"],
|
||||
)
|
||||
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
9242
media/RESULTS.md
9242
media/RESULTS.md
File diff suppressed because it is too large
Load Diff
@ -61,7 +61,7 @@ datasets = {
|
||||
"human36m": {
|
||||
"path": "/datasets/human36m/skelda/pose_test.json",
|
||||
"take_interval": 5,
|
||||
"min_match_score": 0.94,
|
||||
"min_match_score": 0.95,
|
||||
"min_group_size": 1,
|
||||
"min_bbox_score": 0.4,
|
||||
"min_bbox_area": 0.1 * 0.1,
|
||||
@ -73,6 +73,7 @@ datasets = {
|
||||
# "cams": ["00_03", "00_06", "00_12"],
|
||||
# "cams": ["00_03", "00_06", "00_12", "00_13", "00_23", "00_15", "00_10", "00_21", "00_09", "00_01"],
|
||||
"take_interval": 3,
|
||||
"min_match_score": 0.95,
|
||||
"use_scenes": ["160906_pizza1", "160422_haggling1", "160906_ian5"],
|
||||
"min_group_size": 1,
|
||||
# "min_group_size": 4,
|
||||
@ -88,6 +89,7 @@ datasets = {
|
||||
"campus": {
|
||||
"path": "/datasets/campus/skelda/test.json",
|
||||
"take_interval": 1,
|
||||
"min_match_score": 0.90,
|
||||
"min_bbox_score": 0.5,
|
||||
},
|
||||
"shelf": {
|
||||
@ -109,6 +111,7 @@ datasets = {
|
||||
"tsinghua": {
|
||||
"path": "/datasets/tsinghua/skelda/test.json",
|
||||
"take_interval": 3,
|
||||
"min_match_score": 0.95,
|
||||
"min_group_size": 2,
|
||||
},
|
||||
"human36m_wb": {
|
||||
@ -122,7 +125,7 @@ datasets = {
|
||||
"take_interval": 2,
|
||||
"subset": "tagging",
|
||||
"min_group_size": 2,
|
||||
"min_bbox_score": 0.25,
|
||||
"min_bbox_score": 0.2,
|
||||
"min_bbox_area": 0.05 * 0.05,
|
||||
},
|
||||
"egohumans_legoassemble": {
|
||||
@ -343,19 +346,32 @@ def main():
|
||||
# Print a dataset sample for debugging
|
||||
print(labels[0])
|
||||
|
||||
print("\nPrefetching images ...")
|
||||
for label in tqdm.tqdm(labels):
|
||||
# If the images are stored on a HDD, it sometimes takes a while to load them
|
||||
# Prefetching them results in more stable timings of the following steps
|
||||
# To prevent memory overflow, the code only loads the images, but does not store them
|
||||
try:
|
||||
for i in range(len(label["imgpaths"])):
|
||||
imgpath = label["imgpaths"][i]
|
||||
img = test_triangulate.load_image(imgpath)
|
||||
except cv2.error:
|
||||
print("One of the paths not found:", label["imgpaths"])
|
||||
continue
|
||||
time.sleep(3)
|
||||
|
||||
print("\nCalculating 2D predictions ...")
|
||||
all_poses_2d = []
|
||||
times = []
|
||||
for label in tqdm.tqdm(labels):
|
||||
images_2d = []
|
||||
|
||||
try:
|
||||
start = time.time()
|
||||
try:
|
||||
for i in range(len(label["imgpaths"])):
|
||||
imgpath = label["imgpaths"][i]
|
||||
img = test_triangulate.load_image(imgpath)
|
||||
images_2d.append(img)
|
||||
time_imgs = time.time() - start
|
||||
except cv2.error:
|
||||
print("One of the paths not found:", label["imgpaths"])
|
||||
continue
|
||||
@ -373,7 +389,16 @@ def main():
|
||||
cam["K"][0][2] = cam["K"][0][2] * (1000 / ishape[1])
|
||||
images_2d[i] = cv2.resize(img, (1000, 1000))
|
||||
|
||||
# Convert image format to Bayer encoding to simulate real camera input
|
||||
# This also resulted in notably better MPJPE results in most cases, presumbly since the
|
||||
# demosaicing algorithm from OpenCV is better than the default one from the cameras
|
||||
for i in range(len(images_2d)):
|
||||
images_2d[i] = test_triangulate.rgb2bayer(images_2d[i])
|
||||
time_imgs = time.time() - start
|
||||
|
||||
start = time.time()
|
||||
for i in range(len(images_2d)):
|
||||
images_2d[i] = test_triangulate.bayer2rgb(images_2d[i])
|
||||
poses_2d = utils_2d_pose.get_2d_pose(kpt_model, images_2d)
|
||||
poses_2d = test_triangulate.update_keypoints(poses_2d, joint_names_2d)
|
||||
time_2d = time.time() - start
|
||||
|
||||
@ -227,6 +227,23 @@ def load_image(path: str):
|
||||
# ==================================================================================================
|
||||
|
||||
|
||||
def rgb2bayer(img):
|
||||
bayer = np.zeros((img.shape[0], img.shape[1]), dtype=img.dtype)
|
||||
bayer[0::2, 0::2] = img[0::2, 0::2, 0]
|
||||
bayer[0::2, 1::2] = img[0::2, 1::2, 1]
|
||||
bayer[1::2, 0::2] = img[1::2, 0::2, 1]
|
||||
bayer[1::2, 1::2] = img[1::2, 1::2, 2]
|
||||
return bayer
|
||||
|
||||
|
||||
def bayer2rgb(bayer):
|
||||
img = cv2.cvtColor(bayer, cv2.COLOR_BayerBG2RGB)
|
||||
return img
|
||||
|
||||
|
||||
# ==================================================================================================
|
||||
|
||||
|
||||
def update_keypoints(poses_2d: list, joint_names: List[str]) -> list:
|
||||
new_views = []
|
||||
for view in poses_2d:
|
||||
@ -314,6 +331,8 @@ def main():
|
||||
for i in range(len(sample["cameras_color"])):
|
||||
imgpath = sample["imgpaths_color"][i]
|
||||
img = load_image(imgpath)
|
||||
img = rgb2bayer(img)
|
||||
img = bayer2rgb(img)
|
||||
images_2d.append(img)
|
||||
|
||||
# Get 2D poses
|
||||
|
||||
Reference in New Issue
Block a user