first commit

This commit is contained in:
IamZLT
2024-08-05 11:19:19 +08:00
commit 8b2e804ccc
39 changed files with 2795 additions and 0 deletions

360
common/Mydataset.py Normal file
View File

@ -0,0 +1,360 @@
import torch
import numpy as np
import torch.utils.data as data
from common.cameras import normalize_screen_coordinates
class ChunkedGenerator:
def __init__(self, batch_size, cameras, poses_3d, poses_2d,
chunk_length=1, pad=0, causal_shift=0,
shuffle=False, random_seed=1234,
augment=False, reverse_aug=False, kps_left=None, kps_right=None, joints_left=None, joints_right=None,
endless=False, out_all=False):
assert poses_3d is None or len(poses_3d) == len(poses_2d), (len(poses_3d), len(poses_2d))
assert cameras is None or len(cameras) == len(poses_2d)
pairs = []
self.saved_index = {}
start_index = 0
for key in poses_2d.keys():
assert poses_3d is None or poses_2d[key].shape[0] == poses_3d[key].shape[0]
n_chunks = (poses_2d[key].shape[0] + chunk_length - 1) // chunk_length
offset = (n_chunks * chunk_length - poses_2d[key].shape[0]) // 2
bounds = np.arange(n_chunks + 1) * chunk_length - offset
augment_vector = np.full(len(bounds - 1), False, dtype=bool)
reverse_augment_vector = np.full(len(bounds - 1), False, dtype=bool)
keys = np.tile(np.array(key).reshape([1, 2]), (len(bounds - 1), 1))
pairs += list(zip(keys, bounds[:-1], bounds[1:], augment_vector, reverse_augment_vector))
if reverse_aug:
pairs += list(zip(keys, bounds[:-1], bounds[1:], augment_vector, ~reverse_augment_vector))
if augment:
if reverse_aug:
pairs += list(zip(keys, bounds[:-1], bounds[1:], ~augment_vector, ~reverse_augment_vector))
else:
pairs += list(zip(keys, bounds[:-1], bounds[1:], ~augment_vector, reverse_augment_vector))
end_index = start_index + poses_3d[key].shape[0]
self.saved_index[key] = [start_index, end_index]
start_index = start_index + poses_3d[key].shape[0]
if cameras is not None:
self.batch_cam = np.empty((batch_size, cameras[key].shape[-1]))
if poses_3d is not None:
self.batch_3d = np.empty((batch_size, chunk_length, poses_3d[key].shape[-2], poses_3d[key].shape[-1]))
self.batch_2d = np.empty(
(batch_size, chunk_length + 2 * pad, poses_2d[key].shape[-3], poses_2d[key].shape[-2],
poses_2d[key].shape[-1]))
self.num_batches = (len(pairs) + batch_size - 1) // batch_size
self.batch_size = batch_size
self.random = np.random.RandomState(random_seed)
self.pairs = pairs
self.shuffle = shuffle
self.pad = pad
self.causal_shift = causal_shift
self.endless = endless
self.state = None
self.cameras = cameras
if cameras is not None:
self.cameras = cameras
self.poses_3d = poses_3d
self.poses_2d = poses_2d
self.augment = augment
self.kps_left = kps_left
self.kps_right = kps_right
self.joints_left = joints_left
self.joints_right = joints_right
self.out_all = out_all
def num_frames(self):
return self.num_batches * self.batch_size
def random_state(self):
return self.random
def set_random_state(self, random):
self.random = random
def augment_enabled(self):
return self.augment
def next_pairs(self):
if self.state is None:
if self.shuffle:
pairs = self.random.permutation(self.pairs)
else:
pairs = self.pairs
return 0, pairs
else:
return self.state
def get_batch(self, seq_i, start_3d, end_3d, flip, reverse):
subject, action = seq_i
seq_name = (subject, action)
start_2d = start_3d - self.pad - self.causal_shift # \u5f00\u59cb\u4f4d\u7f6e
end_2d = end_3d + self.pad - self.causal_shift
seq_2d = self.poses_2d[seq_name].copy()
low_2d = max(start_2d, 0)
high_2d = min(end_2d, seq_2d.shape[0])
pad_left_2d = low_2d - start_2d
pad_right_2d = end_2d - high_2d
if pad_left_2d != 0 or pad_right_2d != 0:
self.batch_2d = np.pad(seq_2d[low_2d:high_2d], ((pad_left_2d, pad_right_2d), (0, 0), (0, 0), (0, 0)),
'edge')
else:
self.batch_2d = seq_2d[low_2d:high_2d]
if flip:
self.batch_2d[:, :, :, 0] *= -1
self.batch_2d[:, :, self.kps_left + self.kps_right] = self.batch_2d[:, :, self.kps_right + self.kps_left]
if reverse:
self.batch_2d = self.batch_2d[::-1].copy()
if self.poses_3d is not None:
seq_3d = self.poses_3d[seq_name].copy()
if self.out_all:
low_3d = low_2d
high_3d = high_2d
pad_left_3d = pad_left_2d
pad_right_3d = pad_right_2d
else:
low_3d = max(start_3d, 0)
high_3d = min(end_3d, seq_3d.shape[0])
pad_left_3d = low_3d - start_3d
pad_right_3d = end_3d - high_3d
if pad_left_3d != 0 or pad_right_3d != 0:
self.batch_3d = np.pad(seq_3d[low_3d:high_3d],
((pad_left_3d, pad_right_3d), (0, 0), (0, 0)), 'edge')
else:
self.batch_3d = seq_3d[low_3d:high_3d]
if flip:
self.batch_3d[:, :, 0] *= -1
self.batch_3d[:, self.joints_left + self.joints_right] = \
self.batch_3d[:, self.joints_right + self.joints_left]
if reverse:
self.batch_3d = self.batch_3d[::-1].copy()
if self.poses_3d is None and self.cameras is None:
return None, None, self.batch_2d.copy(), action, subject
elif self.poses_3d is not None and self.cameras is None:
return np.zeros(9), self.batch_3d.copy(), self.batch_2d.copy(), action, subject, low_2d, high_2d
elif self.poses_3d is None:
return self.batch_cam, None, self.batch_2d.copy(), action, subject
else:
return self.batch_cam, self.batch_3d.copy(), self.batch_2d.copy(), action, subject
class Fusion(data.Dataset):
def __init__(self, opt, dataset, root_path, train=True):
self.hop1 = torch.tensor([[0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]])
self.hop2 = torch.tensor([[0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]])
self.hop3 = torch.tensor([[0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0],
[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]])
self.hop4 = torch.tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
[0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0]])
self.data_type = opt.dataset
self.train = train
self.keypoints_name = opt.keypoints
self.root_path = root_path
self.train_list = opt.subjects_train.split(',')
self.test_list = opt.subjects_test.split(',')
self.action_filter = None if opt.actions == '*' else opt.actions.split(',')
self.downsample = opt.downsample
self.subset = opt.subset
self.stride = opt.stride
self.crop_uv = opt.crop_uv
self.test_aug = opt.test_augmentation
self.pad = opt.pad
if self.train:
self.keypoints = self.prepare_data(dataset, self.train_list)
self.cameras_train, self.poses_train, self.poses_train_2d = self.fetch(dataset, self.train_list,
subset=self.subset)
self.generator = ChunkedGenerator(opt.batch_size // opt.stride, self.cameras_train, self.poses_train,
self.poses_train_2d, self.stride, pad=self.pad,
augment=opt.data_augmentation, reverse_aug=opt.reverse_augmentation,
kps_left=self.kps_left, kps_right=self.kps_right,
joints_left=self.joints_left,
joints_right=self.joints_right, out_all=opt.out_all)
print('INFO: Training on {} frames'.format(self.generator.num_frames()))
else:
self.keypoints = self.prepare_data(dataset, self.test_list)
self.cameras_test, self.poses_test, self.poses_test_2d = self.fetch(dataset, self.test_list,
subset=self.subset)
self.generator = ChunkedGenerator(opt.batch_size // opt.stride, self.cameras_test, self.poses_test,
self.poses_test_2d,
pad=self.pad, augment=False, kps_left=self.kps_left,
kps_right=self.kps_right, joints_left=self.joints_left,
joints_right=self.joints_right)
self.key_index = self.generator.saved_index
print('INFO: Testing on {} frames'.format(self.generator.num_frames()))
def prepare_data(self, dataset, folder_list):
for subject in folder_list:
for action in dataset[subject].keys():
dataset[subject][action]['positions'][:, 1:] -= dataset[subject][action]['positions'][:, :1]
keypoints = np.load(self.root_path + 'data_2d_' + self.data_type + '_' + self.keypoints_name + '.npz',
allow_pickle=True)
keypoints_symmetry = keypoints['metadata'].item()['keypoints_symmetry']
self.kps_left, self.kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1])
self.joints_left, self.joints_right = list(dataset.skeleton().joints_left()), list(
dataset.skeleton().joints_right())
keypoints = keypoints['positions_2d'].item()
for subject in folder_list:
for action in dataset[subject].keys():
mocap_length = dataset[subject][action]['positions'].shape[0]
for cam_idx in range(len(keypoints[subject][action])):
assert keypoints[subject][action][cam_idx].shape[0] >= mocap_length
if keypoints[subject][action][cam_idx].shape[0] > mocap_length:
keypoints[subject][action][cam_idx] = keypoints[subject][action][cam_idx][:mocap_length]
for subject in keypoints.keys():
for action in keypoints[subject]:
for cam_idx, kps in enumerate(keypoints[subject][action]):
cam = dataset.cameras()[subject][cam_idx]
if self.crop_uv == 0:
kps[..., :2] = normalize_screen_coordinates(kps[..., :2], w=cam['res_w'], h=cam['res_h'])
keypoints[subject][action][cam_idx] = kps
for subject in folder_list:
for action in dataset[subject].keys():
positions_2d_pairs = []
for cam_idx in range(len(keypoints[subject][action])):
positions_2d_pairs.append(keypoints[subject][action][cam_idx])
keypoints[subject][action].append(
np.array(positions_2d_pairs).transpose((1, 0, 2,3)))
return keypoints
def fetch(self, dataset, subjects, subset=1, ):
out_poses_3d = {}
out_poses_2d = {}
out_camera_params = {}
for subject in subjects:
for action in self.keypoints[subject].keys():
poses_2d = self.keypoints[subject][action][4]
out_poses_2d[(subject, action)] = poses_2d
poses_3d = dataset[subject][action]['positions']
out_poses_3d[(subject, action)] = poses_3d
if len(out_camera_params) == 0:
out_camera_params = None
downsample = 1
if downsample:
pass
return out_camera_params, out_poses_3d, out_poses_2d
def hop_normalize(self, x1, x2, x3, x4):
x1 = x1 / torch.sum(x1, dim=1)
x2 = x2 / torch.sum(x1, dim=1)
x3 = x3 / torch.sum(x1, dim=1)
x4 = x4 / torch.sum(x1, dim=1)
return torch.cat((x1.unsqueeze(0), x2.unsqueeze(0), x3.unsqueeze(0), x4.unsqueeze(0)), dim=0)
def __len__(self):
return len(self.generator.pairs)
def __getitem__(self, index):
seq_name, start_3d, end_3d, flip, reverse = self.generator.pairs[index]
cam, gt_3D, input_2D, action, subject, low_2d, high_2d = self.generator.get_batch(seq_name, start_3d, end_3d,
False, False)
if self.train == False and self.test_aug:
_, _, input_2D_aug, _, _, _, _ = self.generator.get_batch(seq_name, start_3d, end_3d, flip=False,
reverse=False)
input_2D = np.concatenate((np.expand_dims(input_2D, axis=0), np.expand_dims(input_2D_aug, axis=0)), 0)
bb_box = np.array([0, 0, 1, 1])
input_2D_update = input_2D
hops = self.hop_normalize(self.hop1, self.hop2, self.hop3, self.hop4)
scale = np.float64(1.0)
return cam, gt_3D, input_2D_update, action, subject, scale, bb_box, low_2d, high_2d, hops

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

258
common/cameras.py Normal file
View File

@ -0,0 +1,258 @@
import sys
import numpy as np
import torch
def normalize_screen_coordinates(X, w, h):
assert X.shape[-1] == 2
return X / w * 2 - [1, h / w]
def world_to_camera(X, R, t): # https://blog.csdn.net/Hurt_Town/article/details/125071279
Rt = wrap(qinverse, R)
# return wrap(qrot, np.tile(Rt, (*X.shape[:-1], 1)), X - t)
return wrap(qrot, Rt.repeat(*X.shape[:-1], 1), X - t)
def camera_to_world(X, R, t):
return wrap(qrot, np.tile(R, (*X.shape[:-1], 1)), X) + t
def wrap(func, *args, unsqueeze=False):
args = list(args)
for i, arg in enumerate(args):
if type(arg) == np.ndarray:
args[i] = torch.from_numpy(arg)
if unsqueeze:
args[i] = args[i].unsqueeze(0)
result = func(*args)
if isinstance(result, tuple):
result = list(result)
for i, res in enumerate(result):
if type(res) == torch.Tensor:
if unsqueeze:
res = res.squeeze(0)
result[i] = res.numpy()
return tuple(result)
elif type(result) == torch.Tensor:
if unsqueeze:
result = result.squeeze(0)
# return result.numpy()
return result
else:
return result
def qrot(q, v):
assert q.shape[-1] == 4
assert v.shape[-1] == 3
assert q.shape[:-1] == v.shape[:-1]
qvec = q[..., 1:]
uv = torch.cross(qvec, v, dim=len(q.shape) - 1)
uuv = torch.cross(qvec, uv, dim=len(q.shape) - 1)
return (v + 2 * (q[..., :1] * uv + uuv))
def qinverse(q, inplace=False):
if inplace:
q[..., 1:] *= -1
return q
else:
w = q[..., :1]
xyz = q[..., 1:]
return torch.cat((w, -xyz), dim=len(q.shape) - 1)
h36m_cameras_intrinsic_params = [
{
'id': '54138969',
'center': [512.54150390625, 515.4514770507812],
'focal_length': [1145.0494384765625, 1143.7811279296875],
'radial_distortion': [-0.20709891617298126, 0.24777518212795258, -0.0030751503072679043],
'tangential_distortion': [-0.0009756988729350269, -0.00142447161488235],
'res_w': 1000,
'res_h': 1002,
'azimuth': 70,
},
{
'id': '55011271',
'center': [508.8486328125, 508.0649108886719],
'focal_length': [1149.6756591796875, 1147.5916748046875],
'radial_distortion': [-0.1942136287689209, 0.2404085397720337, 0.006819975562393665],
'tangential_distortion': [-0.0016190266469493508, -0.0027408944442868233],
'res_w': 1000,
'res_h': 1000,
'azimuth': -70,
},
{
'id': '58860488',
'center': [519.8158569335938, 501.40264892578125],
'focal_length': [1149.1407470703125, 1148.7989501953125],
'radial_distortion': [-0.2083381861448288, 0.25548800826072693, -0.0024604974314570427],
'tangential_distortion': [0.0014843869721516967, -0.0007599993259645998],
'res_w': 1000,
'res_h': 1000,
'azimuth': 110,
},
{
'id': '60457274',
'center': [514.9682006835938, 501.88201904296875],
'focal_length': [1145.5113525390625, 1144.77392578125],
'radial_distortion': [-0.198384091258049, 0.21832367777824402, -0.008947807364165783],
'tangential_distortion': [-0.0005872055771760643, -0.0018133620033040643],
'res_w': 1000,
'res_h': 1002,
'azimuth': -110,
},
]
h36m_cameras_extrinsic_params = {
'S1': [
{
'orientation': [0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088],
'translation': [1841.1070556640625, 4955.28466796875, 1563.4454345703125],
},
{
'orientation': [0.6157187819480896, -0.764836311340332, -0.14833825826644897, 0.11794740706682205],
'translation': [1761.278564453125, -5078.0068359375, 1606.2650146484375],
},
{
'orientation': [0.14651472866535187, -0.14647851884365082, 0.7653023600578308, -0.6094175577163696],
'translation': [-1846.7777099609375, 5215.04638671875, 1491.972412109375],
},
{
'orientation': [0.5834008455276489, -0.7853162288665771, 0.14548823237419128, -0.14749594032764435],
'translation': [-1794.7896728515625, -3722.698974609375, 1574.8927001953125],
},
],
'S2': [
{},
{},
{},
{},
],
'S3': [
{},
{},
{},
{},
],
'S4': [
{},
{},
{},
{},
],
'S5': [
{
'orientation': [0.1467377245426178, -0.162370964884758, -0.7551892995834351, 0.6178938746452332],
'translation': [2097.3916015625, 4880.94482421875, 1605.732421875],
},
{
'orientation': [0.6159758567810059, -0.7626792192459106, -0.15728192031383514, 0.1189815029501915],
'translation': [2031.7008056640625, -5167.93310546875, 1612.923095703125],
},
{
'orientation': [0.14291371405124664, -0.12907841801643372, 0.7678384780883789, -0.6110143065452576],
'translation': [-1620.5948486328125, 5171.65869140625, 1496.43701171875],
},
{
'orientation': [0.5920479893684387, -0.7814217805862427, 0.1274748593568802, -0.15036417543888092],
'translation': [-1637.1737060546875, -3867.3173828125, 1547.033203125],
},
],
'S6': [
{
'orientation': [0.1337897777557373, -0.15692396461963654, -0.7571090459823608, 0.6198879480361938],
'translation': [1935.4517822265625, 4950.24560546875, 1618.0838623046875],
},
{
'orientation': [0.6147197484970093, -0.7628812789916992, -0.16174767911434174, 0.11819244921207428],
'translation': [1969.803955078125, -5128.73876953125, 1632.77880859375],
},
{
'orientation': [0.1529948115348816, -0.13529130816459656, 0.7646096348762512, -0.6112781167030334],
'translation': [-1769.596435546875, 5185.361328125, 1476.993408203125],
},
{
'orientation': [0.5916101336479187, -0.7804774045944214, 0.12832270562648773, -0.1561593860387802],
'translation': [-1721.668701171875, -3884.13134765625, 1540.4879150390625],
},
],
'S7': [
{
'orientation': [0.1435241848230362, -0.1631336808204651, -0.7548328638076782, 0.6188824772834778],
'translation': [1974.512939453125, 4926.3544921875, 1597.8326416015625],
},
{
'orientation': [0.6141672730445862, -0.7638262510299683, -0.1596645563840866, 0.1177929937839508],
'translation': [1937.0584716796875, -5119.7900390625, 1631.5665283203125],
},
{
'orientation': [0.14550060033798218, -0.12874816358089447, 0.7660516500473022, -0.6127139329910278],
'translation': [-1741.8111572265625, 5208.24951171875, 1464.8245849609375],
},
{
'orientation': [0.5912848114967346, -0.7821764349937439, 0.12445473670959473, -0.15196487307548523],
'translation': [-1734.7105712890625, -3832.42138671875, 1548.5830078125],
},
],
'S8': [
{
'orientation': [0.14110587537288666, -0.15589867532253265, -0.7561917304992676, 0.619644045829773],
'translation': [2150.65185546875, 4896.1611328125, 1611.9046630859375],
},
{
'orientation': [0.6169601678848267, -0.7647668123245239, -0.14846350252628326, 0.11158157885074615],
'translation': [2219.965576171875, -5148.453125, 1613.0440673828125],
},
{
'orientation': [0.1471444070339203, -0.13377119600772858, 0.7670128345489502, -0.6100369691848755],
'translation': [-1571.2215576171875, 5137.0185546875, 1498.1761474609375],
},
{
'orientation': [0.5927824378013611, -0.7825870513916016, 0.12147816270589828, -0.14631995558738708],
'translation': [-1476.913330078125, -3896.7412109375, 1547.97216796875],
},
],
'S9': [
{
'orientation': [0.15540587902069092, -0.15548215806484222, -0.7532095313072205, 0.6199594736099243],
'translation': [2044.45849609375, 4935.1171875, 1481.2275390625],
},
{
'orientation': [0.618784487247467, -0.7634735107421875, -0.14132238924503326, 0.11933968216180801],
'translation': [1990.959716796875, -5123.810546875, 1568.8048095703125],
},
{
'orientation': [0.13357827067375183, -0.1367100477218628, 0.7689454555511475, -0.6100738644599915],
'translation': [-1670.9921875, 5211.98583984375, 1528.387939453125],
},
{
'orientation': [0.5879399180412292, -0.7823407053947449, 0.1427614390850067, -0.14794869720935822],
'translation': [-1696.04345703125, -3827.099853515625, 1591.4127197265625],
},
],
'S11': [
{
'orientation': [0.15232472121715546, -0.15442320704460144, -0.7547563314437866, 0.6191070079803467],
'translation': [2098.440185546875, 4926.5546875, 1500.278564453125],
},
{
'orientation': [0.6189449429512024, -0.7600917220115662, -0.15300633013248444, 0.1255258321762085],
'translation': [2083.182373046875, -4912.1728515625, 1561.07861328125],
},
{
'orientation': [0.14943228662014008, -0.15650227665901184, 0.7681233882904053, -0.6026304364204407],
'translation': [-1609.8153076171875, 5177.3359375, 1537.896728515625],
},
{
'orientation': [0.5894251465797424, -0.7818877100944519, 0.13991211354732513, -0.14715361595153809],
'translation': [-1590.738037109375, -3854.1689453125, 1578.017578125],
},
],
}

171
common/h36m_dataset.py Normal file
View File

@ -0,0 +1,171 @@
import numpy as np
import copy
from common.cameras import h36m_cameras_intrinsic_params, h36m_cameras_extrinsic_params, \
normalize_screen_coordinates
class Skeleton:
def __init__(self, parents, joints_left, joints_right):
assert len(joints_left) == len(joints_right)
self._parents = np.array(parents)
self._joints_left = joints_left
self._joints_right = joints_right
self._compute_metadata()
def num_joints(self):
return len(self._parents)
def parents(self):
return self._parents
def has_children(self):
return self._has_children
def children(self):
return self._children
def remove_joints(self, joints_to_remove):
valid_joints = []
for joint in range(len(self._parents)):
if joint not in joints_to_remove:
valid_joints.append(joint)
for i in range(len(self._parents)):
while self._parents[i] in joints_to_remove:
self._parents[i] = self._parents[self._parents[i]]
index_offsets = np.zeros(len(self._parents), dtype=int)
new_parents = []
for i, parent in enumerate(self._parents):
if i not in joints_to_remove:
new_parents.append(parent - index_offsets[parent])
else:
index_offsets[i:] += 1
self._parents = np.array(new_parents)
if self._joints_left is not None:
new_joints_left = []
for joint in self._joints_left:
if joint in valid_joints:
new_joints_left.append(joint - index_offsets[joint])
self._joints_left = new_joints_left
if self._joints_right is not None:
new_joints_right = []
for joint in self._joints_right:
if joint in valid_joints:
new_joints_right.append(joint - index_offsets[joint])
self._joints_right = new_joints_right
self._compute_metadata()
return valid_joints
def joints_left(self):
return self._joints_left
def joints_right(self):
return self._joints_right
def _compute_metadata(self):
self._has_children = np.zeros(len(self._parents)).astype(bool)
for i, parent in enumerate(self._parents):
if parent != -1:
self._has_children[parent] = True
self._children = []
for i, parent in enumerate(self._parents):
self._children.append([])
for i, parent in enumerate(self._parents):
if parent != -1:
self._children[parent].append(i)
h36m_skeleton = Skeleton(parents=[-1, 0, 1, 2, 3, 4, 0, 6, 7, 8, 9, 0, 11, 12, 13, 14, 12,
16, 17, 18, 19, 20, 19, 22, 12, 24, 25, 26, 27, 28, 27, 30], # 树的双亲表示法
joints_left=[6, 7, 8, 9, 10, 16, 17, 18, 19, 20, 21, 22, 23],
joints_right=[1, 2, 3, 4, 5, 24, 25, 26, 27, 28, 29, 30, 31])
class MocapDataset:
def __init__(self, fps, skeleton):
self._skeleton = skeleton
self._fps = fps
self._data = None
self._cameras = None
def remove_joints(self, joints_to_remove):
kept_joints = self._skeleton.remove_joints(joints_to_remove)
for subject in self._data.keys():
for action in self._data[subject].keys():
s = self._data[subject][action]
s['positions'] = s['positions'][:, kept_joints]
def __getitem__(self, key):
return self._data[key]
def subjects(self):
return self._data.keys()
def fps(self):
return self._fps
def skeleton(self):
return self._skeleton
def cameras(self):
return self._cameras
def supports_semi_supervised(self):
return False
class Human36mDataset(MocapDataset):
def __init__(self, path, opt, remove_static_joints=True):
super().__init__(fps=50, skeleton=h36m_skeleton)
self.train_list = ['S1', 'S5', 'S6', 'S7', 'S8']
self.test_list = ['S9', 'S11']
self._cameras = copy.deepcopy(h36m_cameras_extrinsic_params)
for cameras in self._cameras.values():
for i, cam in enumerate(cameras):
cam.update(h36m_cameras_intrinsic_params[i])
for k, v in cam.items():
if k not in ['id', 'res_w', 'res_h']:
cam[k] = np.array(v, dtype='float32')
if opt.crop_uv == 0:
cam['center'] = normalize_screen_coordinates(cam['center'], w=cam['res_w'], h=cam['res_h']).astype(
'float32')
cam['focal_length'] = cam['focal_length'] / cam['res_w'] * 2
if 'translation' in cam:
cam['translation'] = cam['translation'] / 1000
cam['intrinsic'] = np.concatenate((cam['focal_length'],
cam['center'],
cam['radial_distortion'],
cam['tangential_distortion']))
data = np.load(path, allow_pickle=True)['positions_3d'].item()
self._data = {}
for subject, actions in data.items():
self._data[subject] = {}
for action_name, positions in actions.items():
self._data[subject][action_name] = {
'positions': positions,
'cameras': self._cameras[subject],
}
if remove_static_joints:
self.remove_joints([4, 5, 9, 10, 11, 16, 20, 21, 22, 23, 24, 28, 29, 30, 31])
self._skeleton._parents[11] = 8
self._skeleton._parents[14] = 8
def supports_semi_supervised(self):
return True

211
common/utils.py Normal file
View File

@ -0,0 +1,211 @@
import torch
import numpy as np
import hashlib
from torch.autograd import Variable
import os
def deterministic_random(min_value, max_value, data):
digest = hashlib.sha256(data.encode()).digest()
raw_value = int.from_bytes(digest[:4], byteorder='little', signed=False)
return int(raw_value / (2 ** 32 - 1) * (max_value - min_value)) + min_value
def mpjpe_cal(predicted, target):
assert predicted.shape == target.shape
return torch.mean(torch.norm(predicted - target, dim=len(target.shape) - 1))
def test_calculation(predicted, target, action, error_sum, data_type, subject):
error_sum = mpjpe_by_action_p1(predicted, target, action, error_sum)
error_sum = mpjpe_by_action_p2(predicted, target, action, error_sum)
return error_sum
def mpjpe_by_action_p1(predicted, target, action, action_error_sum):
assert predicted.shape == target.shape
num = predicted.size(0)
dist = torch.mean(torch.norm(predicted - target, dim=len(target.shape) - 1), dim=len(target.shape) - 2)
if len(set(list(action))) == 1:
end_index = action[0].find(' ')
if end_index != -1:
action_name = action[0][:end_index]
else:
action_name = action[0]
action_error_sum[action_name]['p1'].update(torch.mean(dist).item() * num, num)
else:
for i in range(num):
end_index = action[i].find(' ')
if end_index != -1:
action_name = action[i][:end_index]
else:
action_name = action[i]
action_error_sum[action_name]['p1'].update(dist[i].item(), 1)
return action_error_sum
def mpjpe_by_action_p2(predicted, target, action, action_error_sum):
assert predicted.shape == target.shape
num = predicted.size(0)
pred = predicted.detach().cpu().numpy().reshape(-1, predicted.shape[-2], predicted.shape[-1])
gt = target.detach().cpu().numpy().reshape(-1, target.shape[-2], target.shape[-1])
dist = p_mpjpe(pred, gt)
if len(set(list(action))) == 1:
end_index = action[0].find(' ')
if end_index != -1:
action_name = action[0][:end_index]
else:
action_name = action[0]
action_error_sum[action_name]['p2'].update(np.mean(dist) * num, num)
else:
for i in range(num):
end_index = action[i].find(' ')
if end_index != -1:
action_name = action[i][:end_index]
else:
action_name = action[i]
action_error_sum[action_name]['p2'].update(np.mean(dist), 1)
return action_error_sum
def p_mpjpe(predicted, target):
assert predicted.shape == target.shape
muX = np.mean(target, axis=1, keepdims=True)
muY = np.mean(predicted, axis=1, keepdims=True)
X0 = target - muX
Y0 = predicted - muY
normX = np.sqrt(np.sum(X0 ** 2, axis=(1, 2), keepdims=True))
normY = np.sqrt(np.sum(Y0 ** 2, axis=(1, 2), keepdims=True))
X0 /= normX
Y0 /= normY
H = np.matmul(X0.transpose(0, 2, 1), Y0)
U, s, Vt = np.linalg.svd(H)
V = Vt.transpose(0, 2, 1)
R = np.matmul(V, U.transpose(0, 2, 1))
sign_detR = np.sign(np.expand_dims(np.linalg.det(R), axis=1))
V[:, :, -1] *= sign_detR
s[:, -1] *= sign_detR.flatten()
R = np.matmul(V, U.transpose(0, 2, 1))
tr = np.expand_dims(np.sum(s, axis=1, keepdims=True), axis=2)
a = tr * normX / normY
t = muX - a * np.matmul(muY, R)
predicted_aligned = a * np.matmul(predicted, R) + t
return np.mean(np.linalg.norm(predicted_aligned - target, axis=len(target.shape) - 1), axis=len(target.shape) - 2)
def define_actions(action):
actions = ["Directions", "Discussion", "Eating", "Greeting",
"Phoning", "Photo", "Posing", "Purchases",
"Sitting", "SittingDown", "Smoking", "Waiting",
"WalkDog", "Walking", "WalkTogether"]
if action == "All" or action == "all" or action == '*':
return actions
if not action in actions:
raise (ValueError, "Unrecognized action: %s" % action)
return [action]
def define_error_list(actions):
error_sum = {}
error_sum.update({actions[i]:
{'p1': AccumLoss(), 'p2': AccumLoss()}
for i in range(len(actions))})
return error_sum
class AccumLoss(object):
def __init__(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val
self.count += n
self.avg = self.sum / self.count
def get_varialbe(split, target):
num = len(target)
var = []
if split == 'train':
for i in range(num):
temp = Variable(target[i], requires_grad=False).contiguous().type(torch.cuda.FloatTensor)
var.append(temp)
else:
for i in range(num):
temp = Variable(target[i]).contiguous().cuda().type(torch.cuda.FloatTensor)
var.append(temp)
return var
def print_error(data_type, action_error_sum, is_train):
mean_error_p1, mean_error_p2 = print_error_action(action_error_sum, is_train)
return mean_error_p1, mean_error_p2
def print_error_action(action_error_sum, is_train):
mean_error_each = {'p1': 0.0, 'p2': 0.0}
mean_error_all = {'p1': AccumLoss(), 'p2': AccumLoss()}
if is_train == 0:
print("{0:=^12} {1:=^10} {2:=^8}".format("Action", "p#1 mm", "p#2 mm"))
for action, value in action_error_sum.items():
if is_train == 0:
print("{0:<12} ".format(action), end="")
mean_error_each['p1'] = action_error_sum[action]['p1'].avg * 1000.0
mean_error_all['p1'].update(mean_error_each['p1'], 1)
mean_error_each['p2'] = action_error_sum[action]['p2'].avg * 1000.0
mean_error_all['p2'].update(mean_error_each['p2'], 1)
if is_train == 0:
print("{0:>6.2f} {1:>10.2f}".format(mean_error_each['p1'], mean_error_each['p2']))
if is_train == 0:
print("{0:<12} {1:>6.2f} {2:>10.2f}".format("Average", mean_error_all['p1'].avg, mean_error_all['p2'].avg))
return mean_error_all['p1'].avg, mean_error_all['p2'].avg
def save_model(previous_name, save_dir, epoch, data_threshold, model):
if os.path.exists(previous_name):
os.remove(previous_name)
torch.save(model.state_dict(), '%s/model_%d_%d.pth' % (save_dir, epoch, data_threshold * 100))
previous_name = '%s/model_%d_%d.pth' % (save_dir, epoch, data_threshold * 100)
return previous_name
def save_model_epoch(save_dir, epoch, model):
torch.save(model.state_dict(), '%s/epoch_%d.pth' % (save_dir, epoch))