first commit

2024-08-05 11:19:19 +08:00
commit 8b2e804ccc
39 changed files with 2795 additions and 0 deletions
--- a/common/Mydataset.py
+++ b/common/Mydataset.py
@ -0,0 +1,360 @@
+import torch
+import numpy as np
+import torch.utils.data as data
+
+from common.cameras import normalize_screen_coordinates
+
+
+class ChunkedGenerator:
+
+    def __init__(self, batch_size, cameras, poses_3d, poses_2d,
+                 chunk_length=1, pad=0, causal_shift=0,
+                 shuffle=False, random_seed=1234,
+                 augment=False, reverse_aug=False, kps_left=None, kps_right=None, joints_left=None, joints_right=None,
+                 endless=False, out_all=False):
+        assert poses_3d is None or len(poses_3d) == len(poses_2d), (len(poses_3d), len(poses_2d))
+        assert cameras is None or len(cameras) == len(poses_2d)
+
+        pairs = []
+        self.saved_index = {}
+        start_index = 0
+
+        for key in poses_2d.keys():
+            assert poses_3d is None or poses_2d[key].shape[0] == poses_3d[key].shape[0]
+            n_chunks = (poses_2d[key].shape[0] + chunk_length - 1) // chunk_length
+            offset = (n_chunks * chunk_length - poses_2d[key].shape[0]) // 2
+            bounds = np.arange(n_chunks + 1) * chunk_length - offset
+            augment_vector = np.full(len(bounds - 1), False, dtype=bool)
+            reverse_augment_vector = np.full(len(bounds - 1), False, dtype=bool)
+            keys = np.tile(np.array(key).reshape([1, 2]), (len(bounds - 1), 1))
+            pairs += list(zip(keys, bounds[:-1], bounds[1:], augment_vector, reverse_augment_vector))
+            if reverse_aug:
+                pairs += list(zip(keys, bounds[:-1], bounds[1:], augment_vector, ~reverse_augment_vector))
+            if augment:
+                if reverse_aug:
+                    pairs += list(zip(keys, bounds[:-1], bounds[1:], ~augment_vector, ~reverse_augment_vector))
+                else:
+                    pairs += list(zip(keys, bounds[:-1], bounds[1:], ~augment_vector, reverse_augment_vector))
+            end_index = start_index + poses_3d[key].shape[0]
+            self.saved_index[key] = [start_index, end_index]
+            start_index = start_index + poses_3d[key].shape[0]
+
+        if cameras is not None:
+            self.batch_cam = np.empty((batch_size, cameras[key].shape[-1]))
+
+        if poses_3d is not None:
+            self.batch_3d = np.empty((batch_size, chunk_length, poses_3d[key].shape[-2], poses_3d[key].shape[-1]))
+        self.batch_2d = np.empty(
+            (batch_size, chunk_length + 2 * pad, poses_2d[key].shape[-3], poses_2d[key].shape[-2],
+             poses_2d[key].shape[-1]))
+
+        self.num_batches = (len(pairs) + batch_size - 1) // batch_size
+        self.batch_size = batch_size
+        self.random = np.random.RandomState(random_seed)
+        self.pairs = pairs
+        self.shuffle = shuffle
+        self.pad = pad
+        self.causal_shift = causal_shift
+        self.endless = endless
+        self.state = None
+
+        self.cameras = cameras
+        if cameras is not None:
+            self.cameras = cameras
+        self.poses_3d = poses_3d
+        self.poses_2d = poses_2d
+
+        self.augment = augment
+        self.kps_left = kps_left
+        self.kps_right = kps_right
+        self.joints_left = joints_left
+        self.joints_right = joints_right
+        self.out_all = out_all
+
+    def num_frames(self):
+        return self.num_batches * self.batch_size
+
+    def random_state(self):
+        return self.random
+
+    def set_random_state(self, random):
+        self.random = random
+
+    def augment_enabled(self):
+        return self.augment
+
+    def next_pairs(self):
+        if self.state is None:
+            if self.shuffle:
+                pairs = self.random.permutation(self.pairs)
+            else:
+                pairs = self.pairs
+            return 0, pairs
+        else:
+            return self.state
+
+    def get_batch(self, seq_i, start_3d, end_3d, flip, reverse):
+        subject, action = seq_i
+        seq_name = (subject, action)
+        start_2d = start_3d - self.pad - self.causal_shift  # \u5f00\u59cb\u4f4d\u7f6e
+        end_2d = end_3d + self.pad - self.causal_shift
+
+        seq_2d = self.poses_2d[seq_name].copy()
+        low_2d = max(start_2d, 0)
+        high_2d = min(end_2d, seq_2d.shape[0])
+        pad_left_2d = low_2d - start_2d
+        pad_right_2d = end_2d - high_2d
+
+        if pad_left_2d != 0 or pad_right_2d != 0:
+            self.batch_2d = np.pad(seq_2d[low_2d:high_2d], ((pad_left_2d, pad_right_2d), (0, 0), (0, 0), (0, 0)),
+                                   'edge')
+        else:
+            self.batch_2d = seq_2d[low_2d:high_2d]
+
+        if flip:
+            self.batch_2d[:, :, :, 0] *= -1
+            self.batch_2d[:, :, self.kps_left + self.kps_right] = self.batch_2d[:, :, self.kps_right + self.kps_left]
+
+        if reverse:
+            self.batch_2d = self.batch_2d[::-1].copy()
+
+        if self.poses_3d is not None:
+            seq_3d = self.poses_3d[seq_name].copy()
+            if self.out_all:
+                low_3d = low_2d
+                high_3d = high_2d
+                pad_left_3d = pad_left_2d
+                pad_right_3d = pad_right_2d
+            else:
+                low_3d = max(start_3d, 0)
+                high_3d = min(end_3d, seq_3d.shape[0])
+                pad_left_3d = low_3d - start_3d
+                pad_right_3d = end_3d - high_3d
+            if pad_left_3d != 0 or pad_right_3d != 0:
+                self.batch_3d = np.pad(seq_3d[low_3d:high_3d],
+                                       ((pad_left_3d, pad_right_3d), (0, 0), (0, 0)), 'edge')
+            else:
+                self.batch_3d = seq_3d[low_3d:high_3d]
+
+            if flip:
+                self.batch_3d[:, :, 0] *= -1
+                self.batch_3d[:, self.joints_left + self.joints_right] = \
+                    self.batch_3d[:, self.joints_right + self.joints_left]
+            if reverse:
+                self.batch_3d = self.batch_3d[::-1].copy()
+
+        if self.poses_3d is None and self.cameras is None:
+            return None, None, self.batch_2d.copy(), action, subject
+        elif self.poses_3d is not None and self.cameras is None:
+            return np.zeros(9), self.batch_3d.copy(), self.batch_2d.copy(), action, subject, low_2d, high_2d
+        elif self.poses_3d is None:
+            return self.batch_cam, None, self.batch_2d.copy(), action, subject
+        else:
+            return self.batch_cam, self.batch_3d.copy(), self.batch_2d.copy(), action, subject
+
+
+class Fusion(data.Dataset):
+    def __init__(self, opt, dataset, root_path, train=True):
+        self.hop1 = torch.tensor([[0, 1,	0,	0,	1,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [1,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	1,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [1,	0,	0,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	1,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [1,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	1,	0,	1,	0,	1,	0,	0,	1,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	1,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	0,	1,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	1,	0,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	0,	0,	0,	0,	1,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	1],
+                            [0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0]])
+
+        self.hop2 = torch.tensor([[0,	0,	1,	0,	0,	1,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	0,	1,	1,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	1,	0,	0,	0,	0,	1,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	1,	0,	0,	1,	0,	0,	0,	0,	1,	0,	1,	0,	0,	1,	0,	0],
+                            [1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	1,	0,	0,	1,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	0,	1,	0,	0,	1,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	1,	0,	1,	0,	0,	0,	1,	1,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	1,	0,	1,	0,	0,	0,	0,	0,	0,	1],
+                            [0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0]])
+
+        self.hop3 = torch.tensor([[0,	0,	0,	1,	0,	0,	1,	0,	0,	1,	0,	1,	0,	0,	1,	0,	0],
+                            [0,	0,	0,	0,	0,	1,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	1,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	1,	0,	0,	0,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	1,	0,	0,	0,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	1,	0,	0,	1,	0,	0,	0,	0,	1,	0,	1,	0,	0,	1,	0],
+                            [0,	1,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	1],
+                            [1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	1,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	0,	1,	0,	0,	1,	0,	0],
+                            [1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	0,	0,	1,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	1,	0,	1,	0,	0,	0,	0,	1,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	1,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	1,	0,	1,	0,	1,	0,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0]])
+
+        self.hop4 = torch.tensor([[0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	1,	0,	0,	1,	0],
+                            [0,	0,	0,	0,	0,	0,	1,	0,	0,	1,	0,	1,	0,	0,	1,	0,	0],
+                            [0,	0,	0,	0,	0,	1,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	1,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	0,	1,	0,	0,	0,	0,	0,	1,	0,	1,	0,	0,	1,	0,	0],
+                            [0,	0,	1,	0,	0,	0,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	1,	0,	0,	0,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	0,	0,	1,	0,	0,	1,	0,	0,	0,	0,	0,	0,	1,	0,	0,	1],
+                            [0,	0,	1,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0],
+                            [0,	1,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	1],
+                            [1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	1,	0],
+                            [0,	1,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	0,	1],
+                            [1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	0,	0,	1,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	1,	0,	1,	0,	0,	0,	0,	1,	0,	0],
+                            [0,	1,	0,	0,	1,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	0,	0],
+                            [1,	0,	0,	0,	0,	0,	0,	0,	0,	0,	1,	0,	1,	0,	0,	0,	0],
+                            [0,	0,	0,	0,	0,	0,	0,	1,	0,	1,	0,	1,	0,	0,	0,	0,	0]])
+
+
+        self.data_type = opt.dataset
+        self.train = train
+        self.keypoints_name = opt.keypoints
+        self.root_path = root_path
+
+        self.train_list = opt.subjects_train.split(',')
+        self.test_list = opt.subjects_test.split(',')
+        self.action_filter = None if opt.actions == '*' else opt.actions.split(',')
+        self.downsample = opt.downsample
+        self.subset = opt.subset
+        self.stride = opt.stride
+        self.crop_uv = opt.crop_uv
+        self.test_aug = opt.test_augmentation
+        self.pad = opt.pad
+
+        if self.train:
+            self.keypoints = self.prepare_data(dataset, self.train_list)
+            self.cameras_train, self.poses_train, self.poses_train_2d = self.fetch(dataset, self.train_list,
+                                                                                   subset=self.subset)
+            self.generator = ChunkedGenerator(opt.batch_size // opt.stride, self.cameras_train, self.poses_train,
+                                              self.poses_train_2d, self.stride, pad=self.pad,
+                                              augment=opt.data_augmentation, reverse_aug=opt.reverse_augmentation,
+                                              kps_left=self.kps_left, kps_right=self.kps_right,
+                                              joints_left=self.joints_left,
+                                              joints_right=self.joints_right, out_all=opt.out_all)
+            print('INFO: Training on {} frames'.format(self.generator.num_frames()))
+        else:
+            self.keypoints = self.prepare_data(dataset, self.test_list)
+            self.cameras_test, self.poses_test, self.poses_test_2d = self.fetch(dataset, self.test_list,
+                                                                                subset=self.subset)
+            self.generator = ChunkedGenerator(opt.batch_size // opt.stride, self.cameras_test, self.poses_test,
+                                              self.poses_test_2d,
+                                              pad=self.pad, augment=False, kps_left=self.kps_left,
+                                              kps_right=self.kps_right, joints_left=self.joints_left,
+                                              joints_right=self.joints_right)
+            self.key_index = self.generator.saved_index
+            print('INFO: Testing on {} frames'.format(self.generator.num_frames()))
+
+    def prepare_data(self, dataset, folder_list):
+
+        for subject in folder_list:
+            for action in dataset[subject].keys():
+                dataset[subject][action]['positions'][:, 1:] -= dataset[subject][action]['positions'][:, :1]
+
+        keypoints = np.load(self.root_path + 'data_2d_' + self.data_type + '_' + self.keypoints_name + '.npz',
+                            allow_pickle=True)
+        keypoints_symmetry = keypoints['metadata'].item()['keypoints_symmetry']
+
+        self.kps_left, self.kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1])
+        self.joints_left, self.joints_right = list(dataset.skeleton().joints_left()), list(
+            dataset.skeleton().joints_right())
+
+        keypoints = keypoints['positions_2d'].item()
+
+        for subject in folder_list:
+            for action in dataset[subject].keys():
+                mocap_length = dataset[subject][action]['positions'].shape[0]
+                for cam_idx in range(len(keypoints[subject][action])):
+                    assert keypoints[subject][action][cam_idx].shape[0] >= mocap_length
+                    if keypoints[subject][action][cam_idx].shape[0] > mocap_length:
+                        keypoints[subject][action][cam_idx] = keypoints[subject][action][cam_idx][:mocap_length]
+
+        for subject in keypoints.keys():
+            for action in keypoints[subject]:
+                for cam_idx, kps in enumerate(keypoints[subject][action]):
+                    cam = dataset.cameras()[subject][cam_idx]
+                    if self.crop_uv == 0:
+                        kps[..., :2] = normalize_screen_coordinates(kps[..., :2], w=cam['res_w'], h=cam['res_h'])
+                    keypoints[subject][action][cam_idx] = kps
+
+        for subject in folder_list:
+            for action in dataset[subject].keys():
+                positions_2d_pairs = []
+                for cam_idx in range(len(keypoints[subject][action])):
+                    positions_2d_pairs.append(keypoints[subject][action][cam_idx])
+
+                keypoints[subject][action].append(
+                    np.array(positions_2d_pairs).transpose((1, 0, 2,3)))
+        return keypoints
+
+    def fetch(self, dataset, subjects, subset=1, ):
+        out_poses_3d = {}
+        out_poses_2d = {}
+        out_camera_params = {}
+        for subject in subjects:
+            for action in self.keypoints[subject].keys():
+                poses_2d = self.keypoints[subject][action][4]
+                out_poses_2d[(subject, action)] = poses_2d
+
+                poses_3d = dataset[subject][action]['positions']
+                out_poses_3d[(subject, action)] = poses_3d
+
+        if len(out_camera_params) == 0:
+            out_camera_params = None
+
+        downsample = 1
+        if downsample:
+            pass
+        return out_camera_params, out_poses_3d, out_poses_2d
+    
+    def hop_normalize(self, x1, x2, x3, x4):
+        x1 = x1 / torch.sum(x1, dim=1)
+        x2 = x2 / torch.sum(x1, dim=1)
+        x3 = x3 / torch.sum(x1, dim=1)
+        x4 = x4 / torch.sum(x1, dim=1)
+        return torch.cat((x1.unsqueeze(0), x2.unsqueeze(0), x3.unsqueeze(0), x4.unsqueeze(0)), dim=0)
+
+    def __len__(self):
+        return len(self.generator.pairs)
+
+    def __getitem__(self, index):
+        seq_name, start_3d, end_3d, flip, reverse = self.generator.pairs[index]
+
+        cam, gt_3D, input_2D, action, subject, low_2d, high_2d = self.generator.get_batch(seq_name, start_3d, end_3d,
+                                                                                          False, False)
+
+        if self.train == False and self.test_aug:
+            _, _, input_2D_aug, _, _, _, _ = self.generator.get_batch(seq_name, start_3d, end_3d, flip=False,
+                                                                      reverse=False)
+            input_2D = np.concatenate((np.expand_dims(input_2D, axis=0), np.expand_dims(input_2D_aug, axis=0)), 0)
+
+        bb_box = np.array([0, 0, 1, 1])
+        input_2D_update = input_2D
+
+        hops = self.hop_normalize(self.hop1, self.hop2, self.hop3, self.hop4)
+
+        scale = np.float64(1.0)
+
+        return cam, gt_3D, input_2D_update, action, subject, scale, bb_box, low_2d, high_2d, hops
+
--- a/common/pycache/Mydataset.cpython-37.pyc
+++ b/common/pycache/Mydataset.cpython-37.pyc
--- a/common/pycache/Mydataset.cpython-38.pyc
+++ b/common/pycache/Mydataset.cpython-38.pyc
--- a/common/pycache/cameras.cpython-37.pyc
+++ b/common/pycache/cameras.cpython-37.pyc
--- a/common/pycache/cameras.cpython-38.pyc
+++ b/common/pycache/cameras.cpython-38.pyc
--- a/common/pycache/h36m_dataset.cpython-37.pyc
+++ b/common/pycache/h36m_dataset.cpython-37.pyc
--- a/common/pycache/h36m_dataset.cpython-38.pyc
+++ b/common/pycache/h36m_dataset.cpython-38.pyc
--- a/common/pycache/opt.cpython-37.pyc
+++ b/common/pycache/opt.cpython-37.pyc
--- a/common/pycache/opt.cpython-38.pyc
+++ b/common/pycache/opt.cpython-38.pyc
--- a/common/pycache/utils.cpython-37.pyc
+++ b/common/pycache/utils.cpython-37.pyc
--- a/common/pycache/utils.cpython-38.pyc
+++ b/common/pycache/utils.cpython-38.pyc
--- a/common/cameras.py
+++ b/common/cameras.py
@ -0,0 +1,258 @@
+import sys
+import numpy as np
+import torch
+
+
+def normalize_screen_coordinates(X, w, h):
+
+    assert X.shape[-1] == 2
+    return X / w * 2 - [1, h / w]
+
+
+def world_to_camera(X, R, t):  # https://blog.csdn.net/Hurt_Town/article/details/125071279
+    Rt = wrap(qinverse, R)
+    # return wrap(qrot, np.tile(Rt, (*X.shape[:-1], 1)), X - t)
+    return wrap(qrot, Rt.repeat(*X.shape[:-1], 1), X - t)
+
+
+def camera_to_world(X, R, t):
+    return wrap(qrot, np.tile(R, (*X.shape[:-1], 1)), X) + t
+
+
+def wrap(func, *args, unsqueeze=False):
+    args = list(args)
+    for i, arg in enumerate(args):
+        if type(arg) == np.ndarray:
+            args[i] = torch.from_numpy(arg)
+            if unsqueeze:
+                args[i] = args[i].unsqueeze(0)
+
+    result = func(*args)
+
+    if isinstance(result, tuple):
+        result = list(result)
+        for i, res in enumerate(result):
+            if type(res) == torch.Tensor:
+                if unsqueeze:
+                    res = res.squeeze(0)
+                result[i] = res.numpy()
+        return tuple(result)
+    elif type(result) == torch.Tensor:
+        if unsqueeze:
+            result = result.squeeze(0)
+        # return result.numpy()
+        return result
+    else:
+        return result
+
+
+def qrot(q, v):
+    assert q.shape[-1] == 4
+    assert v.shape[-1] == 3
+    assert q.shape[:-1] == v.shape[:-1]
+
+    qvec = q[..., 1:]
+    uv = torch.cross(qvec, v, dim=len(q.shape) - 1)
+    uuv = torch.cross(qvec, uv, dim=len(q.shape) - 1)
+    return (v + 2 * (q[..., :1] * uv + uuv))
+
+
+def qinverse(q, inplace=False):
+    if inplace:
+        q[..., 1:] *= -1
+        return q
+    else:
+        w = q[..., :1]
+        xyz = q[..., 1:]
+        return torch.cat((w, -xyz), dim=len(q.shape) - 1)
+
+
+h36m_cameras_intrinsic_params = [
+    {
+        'id': '54138969',
+        'center': [512.54150390625, 515.4514770507812],
+        'focal_length': [1145.0494384765625, 1143.7811279296875],
+        'radial_distortion': [-0.20709891617298126, 0.24777518212795258, -0.0030751503072679043],
+        'tangential_distortion': [-0.0009756988729350269, -0.00142447161488235],
+        'res_w': 1000,
+        'res_h': 1002,
+        'azimuth': 70,
+    },
+    {
+        'id': '55011271',
+        'center': [508.8486328125, 508.0649108886719],
+        'focal_length': [1149.6756591796875, 1147.5916748046875],
+        'radial_distortion': [-0.1942136287689209, 0.2404085397720337, 0.006819975562393665],
+        'tangential_distortion': [-0.0016190266469493508, -0.0027408944442868233],
+        'res_w': 1000,
+        'res_h': 1000,
+        'azimuth': -70,
+    },
+    {
+        'id': '58860488',
+        'center': [519.8158569335938, 501.40264892578125],
+        'focal_length': [1149.1407470703125, 1148.7989501953125],
+        'radial_distortion': [-0.2083381861448288, 0.25548800826072693, -0.0024604974314570427],
+        'tangential_distortion': [0.0014843869721516967, -0.0007599993259645998],
+        'res_w': 1000,
+        'res_h': 1000,
+        'azimuth': 110,
+    },
+    {
+        'id': '60457274',
+        'center': [514.9682006835938, 501.88201904296875],
+        'focal_length': [1145.5113525390625, 1144.77392578125],
+        'radial_distortion': [-0.198384091258049, 0.21832367777824402, -0.008947807364165783],
+        'tangential_distortion': [-0.0005872055771760643, -0.0018133620033040643],
+        'res_w': 1000,
+        'res_h': 1002,
+        'azimuth': -110,
+    },
+]
+
+h36m_cameras_extrinsic_params = {
+    'S1': [
+        {
+            'orientation': [0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088],
+            'translation': [1841.1070556640625, 4955.28466796875, 1563.4454345703125],
+        },
+        {
+            'orientation': [0.6157187819480896, -0.764836311340332, -0.14833825826644897, 0.11794740706682205],
+            'translation': [1761.278564453125, -5078.0068359375, 1606.2650146484375],
+        },
+        {
+            'orientation': [0.14651472866535187, -0.14647851884365082, 0.7653023600578308, -0.6094175577163696],
+            'translation': [-1846.7777099609375, 5215.04638671875, 1491.972412109375],
+        },
+        {
+            'orientation': [0.5834008455276489, -0.7853162288665771, 0.14548823237419128, -0.14749594032764435],
+            'translation': [-1794.7896728515625, -3722.698974609375, 1574.8927001953125],
+        },
+    ],
+    'S2': [
+        {},
+        {},
+        {},
+        {},
+    ],
+    'S3': [
+        {},
+        {},
+        {},
+        {},
+    ],
+    'S4': [
+        {},
+        {},
+        {},
+        {},
+    ],
+    'S5': [
+        {
+            'orientation': [0.1467377245426178, -0.162370964884758, -0.7551892995834351, 0.6178938746452332],
+            'translation': [2097.3916015625, 4880.94482421875, 1605.732421875],
+        },
+        {
+            'orientation': [0.6159758567810059, -0.7626792192459106, -0.15728192031383514, 0.1189815029501915],
+            'translation': [2031.7008056640625, -5167.93310546875, 1612.923095703125],
+        },
+        {
+            'orientation': [0.14291371405124664, -0.12907841801643372, 0.7678384780883789, -0.6110143065452576],
+            'translation': [-1620.5948486328125, 5171.65869140625, 1496.43701171875],
+        },
+        {
+            'orientation': [0.5920479893684387, -0.7814217805862427, 0.1274748593568802, -0.15036417543888092],
+            'translation': [-1637.1737060546875, -3867.3173828125, 1547.033203125],
+        },
+    ],
+    'S6': [
+        {
+            'orientation': [0.1337897777557373, -0.15692396461963654, -0.7571090459823608, 0.6198879480361938],
+            'translation': [1935.4517822265625, 4950.24560546875, 1618.0838623046875],
+        },
+        {
+            'orientation': [0.6147197484970093, -0.7628812789916992, -0.16174767911434174, 0.11819244921207428],
+            'translation': [1969.803955078125, -5128.73876953125, 1632.77880859375],
+        },
+        {
+            'orientation': [0.1529948115348816, -0.13529130816459656, 0.7646096348762512, -0.6112781167030334],
+            'translation': [-1769.596435546875, 5185.361328125, 1476.993408203125],
+        },
+        {
+            'orientation': [0.5916101336479187, -0.7804774045944214, 0.12832270562648773, -0.1561593860387802],
+            'translation': [-1721.668701171875, -3884.13134765625, 1540.4879150390625],
+        },
+    ],
+    'S7': [
+        {
+            'orientation': [0.1435241848230362, -0.1631336808204651, -0.7548328638076782, 0.6188824772834778],
+            'translation': [1974.512939453125, 4926.3544921875, 1597.8326416015625],
+        },
+        {
+            'orientation': [0.6141672730445862, -0.7638262510299683, -0.1596645563840866, 0.1177929937839508],
+            'translation': [1937.0584716796875, -5119.7900390625, 1631.5665283203125],
+        },
+        {
+            'orientation': [0.14550060033798218, -0.12874816358089447, 0.7660516500473022, -0.6127139329910278],
+            'translation': [-1741.8111572265625, 5208.24951171875, 1464.8245849609375],
+        },
+        {
+            'orientation': [0.5912848114967346, -0.7821764349937439, 0.12445473670959473, -0.15196487307548523],
+            'translation': [-1734.7105712890625, -3832.42138671875, 1548.5830078125],
+        },
+    ],
+    'S8': [
+        {
+            'orientation': [0.14110587537288666, -0.15589867532253265, -0.7561917304992676, 0.619644045829773],
+            'translation': [2150.65185546875, 4896.1611328125, 1611.9046630859375],
+        },
+        {
+            'orientation': [0.6169601678848267, -0.7647668123245239, -0.14846350252628326, 0.11158157885074615],
+            'translation': [2219.965576171875, -5148.453125, 1613.0440673828125],
+        },
+        {
+            'orientation': [0.1471444070339203, -0.13377119600772858, 0.7670128345489502, -0.6100369691848755],
+            'translation': [-1571.2215576171875, 5137.0185546875, 1498.1761474609375],
+        },
+        {
+            'orientation': [0.5927824378013611, -0.7825870513916016, 0.12147816270589828, -0.14631995558738708],
+            'translation': [-1476.913330078125, -3896.7412109375, 1547.97216796875],
+        },
+    ],
+    'S9': [
+        {
+            'orientation': [0.15540587902069092, -0.15548215806484222, -0.7532095313072205, 0.6199594736099243],
+            'translation': [2044.45849609375, 4935.1171875, 1481.2275390625],
+        },
+        {
+            'orientation': [0.618784487247467, -0.7634735107421875, -0.14132238924503326, 0.11933968216180801],
+            'translation': [1990.959716796875, -5123.810546875, 1568.8048095703125],
+        },
+        {
+            'orientation': [0.13357827067375183, -0.1367100477218628, 0.7689454555511475, -0.6100738644599915],
+            'translation': [-1670.9921875, 5211.98583984375, 1528.387939453125],
+        },
+        {
+            'orientation': [0.5879399180412292, -0.7823407053947449, 0.1427614390850067, -0.14794869720935822],
+            'translation': [-1696.04345703125, -3827.099853515625, 1591.4127197265625],
+        },
+    ],
+    'S11': [
+        {
+            'orientation': [0.15232472121715546, -0.15442320704460144, -0.7547563314437866, 0.6191070079803467],
+            'translation': [2098.440185546875, 4926.5546875, 1500.278564453125],
+        },
+        {
+            'orientation': [0.6189449429512024, -0.7600917220115662, -0.15300633013248444, 0.1255258321762085],
+            'translation': [2083.182373046875, -4912.1728515625, 1561.07861328125],
+        },
+        {
+            'orientation': [0.14943228662014008, -0.15650227665901184, 0.7681233882904053, -0.6026304364204407],
+            'translation': [-1609.8153076171875, 5177.3359375, 1537.896728515625],
+        },
+        {
+            'orientation': [0.5894251465797424, -0.7818877100944519, 0.13991211354732513, -0.14715361595153809],
+            'translation': [-1590.738037109375, -3854.1689453125, 1578.017578125],
+        },
+    ],
+}
--- a/common/h36m_dataset.py
+++ b/common/h36m_dataset.py
@ -0,0 +1,171 @@
+import numpy as np
+import copy
+
+from common.cameras import h36m_cameras_intrinsic_params, h36m_cameras_extrinsic_params, \
+    normalize_screen_coordinates
+
+
+class Skeleton:
+
+    def __init__(self, parents, joints_left, joints_right):
+        assert len(joints_left) == len(joints_right)
+
+        self._parents = np.array(parents)
+        self._joints_left = joints_left
+        self._joints_right = joints_right
+        self._compute_metadata()
+
+    def num_joints(self):
+        return len(self._parents)
+
+    def parents(self):
+        return self._parents
+
+    def has_children(self):
+        return self._has_children
+
+    def children(self):
+        return self._children
+
+    def remove_joints(self, joints_to_remove):
+
+        valid_joints = []
+        for joint in range(len(self._parents)):
+            if joint not in joints_to_remove:
+                valid_joints.append(joint)
+
+        for i in range(len(self._parents)):
+            while self._parents[i] in joints_to_remove:
+                self._parents[i] = self._parents[self._parents[i]]
+
+        index_offsets = np.zeros(len(self._parents), dtype=int)
+        new_parents = []
+        for i, parent in enumerate(self._parents):
+            if i not in joints_to_remove:
+                new_parents.append(parent - index_offsets[parent])
+            else:
+                index_offsets[i:] += 1
+        self._parents = np.array(new_parents)
+
+        if self._joints_left is not None:
+            new_joints_left = []
+            for joint in self._joints_left:
+                if joint in valid_joints:
+                    new_joints_left.append(joint - index_offsets[joint])
+            self._joints_left = new_joints_left
+        if self._joints_right is not None:
+            new_joints_right = []
+            for joint in self._joints_right:
+                if joint in valid_joints:
+                    new_joints_right.append(joint - index_offsets[joint])
+            self._joints_right = new_joints_right
+
+        self._compute_metadata()
+
+        return valid_joints
+
+    def joints_left(self):
+        return self._joints_left
+
+    def joints_right(self):
+        return self._joints_right
+
+    def _compute_metadata(self):
+        self._has_children = np.zeros(len(self._parents)).astype(bool)
+        for i, parent in enumerate(self._parents):
+            if parent != -1:
+                self._has_children[parent] = True
+
+        self._children = []
+        for i, parent in enumerate(self._parents):
+            self._children.append([])
+        for i, parent in enumerate(self._parents):
+            if parent != -1:
+                self._children[parent].append(i)
+
+
+h36m_skeleton = Skeleton(parents=[-1, 0, 1, 2, 3, 4, 0, 6, 7, 8, 9, 0, 11, 12, 13, 14, 12,
+                                  16, 17, 18, 19, 20, 19, 22, 12, 24, 25, 26, 27, 28, 27, 30],  # 树的双亲表示法
+                         joints_left=[6, 7, 8, 9, 10, 16, 17, 18, 19, 20, 21, 22, 23],
+                         joints_right=[1, 2, 3, 4, 5, 24, 25, 26, 27, 28, 29, 30, 31])
+
+
+class MocapDataset:
+    def __init__(self, fps, skeleton):
+        self._skeleton = skeleton
+        self._fps = fps
+        self._data = None
+        self._cameras = None
+
+    def remove_joints(self, joints_to_remove):
+        kept_joints = self._skeleton.remove_joints(joints_to_remove)
+        for subject in self._data.keys():
+            for action in self._data[subject].keys():
+                s = self._data[subject][action]
+                s['positions'] = s['positions'][:, kept_joints]
+
+    def __getitem__(self, key):
+        return self._data[key]
+
+    def subjects(self):
+        return self._data.keys()
+
+    def fps(self):
+        return self._fps
+
+    def skeleton(self):
+        return self._skeleton
+
+    def cameras(self):
+        return self._cameras
+
+    def supports_semi_supervised(self):
+        return False
+
+
+class Human36mDataset(MocapDataset):
+    def __init__(self, path, opt, remove_static_joints=True):
+        super().__init__(fps=50, skeleton=h36m_skeleton)
+        self.train_list = ['S1', 'S5', 'S6', 'S7', 'S8']
+        self.test_list = ['S9', 'S11']
+
+        self._cameras = copy.deepcopy(h36m_cameras_extrinsic_params)
+        for cameras in self._cameras.values():
+            for i, cam in enumerate(cameras):
+                cam.update(h36m_cameras_intrinsic_params[i])
+                for k, v in cam.items():
+                    if k not in ['id', 'res_w', 'res_h']:
+                        cam[k] = np.array(v, dtype='float32')
+
+                if opt.crop_uv == 0:
+                    cam['center'] = normalize_screen_coordinates(cam['center'], w=cam['res_w'], h=cam['res_h']).astype(
+                        'float32')
+                    cam['focal_length'] = cam['focal_length'] / cam['res_w'] * 2
+
+                if 'translation' in cam:
+                    cam['translation'] = cam['translation'] / 1000
+
+                cam['intrinsic'] = np.concatenate((cam['focal_length'],
+                                                   cam['center'],
+                                                   cam['radial_distortion'],
+                                                   cam['tangential_distortion']))
+
+        data = np.load(path, allow_pickle=True)['positions_3d'].item()
+
+        self._data = {}
+        for subject, actions in data.items():
+            self._data[subject] = {}
+            for action_name, positions in actions.items():
+                self._data[subject][action_name] = {
+                    'positions': positions,
+                    'cameras': self._cameras[subject],
+                }
+
+        if remove_static_joints:
+            self.remove_joints([4, 5, 9, 10, 11, 16, 20, 21, 22, 23, 24, 28, 29, 30, 31])
+
+            self._skeleton._parents[11] = 8
+            self._skeleton._parents[14] = 8
+
+    def supports_semi_supervised(self):
+        return True
--- a/common/utils.py
+++ b/common/utils.py
@ -0,0 +1,211 @@
+import torch
+import numpy as np
+import hashlib
+from torch.autograd import Variable
+import os
+
+
+def deterministic_random(min_value, max_value, data):
+    digest = hashlib.sha256(data.encode()).digest()
+    raw_value = int.from_bytes(digest[:4], byteorder='little', signed=False)
+    return int(raw_value / (2 ** 32 - 1) * (max_value - min_value)) + min_value
+
+
+def mpjpe_cal(predicted, target):
+
+    assert predicted.shape == target.shape
+    return torch.mean(torch.norm(predicted - target, dim=len(target.shape) - 1))
+
+
+def test_calculation(predicted, target, action, error_sum, data_type, subject):
+    error_sum = mpjpe_by_action_p1(predicted, target, action, error_sum)
+    error_sum = mpjpe_by_action_p2(predicted, target, action, error_sum)
+
+    return error_sum
+
+
+def mpjpe_by_action_p1(predicted, target, action, action_error_sum):
+    assert predicted.shape == target.shape
+    num = predicted.size(0)
+    dist = torch.mean(torch.norm(predicted - target, dim=len(target.shape) - 1), dim=len(target.shape) - 2)
+
+    if len(set(list(action))) == 1:
+        end_index = action[0].find(' ')
+        if end_index != -1:
+            action_name = action[0][:end_index]
+        else:
+            action_name = action[0]
+
+        action_error_sum[action_name]['p1'].update(torch.mean(dist).item() * num, num)
+    else:
+        for i in range(num):
+            end_index = action[i].find(' ')
+            if end_index != -1:
+                action_name = action[i][:end_index]
+            else:
+                action_name = action[i]
+
+            action_error_sum[action_name]['p1'].update(dist[i].item(), 1)
+
+    return action_error_sum
+
+
+def mpjpe_by_action_p2(predicted, target, action, action_error_sum):
+    assert predicted.shape == target.shape
+    num = predicted.size(0)
+    pred = predicted.detach().cpu().numpy().reshape(-1, predicted.shape[-2], predicted.shape[-1])
+    gt = target.detach().cpu().numpy().reshape(-1, target.shape[-2], target.shape[-1])
+    dist = p_mpjpe(pred, gt)
+
+    if len(set(list(action))) == 1:
+        end_index = action[0].find(' ')
+        if end_index != -1:
+            action_name = action[0][:end_index]
+        else:
+            action_name = action[0]
+        action_error_sum[action_name]['p2'].update(np.mean(dist) * num, num)
+    else:
+        for i in range(num):
+            end_index = action[i].find(' ')
+            if end_index != -1:
+                action_name = action[i][:end_index]
+            else:
+                action_name = action[i]
+            action_error_sum[action_name]['p2'].update(np.mean(dist), 1)
+
+    return action_error_sum
+
+
+def p_mpjpe(predicted, target):
+    assert predicted.shape == target.shape
+
+    muX = np.mean(target, axis=1, keepdims=True)
+    muY = np.mean(predicted, axis=1, keepdims=True)
+
+    X0 = target - muX
+    Y0 = predicted - muY
+
+    normX = np.sqrt(np.sum(X0 ** 2, axis=(1, 2), keepdims=True))
+    normY = np.sqrt(np.sum(Y0 ** 2, axis=(1, 2), keepdims=True))
+
+    X0 /= normX
+    Y0 /= normY
+
+    H = np.matmul(X0.transpose(0, 2, 1), Y0)
+    U, s, Vt = np.linalg.svd(H)
+    V = Vt.transpose(0, 2, 1)
+    R = np.matmul(V, U.transpose(0, 2, 1))
+
+    sign_detR = np.sign(np.expand_dims(np.linalg.det(R), axis=1))
+    V[:, :, -1] *= sign_detR
+    s[:, -1] *= sign_detR.flatten()
+    R = np.matmul(V, U.transpose(0, 2, 1))
+
+    tr = np.expand_dims(np.sum(s, axis=1, keepdims=True), axis=2)
+
+    a = tr * normX / normY
+    t = muX - a * np.matmul(muY, R)
+
+    predicted_aligned = a * np.matmul(predicted, R) + t
+
+    return np.mean(np.linalg.norm(predicted_aligned - target, axis=len(target.shape) - 1), axis=len(target.shape) - 2)
+
+
+def define_actions(action):
+    actions = ["Directions", "Discussion", "Eating", "Greeting",
+               "Phoning", "Photo", "Posing", "Purchases",
+               "Sitting", "SittingDown", "Smoking", "Waiting",
+               "WalkDog", "Walking", "WalkTogether"]
+
+    if action == "All" or action == "all" or action == '*':
+        return actions
+
+    if not action in actions:
+        raise (ValueError, "Unrecognized action: %s" % action)
+
+    return [action]
+
+
+def define_error_list(actions):
+    error_sum = {}
+    error_sum.update({actions[i]:
+                          {'p1': AccumLoss(), 'p2': AccumLoss()}
+                      for i in range(len(actions))})
+    return error_sum
+
+
+class AccumLoss(object):
+    def __init__(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val
+        self.count += n
+        self.avg = self.sum / self.count
+
+
+def get_varialbe(split, target):
+    num = len(target)
+    var = []
+    if split == 'train':
+        for i in range(num):
+            temp = Variable(target[i], requires_grad=False).contiguous().type(torch.cuda.FloatTensor)
+            var.append(temp)
+    else:
+        for i in range(num):
+            temp = Variable(target[i]).contiguous().cuda().type(torch.cuda.FloatTensor)
+            var.append(temp)
+
+    return var
+
+
+def print_error(data_type, action_error_sum, is_train):
+    mean_error_p1, mean_error_p2 = print_error_action(action_error_sum, is_train)
+
+    return mean_error_p1, mean_error_p2
+
+
+def print_error_action(action_error_sum, is_train):
+    mean_error_each = {'p1': 0.0, 'p2': 0.0}
+    mean_error_all = {'p1': AccumLoss(), 'p2': AccumLoss()}
+
+    if is_train == 0:
+        print("{0:=^12} {1:=^10} {2:=^8}".format("Action", "p#1 mm", "p#2 mm"))
+
+    for action, value in action_error_sum.items():
+        if is_train == 0:
+            print("{0:<12} ".format(action), end="")
+
+        mean_error_each['p1'] = action_error_sum[action]['p1'].avg * 1000.0
+        mean_error_all['p1'].update(mean_error_each['p1'], 1)
+
+        mean_error_each['p2'] = action_error_sum[action]['p2'].avg * 1000.0
+        mean_error_all['p2'].update(mean_error_each['p2'], 1)
+
+        if is_train == 0:
+            print("{0:>6.2f} {1:>10.2f}".format(mean_error_each['p1'], mean_error_each['p2']))
+
+    if is_train == 0:
+        print("{0:<12} {1:>6.2f} {2:>10.2f}".format("Average", mean_error_all['p1'].avg, mean_error_all['p2'].avg))
+
+    return mean_error_all['p1'].avg, mean_error_all['p2'].avg
+
+
+def save_model(previous_name, save_dir, epoch, data_threshold, model):
+    if os.path.exists(previous_name):
+        os.remove(previous_name)
+
+    torch.save(model.state_dict(), '%s/model_%d_%d.pth' % (save_dir, epoch, data_threshold * 100))
+
+    previous_name = '%s/model_%d_%d.pth' % (save_dir, epoch, data_threshold * 100)
+
+    return previous_name
+
+
+def save_model_epoch(save_dir, epoch, model):
+    torch.save(model.state_dict(), '%s/epoch_%d.pth' % (save_dir, epoch))
+