first commit

This commit is contained in:
IamZLT
2024-08-05 11:19:19 +08:00
commit 8b2e804ccc
39 changed files with 2795 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
checkpoint
dataset

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 z0911k
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

60
README.md Normal file
View File

@ -0,0 +1,60 @@
# Deep Semantic Graph Transformer for Multi-view 3D Human Pose Estimation [AAAI 2024]
<p align="center"><img src="framework.png" width="65%" alt="" /></p>
> **Deep Semantic Graph Transformer for Multi-view 3D Human Pose Estimation**,
> Lijun Zhang, Kangkang Zhou, Feng Lu, Xiang-Dong Zhou, Yu Shi,
> *The 38th Annual AAAI Conference on Artificial Intelligence (AAAI), 2024*
## TODO
- The paper will be released soon!
- Test code and model weights will be released soon!
## Release
- [14/12/2023] We released the model and training code for SGraFormer.
## Installation
- Create a conda environment: ```conda create -n SGraFormer python=3.7```
- Download cudatoolkit=11.0 from [here](https://developer.nvidia.com/cuda-11.0-download-archive) and install
- ```pip3 install torch==1.7.1+cu110 torchvision==0.8.2+cu110 -f https://download.pytorch.org/whl/torch_stable.html```
- ```pip3 install -r requirements.txt```
## Dataset Setup
Please download the dataset from [Human3.6M](http://vision.imar.ro/human3.6m/) website and refer to [VideoPose3D](https://github.com/facebookresearch/VideoPose3D) to set up the Human3.6M dataset ('./dataset' directory).
Or you can download the processed data from [here](https://drive.google.com/drive/folders/1F_qbuZTwLJGUSib1oBUTYfOrLB6-MKrM?usp=sharing).
```bash
${POSE_ROOT}/
|-- dataset
| |-- data_3d_h36m.npz
| |-- data_2d_h36m_gt.npz
| |-- data_2d_h36m_cpn_ft_h36m_dbb.npz
```
## Quick Start
To train a model on Human3.6M:
```bash
python main.py --frames 27 --batch_size 1024 --nepoch 50 --lr 0.0002
```
## Citation
If you find our work useful in your research, please consider citing:
@inproceedings{
The 38th Annual AAAI Conference on Artificial Intelligence (AAAI)
author = {Lijun Zhang, Kangkang Zhou, Feng Lu, Xiang-Dong Zhou, Yu Shi},
title = {Deep Semantic Graph Transformer for Multi-view 3D Human Pose Estimation},
year = {2024},
}
## Acknowledgement
Our code is extended from the following repositories. We thank the authors for releasing the codes.
- [PoseFormer](https://github.com/zczcwh/PoseFormer)
- [VideoPose3D](https://github.com/facebookresearch/VideoPose3D)

360
common/Mydataset.py Normal file
View File

@ -0,0 +1,360 @@
import torch
import numpy as np
import torch.utils.data as data
from common.cameras import normalize_screen_coordinates
class ChunkedGenerator:
def __init__(self, batch_size, cameras, poses_3d, poses_2d,
chunk_length=1, pad=0, causal_shift=0,
shuffle=False, random_seed=1234,
augment=False, reverse_aug=False, kps_left=None, kps_right=None, joints_left=None, joints_right=None,
endless=False, out_all=False):
assert poses_3d is None or len(poses_3d) == len(poses_2d), (len(poses_3d), len(poses_2d))
assert cameras is None or len(cameras) == len(poses_2d)
pairs = []
self.saved_index = {}
start_index = 0
for key in poses_2d.keys():
assert poses_3d is None or poses_2d[key].shape[0] == poses_3d[key].shape[0]
n_chunks = (poses_2d[key].shape[0] + chunk_length - 1) // chunk_length
offset = (n_chunks * chunk_length - poses_2d[key].shape[0]) // 2
bounds = np.arange(n_chunks + 1) * chunk_length - offset
augment_vector = np.full(len(bounds - 1), False, dtype=bool)
reverse_augment_vector = np.full(len(bounds - 1), False, dtype=bool)
keys = np.tile(np.array(key).reshape([1, 2]), (len(bounds - 1), 1))
pairs += list(zip(keys, bounds[:-1], bounds[1:], augment_vector, reverse_augment_vector))
if reverse_aug:
pairs += list(zip(keys, bounds[:-1], bounds[1:], augment_vector, ~reverse_augment_vector))
if augment:
if reverse_aug:
pairs += list(zip(keys, bounds[:-1], bounds[1:], ~augment_vector, ~reverse_augment_vector))
else:
pairs += list(zip(keys, bounds[:-1], bounds[1:], ~augment_vector, reverse_augment_vector))
end_index = start_index + poses_3d[key].shape[0]
self.saved_index[key] = [start_index, end_index]
start_index = start_index + poses_3d[key].shape[0]
if cameras is not None:
self.batch_cam = np.empty((batch_size, cameras[key].shape[-1]))
if poses_3d is not None:
self.batch_3d = np.empty((batch_size, chunk_length, poses_3d[key].shape[-2], poses_3d[key].shape[-1]))
self.batch_2d = np.empty(
(batch_size, chunk_length + 2 * pad, poses_2d[key].shape[-3], poses_2d[key].shape[-2],
poses_2d[key].shape[-1]))
self.num_batches = (len(pairs) + batch_size - 1) // batch_size
self.batch_size = batch_size
self.random = np.random.RandomState(random_seed)
self.pairs = pairs
self.shuffle = shuffle
self.pad = pad
self.causal_shift = causal_shift
self.endless = endless
self.state = None
self.cameras = cameras
if cameras is not None:
self.cameras = cameras
self.poses_3d = poses_3d
self.poses_2d = poses_2d
self.augment = augment
self.kps_left = kps_left
self.kps_right = kps_right
self.joints_left = joints_left
self.joints_right = joints_right
self.out_all = out_all
def num_frames(self):
return self.num_batches * self.batch_size
def random_state(self):
return self.random
def set_random_state(self, random):
self.random = random
def augment_enabled(self):
return self.augment
def next_pairs(self):
if self.state is None:
if self.shuffle:
pairs = self.random.permutation(self.pairs)
else:
pairs = self.pairs
return 0, pairs
else:
return self.state
def get_batch(self, seq_i, start_3d, end_3d, flip, reverse):
subject, action = seq_i
seq_name = (subject, action)
start_2d = start_3d - self.pad - self.causal_shift # \u5f00\u59cb\u4f4d\u7f6e
end_2d = end_3d + self.pad - self.causal_shift
seq_2d = self.poses_2d[seq_name].copy()
low_2d = max(start_2d, 0)
high_2d = min(end_2d, seq_2d.shape[0])
pad_left_2d = low_2d - start_2d
pad_right_2d = end_2d - high_2d
if pad_left_2d != 0 or pad_right_2d != 0:
self.batch_2d = np.pad(seq_2d[low_2d:high_2d], ((pad_left_2d, pad_right_2d), (0, 0), (0, 0), (0, 0)),
'edge')
else:
self.batch_2d = seq_2d[low_2d:high_2d]
if flip:
self.batch_2d[:, :, :, 0] *= -1
self.batch_2d[:, :, self.kps_left + self.kps_right] = self.batch_2d[:, :, self.kps_right + self.kps_left]
if reverse:
self.batch_2d = self.batch_2d[::-1].copy()
if self.poses_3d is not None:
seq_3d = self.poses_3d[seq_name].copy()
if self.out_all:
low_3d = low_2d
high_3d = high_2d
pad_left_3d = pad_left_2d
pad_right_3d = pad_right_2d
else:
low_3d = max(start_3d, 0)
high_3d = min(end_3d, seq_3d.shape[0])
pad_left_3d = low_3d - start_3d
pad_right_3d = end_3d - high_3d
if pad_left_3d != 0 or pad_right_3d != 0:
self.batch_3d = np.pad(seq_3d[low_3d:high_3d],
((pad_left_3d, pad_right_3d), (0, 0), (0, 0)), 'edge')
else:
self.batch_3d = seq_3d[low_3d:high_3d]
if flip:
self.batch_3d[:, :, 0] *= -1
self.batch_3d[:, self.joints_left + self.joints_right] = \
self.batch_3d[:, self.joints_right + self.joints_left]
if reverse:
self.batch_3d = self.batch_3d[::-1].copy()
if self.poses_3d is None and self.cameras is None:
return None, None, self.batch_2d.copy(), action, subject
elif self.poses_3d is not None and self.cameras is None:
return np.zeros(9), self.batch_3d.copy(), self.batch_2d.copy(), action, subject, low_2d, high_2d
elif self.poses_3d is None:
return self.batch_cam, None, self.batch_2d.copy(), action, subject
else:
return self.batch_cam, self.batch_3d.copy(), self.batch_2d.copy(), action, subject
class Fusion(data.Dataset):
def __init__(self, opt, dataset, root_path, train=True):
self.hop1 = torch.tensor([[0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]])
self.hop2 = torch.tensor([[0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]])
self.hop3 = torch.tensor([[0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0],
[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]])
self.hop4 = torch.tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
[0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0]])
self.data_type = opt.dataset
self.train = train
self.keypoints_name = opt.keypoints
self.root_path = root_path
self.train_list = opt.subjects_train.split(',')
self.test_list = opt.subjects_test.split(',')
self.action_filter = None if opt.actions == '*' else opt.actions.split(',')
self.downsample = opt.downsample
self.subset = opt.subset
self.stride = opt.stride
self.crop_uv = opt.crop_uv
self.test_aug = opt.test_augmentation
self.pad = opt.pad
if self.train:
self.keypoints = self.prepare_data(dataset, self.train_list)
self.cameras_train, self.poses_train, self.poses_train_2d = self.fetch(dataset, self.train_list,
subset=self.subset)
self.generator = ChunkedGenerator(opt.batch_size // opt.stride, self.cameras_train, self.poses_train,
self.poses_train_2d, self.stride, pad=self.pad,
augment=opt.data_augmentation, reverse_aug=opt.reverse_augmentation,
kps_left=self.kps_left, kps_right=self.kps_right,
joints_left=self.joints_left,
joints_right=self.joints_right, out_all=opt.out_all)
print('INFO: Training on {} frames'.format(self.generator.num_frames()))
else:
self.keypoints = self.prepare_data(dataset, self.test_list)
self.cameras_test, self.poses_test, self.poses_test_2d = self.fetch(dataset, self.test_list,
subset=self.subset)
self.generator = ChunkedGenerator(opt.batch_size // opt.stride, self.cameras_test, self.poses_test,
self.poses_test_2d,
pad=self.pad, augment=False, kps_left=self.kps_left,
kps_right=self.kps_right, joints_left=self.joints_left,
joints_right=self.joints_right)
self.key_index = self.generator.saved_index
print('INFO: Testing on {} frames'.format(self.generator.num_frames()))
def prepare_data(self, dataset, folder_list):
for subject in folder_list:
for action in dataset[subject].keys():
dataset[subject][action]['positions'][:, 1:] -= dataset[subject][action]['positions'][:, :1]
keypoints = np.load(self.root_path + 'data_2d_' + self.data_type + '_' + self.keypoints_name + '.npz',
allow_pickle=True)
keypoints_symmetry = keypoints['metadata'].item()['keypoints_symmetry']
self.kps_left, self.kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1])
self.joints_left, self.joints_right = list(dataset.skeleton().joints_left()), list(
dataset.skeleton().joints_right())
keypoints = keypoints['positions_2d'].item()
for subject in folder_list:
for action in dataset[subject].keys():
mocap_length = dataset[subject][action]['positions'].shape[0]
for cam_idx in range(len(keypoints[subject][action])):
assert keypoints[subject][action][cam_idx].shape[0] >= mocap_length
if keypoints[subject][action][cam_idx].shape[0] > mocap_length:
keypoints[subject][action][cam_idx] = keypoints[subject][action][cam_idx][:mocap_length]
for subject in keypoints.keys():
for action in keypoints[subject]:
for cam_idx, kps in enumerate(keypoints[subject][action]):
cam = dataset.cameras()[subject][cam_idx]
if self.crop_uv == 0:
kps[..., :2] = normalize_screen_coordinates(kps[..., :2], w=cam['res_w'], h=cam['res_h'])
keypoints[subject][action][cam_idx] = kps
for subject in folder_list:
for action in dataset[subject].keys():
positions_2d_pairs = []
for cam_idx in range(len(keypoints[subject][action])):
positions_2d_pairs.append(keypoints[subject][action][cam_idx])
keypoints[subject][action].append(
np.array(positions_2d_pairs).transpose((1, 0, 2,3)))
return keypoints
def fetch(self, dataset, subjects, subset=1, ):
out_poses_3d = {}
out_poses_2d = {}
out_camera_params = {}
for subject in subjects:
for action in self.keypoints[subject].keys():
poses_2d = self.keypoints[subject][action][4]
out_poses_2d[(subject, action)] = poses_2d
poses_3d = dataset[subject][action]['positions']
out_poses_3d[(subject, action)] = poses_3d
if len(out_camera_params) == 0:
out_camera_params = None
downsample = 1
if downsample:
pass
return out_camera_params, out_poses_3d, out_poses_2d
def hop_normalize(self, x1, x2, x3, x4):
x1 = x1 / torch.sum(x1, dim=1)
x2 = x2 / torch.sum(x1, dim=1)
x3 = x3 / torch.sum(x1, dim=1)
x4 = x4 / torch.sum(x1, dim=1)
return torch.cat((x1.unsqueeze(0), x2.unsqueeze(0), x3.unsqueeze(0), x4.unsqueeze(0)), dim=0)
def __len__(self):
return len(self.generator.pairs)
def __getitem__(self, index):
seq_name, start_3d, end_3d, flip, reverse = self.generator.pairs[index]
cam, gt_3D, input_2D, action, subject, low_2d, high_2d = self.generator.get_batch(seq_name, start_3d, end_3d,
False, False)
if self.train == False and self.test_aug:
_, _, input_2D_aug, _, _, _, _ = self.generator.get_batch(seq_name, start_3d, end_3d, flip=False,
reverse=False)
input_2D = np.concatenate((np.expand_dims(input_2D, axis=0), np.expand_dims(input_2D_aug, axis=0)), 0)
bb_box = np.array([0, 0, 1, 1])
input_2D_update = input_2D
hops = self.hop_normalize(self.hop1, self.hop2, self.hop3, self.hop4)
scale = np.float64(1.0)
return cam, gt_3D, input_2D_update, action, subject, scale, bb_box, low_2d, high_2d, hops

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

258
common/cameras.py Normal file
View File

@ -0,0 +1,258 @@
import sys
import numpy as np
import torch
def normalize_screen_coordinates(X, w, h):
assert X.shape[-1] == 2
return X / w * 2 - [1, h / w]
def world_to_camera(X, R, t): # https://blog.csdn.net/Hurt_Town/article/details/125071279
Rt = wrap(qinverse, R)
# return wrap(qrot, np.tile(Rt, (*X.shape[:-1], 1)), X - t)
return wrap(qrot, Rt.repeat(*X.shape[:-1], 1), X - t)
def camera_to_world(X, R, t):
return wrap(qrot, np.tile(R, (*X.shape[:-1], 1)), X) + t
def wrap(func, *args, unsqueeze=False):
args = list(args)
for i, arg in enumerate(args):
if type(arg) == np.ndarray:
args[i] = torch.from_numpy(arg)
if unsqueeze:
args[i] = args[i].unsqueeze(0)
result = func(*args)
if isinstance(result, tuple):
result = list(result)
for i, res in enumerate(result):
if type(res) == torch.Tensor:
if unsqueeze:
res = res.squeeze(0)
result[i] = res.numpy()
return tuple(result)
elif type(result) == torch.Tensor:
if unsqueeze:
result = result.squeeze(0)
# return result.numpy()
return result
else:
return result
def qrot(q, v):
assert q.shape[-1] == 4
assert v.shape[-1] == 3
assert q.shape[:-1] == v.shape[:-1]
qvec = q[..., 1:]
uv = torch.cross(qvec, v, dim=len(q.shape) - 1)
uuv = torch.cross(qvec, uv, dim=len(q.shape) - 1)
return (v + 2 * (q[..., :1] * uv + uuv))
def qinverse(q, inplace=False):
if inplace:
q[..., 1:] *= -1
return q
else:
w = q[..., :1]
xyz = q[..., 1:]
return torch.cat((w, -xyz), dim=len(q.shape) - 1)
h36m_cameras_intrinsic_params = [
{
'id': '54138969',
'center': [512.54150390625, 515.4514770507812],
'focal_length': [1145.0494384765625, 1143.7811279296875],
'radial_distortion': [-0.20709891617298126, 0.24777518212795258, -0.0030751503072679043],
'tangential_distortion': [-0.0009756988729350269, -0.00142447161488235],
'res_w': 1000,
'res_h': 1002,
'azimuth': 70,
},
{
'id': '55011271',
'center': [508.8486328125, 508.0649108886719],
'focal_length': [1149.6756591796875, 1147.5916748046875],
'radial_distortion': [-0.1942136287689209, 0.2404085397720337, 0.006819975562393665],
'tangential_distortion': [-0.0016190266469493508, -0.0027408944442868233],
'res_w': 1000,
'res_h': 1000,
'azimuth': -70,
},
{
'id': '58860488',
'center': [519.8158569335938, 501.40264892578125],
'focal_length': [1149.1407470703125, 1148.7989501953125],
'radial_distortion': [-0.2083381861448288, 0.25548800826072693, -0.0024604974314570427],
'tangential_distortion': [0.0014843869721516967, -0.0007599993259645998],
'res_w': 1000,
'res_h': 1000,
'azimuth': 110,
},
{
'id': '60457274',
'center': [514.9682006835938, 501.88201904296875],
'focal_length': [1145.5113525390625, 1144.77392578125],
'radial_distortion': [-0.198384091258049, 0.21832367777824402, -0.008947807364165783],
'tangential_distortion': [-0.0005872055771760643, -0.0018133620033040643],
'res_w': 1000,
'res_h': 1002,
'azimuth': -110,
},
]
h36m_cameras_extrinsic_params = {
'S1': [
{
'orientation': [0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088],
'translation': [1841.1070556640625, 4955.28466796875, 1563.4454345703125],
},
{
'orientation': [0.6157187819480896, -0.764836311340332, -0.14833825826644897, 0.11794740706682205],
'translation': [1761.278564453125, -5078.0068359375, 1606.2650146484375],
},
{
'orientation': [0.14651472866535187, -0.14647851884365082, 0.7653023600578308, -0.6094175577163696],
'translation': [-1846.7777099609375, 5215.04638671875, 1491.972412109375],
},
{
'orientation': [0.5834008455276489, -0.7853162288665771, 0.14548823237419128, -0.14749594032764435],
'translation': [-1794.7896728515625, -3722.698974609375, 1574.8927001953125],
},
],
'S2': [
{},
{},
{},
{},
],
'S3': [
{},
{},
{},
{},
],
'S4': [
{},
{},
{},
{},
],
'S5': [
{
'orientation': [0.1467377245426178, -0.162370964884758, -0.7551892995834351, 0.6178938746452332],
'translation': [2097.3916015625, 4880.94482421875, 1605.732421875],
},
{
'orientation': [0.6159758567810059, -0.7626792192459106, -0.15728192031383514, 0.1189815029501915],
'translation': [2031.7008056640625, -5167.93310546875, 1612.923095703125],
},
{
'orientation': [0.14291371405124664, -0.12907841801643372, 0.7678384780883789, -0.6110143065452576],
'translation': [-1620.5948486328125, 5171.65869140625, 1496.43701171875],
},
{
'orientation': [0.5920479893684387, -0.7814217805862427, 0.1274748593568802, -0.15036417543888092],
'translation': [-1637.1737060546875, -3867.3173828125, 1547.033203125],
},
],
'S6': [
{
'orientation': [0.1337897777557373, -0.15692396461963654, -0.7571090459823608, 0.6198879480361938],
'translation': [1935.4517822265625, 4950.24560546875, 1618.0838623046875],
},
{
'orientation': [0.6147197484970093, -0.7628812789916992, -0.16174767911434174, 0.11819244921207428],
'translation': [1969.803955078125, -5128.73876953125, 1632.77880859375],
},
{
'orientation': [0.1529948115348816, -0.13529130816459656, 0.7646096348762512, -0.6112781167030334],
'translation': [-1769.596435546875, 5185.361328125, 1476.993408203125],
},
{
'orientation': [0.5916101336479187, -0.7804774045944214, 0.12832270562648773, -0.1561593860387802],
'translation': [-1721.668701171875, -3884.13134765625, 1540.4879150390625],
},
],
'S7': [
{
'orientation': [0.1435241848230362, -0.1631336808204651, -0.7548328638076782, 0.6188824772834778],
'translation': [1974.512939453125, 4926.3544921875, 1597.8326416015625],
},
{
'orientation': [0.6141672730445862, -0.7638262510299683, -0.1596645563840866, 0.1177929937839508],
'translation': [1937.0584716796875, -5119.7900390625, 1631.5665283203125],
},
{
'orientation': [0.14550060033798218, -0.12874816358089447, 0.7660516500473022, -0.6127139329910278],
'translation': [-1741.8111572265625, 5208.24951171875, 1464.8245849609375],
},
{
'orientation': [0.5912848114967346, -0.7821764349937439, 0.12445473670959473, -0.15196487307548523],
'translation': [-1734.7105712890625, -3832.42138671875, 1548.5830078125],
},
],
'S8': [
{
'orientation': [0.14110587537288666, -0.15589867532253265, -0.7561917304992676, 0.619644045829773],
'translation': [2150.65185546875, 4896.1611328125, 1611.9046630859375],
},
{
'orientation': [0.6169601678848267, -0.7647668123245239, -0.14846350252628326, 0.11158157885074615],
'translation': [2219.965576171875, -5148.453125, 1613.0440673828125],
},
{
'orientation': [0.1471444070339203, -0.13377119600772858, 0.7670128345489502, -0.6100369691848755],
'translation': [-1571.2215576171875, 5137.0185546875, 1498.1761474609375],
},
{
'orientation': [0.5927824378013611, -0.7825870513916016, 0.12147816270589828, -0.14631995558738708],
'translation': [-1476.913330078125, -3896.7412109375, 1547.97216796875],
},
],
'S9': [
{
'orientation': [0.15540587902069092, -0.15548215806484222, -0.7532095313072205, 0.6199594736099243],
'translation': [2044.45849609375, 4935.1171875, 1481.2275390625],
},
{
'orientation': [0.618784487247467, -0.7634735107421875, -0.14132238924503326, 0.11933968216180801],
'translation': [1990.959716796875, -5123.810546875, 1568.8048095703125],
},
{
'orientation': [0.13357827067375183, -0.1367100477218628, 0.7689454555511475, -0.6100738644599915],
'translation': [-1670.9921875, 5211.98583984375, 1528.387939453125],
},
{
'orientation': [0.5879399180412292, -0.7823407053947449, 0.1427614390850067, -0.14794869720935822],
'translation': [-1696.04345703125, -3827.099853515625, 1591.4127197265625],
},
],
'S11': [
{
'orientation': [0.15232472121715546, -0.15442320704460144, -0.7547563314437866, 0.6191070079803467],
'translation': [2098.440185546875, 4926.5546875, 1500.278564453125],
},
{
'orientation': [0.6189449429512024, -0.7600917220115662, -0.15300633013248444, 0.1255258321762085],
'translation': [2083.182373046875, -4912.1728515625, 1561.07861328125],
},
{
'orientation': [0.14943228662014008, -0.15650227665901184, 0.7681233882904053, -0.6026304364204407],
'translation': [-1609.8153076171875, 5177.3359375, 1537.896728515625],
},
{
'orientation': [0.5894251465797424, -0.7818877100944519, 0.13991211354732513, -0.14715361595153809],
'translation': [-1590.738037109375, -3854.1689453125, 1578.017578125],
},
],
}

171
common/h36m_dataset.py Normal file
View File

@ -0,0 +1,171 @@
import numpy as np
import copy
from common.cameras import h36m_cameras_intrinsic_params, h36m_cameras_extrinsic_params, \
normalize_screen_coordinates
class Skeleton:
def __init__(self, parents, joints_left, joints_right):
assert len(joints_left) == len(joints_right)
self._parents = np.array(parents)
self._joints_left = joints_left
self._joints_right = joints_right
self._compute_metadata()
def num_joints(self):
return len(self._parents)
def parents(self):
return self._parents
def has_children(self):
return self._has_children
def children(self):
return self._children
def remove_joints(self, joints_to_remove):
valid_joints = []
for joint in range(len(self._parents)):
if joint not in joints_to_remove:
valid_joints.append(joint)
for i in range(len(self._parents)):
while self._parents[i] in joints_to_remove:
self._parents[i] = self._parents[self._parents[i]]
index_offsets = np.zeros(len(self._parents), dtype=int)
new_parents = []
for i, parent in enumerate(self._parents):
if i not in joints_to_remove:
new_parents.append(parent - index_offsets[parent])
else:
index_offsets[i:] += 1
self._parents = np.array(new_parents)
if self._joints_left is not None:
new_joints_left = []
for joint in self._joints_left:
if joint in valid_joints:
new_joints_left.append(joint - index_offsets[joint])
self._joints_left = new_joints_left
if self._joints_right is not None:
new_joints_right = []
for joint in self._joints_right:
if joint in valid_joints:
new_joints_right.append(joint - index_offsets[joint])
self._joints_right = new_joints_right
self._compute_metadata()
return valid_joints
def joints_left(self):
return self._joints_left
def joints_right(self):
return self._joints_right
def _compute_metadata(self):
self._has_children = np.zeros(len(self._parents)).astype(bool)
for i, parent in enumerate(self._parents):
if parent != -1:
self._has_children[parent] = True
self._children = []
for i, parent in enumerate(self._parents):
self._children.append([])
for i, parent in enumerate(self._parents):
if parent != -1:
self._children[parent].append(i)
h36m_skeleton = Skeleton(parents=[-1, 0, 1, 2, 3, 4, 0, 6, 7, 8, 9, 0, 11, 12, 13, 14, 12,
16, 17, 18, 19, 20, 19, 22, 12, 24, 25, 26, 27, 28, 27, 30], # 树的双亲表示法
joints_left=[6, 7, 8, 9, 10, 16, 17, 18, 19, 20, 21, 22, 23],
joints_right=[1, 2, 3, 4, 5, 24, 25, 26, 27, 28, 29, 30, 31])
class MocapDataset:
def __init__(self, fps, skeleton):
self._skeleton = skeleton
self._fps = fps
self._data = None
self._cameras = None
def remove_joints(self, joints_to_remove):
kept_joints = self._skeleton.remove_joints(joints_to_remove)
for subject in self._data.keys():
for action in self._data[subject].keys():
s = self._data[subject][action]
s['positions'] = s['positions'][:, kept_joints]
def __getitem__(self, key):
return self._data[key]
def subjects(self):
return self._data.keys()
def fps(self):
return self._fps
def skeleton(self):
return self._skeleton
def cameras(self):
return self._cameras
def supports_semi_supervised(self):
return False
class Human36mDataset(MocapDataset):
def __init__(self, path, opt, remove_static_joints=True):
super().__init__(fps=50, skeleton=h36m_skeleton)
self.train_list = ['S1', 'S5', 'S6', 'S7', 'S8']
self.test_list = ['S9', 'S11']
self._cameras = copy.deepcopy(h36m_cameras_extrinsic_params)
for cameras in self._cameras.values():
for i, cam in enumerate(cameras):
cam.update(h36m_cameras_intrinsic_params[i])
for k, v in cam.items():
if k not in ['id', 'res_w', 'res_h']:
cam[k] = np.array(v, dtype='float32')
if opt.crop_uv == 0:
cam['center'] = normalize_screen_coordinates(cam['center'], w=cam['res_w'], h=cam['res_h']).astype(
'float32')
cam['focal_length'] = cam['focal_length'] / cam['res_w'] * 2
if 'translation' in cam:
cam['translation'] = cam['translation'] / 1000
cam['intrinsic'] = np.concatenate((cam['focal_length'],
cam['center'],
cam['radial_distortion'],
cam['tangential_distortion']))
data = np.load(path, allow_pickle=True)['positions_3d'].item()
self._data = {}
for subject, actions in data.items():
self._data[subject] = {}
for action_name, positions in actions.items():
self._data[subject][action_name] = {
'positions': positions,
'cameras': self._cameras[subject],
}
if remove_static_joints:
self.remove_joints([4, 5, 9, 10, 11, 16, 20, 21, 22, 23, 24, 28, 29, 30, 31])
self._skeleton._parents[11] = 8
self._skeleton._parents[14] = 8
def supports_semi_supervised(self):
return True

211
common/utils.py Normal file
View File

@ -0,0 +1,211 @@
import torch
import numpy as np
import hashlib
from torch.autograd import Variable
import os
def deterministic_random(min_value, max_value, data):
digest = hashlib.sha256(data.encode()).digest()
raw_value = int.from_bytes(digest[:4], byteorder='little', signed=False)
return int(raw_value / (2 ** 32 - 1) * (max_value - min_value)) + min_value
def mpjpe_cal(predicted, target):
assert predicted.shape == target.shape
return torch.mean(torch.norm(predicted - target, dim=len(target.shape) - 1))
def test_calculation(predicted, target, action, error_sum, data_type, subject):
error_sum = mpjpe_by_action_p1(predicted, target, action, error_sum)
error_sum = mpjpe_by_action_p2(predicted, target, action, error_sum)
return error_sum
def mpjpe_by_action_p1(predicted, target, action, action_error_sum):
assert predicted.shape == target.shape
num = predicted.size(0)
dist = torch.mean(torch.norm(predicted - target, dim=len(target.shape) - 1), dim=len(target.shape) - 2)
if len(set(list(action))) == 1:
end_index = action[0].find(' ')
if end_index != -1:
action_name = action[0][:end_index]
else:
action_name = action[0]
action_error_sum[action_name]['p1'].update(torch.mean(dist).item() * num, num)
else:
for i in range(num):
end_index = action[i].find(' ')
if end_index != -1:
action_name = action[i][:end_index]
else:
action_name = action[i]
action_error_sum[action_name]['p1'].update(dist[i].item(), 1)
return action_error_sum
def mpjpe_by_action_p2(predicted, target, action, action_error_sum):
assert predicted.shape == target.shape
num = predicted.size(0)
pred = predicted.detach().cpu().numpy().reshape(-1, predicted.shape[-2], predicted.shape[-1])
gt = target.detach().cpu().numpy().reshape(-1, target.shape[-2], target.shape[-1])
dist = p_mpjpe(pred, gt)
if len(set(list(action))) == 1:
end_index = action[0].find(' ')
if end_index != -1:
action_name = action[0][:end_index]
else:
action_name = action[0]
action_error_sum[action_name]['p2'].update(np.mean(dist) * num, num)
else:
for i in range(num):
end_index = action[i].find(' ')
if end_index != -1:
action_name = action[i][:end_index]
else:
action_name = action[i]
action_error_sum[action_name]['p2'].update(np.mean(dist), 1)
return action_error_sum
def p_mpjpe(predicted, target):
assert predicted.shape == target.shape
muX = np.mean(target, axis=1, keepdims=True)
muY = np.mean(predicted, axis=1, keepdims=True)
X0 = target - muX
Y0 = predicted - muY
normX = np.sqrt(np.sum(X0 ** 2, axis=(1, 2), keepdims=True))
normY = np.sqrt(np.sum(Y0 ** 2, axis=(1, 2), keepdims=True))
X0 /= normX
Y0 /= normY
H = np.matmul(X0.transpose(0, 2, 1), Y0)
U, s, Vt = np.linalg.svd(H)
V = Vt.transpose(0, 2, 1)
R = np.matmul(V, U.transpose(0, 2, 1))
sign_detR = np.sign(np.expand_dims(np.linalg.det(R), axis=1))
V[:, :, -1] *= sign_detR
s[:, -1] *= sign_detR.flatten()
R = np.matmul(V, U.transpose(0, 2, 1))
tr = np.expand_dims(np.sum(s, axis=1, keepdims=True), axis=2)
a = tr * normX / normY
t = muX - a * np.matmul(muY, R)
predicted_aligned = a * np.matmul(predicted, R) + t
return np.mean(np.linalg.norm(predicted_aligned - target, axis=len(target.shape) - 1), axis=len(target.shape) - 2)
def define_actions(action):
actions = ["Directions", "Discussion", "Eating", "Greeting",
"Phoning", "Photo", "Posing", "Purchases",
"Sitting", "SittingDown", "Smoking", "Waiting",
"WalkDog", "Walking", "WalkTogether"]
if action == "All" or action == "all" or action == '*':
return actions
if not action in actions:
raise (ValueError, "Unrecognized action: %s" % action)
return [action]
def define_error_list(actions):
error_sum = {}
error_sum.update({actions[i]:
{'p1': AccumLoss(), 'p2': AccumLoss()}
for i in range(len(actions))})
return error_sum
class AccumLoss(object):
def __init__(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val
self.count += n
self.avg = self.sum / self.count
def get_varialbe(split, target):
num = len(target)
var = []
if split == 'train':
for i in range(num):
temp = Variable(target[i], requires_grad=False).contiguous().type(torch.cuda.FloatTensor)
var.append(temp)
else:
for i in range(num):
temp = Variable(target[i]).contiguous().cuda().type(torch.cuda.FloatTensor)
var.append(temp)
return var
def print_error(data_type, action_error_sum, is_train):
mean_error_p1, mean_error_p2 = print_error_action(action_error_sum, is_train)
return mean_error_p1, mean_error_p2
def print_error_action(action_error_sum, is_train):
mean_error_each = {'p1': 0.0, 'p2': 0.0}
mean_error_all = {'p1': AccumLoss(), 'p2': AccumLoss()}
if is_train == 0:
print("{0:=^12} {1:=^10} {2:=^8}".format("Action", "p#1 mm", "p#2 mm"))
for action, value in action_error_sum.items():
if is_train == 0:
print("{0:<12} ".format(action), end="")
mean_error_each['p1'] = action_error_sum[action]['p1'].avg * 1000.0
mean_error_all['p1'].update(mean_error_each['p1'], 1)
mean_error_each['p2'] = action_error_sum[action]['p2'].avg * 1000.0
mean_error_all['p2'].update(mean_error_each['p2'], 1)
if is_train == 0:
print("{0:>6.2f} {1:>10.2f}".format(mean_error_each['p1'], mean_error_each['p2']))
if is_train == 0:
print("{0:<12} {1:>6.2f} {2:>10.2f}".format("Average", mean_error_all['p1'].avg, mean_error_all['p2'].avg))
return mean_error_all['p1'].avg, mean_error_all['p2'].avg
def save_model(previous_name, save_dir, epoch, data_threshold, model):
if os.path.exists(previous_name):
os.remove(previous_name)
torch.save(model.state_dict(), '%s/model_%d_%d.pth' % (save_dir, epoch, data_threshold * 100))
previous_name = '%s/model_%d_%d.pth' % (save_dir, epoch, data_threshold * 100)
return previous_name
def save_model_epoch(save_dir, epoch, model):
torch.save(model.state_dict(), '%s/epoch_%d.pth' % (save_dir, epoch))

BIN
framework.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

0
get_2D_skletons.py Normal file
View File

315
main.py Normal file
View File

@ -0,0 +1,315 @@
import os
import torch
import logging
import random
import torch.optim as optim
from tqdm import tqdm
# from torch.utils.tensorboard import SummaryWriter
from common.utils import *
from common.opt import opts
from common.h36m_dataset import Human36mDataset
from common.Mydataset import Fusion
from model.SGraFormer import sgraformer
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
# os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"
CUDA_ID = [0]
device = torch.device("cuda")
def visualize_skeletons(input_2D, output_3D, gt_3D, idx=0, output_dir='./output'):
# Ensure the tensors are on the CPU and convert them to numpy arrays
input_2D = input_2D.cpu().numpy()
output_3D = output_3D.cpu().numpy()
gt_3D = gt_3D.cpu().numpy()
# print("====> input_2D: ", input_2D[-1])
# Get the first action and first sample from the batch
input_sample = input_2D[idx, 0]
output_sample = output_3D[idx, 0]
gt_3D_sample = gt_3D[idx, 0]
print(f'\ninput_sample shape: {input_sample.shape}')
print(f'output_sample shape: {output_sample.shape}')
fig = plt.figure(figsize=(25, 5))
# Define the connections (bones) between joints
bones = [
(0, 1), (1, 2), (2, 3), # Left leg
(0, 4), (4, 5), (5, 6), # Right leg
(0, 7), (7, 8), (8, 9), (9, 10), # Spine
(7, 11), (11, 12), (12, 13), # Right arm
(7, 14), (14, 15), (15, 16) # Left arm
]
# Colors for different parts
bone_colors = {
"leg": 'green',
"spine": 'blue',
"arm": 'red'
}
# Function to get bone color based on index
def get_bone_color(start, end):
if (start in [1, 2, 3] or end in [1, 2, 3] or
start in [4, 5, 6] or end in [4, 5, 6]):
return bone_colors["leg"]
elif start in [7, 8, 9, 10] or end in [7, 8, 9, 10]:
return bone_colors["spine"]
else:
return bone_colors["arm"]
# Plotting 2D skeletons from different angles
for i in range(4):
ax = fig.add_subplot(1, 7, i + 1)
ax.set_title(f'2D angle {i+1}')
ax.scatter(input_sample[i, :, 0], input_sample[i, :, 1], color='blue')
# Draw the bones
for start, end in bones:
bone_color = get_bone_color(start, end)
ax.plot([input_sample[i, start, 0], input_sample[i, end, 0]],
[input_sample[i, start, 1], input_sample[i, end, 1]], color=bone_color)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_xlim(np.min(input_sample[:, :, 0]) - 1, np.max(input_sample[:, :, 0]) + 1)
ax.set_ylim(np.min(input_sample[:, :, 1]) - 1, np.max(input_sample[:, :, 1]) + 1)
ax.grid()
# Plotting predicted 3D skeleton
ax = fig.add_subplot(1, 7, 5, projection='3d')
ax.set_title('3D Predicted Skeleton')
ax.scatter(output_sample[:, 0], output_sample[:, 1], output_sample[:, 2], color='red', label='Predicted')
# Draw the bones in 3D for output_sample
for start, end in bones:
bone_color = get_bone_color(start, end)
ax.plot([output_sample[start, 0], output_sample[end, 0]],
[output_sample[start, 1], output_sample[end, 1]],
[output_sample[start, 2], output_sample[end, 2]], color=bone_color)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_xlim(np.min(output_sample[:, 0]) - 1, np.max(output_sample[:, 0]) + 1)
ax.set_ylim(np.min(output_sample[:, 1]) - 1, np.max(output_sample[:, 1]) + 1)
ax.set_zlim(np.min(output_sample[:, 2]) - 1, np.max(output_sample[:, 2]) + 1)
ax.legend()
# Plotting ground truth 3D skeleton
ax = fig.add_subplot(1, 7, 6, projection='3d')
ax.set_title('3D Ground Truth Skeleton')
ax.scatter(gt_3D_sample[:, 0], gt_3D_sample[:, 1], gt_3D_sample[:, 2], color='blue', label='Ground Truth')
# Draw the bones in 3D for gt_3D_sample
for start, end in bones:
bone_color = get_bone_color(start, end)
ax.plot([gt_3D_sample[start, 0], gt_3D_sample[end, 0]],
[gt_3D_sample[start, 1], gt_3D_sample[end, 1]],
[gt_3D_sample[start, 2], gt_3D_sample[end, 2]], color=bone_color, linestyle='--')
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_xlim(np.min(gt_3D_sample[:, 0]) - 1, np.max(gt_3D_sample[:, 0]) + 1)
ax.set_ylim(np.min(gt_3D_sample[:, 1]) - 1, np.max(gt_3D_sample[:, 1]) + 1)
ax.set_zlim(np.min(gt_3D_sample[:, 2]) - 1, np.max(gt_3D_sample[:, 2]) + 1)
ax.legend()
plt.grid()
# Save the figure
plt.tight_layout()
plt.savefig(f'{output_dir}/skeletons_visualization.png')
plt.show()
def train(opt, actions, train_loader, model, optimizer, epoch, writer, adaptive_weight=None):
return step('train', opt, actions, train_loader, model, optimizer, epoch, writer, adaptive_weight)
def val(opt, actions, val_loader, model):
with torch.no_grad():
return step('test', opt, actions, val_loader, model)
def step(split, opt, actions, dataLoader, model, optimizer=None, epoch=None, writer=None, adaptive_weight=None):
loss_all = {'loss': AccumLoss()}
action_error_sum = define_error_list(actions)
if split == 'train':
model.train()
else:
model.eval()
TQDM = tqdm(enumerate(dataLoader), total=len(dataLoader), ncols=100)
for i, data in TQDM:
batch_cam, gt_3D, input_2D, action, subject, scale, bb_box, start, end, hops = data
[input_2D, gt_3D, batch_cam, scale, bb_box, hops] = get_varialbe(split, [input_2D, gt_3D, batch_cam, scale, bb_box, hops])
if split == 'train':
output_3D = model(input_2D, hops)
elif split == 'test':
# input_2D = input_2D.to(device)
# model = model.to(device)
# hops = hops.to(device)
input_2D, output_3D = input_augmentation(input_2D, hops, model)
visualize_skeletons(input_2D, output_3D, gt_3D)
out_target = gt_3D.clone()
out_target[:, :, 0] = 0
if split == 'train':
loss = mpjpe_cal(output_3D, out_target)
TQDM.set_description(f'Epoch [{epoch}/{opt.nepoch}]')
TQDM.set_postfix({"l": loss.item()})
N = input_2D.size(0)
loss_all['loss'].update(loss.detach().cpu().numpy() * N, N)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# writer.add_scalars(main_tag='scalars1/train_loss',
# tag_scalar_dict={'trianloss': loss.item()},
# global_step=(epoch - 1) * len(dataLoader) + i)
elif split == 'test':
if output_3D.shape[1] != 1:
output_3D = output_3D[:, opt.pad].unsqueeze(1)
output_3D[:, :, 1:, :] -= output_3D[:, :, :1, :]
output_3D[:, :, 0, :] = 0
action_error_sum = test_calculation(output_3D, out_target, action, action_error_sum, opt.dataset, subject)
if split == 'train':
return loss_all['loss'].avg
elif split == 'test':
p1, p2 = print_error(opt.dataset, action_error_sum, opt.train)
return p1, p2
def input_augmentation(input_2D, hops, model):
input_2D_non_flip = input_2D[:, 0]
output_3D_non_flip = model(input_2D_non_flip, hops)
return input_2D_non_flip, output_3D_non_flip
if __name__ == '__main__':
opt = opts().parse()
root_path = opt.root_path
opt.manualSeed = 1
random.seed(opt.manualSeed)
torch.manual_seed(opt.manualSeed)
if opt.train:
logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%Y/%m/%d %H:%M:%S',
filename=os.path.join(opt.checkpoint, 'train.log'), level=logging.INFO)
root_path = opt.root_path
dataset_path = root_path + 'data_3d_' + opt.dataset + '.npz'
dataset = Human36mDataset(dataset_path, opt)
actions = define_actions(opt.actions)
if opt.train:
train_data = Fusion(opt=opt, train=True, dataset=dataset, root_path=root_path)
train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=opt.batch_size,
shuffle=True, num_workers=int(opt.workers), pin_memory=True)
test_data = Fusion(opt=opt, train=False, dataset=dataset, root_path=root_path)
test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size,
shuffle=False, num_workers=int(opt.workers), pin_memory=True)
model = sgraformer(num_frame=opt.frames, num_joints=17, in_chans=2, embed_dim_ratio=32, depth=4,
num_heads=8, mlp_ratio=2., qkv_bias=True, qk_scale=None, drop_path_rate=0.1)
# model = FuseModel()
if torch.cuda.device_count() > 1:
print("Let's use", torch.cuda.device_count(), "GPUs!")
model = torch.nn.DataParallel(model, device_ids=CUDA_ID).to(device)
else:
model = model.to(device)
# 定义一个函数来去除 'module.' 前缀
def remove_module_prefix(state_dict):
new_state_dict = {}
for k, v in state_dict.items():
name = k[7:] if k.startswith('module.') else k # 去除 `module.`
new_state_dict[name] = v
return new_state_dict
model_dict = model.state_dict()
if opt.previous_dir != '':
print('pretrained model path:', opt.previous_dir)
model_path = opt.previous_dir
pre_dict = torch.load(model_path)
# print("=====> pre_dict:", pre_dict.keys())
# 去除 'module.' 前缀
state_dict = remove_module_prefix(pre_dict)
# print("=====> state_dict:", state_dict.keys())
# 只保留在模型字典中的键值对
state_dict = {k: v for k, v in state_dict.items() if k in model_dict.keys()}
# 更新模型字典
model_dict.update(state_dict)
# 加载更新后的模型字典
model.load_state_dict(model_dict)
all_param = []
lr = opt.lr
all_param += list(model.parameters())
optimizer = optim.AdamW(all_param, lr=lr, weight_decay=0.1)
## tensorboard
# writer = SummaryWriter("runs/nin")
writer = None
flag = 0
for epoch in range(1, opt.nepoch + 1):
p1, p2 = val(opt, actions, test_dataloader, model)
print("=====> p1, p2", p1, p2)
if opt.train:
loss = train(opt, actions, train_dataloader, model, optimizer, epoch, writer)
if opt.train:
save_model_epoch(opt.checkpoint, epoch, model)
if p1 < opt.previous_best_threshold:
opt.previous_name = save_model(opt.previous_name, opt.checkpoint, epoch, p1, model)
opt.previous_best_threshold = p1
if opt.train == 0:
print('p1: %.2f, p2: %.2f' % (p1, p2))
break
else:
logging.info('epoch: %d, lr: %.7f, loss: %.4f, p1: %.2f, p2: %.2f' % (epoch, lr, loss, p1, p2))
print('e: %d, lr: %.7f, loss: %.4f, p1: %.2f, p2: %.2f' % (epoch, lr, loss, p1, p2))
if epoch % opt.large_decay_epoch == 0:
for param_group in optimizer.param_groups:
param_group['lr'] *= opt.lr_decay_large
lr *= opt.lr_decay_large
else:
for param_group in optimizer.param_groups:
param_group['lr'] *= opt.lr_decay
lr *= opt.lr_decay
print(opt.checkpoint)

19
md5.py Normal file
View File

@ -0,0 +1,19 @@
import hashlib
def calculate_md5(file_path):
# 创建一个新的MD5 hash对象
md5_hash = hashlib.md5()
# 打开文件,以二进制模式读取
with open(file_path, "rb") as f:
# 分块读取文件,防止文件过大导致内存不足
for chunk in iter(lambda: f.read(4096), b""):
md5_hash.update(chunk)
# 返回MD5值转换为十六进制格式
return md5_hash.hexdigest()
# 文件路径
file_path = "/home/zlt/Documents/SGraFormer-master/checkpoint/epoch_50.pth"
md5_value = calculate_md5(file_path)
print(f"MD5: {md5_value}")

176
model/SGraFormer.py Normal file
View File

@ -0,0 +1,176 @@
## Our model was revised from https://github.com/zczcwh/PoseFormer/blob/main/common/model_poseformer.py
import torch
import torch.nn as nn
from functools import partial
from einops import rearrange
from timm.models.layers import DropPath
from common.opt import opts
from model.Spatial_encoder import First_view_Spatial_features, Spatial_features
from model.Temporal_encoder import Temporal__features
opt = opts().parse()
#######################################################################################################################
class sgraformer(nn.Module):
def __init__(self, num_frame=9, num_joints=17, in_chans=2, embed_dim_ratio=32, depth=4,
num_heads=8, mlp_ratio=2., qkv_bias=True, qk_scale=None,
drop_rate=0., attn_drop_rate=0., drop_path_rate=0.2, norm_layer=None):
""" ##########hybrid_backbone=None, representation_size=None,
Args:
num_frame (int, tuple): input frame number
num_joints (int, tuple): joints number
in_chans (int): number of input channels, 2D joints have 2 channels: (x,y)
embed_dim_ratio (int): embedding dimension ratio
depth (int): depth of transformer
num_heads (int): number of attention heads
mlp_ratio (int): ratio of mlp hidden dim to embedding dim
qkv_bias (bool): enable bias for qkv if True
qk_scale (float): override default qk scale of head_dim ** -0.5 if set
drop_rate (float): dropout rate
attn_drop_rate (float): attention dropout rate
drop_path_rate (float): stochastic depth rate
norm_layer: (nn.Module): normalization layer
"""
super().__init__()
embed_dim = embed_dim_ratio * num_joints
out_dim = num_joints * 3 #### output dimension is num_joints * 3
##Spatial_features
self.SF1 = First_view_Spatial_features(num_frame, num_joints, in_chans, embed_dim_ratio, depth,
num_heads, mlp_ratio, qkv_bias, qk_scale,
drop_rate, attn_drop_rate, drop_path_rate, norm_layer)
self.SF2 = Spatial_features(num_frame, num_joints, in_chans, embed_dim_ratio, depth,
num_heads, mlp_ratio, qkv_bias, qk_scale,
drop_rate, attn_drop_rate, drop_path_rate, norm_layer)
self.SF3 = Spatial_features(num_frame, num_joints, in_chans, embed_dim_ratio, depth,
num_heads, mlp_ratio, qkv_bias, qk_scale,
drop_rate, attn_drop_rate, drop_path_rate, norm_layer)
self.SF4 = Spatial_features(num_frame, num_joints, in_chans, embed_dim_ratio, depth,
num_heads, mlp_ratio, qkv_bias, qk_scale,
drop_rate, attn_drop_rate, drop_path_rate, norm_layer)
## MVF
self.view_pos_embed = nn.Parameter(torch.zeros(1, 4, num_frame, embed_dim))
self.pos_drop = nn.Dropout(p=0.)
self.conv = nn.Sequential(
nn.BatchNorm2d(4, momentum=0.1),
nn.Conv2d(4, 1, kernel_size=opt.mvf_kernel, stride=1, padding=int(opt.mvf_kernel // 2), bias=False),
nn.ReLU(inplace=True),
)
self.conv_hop = nn.Sequential(
nn.BatchNorm2d(4, momentum=0.1),
nn.Conv2d(4, 1, kernel_size=opt.mvf_kernel, stride=1, padding=int(opt.mvf_kernel // 2), bias=False),
nn.ReLU(inplace=True),
)
self.conv_norm = nn.LayerNorm(embed_dim)
self.conv_hop_norm = nn.LayerNorm(embed_dim)
# Time Serial
self.TF = Temporal__features(num_frame, num_joints, in_chans, embed_dim_ratio, depth,
num_heads, mlp_ratio, qkv_bias, qk_scale,
drop_rate, attn_drop_rate, drop_path_rate, norm_layer)
self.head = nn.Sequential(
nn.LayerNorm(embed_dim),
nn.Linear(embed_dim, out_dim),
)
self.hop_w0 = nn.Parameter(torch.ones(17, 17))
self.hop_w1 = nn.Parameter(torch.ones(17, 17))
self.hop_w2 = nn.Parameter(torch.ones(17, 17))
self.hop_w3 = nn.Parameter(torch.ones(17, 17))
self.hop_w4 = nn.Parameter(torch.ones(17, 17))
self.hop_global = nn.Parameter(torch.ones(17, 17))
self.linear_hop = nn.Linear(8, 2)
# self.max_pool = nn.MaxPool1d(2)
self.edge_embedding = nn.Linear(17*17*4, 17*17)
def forward(self, x, hops):
b, f, v, j, c = x.shape
edge_embedding = self.edge_embedding(hops[0].reshape(1, -1))
###############golbal feature#################
x_hop_global = x.unsqueeze(3).repeat(1, 1, 1, 17, 1, 1)
x_hop_global = x_hop_global - x_hop_global.permute(0, 1, 2, 4, 3, 5)
x_hop_global = torch.sum(x_hop_global ** 2, dim=-1)
hop_global = x_hop_global / torch.sum(x_hop_global, dim=-1).unsqueeze(-1)
hops = hops.unsqueeze(1).unsqueeze(2).repeat(1, f, v, 1, 1, 1)
hops1 = hop_global * hops[:, :, :, 0]
hops2 = hop_global * hops[:, :, :, 1]
hops3 = hop_global * hops[:, :, :, 2]
hops4 = hop_global * hops[:, :, :, 3]
# hops = torch.cat((hops1,hops2,hops3,hops4), dim=-1)
hops = torch.cat((hops1,hops2,hops3,hops4), dim=-1)
x1 = x[:, :, 0]
x2 = x[:, :, 1]
x3 = x[:, :, 2]
x4 = x[:, :, 3]
x1 = x1.permute(0, 3, 1, 2)
x2 = x2.permute(0, 3, 1, 2)
x3 = x3.permute(0, 3, 1, 2)
x4 = x4.permute(0, 3, 1, 2)
hop1 = hops[:, :, 0]
hop2 = hops[:, :, 1]
hop3 = hops[:, :, 2]
hop4 = hops[:, :, 3]
hop1 = hop1.permute(0, 3, 1, 2)
hop2 = hop2.permute(0, 3, 1, 2)
hop3 = hop3.permute(0, 3, 1, 2)
hop4 = hop4.permute(0, 3, 1, 2)
### Semantic graph transformer encoder
x1, hop1, MSA1, MSA2, MSA3, MSA4 = self.SF1(x1, hop1, edge_embedding)
x2, hop2, MSA1, MSA2, MSA3, MSA4 = self.SF2(x2, hop2, MSA1, MSA2, MSA3, MSA4, edge_embedding)
x3, hop3, MSA1, MSA2, MSA3, MSA4 = self.SF3(x3, hop3, MSA1, MSA2, MSA3, MSA4, edge_embedding)
x4, hop4, MSA1, MSA2, MSA3, MSA4 = self.SF4(x4, hop4, MSA1, MSA2, MSA3, MSA4, edge_embedding)
### Multi-view cross-channel fusion
x = torch.cat((x1.unsqueeze(1), x2.unsqueeze(1), x3.unsqueeze(1), x4.unsqueeze(1)), dim=1) + self.view_pos_embed
x = self.pos_drop(x)
x = self.conv(x).squeeze(1) + x1 + x2 + x3 + x4
x = self.conv_norm(x)
hop = torch.cat((hop1.unsqueeze(1), hop2.unsqueeze(1), hop3.unsqueeze(1), hop4.unsqueeze(1)), dim=1) + self.view_pos_embed
hop = self.pos_drop(hop)
# hop = self.conv_hop(hop).squeeze(1) + hop1 + hop2 + hop3 + hop4
# hop = self.conv_hop_norm(hop)
hop = self.conv(hop).squeeze(1) + hop1 + hop2 + hop3 + hop4
hop = self.conv_norm(hop)
x = x * hop
### Temporal transformer encoder
x = self.TF(x)
x = self.head(x)
x = x.view(b, opt.frames, j, -1)
print("=============> x.shape", x.shape)
return x
# x = torch.rand((8, 27, 4, 17 , 2))
# hops = torch.rand((8,4,17,17))
# mvft = hmvformer(num_frame=opt.frames, num_joints=17, in_chans=2, embed_dim_ratio=32, depth=4,
# num_heads=8, mlp_ratio=2., qkv_bias=True, qk_scale=None, drop_path_rate=0.1)
# print(mvft(x, hops).shape)

343
model/Spatial_encoder.py Normal file
View File

@ -0,0 +1,343 @@
## Our model was revised from https://github.com/zczcwh/PoseFormer/blob/main/common/model_poseformer.py
import torch
import torch.nn as nn
from functools import partial
from einops import rearrange
from timm.models.layers import DropPath
#######################################################################################################################
class Mlp(nn.Module):
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.fc1 = nn.Linear(in_features, hidden_features)
self.act = act_layer()
self.fc2 = nn.Linear(hidden_features, out_features)
self.drop = nn.Dropout(drop)
def forward(self, x):
x = self.fc1(x)
x = self.act(x)
x = self.drop(x)
x = self.fc2(x)
x = self.drop(x)
return x
#######################################################################################################################
class Attention(nn.Module):
def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
super().__init__()
self.num_heads = num_heads
head_dim = dim // num_heads
# NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
self.scale = qk_scale or head_dim ** -0.5
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
self.edge_embedding = nn.Linear(17*17, 17*17)
def forward(self, x, edge_embedding):
B, N, C = x.shape
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
attn = (q @ k.transpose(-2, -1)) * self.scale
edge_embedding = self.edge_embedding(edge_embedding)
edge_embedding = edge_embedding.reshape(1, 17, 17).unsqueeze(0).repeat(B, self.num_heads, 1, 1)
# print(edge_embedding.shape)
attn = attn + edge_embedding
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = self.proj(x)
x = self.proj_drop(x)
return x
#######################################################################################################################
class CVA_Attention(nn.Module):
def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
super().__init__()
self.num_heads = num_heads
head_dim = dim // num_heads
# NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
self.scale = qk_scale or head_dim ** -0.5
self.Qnorm = nn.LayerNorm(dim)
self.Knorm = nn.LayerNorm(dim)
self.Vnorm = nn.LayerNorm(dim)
self.QLinear = nn.Linear(dim, dim)
self.KLinear = nn.Linear(dim, dim)
self.VLinear = nn.Linear(dim, dim)
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
self.edge_embedding = nn.Linear(17*17, 17*17)
def forward(self, x, CVA_input, edge_embedding):
B, N, C = x.shape
# CVA_input = self.max_pool(CVA_input)
# print(CVA_input.shape)
q = self.QLinear(self.Qnorm(CVA_input)).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
k = self.KLinear(self.Knorm(CVA_input)).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
v = self.VLinear(self.Vnorm(x)).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
attn = (q @ k.transpose(-2, -1)) * self.scale
edge_embedding = self.edge_embedding(edge_embedding)
edge_embedding = edge_embedding.reshape(1, 17, 17).unsqueeze(0).repeat(B, self.num_heads, 1, 1)
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = self.proj(x)
x = self.proj_drop(x)
return x
#######################################################################################################################
class Block(nn.Module):
def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
super().__init__()
self.norm1 = norm_layer(dim)
self.attn = Attention(
dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
def forward(self, x):
x = x + self.drop_path(self.attn(self.norm1(x), edge_embedding))
x = x + self.drop_path(self.mlp(self.norm2(x)))
return x
#######################################################################################################################
class Multi_Out_Block(nn.Module):
def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
super().__init__()
self.norm1 = norm_layer(dim)
self.attn = Attention(
dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
self.norm_hop1 = norm_layer(dim)
self.norm_hop2 = norm_layer(dim)
self.mlp_hop = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
def forward(self, x, hops, edge_embedding):
MSA = self.drop_path(self.attn(self.norm1(x), edge_embedding))
MSA = self.norm_hop1(hops) * MSA
x = x + MSA
x = x + self.drop_path(self.mlp(self.norm2(x)))
hops = hops + MSA
hops = hops + self.drop_path(self.mlp_hop(self.norm_hop2(hops)))
return x, hops, MSA
#######################################################################################################################
class Multi_In_Out_Block(nn.Module):
def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
super().__init__()
self.norm1 = norm_layer(dim)
self.attn = Attention(
dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
self.cva_attn = CVA_Attention(
dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
# self.max_pool = nn.MaxPool1d(3, stride=1, padding=1, dilation=1, return_indices=False, ceil_mode=False)
self.norm_hop1 = norm_layer(dim)
self.norm_hop2 = norm_layer(dim)
self.mlp_hop = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
def forward(self, x, hops, CVA_input, edge_embedding):
MSA = self.drop_path(self.cva_attn(x, CVA_input, edge_embedding))
MSA = self.norm_hop1(hops) * MSA
x = x + MSA
x = x + self.drop_path(self.mlp(self.norm2(x)))
hops = hops + MSA
hops = hops + self.drop_path(self.mlp_hop(self.norm_hop2(hops)))
return x, hops, MSA
#######################################################################################################################
class First_view_Spatial_features(nn.Module):
def __init__(self, num_frame=9, num_joints=17, in_chans=2, embed_dim_ratio=32, depth=4,
num_heads=8, mlp_ratio=2., qkv_bias=True, qk_scale=None,
drop_rate=0., attn_drop_rate=0., drop_path_rate=0.2, norm_layer=None):
super().__init__()
norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
### spatial patch embedding
self.Spatial_patch_to_embedding = nn.Linear(in_chans, embed_dim_ratio)
self.Spatial_pos_embed = nn.Parameter(torch.zeros(1, num_joints, embed_dim_ratio))
self.hop_to_embedding = nn.Linear(68, embed_dim_ratio)
self.hop_pos_embed = nn.Parameter(torch.zeros(1, num_joints, embed_dim_ratio))
self.pos_drop = nn.Dropout(p=drop_rate)
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule
self.block1 = Multi_Out_Block(dim=embed_dim_ratio, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[0],
norm_layer=norm_layer)
self.block2 = Multi_Out_Block(dim=embed_dim_ratio, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[1],
norm_layer=norm_layer)
self.block3 = Multi_Out_Block(dim=embed_dim_ratio, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[2],
norm_layer=norm_layer)
self.block4 = Multi_Out_Block(dim=embed_dim_ratio, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[3],
norm_layer=norm_layer)
self.Spatial_norm = norm_layer(embed_dim_ratio)
self.hop_norm = norm_layer(embed_dim_ratio)
def forward(self, x, hops, edge_embedding):
b, _, f, p = x.shape ##### b is batch size, f is number of frames, p is number of joints
x = rearrange(x, 'b c f p -> (b f) p c', )
x = self.Spatial_patch_to_embedding(x)
x += self.Spatial_pos_embed
x = self.pos_drop(x)
hops = rearrange(hops, 'b c f p -> (b f) p c', )
hops = self.hop_to_embedding(hops)
hops += self.hop_pos_embed
hops = self.pos_drop(hops)
x, hops, MSA1 = self.block1(x, hops, edge_embedding)
x, hops, MSA2 = self.block2(x, hops, edge_embedding)
x, hops, MSA3 = self.block3(x, hops, edge_embedding)
x, hops, MSA4 = self.block4(x, hops, edge_embedding)
x = self.Spatial_norm(x)
x = rearrange(x, '(b f) w c -> b f (w c)', f=f)
hops = self.hop_norm(hops)
hops = rearrange(hops, '(b f) w c -> b f (w c)', f=f)
return x, hops, MSA1, MSA2, MSA3, MSA4
#######################################################################################################################
class Spatial_features(nn.Module):
def __init__(self, num_frame=9, num_joints=17, in_chans=2, embed_dim_ratio=32, depth=4,
num_heads=8, mlp_ratio=2., qkv_bias=True, qk_scale=None,
drop_rate=0., attn_drop_rate=0., drop_path_rate=0.2, norm_layer=None):
super().__init__()
norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
### spatial patch embedding
self.Spatial_patch_to_embedding = nn.Linear(in_chans, embed_dim_ratio)
self.Spatial_pos_embed = nn.Parameter(torch.zeros(1, num_joints, embed_dim_ratio))
self.hop_to_embedding = nn.Linear(68, embed_dim_ratio)
self.hop_pos_embed = nn.Parameter(torch.zeros(1, num_joints, embed_dim_ratio))
self.pos_drop = nn.Dropout(p=drop_rate)
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule
self.block1 = Multi_In_Out_Block(
dim=embed_dim_ratio, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[0], norm_layer=norm_layer)
self.block2 = Multi_In_Out_Block(
dim=embed_dim_ratio, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[1], norm_layer=norm_layer)
self.block3 = Multi_In_Out_Block(
dim=embed_dim_ratio, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[2], norm_layer=norm_layer)
self.block4 = Multi_In_Out_Block(
dim=embed_dim_ratio, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[3], norm_layer=norm_layer)
self.Spatial_norm = norm_layer(embed_dim_ratio)
self.hop_norm = norm_layer(embed_dim_ratio)
def forward(self, x, hops, MSA1, MSA2, MSA3, MSA4, edge_embedding):
b, _, f, p = x.shape ##### b is batch size, f is number of frames, p is number of joints
x = rearrange(x, 'b c f p -> (b f) p c', )
x = self.Spatial_patch_to_embedding(x)
x += self.Spatial_pos_embed
x = self.pos_drop(x)
hops = rearrange(hops, 'b c f p -> (b f) p c', )
hops = self.hop_to_embedding(hops)
hops += self.hop_pos_embed
hops = self.pos_drop(hops)
x, hops, MSA1 = self.block1(x, hops, MSA1, edge_embedding)
x, hops, MSA2 = self.block2(x, hops, MSA2, edge_embedding)
x, hops, MSA3 = self.block3(x, hops, MSA3, edge_embedding)
x, hops, MSA4 = self.block4(x, hops, MSA4, edge_embedding)
x = self.Spatial_norm(x)
x = rearrange(x, '(b f) w c -> b f (w c)', f=f)
hops = self.hop_norm(hops)
hops = rearrange(hops, '(b f) w c -> b f (w c)', f=f)
return x, hops, MSA1, MSA2, MSA3, MSA4

159
model/Temporal_encoder.py Normal file
View File

@ -0,0 +1,159 @@
## Our model was revised from https://github.com/zczcwh/PoseFormer/blob/main/common/model_poseformer.py
import torch
import torch.nn as nn
from functools import partial
from einops import rearrange
from timm.models.layers import DropPath
from common.opt import opts
opt = opts().parse()
#######################################################################################################################
class Mlp(nn.Module):
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.fc1 = nn.Linear(in_features, hidden_features)
self.act = act_layer()
self.fc2 = nn.Linear(hidden_features, out_features)
self.drop = nn.Dropout(drop)
def forward(self, x):
x = self.fc1(x)
x = self.act(x)
x = self.drop(x)
x = self.fc2(x)
x = self.drop(x)
return x
#######################################################################################################################
class Attention(nn.Module):
def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
super().__init__()
self.num_heads = num_heads
head_dim = dim // num_heads
# NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
self.scale = qk_scale or head_dim ** -0.5
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
def forward(self, x):
B, N, C = x.shape
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
attn = (q @ k.transpose(-2, -1)) * self.scale
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = self.proj(x)
x = self.proj_drop(x)
return x
#######################################################################################################################
class CVA_Attention(nn.Module):
def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
super().__init__()
self.num_heads = num_heads
head_dim = dim // num_heads
# NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
self.scale = qk_scale or head_dim ** -0.5
self.Qnorm = nn.LayerNorm(dim)
self.Knorm = nn.LayerNorm(dim)
self.Vnorm = nn.LayerNorm(dim)
self.QLinear = nn.Linear(dim, dim)
self.KLinear = nn.Linear(dim, dim)
self.VLinear = nn.Linear(dim, dim)
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
def forward(self, x, CVA_input):
B, N, C = x.shape
q = self.QLinear(self.Qnorm(CVA_input)).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
k = self.KLinear(self.Knorm(CVA_input)).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
v = self.VLinear(self.Vnorm(x)).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
attn = (q @ k.transpose(-2, -1)) * self.scale
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = self.proj(x)
x = self.proj_drop(x)
return x
#######################################################################################################################
class Block(nn.Module):
def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
super().__init__()
self.norm1 = norm_layer(dim)
self.attn = Attention(
dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
def forward(self, x):
x = x + self.drop_path(self.attn(self.norm1(x)))
x = x + self.drop_path(self.mlp(self.norm2(x)))
return x
#######################################################################################################################
class Temporal__features(nn.Module):
def __init__(self, num_frame=9, num_joints=17, in_chans=2, embed_dim_ratio=32, depth=4,
num_heads=8, mlp_ratio=2., qkv_bias=True, qk_scale=None,
drop_rate=0., attn_drop_rate=0., drop_path_rate=0.2, norm_layer=None):
super().__init__()
norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
embed_dim = embed_dim_ratio * num_joints #### temporal embed_dim is num_joints * spatial embedding dim ratio
out_dim = num_joints * 3 #### output dimension is num_joints * 3
### Temporal patch embedding
self.Temporal_pos_embed = nn.Parameter(torch.zeros(1, num_frame, embed_dim))
self.pos_drop = nn.Dropout(p=drop_rate)
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule
self.blocks = nn.ModuleList([
Block(
dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer)
for i in range(depth)])
self.Temporal_norm = norm_layer(embed_dim)
####### A easy way to implement weighted mean
self.weighted_mean = torch.nn.Conv1d(in_channels=num_frame, out_channels=1, kernel_size=1)
def forward(self, x):
b = x.shape[0]
x += self.Temporal_pos_embed
x = self.pos_drop(x)
for blk in self.blocks:
x = blk(x)
x = self.Temporal_norm(x)
##### x size [b, f, emb_dim], then take weighted mean on frame dimension, we only predict 3D pose of the center frame
# x = self.weighted_mean(x)
x = x.view(b, opt.frames, -1)
return x

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 105 KiB

12
requirements.txt Normal file
View File

@ -0,0 +1,12 @@
opencv-python
tqdm
yacs
numba
scikit-image
filterpy
ipython
einops
tensorboard
timm==0.4.5
matplotlib==2.2.2
tensorboardX

256
test.py Normal file
View File

@ -0,0 +1,256 @@
import os
import torch
import logging
import random
import torch.optim as optim
from tqdm import tqdm
# from torch.utils.tensorboard import SummaryWriter
from common.utils import *
from common.opt import opts
from common.h36m_dataset import Human36mDataset
from common.Mydataset import Fusion
from model.SGraFormer import sgraformer
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
CUDA_ID = [0]
device = torch.device("cuda")
def visualize_skeletons(input_2D, output_3D, gt_3D, idx=5, output_dir='./output'):
# Ensure the tensors are on the CPU and convert them to numpy arrays
input_2D = input_2D.cpu().numpy()
output_3D = output_3D.cpu().numpy()
gt_3D = gt_3D.cpu().numpy()
# Get the first action and first sample from the batch
input_sample = input_2D[idx, 0]
output_sample = output_3D[idx, 0]
gt_3D_sample = gt_3D[idx, 0]
print(f'\ninput_sample shape: {input_sample.shape}')
print(f'output_sample shape: {output_sample.shape}')
fig = plt.figure(figsize=(25, 5))
# Define the connections (bones) between joints
bones = [
(0, 1), (1, 2), (2, 3), # Left leg
(0, 4), (4, 5), (5, 6), # Right leg
(0, 7), (7, 8), (8, 9), (9, 10), # Spine
(7, 11), (11, 12), (12, 13), # Right arm
(7, 14), (14, 15), (15, 16) # Left arm
]
# Colors for different parts
bone_colors = {
"leg": 'green',
"spine": 'blue',
"arm": 'red'
}
# Function to get bone color based on index
def get_bone_color(start, end):
if (start in [1, 2, 3] or end in [1, 2, 3] or
start in [4, 5, 6] or end in [4, 5, 6]):
return bone_colors["leg"]
elif start in [7, 8, 9, 10] or end in [7, 8, 9, 10]:
return bone_colors["spine"]
else:
return bone_colors["arm"]
# Plotting 2D skeletons from different angles
for i in range(4):
ax = fig.add_subplot(1, 7, i + 1)
ax.set_title(f'2D angle {i+1}')
ax.scatter(input_sample[i, :, 0], input_sample[i, :, 1], color='blue')
# Draw the bones
for start, end in bones:
bone_color = get_bone_color(start, end)
ax.plot([input_sample[i, start, 0], input_sample[i, end, 0]],
[input_sample[i, start, 1], input_sample[i, end, 1]], color=bone_color)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_xlim(np.min(input_sample[:, :, 0]) - 1, np.max(input_sample[:, :, 0]) + 1)
ax.set_ylim(np.min(input_sample[:, :, 1]) - 1, np.max(input_sample[:, :, 1]) + 1)
ax.grid()
# Plotting predicted 3D skeleton
ax = fig.add_subplot(1, 7, 5, projection='3d')
ax.set_title('3D Predicted Skeleton')
ax.scatter(output_sample[:, 0], output_sample[:, 1], output_sample[:, 2], color='red', label='Predicted')
# Draw the bones in 3D for output_sample
for start, end in bones:
bone_color = get_bone_color(start, end)
ax.plot([output_sample[start, 0], output_sample[end, 0]],
[output_sample[start, 1], output_sample[end, 1]],
[output_sample[start, 2], output_sample[end, 2]], color=bone_color)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_xlim(np.min(output_sample[:, 0]) - 1, np.max(output_sample[:, 0]) + 1)
ax.set_ylim(np.min(output_sample[:, 1]) - 1, np.max(output_sample[:, 1]) + 1)
ax.set_zlim(np.min(output_sample[:, 2]) - 1, np.max(output_sample[:, 2]) + 1)
ax.legend()
# Plotting ground truth 3D skeleton
ax = fig.add_subplot(1, 7, 6, projection='3d')
ax.set_title('3D Ground Truth Skeleton')
ax.scatter(gt_3D_sample[:, 0], gt_3D_sample[:, 1], gt_3D_sample[:, 2], color='blue', label='Ground Truth')
# Draw the bones in 3D for gt_3D_sample
for start, end in bones:
bone_color = get_bone_color(start, end)
ax.plot([gt_3D_sample[start, 0], gt_3D_sample[end, 0]],
[gt_3D_sample[start, 1], gt_3D_sample[end, 1]],
[gt_3D_sample[start, 2], gt_3D_sample[end, 2]], color=bone_color, linestyle='--')
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_xlim(np.min(gt_3D_sample[:, 0]) - 1, np.max(gt_3D_sample[:, 0]) + 1)
ax.set_ylim(np.min(gt_3D_sample[:, 1]) - 1, np.max(gt_3D_sample[:, 1]) + 1)
ax.set_zlim(np.min(gt_3D_sample[:, 2]) - 1, np.max(gt_3D_sample[:, 2]) + 1)
ax.legend()
plt.grid()
# Save the figure
plt.tight_layout()
plt.savefig(f'{output_dir}/skeletons_visualization.png')
plt.show()
def val(opt, actions, val_loader, model):
with torch.no_grad():
return step('test', opt, actions, val_loader, model)
def step(split, opt, actions, dataLoader, model, optimizer=None, epoch=None, writer=None, adaptive_weight=None):
loss_all = {'loss': AccumLoss()}
action_error_sum = define_error_list(actions)
model.eval()
TQDM = tqdm(enumerate(dataLoader), total=len(dataLoader), ncols=100)
for i, data in TQDM:
batch_cam, gt_3D, input_2D, action, subject, scale, bb_box, start, end, hops = data
[input_2D, gt_3D, batch_cam, scale, bb_box, hops] = get_varialbe(split, [input_2D, gt_3D, batch_cam, scale, bb_box, hops])
# print("\n======> input_2D: ", input_2D.shape)
# print("======> gt_3D: ", gt_3D.shape)
if split == 'train':
output_3D = model(input_2D, hops)
elif split == 'test':
input_2D, output_3D = input_augmentation(input_2D, hops, model)
out_target = gt_3D.clone()
out_target[:, :, 0] = 0
# print("======> output_3D: ", output_3D.shape)
# visualize_skeletons(input_2D, output_3D, gt_3D)
if output_3D.shape[1] != 1:
output_3D = output_3D[:, opt.pad].unsqueeze(1)
output_3D[:, :, 1:, :] -= output_3D[:, :, :1, :]
output_3D[:, :, 0, :] = 0
action_error_sum = test_calculation(output_3D, out_target, action, action_error_sum, opt.dataset, subject)
p1, p2 = print_error(opt.dataset, action_error_sum, opt.train)
# print("======> p1, p2: ", p1, p2)
if split == 'train':
return loss_all['loss'].avg
elif split == 'test':
p1, p2 = print_error(opt.dataset, action_error_sum, opt.train)
return p1, p2
def input_augmentation(input_2D, hops, model):
input_2D_non_flip = input_2D[:, 0]
output_3D_non_flip = model(input_2D_non_flip, hops)
# print("======> input_2D_non_flip: ", input_2D_non_flip.shape)
# print("======> output_3D_non_flip: ", output_3D_non_flip.shape)
# visualize_skeletons(input_2D_non_flip, output_3D_non_flip)
return input_2D_non_flip, output_3D_non_flip
if __name__ == '__main__':
opt = opts().parse()
root_path = opt.root_path
opt.manualSeed = 1
random.seed(opt.manualSeed)
torch.manual_seed(opt.manualSeed)
root_path = opt.root_path
dataset_path = root_path + 'data_3d_' + opt.dataset + '.npz'
dataset = Human36mDataset(dataset_path, opt)
actions = define_actions(opt.actions)
train_data = Fusion(opt=opt, train=True, dataset=dataset, root_path=root_path)
train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=opt.batch_size,
shuffle=True, num_workers=int(opt.workers), pin_memory=True)
test_data = Fusion(opt=opt, train=False, dataset=dataset, root_path=root_path)
test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size,
shuffle=False, num_workers=int(opt.workers), pin_memory=True)
model = sgraformer(num_frame=opt.frames, num_joints=17, in_chans=2, embed_dim_ratio=32, depth=4,
num_heads=8, mlp_ratio=2., qkv_bias=True, qk_scale=None, drop_path_rate=0.1)
# model = FuseModel()
if torch.cuda.device_count() > 1:
print("Let's use", torch.cuda.device_count(), "GPUs!")
model = torch.nn.DataParallel(model, device_ids=CUDA_ID).to(device)
model = model.to(device)
model_dict = model.state_dict()
model_path = '/home/zlt/Documents/SGraFormer-master/checkpoint/epoch_50.pth'
pre_dict = torch.load(model_path)
model_dict = model.state_dict()
state_dict = {k: v for k, v in pre_dict.items() if k in model_dict.keys()}
model_dict.update(state_dict)
model.load_state_dict(model_dict)
all_param = []
lr = opt.lr
all_param += list(model.parameters())
optimizer = optim.AdamW(all_param, lr=lr, weight_decay=0.1)
## tensorboard
# writer = SummaryWriter("runs/nin")
writer = None
flag = 0
p1, p2 = val(opt, actions, test_dataloader, model)
print('p1: %.2f, p2: %.2f' % (p1, p2))

209
vs Normal file
View File

@ -0,0 +1,209 @@
====> input_2D: [[[[-0.17325002 -0.17840627]
[-0.16224998 -0.18938544]
[-0.1595 0.00549481]
...
[-0.12924999 -0.38701043]
[-0.12924999 -0.28819796]
[-0.23925 -0.26623955]]
[[ 0.11524999 -0.21262503]
[ 0.06274998 -0.21262503]
[ 0.05225003 -0.02362502]
...
[ 0.04174995 -0.40162498]
[ 0.00762498 -0.28875 ]
[ 0.07587504 -0.28875 ]]
[[ 0.126382 -0.27275002]
[ 0.17350698 -0.28324997]
[ 0.18659711 -0.10212499]
...
[ 0.1892153 -0.47224998]
[ 0.22063196 -0.36725003]
[ 0.16303468 -0.41974998]]
[[ 0.11715972 -0.28818753]
[ 0.10402083 -0.27506253]
[ 0.07774305 -0.03881248]
...
[ 0.08759713 -0.5277187 ]
[ 0.08759713 -0.37678126]
[ 0.22884035 -0.3669375 ]]]
[[[-0.17236805 -0.17731252]
[-0.16139579 -0.18827084]
[-0.15865278 0.00623951]
...
[-0.12847918 -0.38552085]
[-0.12573606 -0.28415623]
[-0.23351866 -0.263042 ]]
[[ 0.11031246 -0.21437502]
[ 0.06462502 -0.2116875 ]
[ 0.05387497 -0.02356249]
...
[ 0.04312503 -0.4051875 ]
[ 0.01087499 -0.28693748]
[ 0.07537496 -0.2815625 ]]
[[ 0.12379158 -0.2751823 ]
[ 0.17327082 -0.27778125]
[ 0.18889582 -0.10105205]
...
[ 0.18889582 -0.47530204]
[ 0.21754158 -0.36354685]
[ 0.15504158 -0.41552603]]
[[ 0.12193751 -0.28907296]
[ 0.10231245 -0.27596876]
[ 0.0826875 -0.04009375]
...
[ 0.08595836 -0.5216719 ]
[ 0.09904158 -0.37752607]
[ 0.23314583 -0.35459378]]]
[[[-0.1726042 -0.17912498]
[-0.1616875 -0.17912498]
[-0.1562292 0.00645826]
...
[-0.12893748 -0.3865417 ]
[-0.12893748 -0.28829166]
[-0.23810416 -0.2555417 ]]
[[ 0.11325002 -0.20937502]
[ 0.063375 -0.20674998]
[ 0.05025005 -0.02037501]
...
[ 0.03974998 -0.403625 ]
[ 0.00825 -0.28812498]
[ 0.07912505 -0.28025 ]]
[[ 0.126382 -0.27367705]
[ 0.17350698 -0.28415626]
[ 0.18659711 -0.10339063]
...
[ 0.1892153 -0.47278124]
[ 0.22063196 -0.36798954]
[ 0.16303468 -0.40990627]]
[[ 0.12304163 -0.28884378]
[ 0.1034584 -0.27578124]
[ 0.07408333 -0.03739062]
...
[ 0.08713889 -0.52070314]
[ 0.08713889 -0.37048438]
[ 0.23074996 -0.34435937]]]
...
[[[-0.15943056 -0.18728128]
[-0.15660417 -0.19010422]
[-0.15660417 0.00185416]
...
[-0.148125 -0.39617708]
[-0.1452986 -0.29172918]
[-0.23856944 -0.25503126]]
[[ 0.09365964 -0.20290625]
[ 0.05642354 -0.20290625]
[ 0.05110407 -0.02192706]
...
[ 0.03248608 -0.40251565]
[-0.01006943 -0.28008854]
[ 0.08568048 -0.2375052 ]]
[[ 0.14960408 -0.274875 ]
[ 0.18345833 -0.27747917]
[ 0.19387496 -0.10560417]
...
[ 0.20168746 -0.47279167]
[ 0.23554158 -0.3660208 ]
[ 0.14960408 -0.3582083 ]]
[[ 0.10611105 -0.2872292 ]
[ 0.09300005 -0.28068748]
[ 0.07988894 -0.03210416]
...
[ 0.10611105 -0.5292708 ]
[ 0.07988894 -0.36572918]
[ 0.20116663 -0.31012502]]]
[[[-0.15781945 -0.17923954]
[-0.15502083 -0.18764582]
[-0.15502083 0.00289581]
...
[-0.14382643 -0.40060422]
[-0.13822919 -0.2997292 ]
[-0.23338199 -0.25489584]]
[[ 0.09533334 -0.20466667]
[ 0.05533338 -0.20200002]
[ 0.04999995 -0.02066666]
...
[ 0.03133333 -0.402 ]
[-0.01133329 -0.2793333 ]
[ 0.08466661 -0.23666668]]
[[ 0.1532222 -0.27335936]
[ 0.18455553 -0.2785781 ]
[ 0.1976111 -0.09853125]
...
[ 0.1976111 -0.47428125]
[ 0.22894442 -0.36990625]
[ 0.15061104 -0.35946876]]
[[ 0.09612501 -0.29290107]
[ 0.09612501 -0.27979687]
[ 0.07977092 -0.03409376]
...
[ 0.10593748 -0.5320521 ]
[ 0.07977092 -0.36169794]
[ 0.2007916 -0.3125573 ]]]
[[[-0.15511107 -0.18240628]
[-0.15511107 -0.18796876]
[-0.15233332 0.00393746]
...
[-0.144 -0.39934376]
[-0.144 -0.30478123]
[-0.23288894 -0.25471875]]
[[ 0.08933342 -0.20692188]
[ 0.05466664 -0.20426041]
[ 0.04666662 -0.02061981]
...
[ 0.03333342 -0.40653127]
[-0.00666666 -0.28676564]
[ 0.08666658 -0.23619795]]
[[ 0.15420842 -0.2762708 ]
[ 0.18285418 -0.278875 ]
[ 0.19847918 -0.10179168]
...
[ 0.19587505 -0.4741875 ]
[ 0.22712505 -0.3648125 ]
[ 0.1456331 -0.36036688]]
[[ 0.09231246 -0.28507295]
[ 0.09885418 -0.28507295]
[ 0.07595837 -0.02626565]
...
[ 0.10212505 -0.530776 ]
[ 0.08577085 -0.36369792]
[ 0.20679164 -0.30800518]]]]
=======> hops: tensor([[0.0000, 0.0014, 0.0000, ..., 0.0000, 0.0445, 0.0000],
[0.0009, 0.0000, 0.0185, ..., 0.0504, 0.0000, 0.0000],
[0.0000, 0.0094, 0.0000, ..., 0.0000, 0.0000, 0.0000],
...,
[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000]],
device='cuda:0')

223
vs_3090 Normal file
View File

@ -0,0 +1,223 @@
input_2D: [[[[-0.17325002 -0.17840627]
[-0.16224998 -0.18938544]
[-0.1595 0.00549481]
...
[-0.12924999 -0.38701043]
[-0.12924999 -0.28819796]
[-0.23925 -0.26623955]]
[[ 0.11524999 -0.21262503]
[ 0.06274998 -0.21262503]
[ 0.05225003 -0.02362502]
...
[ 0.04174995 -0.40162498]
[ 0.00762498 -0.28875 ]
[ 0.07587504 -0.28875 ]]
[[ 0.126382 -0.27275002]
[ 0.17350698 -0.28324997]
[ 0.18659711 -0.10212499]
...
[ 0.1892153 -0.47224998]
[ 0.22063196 -0.36725003]
[ 0.16303468 -0.41974998]]
[[ 0.11715972 -0.28818753]
[ 0.10402083 -0.27506253]
[ 0.07774305 -0.03881248]
...
[ 0.08759713 -0.5277187 ]
[ 0.08759713 -0.37678126]
[ 0.22884035 -0.3669375 ]]]
[[[-0.17236805 -0.17731252]
[-0.16139579 -0.18827084]
[-0.15865278 0.00623951]
...
[-0.12847918 -0.38552085]
[-0.12573606 -0.28415623]
[-0.23351866 -0.263042 ]]
[[ 0.11031246 -0.21437502]
[ 0.06462502 -0.2116875 ]
[ 0.05387497 -0.02356249]
...
[ 0.04312503 -0.4051875 ]
[ 0.01087499 -0.28693748]
[ 0.07537496 -0.2815625 ]]
[[ 0.12379158 -0.2751823 ]
[ 0.17327082 -0.27778125]
[ 0.18889582 -0.10105205]
...
[ 0.18889582 -0.47530204]
[ 0.21754158 -0.36354685]
[ 0.15504158 -0.41552603]]
[[ 0.12193751 -0.28907296]
[ 0.10231245 -0.27596876]
[ 0.0826875 -0.04009375]
...
[ 0.08595836 -0.5216719 ]
[ 0.09904158 -0.37752607]
[ 0.23314583 -0.35459378]]]
[[[-0.1726042 -0.17912498]
[-0.1616875 -0.17912498]
[-0.1562292 0.00645826]
...
[-0.12893748 -0.3865417 ]
[-0.12893748 -0.28829166]
[-0.23810416 -0.2555417 ]]
[[ 0.11325002 -0.20937502]
[ 0.063375 -0.20674998]
[ 0.05025005 -0.02037501]
...
[ 0.03974998 -0.403625 ]
[ 0.00825 -0.28812498]
[ 0.07912505 -0.28025 ]]
[[ 0.126382 -0.27367705]
[ 0.17350698 -0.28415626]
[ 0.18659711 -0.10339063]
...
[ 0.1892153 -0.47278124]
[ 0.22063196 -0.36798954]
[ 0.16303468 -0.40990627]]
[[ 0.12304163 -0.28884378]
[ 0.1034584 -0.27578124]
[ 0.07408333 -0.03739062]
...
[ 0.08713889 -0.52070314]
[ 0.08713889 -0.37048438]
[ 0.23074996 -0.34435937]]]
...
[[[-0.15943056 -0.18728128]
[-0.15660417 -0.19010422]
[-0.15660417 0.00185416]
...
[-0.148125 -0.39617708]
[-0.1452986 -0.29172918]
[-0.23856944 -0.25503126]]
[[ 0.09365964 -0.20290625]
[ 0.05642354 -0.20290625]
[ 0.05110407 -0.02192706]
...
[ 0.03248608 -0.40251565]
[-0.01006943 -0.28008854]
[ 0.08568048 -0.2375052 ]]
[[ 0.14960408 -0.274875 ]
[ 0.18345833 -0.27747917]
[ 0.19387496 -0.10560417]
...
[ 0.20168746 -0.47279167]
[ 0.23554158 -0.3660208 ]
[ 0.14960408 -0.3582083 ]]
[[ 0.10611105 -0.2872292 ]
[ 0.09300005 -0.28068748]
[ 0.07988894 -0.03210416]
...
[ 0.10611105 -0.5292708 ]
[ 0.07988894 -0.36572918]
[ 0.20116663 -0.31012502]]]
[[[-0.15781945 -0.17923954]
[-0.15502083 -0.18764582]
[-0.15502083 0.00289581]
...
[-0.14382643 -0.40060422]
[-0.13822919 -0.2997292 ]
[-0.23338199 -0.25489584]]
[[ 0.09533334 -0.20466667]
[ 0.05533338 -0.20200002]
[ 0.04999995 -0.02066666]
...
[ 0.03133333 -0.402 ]
[-0.01133329 -0.2793333 ]
[ 0.08466661 -0.23666668]]
[[ 0.1532222 -0.27335936]
[ 0.18455553 -0.2785781 ]
[ 0.1976111 -0.09853125]
...
[ 0.1976111 -0.47428125]
[ 0.22894442 -0.36990625]
[ 0.15061104 -0.35946876]]
[[ 0.09612501 -0.29290107]
[ 0.09612501 -0.27979687]
[ 0.07977092 -0.03409376]
...
[ 0.10593748 -0.5320521 ]
[ 0.07977092 -0.36169794]
[ 0.2007916 -0.3125573 ]]]
[[[-0.15511107 -0.18240628]
[-0.15511107 -0.18796876]
[-0.15233332 0.00393746]
...
[-0.144 -0.39934376]
[-0.144 -0.30478123]
[-0.23288894 -0.25471875]]
[[ 0.08933342 -0.20692188]
[ 0.05466664 -0.20426041]
[ 0.04666662 -0.02061981]
...
[ 0.03333342 -0.40653127]
[-0.00666666 -0.28676564]
[ 0.08666658 -0.23619795]]
[[ 0.15420842 -0.2762708 ]
[ 0.18285418 -0.278875 ]
[ 0.19847918 -0.10179168]
...
[ 0.19587505 -0.4741875 ]
[ 0.22712505 -0.3648125 ]
[ 0.1456331 -0.36036688]]
[[ 0.09231246 -0.28507295]
[ 0.09885418 -0.28507295]
[ 0.07595837 -0.02626565]
...
[ 0.10212505 -0.530776 ]
[ 0.08577085 -0.36369792]
[ 0.20679164 -0.30800518]]]]
=======> hops: tensor([[0.0000, 0.0014, 0.0000, ..., 0.0000, 0.0445, 0.0000],
[0.0009, 0.0000, 0.0185, ..., 0.0504, 0.0000, 0.0000],
[0.0000, 0.0094, 0.0000, ..., 0.0000, 0.0000, 0.0000],
...,
[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000]],
device='cuda:0')
=======> hops: tensor([[0.0000e+00, 4.9881e-05, 0.0000e+00, ..., 0.0000e+00, 6.5165e-03,
0.0000e+00],
[3.2784e-05, 0.0000e+00, 2.7641e-02, ..., 7.3896e-02, 0.0000e+00,
0.0000e+00],
[0.0000e+00, 1.1971e-02, 0.0000e+00, ..., 0.0000e+00, 0.0000e+00,
0.0000e+00],
...,
[0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 0.0000e+00, 0.0000e+00,
0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 0.0000e+00, 0.0000e+00,
0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00, ..., 0.0000e+00, 0.0000e+00,
0.0000e+00]], device='cuda:1')