Update ParsingGait (#160)

* Update ParsingGait

* Clear up the confusion

Clear up the confusion about gait3d and gait3d-parsing.

* Update 0.get_started.md

* Add BaseParsingCuttingTransform

* Update gcn.py

* Create gaitbase_gait3d_parsing_btz32x2_fixed.yaml

* Add gait3d_parsing config file

* Update 1.model_zoo.md

Update Gait3D-Parsing checkpoints

* Update 1.model_zoo.md

add configuration

* Update 1.model_zoo.md

center text

---------

Co-authored-by: Junhao Liang <43094337+darkliang@users.noreply.github.com>
This commit is contained in:
Zzier
2023-10-29 21:53:02 +08:00
committed by GitHub
parent 44fb2414f2
commit 609aa0e9aa
17 changed files with 1422 additions and 3 deletions
@@ -0,0 +1,103 @@
data_cfg:
dataset_name: Gait3D-Parsing
dataset_root: your_path
dataset_partition: ./datasets/Gait3D/Gait3D.json # Uses the same Gait3D.json as Gait3D dataset
# data_in_use: [true, false]
num_workers: 1
remove_no_gallery: false # Remove probe if no gallery for it
test_dataset_name: Gait3D-Parsing
evaluator_cfg:
enable_float16: true
restore_ckpt_strict: true
restore_hint: 120000
save_name: GaitBase_btz32x2_fixed
eval_func: evaluate_Gait3D
sampler:
batch_shuffle: false
batch_size: 4
sample_type: all_ordered # all indicates whole sequence used to test, while ordered means input sequence by its natural order; Other options: fixed_unordered
frames_all_limit: 720 # limit the number of sampled frames to prevent out of memory
metric: euc # cos
transform:
- type: BaseParsingCuttingTransform
loss_cfg:
- loss_term_weight: 1.0
margin: 0.2
type: TripletLoss
log_prefix: triplet
- loss_term_weight: 1.0
scale: 16
type: CrossEntropyLoss
log_prefix: softmax
log_accuracy: true
model_cfg:
model: Baseline
backbone_cfg:
type: ResNet9
block: BasicBlock
channels: # Layers configuration for automatically model construction
- 64
- 128
- 256
- 512
layers:
- 1
- 1
- 1
- 1
strides:
- 1
- 2
- 2
- 1
maxpool: false
SeparateFCs:
in_channels: 512
out_channels: 256
parts_num: 16
SeparateBNNecks:
class_num: 3000
in_channels: 256
parts_num: 16
bin_num:
- 16
optimizer_cfg:
lr: 0.1
momentum: 0.9
solver: SGD
weight_decay: 0.0005
scheduler_cfg:
gamma: 0.1
milestones: # Learning Rate Reduction at each milestones
- 40000
- 80000
- 100000
scheduler: MultiStepLR
trainer_cfg:
enable_float16: true # half_percesion float for memory reduction and speedup
fix_BN: false
with_test: false
log_iter: 100
restore_ckpt_strict: true
restore_hint: 0
save_iter: 40000
save_name: GaitBase_btz32x2_fixed
sync_BN: true
total_iter: 120000
sampler:
batch_shuffle: true
batch_size:
- 32 # TripletSampler, batch_size[0] indicates Number of Identity
- 2 # batch_size[1] indicates Samples sequqnce for each Identity
frames_num_fixed: 30 # fixed frames number for training
frames_num_max: 50 # max frames number for unfixed training
frames_num_min: 10 # min frames number for unfixed traing
sample_type: fixed_unordered # fixed control input frames number, unordered for controlling order of input tensor; Other options: unfixed_ordered or all_ordered
type: TripletSampler
transform:
- type: BaseParsingCuttingTransform
+74
View File
@@ -0,0 +1,74 @@
# Note : *** the batch_size should be equal to the gpus number at the test phase!!! ***
data_cfg:
dataset_name: Gait3D-Parsing
dataset_root: your_path
dataset_partition: ./datasets/Gait3D/Gait3D.json # Uses the same Gait3D.json as Gait3D dataset
num_workers: 1
remove_no_gallery: false
test_dataset_name: Gait3D-Parsing
evaluator_cfg:
enable_float16: false
restore_ckpt_strict: true
restore_hint: 180000
save_name: GaitGL
eval_func: evaluate_Gait3D
metric: cos
sampler:
batch_size: 4
sample_type: all_ordered
type: InferenceSampler
transform:
- type: BaseParsingCuttingTransform
loss_cfg:
- loss_term_weight: 1.0
margin: 0.2
type: TripletLoss
log_prefix: triplet
- loss_term_weight: 1.0
scale: 1
type: CrossEntropyLoss
log_accuracy: true
label_smooth: false
log_prefix: softmax
model_cfg:
model: GaitGL
channels: [32, 64, 128]
class_num: 3000
optimizer_cfg:
lr: 1.0e-3
momentum: 0.9
solver: Adam
weight_decay: 5.0e-4
scheduler_cfg:
gamma: 0.1
milestones:
- 30000
- 90000
scheduler: MultiStepLR
trainer_cfg:
enable_float16: true
with_test: false
log_iter: 100
restore_ckpt_strict: true
restore_hint: 0
save_iter: 30000
save_name: GaitGL
sync_BN: true
total_iter: 180000
sampler:
batch_shuffle: true
batch_size:
- 32
- 4
frames_num_fixed: 30
frames_skip_num: 0
sample_type: fixed_ordered
type: TripletSampler
transform:
- type: BaseParsingCuttingTransform
@@ -0,0 +1,86 @@
data_cfg:
dataset_name: Gait3D-Parsing
dataset_root: your_path
dataset_partition: ./datasets/Gait3D/Gait3D.json # Uses the same Gait3D.json as Gait3D dataset
num_workers: 1
remove_no_gallery: false
test_dataset_name: Gait3D-Parsing
evaluator_cfg:
enable_float16: false
restore_ckpt_strict: true
restore_hint: 180000
save_name: GaitPart
eval_func: evaluate_Gait3D
metric: cos
sampler:
batch_size: 16
sample_type: all_ordered
type: InferenceSampler
transform:
- type: BaseParsingCuttingTransform
loss_cfg:
loss_term_weight: 1.0
margin: 0.2
type: TripletLoss
log_prefix: triplet
model_cfg:
model: GaitPart
backbone_cfg:
in_channels: 1
layers_cfg:
- BC-32
- BC-32
- M
- FC-64-2
- FC-64-2
- M
- FC-128-3
- FC-128-3
type: Plain
SeparateFCs:
in_channels: 128
out_channels: 128
parts_num: 16
bin_num:
- 16
optimizer_cfg:
lr: 0.001
momentum: 0.9
solver: Adam
weight_decay: 0.0005
scheduler_cfg:
gamma: 0.1
milestones:
- 30000
- 90000
scheduler: MultiStepLR
trainer_cfg:
enable_float16: true
log_iter: 100
with_test: false
restore_ckpt_strict: true
restore_hint: 0
save_iter: 30000
save_name: GaitPart
sync_BN: false
total_iter: 180000
sampler:
batch_shuffle: false
batch_size:
- 32
- 4
frames_num_fixed: 30
frames_num_max: 50
frames_num_min: 25
frames_skip_num: 10
sample_type: fixed_ordered
type: TripletSampler
transform:
- type: BaseParsingCuttingTransform
@@ -0,0 +1,81 @@
data_cfg:
dataset_name: Gait3D-Parsing
dataset_root: your_path
dataset_partition: ./datasets/Gait3D/Gait3D.json # Uses the same Gait3D.json as Gait3D dataset
num_workers: 1
remove_no_gallery: false
test_dataset_name: Gait3D-Parsing
evaluator_cfg:
enable_float16: false
restore_ckpt_strict: true
restore_hint: 180000
save_name: GaitSet
eval_func: evaluate_Gait3D
metric: cos
sampler:
batch_size: 16
sample_type: all_ordered
type: InferenceSampler
transform:
- type: BaseParsingCuttingTransform
loss_cfg:
loss_term_weight: 1.0
margin: 0.2
type: TripletLoss
log_prefix: triplet
model_cfg:
model: GaitSet
in_channels:
- 1
- 32
- 64
- 128
SeparateFCs:
in_channels: 128
out_channels: 256
parts_num: 62
bin_num:
- 16
- 8
- 4
- 2
- 1
optimizer_cfg:
lr: 0.001
momentum: 0.9
solver: Adam
weight_decay: 0.0005
scheduler_cfg:
gamma: 0.1
milestones:
- 30000
- 90000
scheduler: MultiStepLR
trainer_cfg:
enable_float16: true
log_iter: 100
with_test: false
restore_ckpt_strict: true
restore_hint: 0
save_iter: 30000
save_name: GaitSet
sync_BN: false
total_iter: 180000
sampler:
batch_shuffle: false
batch_size:
- 32
- 4
frames_num_fixed: 30
frames_num_max: 50
frames_num_min: 25
sample_type: fixed_unordered
type: TripletSampler
transform:
- type: BaseParsingCuttingTransform
@@ -0,0 +1,97 @@
data_cfg:
dataset_name: Gait3D-Parsing
dataset_root: your_path
dataset_partition: ./datasets/Gait3D/Gait3D.json # Uses the same Gait3D.json as Gait3D dataset
num_workers: 1
cache: false
remove_no_gallery: false
test_dataset_name: Gait3D-Parsing
evaluator_cfg:
enable_distributed: true
enable_float16: false
restore_ckpt_strict: true
restore_hint: 180000
save_name: GLN_P1
eval_func: evaluate_Gait3D
metric: cos
sampler:
batch_size: 8
sample_type: all_ordered
type: InferenceSampler
transform:
- type: BaseParsingCuttingTransform
loss_cfg:
- loss_term_weight: 1.0
margin: 0.2
type: TripletLoss
log_prefix: triplet
# - loss_term_weight: 0.1
# scale: 1
# type: CrossEntropyLoss
# log_prefix: softmax
model_cfg:
model: GLN
class_num: 3000
lateral_dim : 256
hidden_dim : 256
dropout : 0.9
in_channels:
- 1
- 32
- 64
- 128
SeparateFCs:
in_channels: 256
out_channels: 256
parts_num: 93
bin_num:
- 16
- 8
- 4
- 2
- 1
Lateral_pretraining: true
optimizer_cfg:
lr: 0.001
momentum: 0.9
solver: Adam
weight_decay: 0.0005
# lr: 0.0001
# momentum: 0.9
# solver: Adam
# weight_decay: 0.0005
scheduler_cfg:
gamma: 0.1
milestones:
- 30000
- 90000
scheduler: MultiStepLR
trainer_cfg:
enable_float16: true
fix_layers: false
with_test: false
log_iter: 100
optimizer_reset: false
restore_ckpt_strict: true
restore_hint: 0
save_iter: 30000
save_name: GLN_P1
sync_BN: true
total_iter: 180000
sampler:
batch_shuffle: false
batch_size:
- 32
- 4
frames_num_fixed: 30
frames_skip_num: 0
sample_type: fixed_ordered
type: TripletSampler
transform:
- type: BaseParsingCuttingTransform
@@ -0,0 +1,93 @@
data_cfg:
dataset_name: Gait3D-Parsing
dataset_root: your_path
dataset_partition: ./datasets/Gait3D/Gait3D.json # Uses the same Gait3D.json as Gait3D dataset
num_workers: 1
remove_no_gallery: false
test_dataset_name: Gait3D-Parsing
evaluator_cfg:
enable_distributed: true
enable_float16: false
restore_ckpt_strict: true
restore_hint: 180000
save_name: GLN_P2
eval_func: evaluate_Gait3D
metric: cos
sampler:
batch_size: 4
sample_type: all_ordered
type: InferenceSampler
transform:
- type: BaseParsingCuttingTransform
loss_cfg:
- loss_term_weight: 1.0
margin: 0.2
type: TripletLoss
log_prefix: triplet
- loss_term_weight: 0.1
scale: 16
type: CrossEntropyLoss
log_prefix: softmax
model_cfg:
model: GLN
class_num: 3000
lateral_dim : 256
hidden_dim : 256
dropout : 0.9
in_channels:
- 1
- 32
- 64
- 128
SeparateFCs:
in_channels: 256
out_channels: 256
parts_num: 93
bin_num:
- 16
- 8
- 4
- 2
- 1
Lateral_pretraining: false
optimizer_cfg:
lr: 0.001
momentum: 0.9
solver: Adam
weight_decay: 0.0005
scheduler_cfg:
gamma: 0.1
milestones:
- 30000
- 90000
scheduler: MultiStepLR
trainer_cfg:
enable_distributed: true
enable_float16: true
fix_layers: false
log_iter: 100
optimizer_reset: true
scheduler_reset: true
restore_ckpt_strict: false
restore_hint: output/Gait3D-Parsing/GLN/GLN_P1/checkpoints/GLN_P1-180000.pt
save_iter: 30000
save_name: GLN_P2
sync_BN: true
total_iter: 180000
sampler:
batch_shuffle: false
batch_size:
- 32
- 4
frames_num_fixed: 30
frames_skip_num: 0
sample_type: fixed_ordered
type: TripletSampler
transform:
- type: BaseParsingCuttingTransform
@@ -0,0 +1,109 @@
data_cfg:
dataset_name: Gait3D-Parsing
dataset_root: your_path
dataset_partition: ./datasets/Gait3D/Gait3D.json # Uses the same Gait3D.json as Gait3D dataset
# data_in_use: [true, false]
num_workers: 1
remove_no_gallery: false # Remove probe if no gallery for it
test_dataset_name: Gait3D-Parsing
evaluator_cfg:
enable_float16: true
restore_ckpt_strict: true
restore_hint: 120000
save_name: ParsingGait
eval_func: evaluate_Gait3D
sampler:
batch_shuffle: false
batch_size: 4
sample_type: all_ordered # all indicates whole sequence used to test, while ordered means input sequence by its natural order; Other options: fixed_unordered
frames_all_limit: 720 # limit the number of sampled frames to prevent out of memory
metric: euc # cos
transform:
- type: BaseParsingCuttingTransform
loss_cfg:
- loss_term_weight: 1.0
margin: 0.2
type: TripletLoss
log_prefix: triplet
- loss_term_weight: 1.0
scale: 16
type: CrossEntropyLoss
log_prefix: softmax
log_accuracy: true
model_cfg:
model: ParsingGait
backbone_cfg:
type: ResNet9
block: BasicBlock
channels: # Layers configuration for automatically model construction
- 64
- 128
- 256
- 512
layers:
- 1
- 1
- 1
- 1
strides:
- 1
- 2
- 2
- 1
maxpool: false
SeparateFCs:
in_channels: 512
out_channels: 256
parts_num: 21
SeparateBNNecks:
class_num: 3000
in_channels: 256
parts_num: 21
bin_num:
- 16
gcn_cfg:
fine_parts: 11
coarse_parts: 5
only_fine_graph: false
only_coarse_graph: true
combine_fine_coarse_graph: false
optimizer_cfg:
lr: 0.1
momentum: 0.9
solver: SGD
weight_decay: 0.0005
scheduler_cfg:
gamma: 0.1
milestones: # Learning Rate Reduction at each milestones
- 40000
- 80000
- 100000
scheduler: MultiStepLR
trainer_cfg:
enable_float16: true # half_percesion float for memory reduction and speedup
fix_BN: false
with_test: True
log_iter: 100
restore_ckpt_strict: true
restore_hint: 0
save_iter: 40000
save_name: ParsingGait
sync_BN: true
total_iter: 120000
sampler:
batch_shuffle: true
batch_size:
- 32 # TripletSampler, batch_size[0] indicates Number of Identity
- 2 # batch_size[1] indicates Samples sequqnce for each Identity
frames_num_fixed: 30 # fixed frames number for training
frames_num_max: 50 # max frames number for unfixed training
frames_num_min: 10 # min frames number for unfixed traing
sample_type: fixed_unordered # fixed control input frames number, unordered for controlling order of input tensor; Other options: unfixed_ordered or all_ordered
type: TripletSampler
transform:
- type: BaseParsingCuttingTransform
@@ -0,0 +1,107 @@
data_cfg:
dataset_name: Gait3D-Parsing
dataset_root: your_path
dataset_partition: datasets/Gait3D/Gait3D.json # Uses the same Gait3D.json as Gait3D dataset
data_in_use: [true, false, true]
num_workers: 1
remove_no_gallery: false # Remove probe if no gallery for it
test_dataset_name: Gait3D-Parsing
evaluator_cfg:
enable_float16: true
restore_ckpt_strict: true
restore_hint: 180000
save_name: SMPLGait
eval_func: evaluate_Gait3D
metric: cos # cos euc
sampler:
batch_shuffle: false
batch_size: 4
sample_type: all_ordered # all indicates whole sequence used to test, while ordered means input sequence by its natural order; Other options: fixed_unordered
frames_all_limit: 720 # limit the number of sampled frames to prevent out of memory
transform:
- type: BaseParsingCuttingTransform
img_w: 64
- type: NoOperation
loss_cfg:
- loss_term_weight: 1.0
margin: 0.2
type: TripletLoss
log_prefix: triplet
- loss_term_weight: 0.1
scale: 16
type: CrossEntropyLoss
log_prefix: softmax
log_accuracy: true
model_cfg:
model: SMPLGait
backbone_cfg:
in_channels: 1
layers_cfg: # Layers configuration for automatically model construction
- BC-64
- BC-64
- M
- BC-128
- BC-128
- M
- BC-256
- BC-256
# - M
# - BC-512
# - BC-512
type: Plain
SeparateFCs:
in_channels: 256
out_channels: 256
parts_num: 31
SeparateBNNecks:
class_num: 3000
in_channels: 256
parts_num: 31
bin_num:
- 16
- 8
- 4
- 2
- 1
optimizer_cfg:
lr: 0.001
momentum: 0.9
solver: Adam
weight_decay: 0.0005
scheduler_cfg:
gamma: 0.1
milestones: # Learning Rate Reduction at each milestones
- 30000
- 90000
scheduler: MultiStepLR
trainer_cfg:
enable_float16: true # half_percesion float for memory reduction and speedup
fix_BN: false
log_iter: 100
with_test: false
restore_ckpt_strict: true
restore_hint: 0
save_iter: 30000
save_name: SMPLGait
sync_BN: true
total_iter: 180000
sampler:
batch_shuffle: true
batch_size:
- 32 # TripletSampler, batch_size[0] indicates Number of Identity
- 4 # batch_size[1] indicates Samples sequqnce for each Identity
frames_num_fixed: 30 # fixed frames number for training
frames_num_max: 50 # max frames number for unfixed training
frames_num_min: 25 # min frames number for unfixed traing
sample_type: fixed_unordered # fixed control input frames number, unordered for controlling order of input tensor; Other options: unfixed_ordered or all_ordered
type: TripletSampler
transform:
- type: BaseParsingCuttingTransform
img_w: 64
- type: NoOperation
+43
View File
@@ -0,0 +1,43 @@
# Gait3D-Parsing
This is the pre-processing instructions for the Gait3D-Parsing dataset. The original dataset can be found [here](https://gait3d.github.io/gait3d-parsing-hp/). The original dataset is not publicly available. You need to request access to the dataset in order to download it. This README explains how to extract the original dataset and convert it to a format suitable for OpenGait.
## Data Preparation
https://github.com/Gait3D/Gait3D-Benchmark#data-preparation
## Data Pretreatment
```python
python datasets/Gait3D-Parsing/pretreatment_gps.py -i 'Gait3D/2D_Parsings' -o 'Gait3D-pars-64-64-pkl' -r 64 -p
```
## Train
### ParsingGait model:
`CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --master_port 12345 --nproc_per_node=4 opengait/main.py --cfgs ./configs/parsinggait/parsinggait_gait3d_parsing.yaml --phase train`
## Citation
If you use this dataset in your research, please cite the following paper:
```
@inproceedings{zheng2023parsinggait,
title={Parsing is All You Need for Accurate Gait Recognition in the Wild},
author={Jinkai Zheng, Xinchen Liu, Shuai Wang, Lihao Wang, Chenggang Yan, Wu Liu},
booktitle={ACM International Conference on Multimedia (ACM MM)},
year={2023}
}
@inproceedings{zheng2022gait3d,
title={Gait Recognition in the Wild with Dense 3D Representations and A Benchmark},
author={Jinkai Zheng, Xinchen Liu, Wu Liu, Lingxiao He, Chenggang Yan, Tao Mei},
booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year={2022}
}
```
If you think the re-implementation of OpenGait is useful, please cite the following paper:
```
@misc{fan2022opengait,
title={OpenGait: Revisiting Gait Recognition Toward Better Practicality},
author={Chao Fan and Junhao Liang and Chuanfu Shen and Saihui Hou and Yongzhen Huang and Shiqi Yu},
year={2022},
eprint={2211.06597},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
## Acknowledgements
This dataset was collected by the [Zheng at. al.](https://gait3d.github.io/). The pre-processing instructions are modified from (https://github.com/Gait3D/Gait3D-Benchmark).
+164
View File
@@ -0,0 +1,164 @@
# This source is based on https://github.com/AbnerHqC/GaitSet/blob/master/pretreatment.py
import argparse
import logging
import multiprocessing as mp
import os
import pickle
from collections import defaultdict
from functools import partial
from pathlib import Path
from typing import Tuple
import cv2
import numpy as np
from tqdm import tqdm
def imgs2pickle(img_groups: Tuple, output_path: Path, img_size: int = 64, verbose: bool = False, parsing: bool = False, dataset='CASIAB') -> None:
"""Reads a group of images and saves the data in pickle format.
Args:
img_groups (Tuple): Tuple of (sid, seq, view) and list of image paths.
output_path (Path): Output path.
img_size (int, optional): Image resizing size. Defaults to 64.
verbose (bool, optional): Display debug info. Defaults to False.
"""
sinfo = img_groups[0]
img_paths = img_groups[1]
to_pickle = []
for img_file in sorted(img_paths):
if verbose:
logging.debug(f'Reading sid {sinfo[0]}, seq {sinfo[1]}, view {sinfo[2]} from {img_file}')
img = cv2.imread(str(img_file), cv2.IMREAD_GRAYSCALE)
if dataset == 'GREW':
to_pickle.append(img.astype('uint8'))
continue
if parsing:
img_sil = (img>0).astype('uint8') * 255
else:
img_sil = img
if img_sil.sum() <= 10000:
if verbose:
logging.debug(f'Image sum: {img_sil.sum()}')
logging.warning(f'{img_file} has no data.')
continue
# Get the upper and lower points
y_sum = img_sil.sum(axis=1)
y_top = (y_sum != 0).argmax(axis=0)
y_btm = (y_sum != 0).cumsum(axis=0).argmax(axis=0)
img = img[y_top: y_btm + 1, :]
img_sil = img_sil[y_top: y_btm + 1, :]
# As the height of a person is larger than the width,
# use the height to calculate resize ratio.
ratio = img.shape[1] / img.shape[0]
ratio_sil = img_sil.shape[1] / img_sil.shape[0]
assert ratio == ratio_sil
if parsing:
img = cv2.resize(img, (int(img_size * ratio), img_size), interpolation=cv2.INTER_NEAREST)
img_sil = cv2.resize(img_sil, (int(img_size * ratio), img_size), interpolation=cv2.INTER_NEAREST)
else:
img = cv2.resize(img, (int(img_size * ratio), img_size), interpolation=cv2.INTER_CUBIC)
img_sil = cv2.resize(img_sil, (int(img_size * ratio), img_size), interpolation=cv2.INTER_CUBIC)
# Get the median of the x-axis and take it as the person's x-center.
x_csum = img_sil.sum(axis=0).cumsum()
x_center = None
for idx, csum in enumerate(x_csum):
if csum > img_sil.sum() / 2:
x_center = idx
break
if not x_center:
logging.warning(f'{img_file} has no center.')
continue
# Get the left and right points
half_width = img_size // 2
left = x_center - half_width
right = x_center + half_width
if left <= 0 or right >= img.shape[1]:
left += half_width
right += half_width
_ = np.zeros((img.shape[0], half_width))
img = np.concatenate([_, img, _], axis=1)
to_pickle.append(img[:, left: right].astype('uint8'))
if to_pickle:
to_pickle = np.asarray(to_pickle)
dst_path = os.path.join(output_path, *sinfo)
# print(img_paths[0].as_posix().split('/'),img_paths[0].as_posix().split('/')[-5])
# dst_path = os.path.join(output_path, img_paths[0].as_posix().split('/')[-5], *sinfo) if dataset == 'GREW' else dst
os.makedirs(dst_path, exist_ok=True)
pkl_path = os.path.join(dst_path, f'{sinfo[2]}.pkl')
if verbose:
logging.debug(f'Saving {pkl_path}...')
pickle.dump(to_pickle, open(pkl_path, 'wb'))
logging.info(f'Saved {len(to_pickle)} valid frames to {pkl_path}.')
if len(to_pickle) < 5:
logging.warning(f'{sinfo} has less than 5 valid data.')
def pretreat(input_path: Path, output_path: Path, img_size: int = 64, workers: int = 4, verbose: bool = False, parsing: bool = False, dataset: str = 'CASIAB') -> None:
"""Reads a dataset and saves the data in pickle format.
Args:
input_path (Path): Dataset root path.
output_path (Path): Output path.
img_size (int, optional): Image resizing size. Defaults to 64.
workers (int, optional): Number of thread workers. Defaults to 4.
verbose (bool, optional): Display debug info. Defaults to False.
"""
img_groups = defaultdict(list)
logging.info(f'Listing {input_path}')
total_files = 0
for img_path in input_path.rglob('*.png'):
if 'gei.png' in img_path.as_posix():
continue
if verbose:
logging.debug(f'Adding {img_path}')
*_, sid, seq, view, _ = img_path.as_posix().split('/')
img_groups[(sid, seq, view)].append(img_path)
total_files += 1
logging.info(f'Total files listed: {total_files}')
progress = tqdm(total=len(img_groups), desc='Pretreating', unit='folder')
with mp.Pool(workers) as pool:
logging.info(f'Start pretreating {input_path}')
for _ in pool.imap_unordered(partial(imgs2pickle, output_path=output_path, img_size=img_size, verbose=verbose, parsing=parsing, dataset=dataset), img_groups.items()):
progress.update(1)
logging.info('Done')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='OpenGait dataset pretreatment module.')
parser.add_argument('-i', '--input_path', default='', type=str, help='Root path of raw dataset.')
parser.add_argument('-o', '--output_path', default='', type=str, help='Output path of pickled dataset.')
parser.add_argument('-l', '--log_file', default='./pretreatment.log', type=str, help='Log file path. Default: ./pretreatment.log')
parser.add_argument('-n', '--n_workers', default=4, type=int, help='Number of thread workers. Default: 4')
parser.add_argument('-r', '--img_size', default=64, type=int, help='Image resizing size. Default 64')
parser.add_argument('-d', '--dataset', default='CASIAB', type=str, help='Dataset for pretreatment.')
parser.add_argument('-v', '--verbose', default=False, action='store_true', help='Display debug info.')
parser.add_argument('-p', '--parsing', default=False, action='store_true', help='Display debug info.')
args = parser.parse_args()
logging.basicConfig(level=logging.INFO, filename=args.log_file, filemode='w', format='[%(asctime)s - %(levelname)s]: %(message)s')
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
logging.info('Verbose mode is on.')
for k, v in args.__dict__.items():
logging.debug(f'{k}: {v}')
print(f"parsing: {args.parsing}")
pretreat(input_path=Path(args.input_path), output_path=Path(args.output_path), img_size=args.img_size, workers=args.n_workers, verbose=args.verbose, parsing=args.parsing, dataset=args.dataset)
+1 -1
View File
@@ -5,7 +5,7 @@ https://github.com/Gait3D/Gait3D-Benchmark#data-preparation
## Data Pretreatment ## Data Pretreatment
```python ```python
python datasets/pretreatment.py --input_path 'Gait3D/2D_Silhouettes' --output_path 'Gait3D-sils-64-64-pkl' python datasets/pretreatment.py --input_path 'Gait3D/2D_Silhouettes' --output_path 'Gait3D-sils-64-64-pkl'
python datasets/pretreatment_smpl.py --input_path 'Gait3D/3D_SMPLs' --output_path 'Gait3D-smpls-pkl' python datasets/Gait3D/pretreatment_smpl.py --input_path 'Gait3D/3D_SMPLs' --output_path 'Gait3D-smpls-pkl'
(optional) python datasets/pretreatment.py --input_path 'Gait3D/2D_Silhouettes' --img_size 128 --output_path 'Gait3D-sils-128-128-pkl' (optional) python datasets/pretreatment.py --input_path 'Gait3D/2D_Silhouettes' --img_size 128 --output_path 'Gait3D-sils-128-128-pkl'
+83
View File
@@ -0,0 +1,83 @@
# -*- coding: utf-8 -*-
"""
Author : jinkai Zheng
date 2021/10/30
E-mail: zhengjinkai3@qq.com
"""
import os.path as osp
import time
import os
import threading
import itertools
import numpy as np
import pickle
import argparse
parser = argparse.ArgumentParser(description='Test')
parser.add_argument('-i', '--input_path', default='', type=str,
help='Root path of raw dataset.')
parser.add_argument('-o', '--output_path', default='', type=str,
help='Root path for output.')
opt = parser.parse_args()
def get_pickle(thread_id, id_list, save_dir):
for id in sorted(id_list):
print(f"Process threadID-PID: {thread_id}-{id}")
cam_list = os.listdir(osp.join(data_dir, id))
cam_list.sort()
for cam in cam_list:
seq_list = os.listdir(osp.join(data_dir, id, cam))
seq_list.sort()
for seq in seq_list:
npz_list = os.listdir(osp.join(data_dir, id, cam, seq))
npz_list.sort()
smpl_paras_fras = []
for npz in npz_list:
npz_path = osp.join(data_dir, id, cam, seq, npz)
frame = np.load(npz_path, allow_pickle=True)['results'][()][0]
smpl_cam = frame['cam'] # 3-D
smpl_pose = frame['poses'] # 72-D
smpl_shape = frame['betas'] # 10-D
smpl_paras = np.concatenate((smpl_cam, smpl_pose, smpl_shape), 0)
smpl_paras_fras.append(smpl_paras)
smpl_paras_fras = np.asarray(smpl_paras_fras)
out_dir = osp.join(save_dir, id, cam, seq)
os.makedirs(out_dir)
smpl_paras_fras_pkl = os.path.join(out_dir, '{}.pkl'.format(seq))
pickle.dump(smpl_paras_fras, open(smpl_paras_fras_pkl, 'wb'))
if __name__ == '__main__':
data_dir = opt.input_path
save_dir = opt.output_path
start_time = time.time()
maxnum_thread = 8
all_ids = sorted(os.listdir(data_dir))
num_ids = len(all_ids)
proces = []
for thread_id in range(maxnum_thread):
indices = itertools.islice(range(num_ids), thread_id, num_ids, maxnum_thread)
id_list = [all_ids[i] for i in indices]
thread_func = threading.Thread(target=get_pickle, args=(thread_id, id_list, save_dir))
thread_func.start()
proces.append(thread_func)
for proc in proces:
proc.join()
time_elapsed = time.time() - start_time
print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
time_elapsed // 3600,
(time_elapsed - (time_elapsed // 3600) * 3600) // 60,
time_elapsed % 60))
+3 -2
View File
@@ -13,15 +13,16 @@
- tqdm - tqdm
- py7zr - py7zr
- kornia - kornia
- einops
Install dependenices by [Anaconda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html): Install dependenices by [Anaconda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html):
``` ```
conda install tqdm pyyaml tensorboard opencv kornia -c conda-forge conda install tqdm pyyaml tensorboard opencv kornia einops -c conda-forge
conda install pytorch==1.10 torchvision -c pytorch conda install pytorch==1.10 torchvision -c pytorch
``` ```
Or, Install dependenices by pip: Or, Install dependenices by pip:
``` ```
pip install tqdm pyyaml tensorboard opencv-python kornia pip install tqdm pyyaml tensorboard opencv-python kornia einops
pip install torch==1.10 torchvision==0.11 pip install torch==1.10 torchvision==0.11
``` ```
## Prepare dataset ## Prepare dataset
+13
View File
@@ -62,6 +62,19 @@
| [DeepGaitV2-P3D](https://arxiv.org/pdf/2303.03301.pdf) | 74.4 | - | 64x44 | - | - | | [DeepGaitV2-P3D](https://arxiv.org/pdf/2303.03301.pdf) | 74.4 | - | 64x44 | - | - |
| [SwinGait(Transformer-based)](https://arxiv.org/pdf/2303.03301.pdf) | 75.0 | - | 64x44 | - | - | | [SwinGait(Transformer-based)](https://arxiv.org/pdf/2303.03301.pdf) | 75.0 | - | 64x44 | - | - |
## [Gait3D-Parsing](https://github.com/Gait3D/Gait3D-Benchmark)
| Model | `Rank@1` | `mAP` | Configuration | Input Size | Download checkpoint |
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:----------:|:---------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
| [GaitSet(AAAI2019)](https://arxiv.org/pdf/1811.06186.pdf) | 55.9 | 46.7 | [gaitset_gait3d_parsing.yaml](../configs/gaitset/gaitset_gait3d_parsing.yaml) | 64x44 | [model](https://huggingface.co/opengait/OpenGait/resolve/main/Gait3D-Parsing/GaitSet/GaitSet/checkpoints/GaitSet-180000.pt) |
| [GaitPart(CVPR2020)](https://openaccess.thecvf.com/content_CVPR_2020/papers/Fan_GaitPart_Temporal_Part-Based_Model_for_Gait_Recognition_CVPR_2020_paper.pdf) | 43.0 | 33.9 | [gaitpart_gait3d_parsing.yaml](../configs/gaitpart/gaitpart_gait3d_parsing.yaml) | 64x44 | [model](https://huggingface.co/opengait/OpenGait/resolve/main/Gait3D-Parsing/GaitPart/GaitPart/checkpoints/GaitPart-180000.pt) |
| [GLN(ECCV2020)](http://home.ustc.edu.cn/~saihui/papers/eccv2020_gln.pdf) | 45.7 | 38.6 | [gln_phase1_gait3d_parsing.yaml](../configs/gln/gln_phase1_gait3d_parsing.yaml), [gln_phase2_gait3d_parsing.yaml](../configs/gln/gln_phase2_gait3d_parsing.yaml) | 64x44 | [model](https://huggingface.co/opengait/OpenGait/blob/main/Gait3D-Parsing/GLN/GLN/checkpoints/GLN_P2-180000.pt) |
| [GaitGL(ICCV2021)](https://openaccess.thecvf.com/content/ICCV2021/papers/Lin_Gait_Recognition_via_Effective_Global-Local_Feature_Representation_and_Local_Temporal_ICCV_2021_paper.pdf) | 47.7 | 36.2 | [gaitgl_gait3d_parsing.yaml](../configs/gaitgl/gaitgl_gait3d_parsing.yaml) | 64x44 | [model](https://huggingface.co/opengait/OpenGait/resolve/main/Gait3D-Parsing/GaitGL/GaitGL/checkpoints/GaitGL-180000.pt) |
| [SMPLGait(CVPR 2022)](https://gait3d.github.io/) | 60.6 | 52.3 | [smplgait_gait3d_parsing.yaml](../configs/smplgait/smplgait_gait3d_parsing.yaml) | 64x44 | [model](https://huggingface.co/opengait/OpenGait/resolve/main/Gait3D-Parsing/SMPLGait/SMPLGait/checkpoints/SMPLGait-180000.pt) |
| [GaitBase(CVPR2023)](https://openaccess.thecvf.com/content/CVPR2023/papers/Fan_OpenGait_Revisiting_Gait_Recognition_Towards_Better_Practicality_CVPR_2023_paper.pdf) | 71.2 | 64.1 | [gaitbase_gait3d_parsing_btz32x2_fixed.yaml](../configs/gaitbase/gaitbase_gait3d_parsing_btz32x2_fixed.yaml) | 64x44 | [model](https://huggingface.co/opengait/OpenGait/resolve/main/Gait3D-Parsing/Baseline/GaitBase_btz32x2_fixed/checkpoints/GaitBase_btz32x2_fixed-120000.pt) |
| [ParsingGait(ACMMM2023)](https://arxiv.org/abs/2308.16739) | 76.2 | 68.2 | [parsinggait_gait3d_parsing.yaml](../configs/parsinggait/parsinggait_gait3d_parsing.yaml) | 64x44 | [model](https://huggingface.co/opengait/OpenGait/resolve/main/Gait3D-Parsing/ParsingGait/ParsingGait/checkpoints/ParsingGait-120000.pt) |
## [CASIA-E](https://www.scidb.cn/en/detail?dataSetId=57be0e918db743279baf44a38d013a06) ## [CASIA-E](https://www.scidb.cn/en/detail?dataSetId=57be0e918db743279baf44a38d013a06)
| Model | `Rank@1.NM` | `Rank@1.BG` | `Rank@1.CL` | Input size| Configuration | | Model | `Rank@1.NM` | `Rank@1.BG` | `Rank@1.CL` | Input size| Configuration |
+17
View File
@@ -25,6 +25,23 @@ class BaseSilTransform():
return x / self.divsor return x / self.divsor
class BaseParsingCuttingTransform():
def __init__(self, divsor=255.0, cutting=None):
self.divsor = divsor
self.cutting = cutting
def __call__(self, x):
if self.cutting is not None:
cutting = self.cutting
else:
cutting = int(x.shape[-1] // 64) * 10
x = x[..., cutting:-cutting]
if x.max() == 255 or x.max() == 255.:
return x / self.divsor
else:
return x / 1.0
class BaseSilCuttingTransform(): class BaseSilCuttingTransform():
def __init__(self, divsor=255.0, cutting=None): def __init__(self, divsor=255.0, cutting=None):
self.divsor = divsor self.divsor = divsor
+80
View File
@@ -0,0 +1,80 @@
import torch
from torch import nn
from torch.nn import functional as F
from torch.nn.parameter import Parameter
import math
class Normalize(nn.Module):
def __init__(self, power=2):
super(Normalize, self).__init__()
self.power = power
def forward(self, x):
norm = x.pow(self.power).sum(1, keepdim=True).pow(1. / self.power)
out = x.div(norm)
return out
class GraphConvolution(nn.Module):
"""
Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
"""
def __init__(self, in_features, out_features, adj_size=9, bias=True):
super(GraphConvolution, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.adj_size = adj_size
self.weight = Parameter(torch.FloatTensor(in_features, out_features))
if bias:
self.bias = Parameter(torch.FloatTensor(out_features))
else:
self.register_parameter('bias', None)
self.reset_parameters()
#self.bn = nn.BatchNorm2d(self.out_features)
self.bn = nn.BatchNorm1d(out_features * adj_size)
def reset_parameters(self):
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.uniform_(-stdv, stdv)
def forward(self, input, adj):
support = torch.matmul(input, self.weight)
output_ = torch.bmm(adj, support)
if self.bias is not None:
output_ = output_ + self.bias
output = output_.view(output_.size(0), output_.size(1)*output_.size(2))
output = self.bn(output)
output = output.view(output_.size(0), output_.size(1), output_.size(2))
return output
def __repr__(self):
return self.__class__.__name__ + ' (' \
+ str(self.in_features) + ' -> ' \
+ str(self.out_features) + ')'
class GCN(nn.Module):
def __init__(self, adj_size, nfeat, nhid, isMeanPooling = True):
super(GCN, self).__init__()
self.adj_size = adj_size
self.nhid = nhid
self.isMeanPooling = isMeanPooling
self.gc1 = GraphConvolution(nfeat, nhid ,adj_size)
self.gc2 = GraphConvolution(nhid, nhid, adj_size)
def forward(self, x, adj):
x_ = F.dropout(x, 0.5, training=self.training)
x_ = F.relu(self.gc1(x_, adj))
x_ = F.dropout(x_, 0.5, training=self.training)
x_ = F.relu(self.gc2(x_, adj))
return x_
+268
View File
@@ -0,0 +1,268 @@
import torch
from ..base_model import BaseModel
from ..modules import SetBlockWrapper, HorizontalPoolingPyramid, PackSequenceWrapper, SeparateFCs, SeparateBNNecks
from torch.nn import functional as F
import numpy as np
from ..backbones.gcn import GCN
def L_Matrix(adj_npy, adj_size):
D =np.zeros((adj_size, adj_size))
for i in range(adj_size):
tmp = adj_npy[i,:]
count = np.sum(tmp==1)
if count>0:
number = count ** (-1/2)
D[i,i] = number
x = np.matmul(D,adj_npy)
L = np.matmul(x,D)
return L
def get_fine_adj_npy():
fine_adj_list = [
# 1 2 3 4 5 6 7 8 9 10 11
[ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1], #1
[ 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0], #2
[ 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1], #3
[ 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1], #4
[ 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0], #5
[ 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0], #6
[ 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1], #7
[ 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1], #8
[ 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0], #9
[ 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0], #10
[ 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1] #11
]
fine_adj_npy = np.array(fine_adj_list)
fine_adj_npy = L_Matrix(fine_adj_npy, len(fine_adj_npy)) # len返回的是行数
return fine_adj_npy
def get_coarse_adj_npy():
coarse_adj_list = [
# 1 2 3 4 5
[ 1, 1, 1, 1, 1], #1
[ 1, 1, 0, 0, 0], #2
[ 1, 0, 1, 0, 0], #3
[ 1, 0, 0, 1, 0], #4
[ 1, 0, 0, 0, 1] #5
]
coarse_adj_npy = np.array(coarse_adj_list)
coarse_adj_npy = L_Matrix(coarse_adj_npy, len(coarse_adj_npy)) # len返回的是行数
return coarse_adj_npy
class ParsingGait(BaseModel):
def build_network(self, model_cfg):
self.Backbone = self.get_backbone(model_cfg['backbone_cfg'])
self.Backbone = SetBlockWrapper(self.Backbone)
self.FCs = SeparateFCs(**model_cfg['SeparateFCs'])
self.BNNecks = SeparateBNNecks(**model_cfg['SeparateBNNecks'])
self.TP = PackSequenceWrapper(torch.max)
self.HPP = HorizontalPoolingPyramid(bin_num=model_cfg['bin_num'])
nfeat = model_cfg['SeparateFCs']['in_channels']
gcn_cfg = model_cfg['gcn_cfg']
self.fine_parts = gcn_cfg['fine_parts']
coarse_parts = gcn_cfg['coarse_parts']
self.only_fine_graph = gcn_cfg['only_fine_graph']
self.only_coarse_graph = gcn_cfg['only_coarse_graph']
self.combine_fine_coarse_graph = gcn_cfg['combine_fine_coarse_graph']
if self.only_fine_graph:
fine_adj_npy = get_fine_adj_npy()
self.fine_adj_npy = torch.from_numpy(fine_adj_npy).float()
self.gcn_fine = GCN(self.fine_parts, nfeat, nfeat, isMeanPooling=True)
self.gammas_fine = torch.nn.Parameter(torch.ones(self.fine_parts) * 0.75)
elif self.only_coarse_graph:
coarse_adj_npy = get_coarse_adj_npy()
self.coarse_adj_npy = torch.from_numpy(coarse_adj_npy).float()
self.gcn_coarse = GCN(coarse_parts, nfeat, nfeat, isMeanPooling=True)
self.gammas_coarse = torch.nn.Parameter(torch.ones(coarse_parts) * 0.75)
elif self.combine_fine_coarse_graph:
fine_adj_npy = get_fine_adj_npy()
self.fine_adj_npy = torch.from_numpy(fine_adj_npy).float()
self.gcn_fine = GCN(self.fine_parts, nfeat, nfeat, isMeanPooling=True)
self.gammas_fine = torch.nn.Parameter(torch.ones(self.fine_parts) * 0.75)
coarse_adj_npy = get_coarse_adj_npy()
self.coarse_adj_npy = torch.from_numpy(coarse_adj_npy).float()
self.gcn_coarse = GCN(coarse_parts, nfeat, nfeat, isMeanPooling=True)
self.gammas_coarse = torch.nn.Parameter(torch.ones(coarse_parts) * 0.75)
else:
raise ValueError("You should choose fine/coarse graph, or combine both of them.")
def PPforGCN(self, x):
"""
Part Pooling for GCN
x : [n, p, c, h, w]
ret : [n, p, c]
"""
n, p, c, h, w = x.size()
z = x.view(n, p, c, -1) # [n, p, c, h*w]
z = z.mean(-1) + z.max(-1)[0] # [n, p, c]
return z
def ParsPartforFineGraph(self, mask_resize, z):
"""
x: [n, c, s, h, w]
paes: [n, 1, s, H, W]
return [n*s, 11, c, h, w]
***Fine Parts:
# 0: Background,
1: Head,
2: Torso,
3: Left-arm,
4: Right-arm,
5: Left-hand,
6: Right-hand,
7: Left-leg,
8: Right-leg,
9: Left-foot,
10: Right-foot,
11: Dress
"""
fine_mask_list = list()
for i in range(1, self.fine_parts + 1):
fine_mask_list.append((mask_resize.long() == i)) # split mask of each class
fine_z_list = list()
for i in range(len(fine_mask_list)):
mask = fine_mask_list[i].unsqueeze(1)
fine_z_list.append((mask.float() * z * self.gammas_fine[i] + (~mask).float() * z * (1.0 - self.gammas_fine[i])).unsqueeze(1)) # split feature map by mask of each class
fine_z_feat = torch.cat(fine_z_list, dim=1) # [n*s, 11, c, h, w] or [n*s, 5, c, h, w]
return fine_z_feat
def ParsPartforCoarseGraph(self, mask_resize, z):
"""
x: [n, c, s, h, w]
paes: [n, 1, s, H, W]
return [n*s, 5, c, h, w]
***Coarse Parts:
1: [1, 2, 11] Head, Torso, Dress
2: [3, 5] Left-arm, Left-hand
3: [4, 6] Right-arm, Right-hand
4: [7, 9] Left-leg, Left-foot
5: [8, 10] Right-leg, Right-foot
"""
coarse_mask_list = list()
coarse_parts = [[1,2,11], [3,5], [4,6], [7,9], [8,10]]
for coarse_part in coarse_parts:
part = mask_resize.long() == -1
for i in coarse_part:
part += (mask_resize.long() == i)
coarse_mask_list.append(part)
coarse_z_list = list()
for i in range(len(coarse_mask_list)):
mask = coarse_mask_list[i].unsqueeze(1)
coarse_z_list.append((mask.float() * z * self.gammas_coarse[i] + (~mask).float() * z * (1.0 - self.gammas_coarse[i])).unsqueeze(1)) # split feature map by mask of each class
coarse_z_feat = torch.cat(coarse_z_list, dim=1) # [n*s, 11, c, h, w] or [n*s, 5, c, h, w]
return coarse_z_feat
def ParsPartforGCN(self, x, pars):
"""
x: [n, c, s, h, w]
paes: [n, 1, s, H, W]
return [n*s, 11, c, h, w] or [n*s, 5, c, h, w]
"""
n, c, s, h, w = x.size()
# mask_resize: [n, s, h, w]
mask_resize = F.interpolate(input=pars.squeeze(1), size=(h, w), mode='nearest')
mask_resize = mask_resize.view(n*s, h, w)
z = x.transpose(1, 2).reshape(n*s, c, h, w)
if self.only_fine_graph:
fine_z_feat = self.ParsPartforFineGraph(mask_resize, z)
return fine_z_feat, None
elif self.only_coarse_graph:
coarse_z_feat = self.ParsPartforCoarseGraph(mask_resize, z)
return None, coarse_z_feat
elif self.combine_fine_coarse_graph:
fine_z_feat = self.ParsPartforFineGraph(mask_resize, z)
coarse_z_feat = self.ParsPartforCoarseGraph(mask_resize, z)
return fine_z_feat, coarse_z_feat
else:
raise ValueError("You should choose fine/coarse graph, or combine both of them.")
def get_gcn_feat(self, n, input, adj_np, is_cuda, seqL):
input_ps = self.PPforGCN(input) # [n*s, 11, c]
n_s, p, c = input_ps.size()
if is_cuda:
adj = adj_np.cuda()
adj = adj.repeat(n_s, 1, 1)
if p == 11:
output_ps = self.gcn_fine(input_ps, adj) # [n*s, 11, c]
elif p == 5:
output_ps = self.gcn_coarse(input_ps, adj) # [n*s, 5, c]
else:
raise ValueError(f"The parsing parts should be 11 or 5, but got {p}")
output_ps = output_ps.view(n, n_s//n, p, c) # [n, s, ps, c]
output_ps = self.TP(output_ps, seqL, dim=1, options={"dim": 1})[0] # [n, ps, c]
return output_ps
def forward(self, inputs):
ipts, labs, _, _, seqL = inputs
pars = ipts[0]
if len(pars.size()) == 4:
pars = pars.unsqueeze(1)
del ipts
outs = self.Backbone(pars) # [n, c, s, h, w]
outs_n, outs_c, outs_s, outs_h, outs_w = outs.size()
# split features by parsing classes
# outs_ps_fine: [n*s, 11, c, h, w]
# outs_ps_coarse: [n*s, 5, c, h, w]
outs_ps_fine, outs_ps_coarse = self.ParsPartforGCN(outs, pars)
is_cuda = pars.is_cuda
if self.only_fine_graph:
outs_ps = self.get_gcn_feat(outs_n, outs_ps_fine, self.fine_adj_npy, is_cuda, seqL) # [n, 11, c]
elif self.only_coarse_graph:
outs_ps = self.get_gcn_feat(outs_n, outs_ps_coarse, self.coarse_adj_npy, is_cuda, seqL) # [n, 5, c]
elif self.combine_fine_coarse_graph:
outs_fine = self.get_gcn_feat(outs_n, outs_ps_fine, self.fine_adj_npy, is_cuda, seqL) # [n, 11, c]
outs_coarse = self.get_gcn_feat(outs_n, outs_ps_coarse, self.coarse_adj_npy, is_cuda, seqL) # [n, 5, c]
outs_ps = torch.cat([outs_fine, outs_coarse], 1) # [n, 16, c]
else:
raise ValueError("You should choose fine/coarse graph, or combine both of them.")
outs_ps = outs_ps.transpose(1, 2).contiguous() # [n, c, ps]
# Temporal Pooling, TP
outs = self.TP(outs, seqL, options={"dim": 2})[0] # [n, c, h, w]
# Horizontal Pooling Matching, HPM
feat = self.HPP(outs) # [n, c, p]
feat = torch.cat([feat, outs_ps], dim=-1) # [n, c, p+ps]
embed_1 = self.FCs(feat) # [n, c, p+ps]
embed_2, logits = self.BNNecks(embed_1) # [n, c, p+ps]
embed = embed_1
n, _, s, h, w = pars.size()
retval = {
'training_feat': {
'triplet': {'embeddings': embed_1, 'labels': labs},
'softmax': {'logits': logits, 'labels': labs}
},
'visual_summary': {
'image/pars': pars.view(n*s, 1, h, w)
},
'inference_feat': {
'embeddings': embed
}
}
return retval