Update ParsingGait (#160)
* Update ParsingGait * Clear up the confusion Clear up the confusion about gait3d and gait3d-parsing. * Update 0.get_started.md * Add BaseParsingCuttingTransform * Update gcn.py * Create gaitbase_gait3d_parsing_btz32x2_fixed.yaml * Add gait3d_parsing config file * Update 1.model_zoo.md Update Gait3D-Parsing checkpoints * Update 1.model_zoo.md add configuration * Update 1.model_zoo.md center text --------- Co-authored-by: Junhao Liang <43094337+darkliang@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,103 @@
|
||||
data_cfg:
|
||||
dataset_name: Gait3D-Parsing
|
||||
dataset_root: your_path
|
||||
dataset_partition: ./datasets/Gait3D/Gait3D.json # Uses the same Gait3D.json as Gait3D dataset
|
||||
# data_in_use: [true, false]
|
||||
num_workers: 1
|
||||
remove_no_gallery: false # Remove probe if no gallery for it
|
||||
test_dataset_name: Gait3D-Parsing
|
||||
|
||||
evaluator_cfg:
|
||||
enable_float16: true
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 120000
|
||||
save_name: GaitBase_btz32x2_fixed
|
||||
eval_func: evaluate_Gait3D
|
||||
sampler:
|
||||
batch_shuffle: false
|
||||
batch_size: 4
|
||||
sample_type: all_ordered # all indicates whole sequence used to test, while ordered means input sequence by its natural order; Other options: fixed_unordered
|
||||
frames_all_limit: 720 # limit the number of sampled frames to prevent out of memory
|
||||
metric: euc # cos
|
||||
transform:
|
||||
- type: BaseParsingCuttingTransform
|
||||
|
||||
loss_cfg:
|
||||
- loss_term_weight: 1.0
|
||||
margin: 0.2
|
||||
type: TripletLoss
|
||||
log_prefix: triplet
|
||||
- loss_term_weight: 1.0
|
||||
scale: 16
|
||||
type: CrossEntropyLoss
|
||||
log_prefix: softmax
|
||||
log_accuracy: true
|
||||
|
||||
model_cfg:
|
||||
model: Baseline
|
||||
backbone_cfg:
|
||||
type: ResNet9
|
||||
block: BasicBlock
|
||||
channels: # Layers configuration for automatically model construction
|
||||
- 64
|
||||
- 128
|
||||
- 256
|
||||
- 512
|
||||
layers:
|
||||
- 1
|
||||
- 1
|
||||
- 1
|
||||
- 1
|
||||
strides:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 1
|
||||
maxpool: false
|
||||
SeparateFCs:
|
||||
in_channels: 512
|
||||
out_channels: 256
|
||||
parts_num: 16
|
||||
SeparateBNNecks:
|
||||
class_num: 3000
|
||||
in_channels: 256
|
||||
parts_num: 16
|
||||
bin_num:
|
||||
- 16
|
||||
|
||||
optimizer_cfg:
|
||||
lr: 0.1
|
||||
momentum: 0.9
|
||||
solver: SGD
|
||||
weight_decay: 0.0005
|
||||
|
||||
scheduler_cfg:
|
||||
gamma: 0.1
|
||||
milestones: # Learning Rate Reduction at each milestones
|
||||
- 40000
|
||||
- 80000
|
||||
- 100000
|
||||
scheduler: MultiStepLR
|
||||
trainer_cfg:
|
||||
enable_float16: true # half_percesion float for memory reduction and speedup
|
||||
fix_BN: false
|
||||
with_test: false
|
||||
log_iter: 100
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 0
|
||||
save_iter: 40000
|
||||
save_name: GaitBase_btz32x2_fixed
|
||||
sync_BN: true
|
||||
total_iter: 120000
|
||||
sampler:
|
||||
batch_shuffle: true
|
||||
batch_size:
|
||||
- 32 # TripletSampler, batch_size[0] indicates Number of Identity
|
||||
- 2 # batch_size[1] indicates Samples sequqnce for each Identity
|
||||
frames_num_fixed: 30 # fixed frames number for training
|
||||
frames_num_max: 50 # max frames number for unfixed training
|
||||
frames_num_min: 10 # min frames number for unfixed traing
|
||||
sample_type: fixed_unordered # fixed control input frames number, unordered for controlling order of input tensor; Other options: unfixed_ordered or all_ordered
|
||||
type: TripletSampler
|
||||
transform:
|
||||
- type: BaseParsingCuttingTransform
|
||||
@@ -0,0 +1,74 @@
|
||||
# Note : *** the batch_size should be equal to the gpus number at the test phase!!! ***
|
||||
data_cfg:
|
||||
dataset_name: Gait3D-Parsing
|
||||
dataset_root: your_path
|
||||
dataset_partition: ./datasets/Gait3D/Gait3D.json # Uses the same Gait3D.json as Gait3D dataset
|
||||
num_workers: 1
|
||||
remove_no_gallery: false
|
||||
test_dataset_name: Gait3D-Parsing
|
||||
|
||||
evaluator_cfg:
|
||||
enable_float16: false
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 180000
|
||||
save_name: GaitGL
|
||||
eval_func: evaluate_Gait3D
|
||||
metric: cos
|
||||
sampler:
|
||||
batch_size: 4
|
||||
sample_type: all_ordered
|
||||
type: InferenceSampler
|
||||
transform:
|
||||
- type: BaseParsingCuttingTransform
|
||||
|
||||
loss_cfg:
|
||||
- loss_term_weight: 1.0
|
||||
margin: 0.2
|
||||
type: TripletLoss
|
||||
log_prefix: triplet
|
||||
- loss_term_weight: 1.0
|
||||
scale: 1
|
||||
type: CrossEntropyLoss
|
||||
log_accuracy: true
|
||||
label_smooth: false
|
||||
log_prefix: softmax
|
||||
|
||||
model_cfg:
|
||||
model: GaitGL
|
||||
channels: [32, 64, 128]
|
||||
class_num: 3000
|
||||
|
||||
optimizer_cfg:
|
||||
lr: 1.0e-3
|
||||
momentum: 0.9
|
||||
solver: Adam
|
||||
weight_decay: 5.0e-4
|
||||
|
||||
scheduler_cfg:
|
||||
gamma: 0.1
|
||||
milestones:
|
||||
- 30000
|
||||
- 90000
|
||||
scheduler: MultiStepLR
|
||||
|
||||
trainer_cfg:
|
||||
enable_float16: true
|
||||
with_test: false
|
||||
log_iter: 100
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 0
|
||||
save_iter: 30000
|
||||
save_name: GaitGL
|
||||
sync_BN: true
|
||||
total_iter: 180000
|
||||
sampler:
|
||||
batch_shuffle: true
|
||||
batch_size:
|
||||
- 32
|
||||
- 4
|
||||
frames_num_fixed: 30
|
||||
frames_skip_num: 0
|
||||
sample_type: fixed_ordered
|
||||
type: TripletSampler
|
||||
transform:
|
||||
- type: BaseParsingCuttingTransform
|
||||
@@ -0,0 +1,86 @@
|
||||
data_cfg:
|
||||
dataset_name: Gait3D-Parsing
|
||||
dataset_root: your_path
|
||||
dataset_partition: ./datasets/Gait3D/Gait3D.json # Uses the same Gait3D.json as Gait3D dataset
|
||||
num_workers: 1
|
||||
remove_no_gallery: false
|
||||
test_dataset_name: Gait3D-Parsing
|
||||
|
||||
evaluator_cfg:
|
||||
enable_float16: false
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 180000
|
||||
save_name: GaitPart
|
||||
eval_func: evaluate_Gait3D
|
||||
metric: cos
|
||||
sampler:
|
||||
batch_size: 16
|
||||
sample_type: all_ordered
|
||||
type: InferenceSampler
|
||||
transform:
|
||||
- type: BaseParsingCuttingTransform
|
||||
|
||||
loss_cfg:
|
||||
loss_term_weight: 1.0
|
||||
margin: 0.2
|
||||
type: TripletLoss
|
||||
log_prefix: triplet
|
||||
|
||||
model_cfg:
|
||||
model: GaitPart
|
||||
backbone_cfg:
|
||||
in_channels: 1
|
||||
layers_cfg:
|
||||
- BC-32
|
||||
- BC-32
|
||||
- M
|
||||
- FC-64-2
|
||||
- FC-64-2
|
||||
- M
|
||||
- FC-128-3
|
||||
- FC-128-3
|
||||
type: Plain
|
||||
SeparateFCs:
|
||||
in_channels: 128
|
||||
out_channels: 128
|
||||
parts_num: 16
|
||||
bin_num:
|
||||
- 16
|
||||
|
||||
|
||||
optimizer_cfg:
|
||||
lr: 0.001
|
||||
momentum: 0.9
|
||||
solver: Adam
|
||||
weight_decay: 0.0005
|
||||
|
||||
scheduler_cfg:
|
||||
gamma: 0.1
|
||||
milestones:
|
||||
- 30000
|
||||
- 90000
|
||||
scheduler: MultiStepLR
|
||||
|
||||
trainer_cfg:
|
||||
enable_float16: true
|
||||
log_iter: 100
|
||||
with_test: false
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 0
|
||||
save_iter: 30000
|
||||
save_name: GaitPart
|
||||
sync_BN: false
|
||||
total_iter: 180000
|
||||
sampler:
|
||||
batch_shuffle: false
|
||||
batch_size:
|
||||
- 32
|
||||
- 4
|
||||
frames_num_fixed: 30
|
||||
frames_num_max: 50
|
||||
frames_num_min: 25
|
||||
frames_skip_num: 10
|
||||
sample_type: fixed_ordered
|
||||
type: TripletSampler
|
||||
transform:
|
||||
- type: BaseParsingCuttingTransform
|
||||
@@ -0,0 +1,81 @@
|
||||
data_cfg:
|
||||
dataset_name: Gait3D-Parsing
|
||||
dataset_root: your_path
|
||||
dataset_partition: ./datasets/Gait3D/Gait3D.json # Uses the same Gait3D.json as Gait3D dataset
|
||||
num_workers: 1
|
||||
remove_no_gallery: false
|
||||
test_dataset_name: Gait3D-Parsing
|
||||
|
||||
evaluator_cfg:
|
||||
enable_float16: false
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 180000
|
||||
save_name: GaitSet
|
||||
eval_func: evaluate_Gait3D
|
||||
metric: cos
|
||||
sampler:
|
||||
batch_size: 16
|
||||
sample_type: all_ordered
|
||||
type: InferenceSampler
|
||||
transform:
|
||||
- type: BaseParsingCuttingTransform
|
||||
|
||||
loss_cfg:
|
||||
loss_term_weight: 1.0
|
||||
margin: 0.2
|
||||
type: TripletLoss
|
||||
log_prefix: triplet
|
||||
|
||||
model_cfg:
|
||||
model: GaitSet
|
||||
in_channels:
|
||||
- 1
|
||||
- 32
|
||||
- 64
|
||||
- 128
|
||||
SeparateFCs:
|
||||
in_channels: 128
|
||||
out_channels: 256
|
||||
parts_num: 62
|
||||
bin_num:
|
||||
- 16
|
||||
- 8
|
||||
- 4
|
||||
- 2
|
||||
- 1
|
||||
|
||||
optimizer_cfg:
|
||||
lr: 0.001
|
||||
momentum: 0.9
|
||||
solver: Adam
|
||||
weight_decay: 0.0005
|
||||
|
||||
scheduler_cfg:
|
||||
gamma: 0.1
|
||||
milestones:
|
||||
- 30000
|
||||
- 90000
|
||||
scheduler: MultiStepLR
|
||||
|
||||
trainer_cfg:
|
||||
enable_float16: true
|
||||
log_iter: 100
|
||||
with_test: false
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 0
|
||||
save_iter: 30000
|
||||
save_name: GaitSet
|
||||
sync_BN: false
|
||||
total_iter: 180000
|
||||
sampler:
|
||||
batch_shuffle: false
|
||||
batch_size:
|
||||
- 32
|
||||
- 4
|
||||
frames_num_fixed: 30
|
||||
frames_num_max: 50
|
||||
frames_num_min: 25
|
||||
sample_type: fixed_unordered
|
||||
type: TripletSampler
|
||||
transform:
|
||||
- type: BaseParsingCuttingTransform
|
||||
@@ -0,0 +1,97 @@
|
||||
data_cfg:
|
||||
dataset_name: Gait3D-Parsing
|
||||
dataset_root: your_path
|
||||
dataset_partition: ./datasets/Gait3D/Gait3D.json # Uses the same Gait3D.json as Gait3D dataset
|
||||
num_workers: 1
|
||||
cache: false
|
||||
remove_no_gallery: false
|
||||
test_dataset_name: Gait3D-Parsing
|
||||
|
||||
evaluator_cfg:
|
||||
enable_distributed: true
|
||||
enable_float16: false
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 180000
|
||||
save_name: GLN_P1
|
||||
eval_func: evaluate_Gait3D
|
||||
metric: cos
|
||||
sampler:
|
||||
batch_size: 8
|
||||
sample_type: all_ordered
|
||||
type: InferenceSampler
|
||||
transform:
|
||||
- type: BaseParsingCuttingTransform
|
||||
|
||||
loss_cfg:
|
||||
- loss_term_weight: 1.0
|
||||
margin: 0.2
|
||||
type: TripletLoss
|
||||
log_prefix: triplet
|
||||
# - loss_term_weight: 0.1
|
||||
# scale: 1
|
||||
# type: CrossEntropyLoss
|
||||
# log_prefix: softmax
|
||||
|
||||
model_cfg:
|
||||
model: GLN
|
||||
class_num: 3000
|
||||
lateral_dim : 256
|
||||
hidden_dim : 256
|
||||
dropout : 0.9
|
||||
in_channels:
|
||||
- 1
|
||||
- 32
|
||||
- 64
|
||||
- 128
|
||||
SeparateFCs:
|
||||
in_channels: 256
|
||||
out_channels: 256
|
||||
parts_num: 93
|
||||
bin_num:
|
||||
- 16
|
||||
- 8
|
||||
- 4
|
||||
- 2
|
||||
- 1
|
||||
Lateral_pretraining: true
|
||||
|
||||
optimizer_cfg:
|
||||
lr: 0.001
|
||||
momentum: 0.9
|
||||
solver: Adam
|
||||
weight_decay: 0.0005
|
||||
# lr: 0.0001
|
||||
# momentum: 0.9
|
||||
# solver: Adam
|
||||
# weight_decay: 0.0005
|
||||
|
||||
scheduler_cfg:
|
||||
gamma: 0.1
|
||||
milestones:
|
||||
- 30000
|
||||
- 90000
|
||||
scheduler: MultiStepLR
|
||||
|
||||
trainer_cfg:
|
||||
enable_float16: true
|
||||
fix_layers: false
|
||||
with_test: false
|
||||
log_iter: 100
|
||||
optimizer_reset: false
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 0
|
||||
save_iter: 30000
|
||||
save_name: GLN_P1
|
||||
sync_BN: true
|
||||
total_iter: 180000
|
||||
sampler:
|
||||
batch_shuffle: false
|
||||
batch_size:
|
||||
- 32
|
||||
- 4
|
||||
frames_num_fixed: 30
|
||||
frames_skip_num: 0
|
||||
sample_type: fixed_ordered
|
||||
type: TripletSampler
|
||||
transform:
|
||||
- type: BaseParsingCuttingTransform
|
||||
@@ -0,0 +1,93 @@
|
||||
data_cfg:
|
||||
dataset_name: Gait3D-Parsing
|
||||
dataset_root: your_path
|
||||
dataset_partition: ./datasets/Gait3D/Gait3D.json # Uses the same Gait3D.json as Gait3D dataset
|
||||
num_workers: 1
|
||||
remove_no_gallery: false
|
||||
test_dataset_name: Gait3D-Parsing
|
||||
|
||||
evaluator_cfg:
|
||||
enable_distributed: true
|
||||
enable_float16: false
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 180000
|
||||
save_name: GLN_P2
|
||||
eval_func: evaluate_Gait3D
|
||||
metric: cos
|
||||
sampler:
|
||||
batch_size: 4
|
||||
sample_type: all_ordered
|
||||
type: InferenceSampler
|
||||
transform:
|
||||
- type: BaseParsingCuttingTransform
|
||||
|
||||
loss_cfg:
|
||||
- loss_term_weight: 1.0
|
||||
margin: 0.2
|
||||
type: TripletLoss
|
||||
log_prefix: triplet
|
||||
- loss_term_weight: 0.1
|
||||
scale: 16
|
||||
type: CrossEntropyLoss
|
||||
log_prefix: softmax
|
||||
|
||||
model_cfg:
|
||||
model: GLN
|
||||
class_num: 3000
|
||||
lateral_dim : 256
|
||||
hidden_dim : 256
|
||||
dropout : 0.9
|
||||
in_channels:
|
||||
- 1
|
||||
- 32
|
||||
- 64
|
||||
- 128
|
||||
SeparateFCs:
|
||||
in_channels: 256
|
||||
out_channels: 256
|
||||
parts_num: 93
|
||||
bin_num:
|
||||
- 16
|
||||
- 8
|
||||
- 4
|
||||
- 2
|
||||
- 1
|
||||
Lateral_pretraining: false
|
||||
|
||||
optimizer_cfg:
|
||||
lr: 0.001
|
||||
momentum: 0.9
|
||||
solver: Adam
|
||||
weight_decay: 0.0005
|
||||
|
||||
scheduler_cfg:
|
||||
gamma: 0.1
|
||||
milestones:
|
||||
- 30000
|
||||
- 90000
|
||||
scheduler: MultiStepLR
|
||||
|
||||
trainer_cfg:
|
||||
enable_distributed: true
|
||||
enable_float16: true
|
||||
fix_layers: false
|
||||
log_iter: 100
|
||||
optimizer_reset: true
|
||||
scheduler_reset: true
|
||||
restore_ckpt_strict: false
|
||||
restore_hint: output/Gait3D-Parsing/GLN/GLN_P1/checkpoints/GLN_P1-180000.pt
|
||||
save_iter: 30000
|
||||
save_name: GLN_P2
|
||||
sync_BN: true
|
||||
total_iter: 180000
|
||||
sampler:
|
||||
batch_shuffle: false
|
||||
batch_size:
|
||||
- 32
|
||||
- 4
|
||||
frames_num_fixed: 30
|
||||
frames_skip_num: 0
|
||||
sample_type: fixed_ordered
|
||||
type: TripletSampler
|
||||
transform:
|
||||
- type: BaseParsingCuttingTransform
|
||||
@@ -0,0 +1,109 @@
|
||||
data_cfg:
|
||||
dataset_name: Gait3D-Parsing
|
||||
dataset_root: your_path
|
||||
dataset_partition: ./datasets/Gait3D/Gait3D.json # Uses the same Gait3D.json as Gait3D dataset
|
||||
# data_in_use: [true, false]
|
||||
num_workers: 1
|
||||
remove_no_gallery: false # Remove probe if no gallery for it
|
||||
test_dataset_name: Gait3D-Parsing
|
||||
|
||||
evaluator_cfg:
|
||||
enable_float16: true
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 120000
|
||||
save_name: ParsingGait
|
||||
eval_func: evaluate_Gait3D
|
||||
sampler:
|
||||
batch_shuffle: false
|
||||
batch_size: 4
|
||||
sample_type: all_ordered # all indicates whole sequence used to test, while ordered means input sequence by its natural order; Other options: fixed_unordered
|
||||
frames_all_limit: 720 # limit the number of sampled frames to prevent out of memory
|
||||
metric: euc # cos
|
||||
transform:
|
||||
- type: BaseParsingCuttingTransform
|
||||
|
||||
loss_cfg:
|
||||
- loss_term_weight: 1.0
|
||||
margin: 0.2
|
||||
type: TripletLoss
|
||||
log_prefix: triplet
|
||||
- loss_term_weight: 1.0
|
||||
scale: 16
|
||||
type: CrossEntropyLoss
|
||||
log_prefix: softmax
|
||||
log_accuracy: true
|
||||
|
||||
model_cfg:
|
||||
model: ParsingGait
|
||||
backbone_cfg:
|
||||
type: ResNet9
|
||||
block: BasicBlock
|
||||
channels: # Layers configuration for automatically model construction
|
||||
- 64
|
||||
- 128
|
||||
- 256
|
||||
- 512
|
||||
layers:
|
||||
- 1
|
||||
- 1
|
||||
- 1
|
||||
- 1
|
||||
strides:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 1
|
||||
maxpool: false
|
||||
SeparateFCs:
|
||||
in_channels: 512
|
||||
out_channels: 256
|
||||
parts_num: 21
|
||||
SeparateBNNecks:
|
||||
class_num: 3000
|
||||
in_channels: 256
|
||||
parts_num: 21
|
||||
bin_num:
|
||||
- 16
|
||||
gcn_cfg:
|
||||
fine_parts: 11
|
||||
coarse_parts: 5
|
||||
only_fine_graph: false
|
||||
only_coarse_graph: true
|
||||
combine_fine_coarse_graph: false
|
||||
|
||||
optimizer_cfg:
|
||||
lr: 0.1
|
||||
momentum: 0.9
|
||||
solver: SGD
|
||||
weight_decay: 0.0005
|
||||
|
||||
scheduler_cfg:
|
||||
gamma: 0.1
|
||||
milestones: # Learning Rate Reduction at each milestones
|
||||
- 40000
|
||||
- 80000
|
||||
- 100000
|
||||
scheduler: MultiStepLR
|
||||
trainer_cfg:
|
||||
enable_float16: true # half_percesion float for memory reduction and speedup
|
||||
fix_BN: false
|
||||
with_test: True
|
||||
log_iter: 100
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 0
|
||||
save_iter: 40000
|
||||
save_name: ParsingGait
|
||||
sync_BN: true
|
||||
total_iter: 120000
|
||||
sampler:
|
||||
batch_shuffle: true
|
||||
batch_size:
|
||||
- 32 # TripletSampler, batch_size[0] indicates Number of Identity
|
||||
- 2 # batch_size[1] indicates Samples sequqnce for each Identity
|
||||
frames_num_fixed: 30 # fixed frames number for training
|
||||
frames_num_max: 50 # max frames number for unfixed training
|
||||
frames_num_min: 10 # min frames number for unfixed traing
|
||||
sample_type: fixed_unordered # fixed control input frames number, unordered for controlling order of input tensor; Other options: unfixed_ordered or all_ordered
|
||||
type: TripletSampler
|
||||
transform:
|
||||
- type: BaseParsingCuttingTransform
|
||||
@@ -0,0 +1,107 @@
|
||||
data_cfg:
|
||||
dataset_name: Gait3D-Parsing
|
||||
dataset_root: your_path
|
||||
dataset_partition: datasets/Gait3D/Gait3D.json # Uses the same Gait3D.json as Gait3D dataset
|
||||
data_in_use: [true, false, true]
|
||||
num_workers: 1
|
||||
remove_no_gallery: false # Remove probe if no gallery for it
|
||||
test_dataset_name: Gait3D-Parsing
|
||||
|
||||
evaluator_cfg:
|
||||
enable_float16: true
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 180000
|
||||
save_name: SMPLGait
|
||||
eval_func: evaluate_Gait3D
|
||||
metric: cos # cos euc
|
||||
sampler:
|
||||
batch_shuffle: false
|
||||
batch_size: 4
|
||||
sample_type: all_ordered # all indicates whole sequence used to test, while ordered means input sequence by its natural order; Other options: fixed_unordered
|
||||
frames_all_limit: 720 # limit the number of sampled frames to prevent out of memory
|
||||
transform:
|
||||
- type: BaseParsingCuttingTransform
|
||||
img_w: 64
|
||||
- type: NoOperation
|
||||
|
||||
loss_cfg:
|
||||
- loss_term_weight: 1.0
|
||||
margin: 0.2
|
||||
type: TripletLoss
|
||||
log_prefix: triplet
|
||||
- loss_term_weight: 0.1
|
||||
scale: 16
|
||||
type: CrossEntropyLoss
|
||||
log_prefix: softmax
|
||||
log_accuracy: true
|
||||
|
||||
model_cfg:
|
||||
model: SMPLGait
|
||||
backbone_cfg:
|
||||
in_channels: 1
|
||||
layers_cfg: # Layers configuration for automatically model construction
|
||||
- BC-64
|
||||
- BC-64
|
||||
- M
|
||||
- BC-128
|
||||
- BC-128
|
||||
- M
|
||||
- BC-256
|
||||
- BC-256
|
||||
# - M
|
||||
# - BC-512
|
||||
# - BC-512
|
||||
type: Plain
|
||||
SeparateFCs:
|
||||
in_channels: 256
|
||||
out_channels: 256
|
||||
parts_num: 31
|
||||
SeparateBNNecks:
|
||||
class_num: 3000
|
||||
in_channels: 256
|
||||
parts_num: 31
|
||||
bin_num:
|
||||
- 16
|
||||
- 8
|
||||
- 4
|
||||
- 2
|
||||
- 1
|
||||
|
||||
optimizer_cfg:
|
||||
lr: 0.001
|
||||
momentum: 0.9
|
||||
solver: Adam
|
||||
weight_decay: 0.0005
|
||||
|
||||
scheduler_cfg:
|
||||
gamma: 0.1
|
||||
milestones: # Learning Rate Reduction at each milestones
|
||||
- 30000
|
||||
- 90000
|
||||
scheduler: MultiStepLR
|
||||
|
||||
trainer_cfg:
|
||||
enable_float16: true # half_percesion float for memory reduction and speedup
|
||||
fix_BN: false
|
||||
log_iter: 100
|
||||
with_test: false
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 0
|
||||
save_iter: 30000
|
||||
save_name: SMPLGait
|
||||
sync_BN: true
|
||||
total_iter: 180000
|
||||
sampler:
|
||||
batch_shuffle: true
|
||||
batch_size:
|
||||
- 32 # TripletSampler, batch_size[0] indicates Number of Identity
|
||||
- 4 # batch_size[1] indicates Samples sequqnce for each Identity
|
||||
frames_num_fixed: 30 # fixed frames number for training
|
||||
frames_num_max: 50 # max frames number for unfixed training
|
||||
frames_num_min: 25 # min frames number for unfixed traing
|
||||
sample_type: fixed_unordered # fixed control input frames number, unordered for controlling order of input tensor; Other options: unfixed_ordered or all_ordered
|
||||
type: TripletSampler
|
||||
transform:
|
||||
- type: BaseParsingCuttingTransform
|
||||
img_w: 64
|
||||
- type: NoOperation
|
||||
@@ -0,0 +1,43 @@
|
||||
# Gait3D-Parsing
|
||||
This is the pre-processing instructions for the Gait3D-Parsing dataset. The original dataset can be found [here](https://gait3d.github.io/gait3d-parsing-hp/). The original dataset is not publicly available. You need to request access to the dataset in order to download it. This README explains how to extract the original dataset and convert it to a format suitable for OpenGait.
|
||||
## Data Preparation
|
||||
https://github.com/Gait3D/Gait3D-Benchmark#data-preparation
|
||||
## Data Pretreatment
|
||||
```python
|
||||
python datasets/Gait3D-Parsing/pretreatment_gps.py -i 'Gait3D/2D_Parsings' -o 'Gait3D-pars-64-64-pkl' -r 64 -p
|
||||
```
|
||||
|
||||
## Train
|
||||
### ParsingGait model:
|
||||
`CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --master_port 12345 --nproc_per_node=4 opengait/main.py --cfgs ./configs/parsinggait/parsinggait_gait3d_parsing.yaml --phase train`
|
||||
|
||||
## Citation
|
||||
If you use this dataset in your research, please cite the following paper:
|
||||
```
|
||||
@inproceedings{zheng2023parsinggait,
|
||||
title={Parsing is All You Need for Accurate Gait Recognition in the Wild},
|
||||
author={Jinkai Zheng, Xinchen Liu, Shuai Wang, Lihao Wang, Chenggang Yan, Wu Liu},
|
||||
booktitle={ACM International Conference on Multimedia (ACM MM)},
|
||||
year={2023}
|
||||
}
|
||||
|
||||
@inproceedings{zheng2022gait3d,
|
||||
title={Gait Recognition in the Wild with Dense 3D Representations and A Benchmark},
|
||||
author={Jinkai Zheng, Xinchen Liu, Wu Liu, Lingxiao He, Chenggang Yan, Tao Mei},
|
||||
booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||||
year={2022}
|
||||
}
|
||||
```
|
||||
If you think the re-implementation of OpenGait is useful, please cite the following paper:
|
||||
```
|
||||
@misc{fan2022opengait,
|
||||
title={OpenGait: Revisiting Gait Recognition Toward Better Practicality},
|
||||
author={Chao Fan and Junhao Liang and Chuanfu Shen and Saihui Hou and Yongzhen Huang and Shiqi Yu},
|
||||
year={2022},
|
||||
eprint={2211.06597},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CV}
|
||||
}
|
||||
```
|
||||
## Acknowledgements
|
||||
This dataset was collected by the [Zheng at. al.](https://gait3d.github.io/). The pre-processing instructions are modified from (https://github.com/Gait3D/Gait3D-Benchmark).
|
||||
@@ -0,0 +1,164 @@
|
||||
# This source is based on https://github.com/AbnerHqC/GaitSet/blob/master/pretreatment.py
|
||||
import argparse
|
||||
import logging
|
||||
import multiprocessing as mp
|
||||
import os
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from typing import Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def imgs2pickle(img_groups: Tuple, output_path: Path, img_size: int = 64, verbose: bool = False, parsing: bool = False, dataset='CASIAB') -> None:
|
||||
"""Reads a group of images and saves the data in pickle format.
|
||||
|
||||
Args:
|
||||
img_groups (Tuple): Tuple of (sid, seq, view) and list of image paths.
|
||||
output_path (Path): Output path.
|
||||
img_size (int, optional): Image resizing size. Defaults to 64.
|
||||
verbose (bool, optional): Display debug info. Defaults to False.
|
||||
"""
|
||||
sinfo = img_groups[0]
|
||||
img_paths = img_groups[1]
|
||||
to_pickle = []
|
||||
for img_file in sorted(img_paths):
|
||||
if verbose:
|
||||
logging.debug(f'Reading sid {sinfo[0]}, seq {sinfo[1]}, view {sinfo[2]} from {img_file}')
|
||||
|
||||
img = cv2.imread(str(img_file), cv2.IMREAD_GRAYSCALE)
|
||||
|
||||
if dataset == 'GREW':
|
||||
to_pickle.append(img.astype('uint8'))
|
||||
continue
|
||||
|
||||
if parsing:
|
||||
img_sil = (img>0).astype('uint8') * 255
|
||||
else:
|
||||
img_sil = img
|
||||
if img_sil.sum() <= 10000:
|
||||
if verbose:
|
||||
logging.debug(f'Image sum: {img_sil.sum()}')
|
||||
logging.warning(f'{img_file} has no data.')
|
||||
continue
|
||||
|
||||
# Get the upper and lower points
|
||||
y_sum = img_sil.sum(axis=1)
|
||||
y_top = (y_sum != 0).argmax(axis=0)
|
||||
y_btm = (y_sum != 0).cumsum(axis=0).argmax(axis=0)
|
||||
img = img[y_top: y_btm + 1, :]
|
||||
img_sil = img_sil[y_top: y_btm + 1, :]
|
||||
|
||||
# As the height of a person is larger than the width,
|
||||
# use the height to calculate resize ratio.
|
||||
ratio = img.shape[1] / img.shape[0]
|
||||
ratio_sil = img_sil.shape[1] / img_sil.shape[0]
|
||||
assert ratio == ratio_sil
|
||||
if parsing:
|
||||
img = cv2.resize(img, (int(img_size * ratio), img_size), interpolation=cv2.INTER_NEAREST)
|
||||
img_sil = cv2.resize(img_sil, (int(img_size * ratio), img_size), interpolation=cv2.INTER_NEAREST)
|
||||
else:
|
||||
img = cv2.resize(img, (int(img_size * ratio), img_size), interpolation=cv2.INTER_CUBIC)
|
||||
img_sil = cv2.resize(img_sil, (int(img_size * ratio), img_size), interpolation=cv2.INTER_CUBIC)
|
||||
|
||||
# Get the median of the x-axis and take it as the person's x-center.
|
||||
x_csum = img_sil.sum(axis=0).cumsum()
|
||||
x_center = None
|
||||
for idx, csum in enumerate(x_csum):
|
||||
if csum > img_sil.sum() / 2:
|
||||
x_center = idx
|
||||
break
|
||||
|
||||
if not x_center:
|
||||
logging.warning(f'{img_file} has no center.')
|
||||
continue
|
||||
|
||||
# Get the left and right points
|
||||
half_width = img_size // 2
|
||||
left = x_center - half_width
|
||||
right = x_center + half_width
|
||||
if left <= 0 or right >= img.shape[1]:
|
||||
left += half_width
|
||||
right += half_width
|
||||
_ = np.zeros((img.shape[0], half_width))
|
||||
img = np.concatenate([_, img, _], axis=1)
|
||||
|
||||
to_pickle.append(img[:, left: right].astype('uint8'))
|
||||
|
||||
if to_pickle:
|
||||
to_pickle = np.asarray(to_pickle)
|
||||
dst_path = os.path.join(output_path, *sinfo)
|
||||
# print(img_paths[0].as_posix().split('/'),img_paths[0].as_posix().split('/')[-5])
|
||||
# dst_path = os.path.join(output_path, img_paths[0].as_posix().split('/')[-5], *sinfo) if dataset == 'GREW' else dst
|
||||
os.makedirs(dst_path, exist_ok=True)
|
||||
pkl_path = os.path.join(dst_path, f'{sinfo[2]}.pkl')
|
||||
if verbose:
|
||||
logging.debug(f'Saving {pkl_path}...')
|
||||
pickle.dump(to_pickle, open(pkl_path, 'wb'))
|
||||
logging.info(f'Saved {len(to_pickle)} valid frames to {pkl_path}.')
|
||||
|
||||
|
||||
if len(to_pickle) < 5:
|
||||
logging.warning(f'{sinfo} has less than 5 valid data.')
|
||||
|
||||
|
||||
|
||||
def pretreat(input_path: Path, output_path: Path, img_size: int = 64, workers: int = 4, verbose: bool = False, parsing: bool = False, dataset: str = 'CASIAB') -> None:
|
||||
"""Reads a dataset and saves the data in pickle format.
|
||||
|
||||
Args:
|
||||
input_path (Path): Dataset root path.
|
||||
output_path (Path): Output path.
|
||||
img_size (int, optional): Image resizing size. Defaults to 64.
|
||||
workers (int, optional): Number of thread workers. Defaults to 4.
|
||||
verbose (bool, optional): Display debug info. Defaults to False.
|
||||
"""
|
||||
img_groups = defaultdict(list)
|
||||
logging.info(f'Listing {input_path}')
|
||||
total_files = 0
|
||||
for img_path in input_path.rglob('*.png'):
|
||||
if 'gei.png' in img_path.as_posix():
|
||||
continue
|
||||
if verbose:
|
||||
logging.debug(f'Adding {img_path}')
|
||||
*_, sid, seq, view, _ = img_path.as_posix().split('/')
|
||||
img_groups[(sid, seq, view)].append(img_path)
|
||||
total_files += 1
|
||||
|
||||
logging.info(f'Total files listed: {total_files}')
|
||||
|
||||
progress = tqdm(total=len(img_groups), desc='Pretreating', unit='folder')
|
||||
|
||||
with mp.Pool(workers) as pool:
|
||||
logging.info(f'Start pretreating {input_path}')
|
||||
for _ in pool.imap_unordered(partial(imgs2pickle, output_path=output_path, img_size=img_size, verbose=verbose, parsing=parsing, dataset=dataset), img_groups.items()):
|
||||
progress.update(1)
|
||||
logging.info('Done')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='OpenGait dataset pretreatment module.')
|
||||
parser.add_argument('-i', '--input_path', default='', type=str, help='Root path of raw dataset.')
|
||||
parser.add_argument('-o', '--output_path', default='', type=str, help='Output path of pickled dataset.')
|
||||
parser.add_argument('-l', '--log_file', default='./pretreatment.log', type=str, help='Log file path. Default: ./pretreatment.log')
|
||||
parser.add_argument('-n', '--n_workers', default=4, type=int, help='Number of thread workers. Default: 4')
|
||||
parser.add_argument('-r', '--img_size', default=64, type=int, help='Image resizing size. Default 64')
|
||||
parser.add_argument('-d', '--dataset', default='CASIAB', type=str, help='Dataset for pretreatment.')
|
||||
parser.add_argument('-v', '--verbose', default=False, action='store_true', help='Display debug info.')
|
||||
parser.add_argument('-p', '--parsing', default=False, action='store_true', help='Display debug info.')
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(level=logging.INFO, filename=args.log_file, filemode='w', format='[%(asctime)s - %(levelname)s]: %(message)s')
|
||||
|
||||
if args.verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
logging.info('Verbose mode is on.')
|
||||
for k, v in args.__dict__.items():
|
||||
logging.debug(f'{k}: {v}')
|
||||
|
||||
print(f"parsing: {args.parsing}")
|
||||
pretreat(input_path=Path(args.input_path), output_path=Path(args.output_path), img_size=args.img_size, workers=args.n_workers, verbose=args.verbose, parsing=args.parsing, dataset=args.dataset)
|
||||
@@ -5,7 +5,7 @@ https://github.com/Gait3D/Gait3D-Benchmark#data-preparation
|
||||
## Data Pretreatment
|
||||
```python
|
||||
python datasets/pretreatment.py --input_path 'Gait3D/2D_Silhouettes' --output_path 'Gait3D-sils-64-64-pkl'
|
||||
python datasets/pretreatment_smpl.py --input_path 'Gait3D/3D_SMPLs' --output_path 'Gait3D-smpls-pkl'
|
||||
python datasets/Gait3D/pretreatment_smpl.py --input_path 'Gait3D/3D_SMPLs' --output_path 'Gait3D-smpls-pkl'
|
||||
|
||||
(optional) python datasets/pretreatment.py --input_path 'Gait3D/2D_Silhouettes' --img_size 128 --output_path 'Gait3D-sils-128-128-pkl'
|
||||
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Author : jinkai Zheng
|
||||
date: 2021/10/30
|
||||
E-mail: zhengjinkai3@qq.com
|
||||
"""
|
||||
|
||||
|
||||
import os.path as osp
|
||||
import time
|
||||
import os
|
||||
import threading
|
||||
import itertools
|
||||
import numpy as np
|
||||
import pickle
|
||||
import argparse
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='Test')
|
||||
parser.add_argument('-i', '--input_path', default='', type=str,
|
||||
help='Root path of raw dataset.')
|
||||
parser.add_argument('-o', '--output_path', default='', type=str,
|
||||
help='Root path for output.')
|
||||
opt = parser.parse_args()
|
||||
|
||||
|
||||
def get_pickle(thread_id, id_list, save_dir):
|
||||
for id in sorted(id_list):
|
||||
print(f"Process threadID-PID: {thread_id}-{id}")
|
||||
cam_list = os.listdir(osp.join(data_dir, id))
|
||||
cam_list.sort()
|
||||
for cam in cam_list:
|
||||
seq_list = os.listdir(osp.join(data_dir, id, cam))
|
||||
seq_list.sort()
|
||||
for seq in seq_list:
|
||||
npz_list = os.listdir(osp.join(data_dir, id, cam, seq))
|
||||
npz_list.sort()
|
||||
smpl_paras_fras = []
|
||||
for npz in npz_list:
|
||||
npz_path = osp.join(data_dir, id, cam, seq, npz)
|
||||
frame = np.load(npz_path, allow_pickle=True)['results'][()][0]
|
||||
smpl_cam = frame['cam'] # 3-D
|
||||
smpl_pose = frame['poses'] # 72-D
|
||||
smpl_shape = frame['betas'] # 10-D
|
||||
smpl_paras = np.concatenate((smpl_cam, smpl_pose, smpl_shape), 0)
|
||||
smpl_paras_fras.append(smpl_paras)
|
||||
smpl_paras_fras = np.asarray(smpl_paras_fras)
|
||||
|
||||
out_dir = osp.join(save_dir, id, cam, seq)
|
||||
os.makedirs(out_dir)
|
||||
smpl_paras_fras_pkl = os.path.join(out_dir, '{}.pkl'.format(seq))
|
||||
pickle.dump(smpl_paras_fras, open(smpl_paras_fras_pkl, 'wb'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
data_dir = opt.input_path
|
||||
|
||||
save_dir = opt.output_path
|
||||
|
||||
start_time = time.time()
|
||||
maxnum_thread = 8
|
||||
|
||||
all_ids = sorted(os.listdir(data_dir))
|
||||
num_ids = len(all_ids)
|
||||
|
||||
proces = []
|
||||
for thread_id in range(maxnum_thread):
|
||||
indices = itertools.islice(range(num_ids), thread_id, num_ids, maxnum_thread)
|
||||
id_list = [all_ids[i] for i in indices]
|
||||
thread_func = threading.Thread(target=get_pickle, args=(thread_id, id_list, save_dir))
|
||||
|
||||
thread_func.start()
|
||||
proces.append(thread_func)
|
||||
|
||||
for proc in proces:
|
||||
proc.join()
|
||||
|
||||
time_elapsed = time.time() - start_time
|
||||
print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
|
||||
time_elapsed // 3600,
|
||||
(time_elapsed - (time_elapsed // 3600) * 3600) // 60,
|
||||
time_elapsed % 60))
|
||||
@@ -13,15 +13,16 @@
|
||||
- tqdm
|
||||
- py7zr
|
||||
- kornia
|
||||
- einops
|
||||
|
||||
Install dependenices by [Anaconda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html):
|
||||
```
|
||||
conda install tqdm pyyaml tensorboard opencv kornia -c conda-forge
|
||||
conda install tqdm pyyaml tensorboard opencv kornia einops -c conda-forge
|
||||
conda install pytorch==1.10 torchvision -c pytorch
|
||||
```
|
||||
Or, Install dependenices by pip:
|
||||
```
|
||||
pip install tqdm pyyaml tensorboard opencv-python kornia
|
||||
pip install tqdm pyyaml tensorboard opencv-python kornia einops
|
||||
pip install torch==1.10 torchvision==0.11
|
||||
```
|
||||
## Prepare dataset
|
||||
|
||||
@@ -62,6 +62,19 @@
|
||||
| [DeepGaitV2-P3D](https://arxiv.org/pdf/2303.03301.pdf) | 74.4 | - | 64x44 | - | - |
|
||||
| [SwinGait(Transformer-based)](https://arxiv.org/pdf/2303.03301.pdf) | 75.0 | - | 64x44 | - | - |
|
||||
|
||||
|
||||
## [Gait3D-Parsing](https://github.com/Gait3D/Gait3D-Benchmark)
|
||||
| Model | `Rank@1` | `mAP` | Configuration | Input Size | Download checkpoint |
|
||||
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:----------:|:---------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
|
||||
| [GaitSet(AAAI2019)](https://arxiv.org/pdf/1811.06186.pdf) | 55.9 | 46.7 | [gaitset_gait3d_parsing.yaml](../configs/gaitset/gaitset_gait3d_parsing.yaml) | 64x44 | [model](https://huggingface.co/opengait/OpenGait/resolve/main/Gait3D-Parsing/GaitSet/GaitSet/checkpoints/GaitSet-180000.pt) |
|
||||
| [GaitPart(CVPR2020)](https://openaccess.thecvf.com/content_CVPR_2020/papers/Fan_GaitPart_Temporal_Part-Based_Model_for_Gait_Recognition_CVPR_2020_paper.pdf) | 43.0 | 33.9 | [gaitpart_gait3d_parsing.yaml](../configs/gaitpart/gaitpart_gait3d_parsing.yaml) | 64x44 | [model](https://huggingface.co/opengait/OpenGait/resolve/main/Gait3D-Parsing/GaitPart/GaitPart/checkpoints/GaitPart-180000.pt) |
|
||||
| [GLN(ECCV2020)](http://home.ustc.edu.cn/~saihui/papers/eccv2020_gln.pdf) | 45.7 | 38.6 | [gln_phase1_gait3d_parsing.yaml](../configs/gln/gln_phase1_gait3d_parsing.yaml), [gln_phase2_gait3d_parsing.yaml](../configs/gln/gln_phase2_gait3d_parsing.yaml) | 64x44 | [model](https://huggingface.co/opengait/OpenGait/blob/main/Gait3D-Parsing/GLN/GLN/checkpoints/GLN_P2-180000.pt) |
|
||||
| [GaitGL(ICCV2021)](https://openaccess.thecvf.com/content/ICCV2021/papers/Lin_Gait_Recognition_via_Effective_Global-Local_Feature_Representation_and_Local_Temporal_ICCV_2021_paper.pdf) | 47.7 | 36.2 | [gaitgl_gait3d_parsing.yaml](../configs/gaitgl/gaitgl_gait3d_parsing.yaml) | 64x44 | [model](https://huggingface.co/opengait/OpenGait/resolve/main/Gait3D-Parsing/GaitGL/GaitGL/checkpoints/GaitGL-180000.pt) |
|
||||
| [SMPLGait(CVPR 2022)](https://gait3d.github.io/) | 60.6 | 52.3 | [smplgait_gait3d_parsing.yaml](../configs/smplgait/smplgait_gait3d_parsing.yaml) | 64x44 | [model](https://huggingface.co/opengait/OpenGait/resolve/main/Gait3D-Parsing/SMPLGait/SMPLGait/checkpoints/SMPLGait-180000.pt) |
|
||||
| [GaitBase(CVPR2023)](https://openaccess.thecvf.com/content/CVPR2023/papers/Fan_OpenGait_Revisiting_Gait_Recognition_Towards_Better_Practicality_CVPR_2023_paper.pdf) | 71.2 | 64.1 | [gaitbase_gait3d_parsing_btz32x2_fixed.yaml](../configs/gaitbase/gaitbase_gait3d_parsing_btz32x2_fixed.yaml) | 64x44 | [model](https://huggingface.co/opengait/OpenGait/resolve/main/Gait3D-Parsing/Baseline/GaitBase_btz32x2_fixed/checkpoints/GaitBase_btz32x2_fixed-120000.pt) |
|
||||
| [ParsingGait(ACMMM2023)](https://arxiv.org/abs/2308.16739) | 76.2 | 68.2 | [parsinggait_gait3d_parsing.yaml](../configs/parsinggait/parsinggait_gait3d_parsing.yaml) | 64x44 | [model](https://huggingface.co/opengait/OpenGait/resolve/main/Gait3D-Parsing/ParsingGait/ParsingGait/checkpoints/ParsingGait-120000.pt) |
|
||||
|
||||
|
||||
## [CASIA-E](https://www.scidb.cn/en/detail?dataSetId=57be0e918db743279baf44a38d013a06)
|
||||
|
||||
| Model | `Rank@1.NM` | `Rank@1.BG` | `Rank@1.CL` | Input size| Configuration |
|
||||
|
||||
@@ -25,6 +25,23 @@ class BaseSilTransform():
|
||||
return x / self.divsor
|
||||
|
||||
|
||||
class BaseParsingCuttingTransform():
|
||||
def __init__(self, divsor=255.0, cutting=None):
|
||||
self.divsor = divsor
|
||||
self.cutting = cutting
|
||||
|
||||
def __call__(self, x):
|
||||
if self.cutting is not None:
|
||||
cutting = self.cutting
|
||||
else:
|
||||
cutting = int(x.shape[-1] // 64) * 10
|
||||
x = x[..., cutting:-cutting]
|
||||
if x.max() == 255 or x.max() == 255.:
|
||||
return x / self.divsor
|
||||
else:
|
||||
return x / 1.0
|
||||
|
||||
|
||||
class BaseSilCuttingTransform():
|
||||
def __init__(self, divsor=255.0, cutting=None):
|
||||
self.divsor = divsor
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
from torch.nn.parameter import Parameter
|
||||
import math
|
||||
|
||||
|
||||
class Normalize(nn.Module):
|
||||
|
||||
def __init__(self, power=2):
|
||||
super(Normalize, self).__init__()
|
||||
self.power = power
|
||||
|
||||
def forward(self, x):
|
||||
norm = x.pow(self.power).sum(1, keepdim=True).pow(1. / self.power)
|
||||
out = x.div(norm)
|
||||
return out
|
||||
|
||||
|
||||
class GraphConvolution(nn.Module):
|
||||
"""
|
||||
Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
|
||||
"""
|
||||
|
||||
def __init__(self, in_features, out_features, adj_size=9, bias=True):
|
||||
super(GraphConvolution, self).__init__()
|
||||
self.in_features = in_features
|
||||
self.out_features = out_features
|
||||
self.adj_size = adj_size
|
||||
|
||||
self.weight = Parameter(torch.FloatTensor(in_features, out_features))
|
||||
|
||||
if bias:
|
||||
self.bias = Parameter(torch.FloatTensor(out_features))
|
||||
else:
|
||||
self.register_parameter('bias', None)
|
||||
self.reset_parameters()
|
||||
#self.bn = nn.BatchNorm2d(self.out_features)
|
||||
self.bn = nn.BatchNorm1d(out_features * adj_size)
|
||||
|
||||
def reset_parameters(self):
|
||||
stdv = 1. / math.sqrt(self.weight.size(1))
|
||||
self.weight.data.uniform_(-stdv, stdv)
|
||||
if self.bias is not None:
|
||||
self.bias.data.uniform_(-stdv, stdv)
|
||||
|
||||
def forward(self, input, adj):
|
||||
support = torch.matmul(input, self.weight)
|
||||
output_ = torch.bmm(adj, support)
|
||||
if self.bias is not None:
|
||||
output_ = output_ + self.bias
|
||||
output = output_.view(output_.size(0), output_.size(1)*output_.size(2))
|
||||
output = self.bn(output)
|
||||
output = output.view(output_.size(0), output_.size(1), output_.size(2))
|
||||
|
||||
return output
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + ' (' \
|
||||
+ str(self.in_features) + ' -> ' \
|
||||
+ str(self.out_features) + ')'
|
||||
|
||||
|
||||
class GCN(nn.Module):
|
||||
def __init__(self, adj_size, nfeat, nhid, isMeanPooling = True):
|
||||
super(GCN, self).__init__()
|
||||
|
||||
self.adj_size = adj_size
|
||||
self.nhid = nhid
|
||||
self.isMeanPooling = isMeanPooling
|
||||
self.gc1 = GraphConvolution(nfeat, nhid ,adj_size)
|
||||
self.gc2 = GraphConvolution(nhid, nhid, adj_size)
|
||||
|
||||
def forward(self, x, adj):
|
||||
x_ = F.dropout(x, 0.5, training=self.training)
|
||||
x_ = F.relu(self.gc1(x_, adj))
|
||||
x_ = F.dropout(x_, 0.5, training=self.training)
|
||||
x_ = F.relu(self.gc2(x_, adj))
|
||||
return x_
|
||||
|
||||
@@ -0,0 +1,268 @@
|
||||
import torch
|
||||
|
||||
from ..base_model import BaseModel
|
||||
from ..modules import SetBlockWrapper, HorizontalPoolingPyramid, PackSequenceWrapper, SeparateFCs, SeparateBNNecks
|
||||
|
||||
from torch.nn import functional as F
|
||||
import numpy as np
|
||||
from ..backbones.gcn import GCN
|
||||
|
||||
|
||||
def L_Matrix(adj_npy, adj_size):
|
||||
|
||||
D =np.zeros((adj_size, adj_size))
|
||||
for i in range(adj_size):
|
||||
tmp = adj_npy[i,:]
|
||||
count = np.sum(tmp==1)
|
||||
if count>0:
|
||||
number = count ** (-1/2)
|
||||
D[i,i] = number
|
||||
|
||||
x = np.matmul(D,adj_npy)
|
||||
L = np.matmul(x,D)
|
||||
return L
|
||||
|
||||
def get_fine_adj_npy():
|
||||
fine_adj_list = [
|
||||
# 1 2 3 4 5 6 7 8 9 10 11
|
||||
[ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1], #1
|
||||
[ 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0], #2
|
||||
[ 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1], #3
|
||||
[ 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1], #4
|
||||
[ 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0], #5
|
||||
[ 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0], #6
|
||||
[ 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1], #7
|
||||
[ 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1], #8
|
||||
[ 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0], #9
|
||||
[ 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0], #10
|
||||
[ 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1] #11
|
||||
]
|
||||
fine_adj_npy = np.array(fine_adj_list)
|
||||
fine_adj_npy = L_Matrix(fine_adj_npy, len(fine_adj_npy)) # len返回的是行数
|
||||
return fine_adj_npy
|
||||
|
||||
def get_coarse_adj_npy():
|
||||
coarse_adj_list = [
|
||||
# 1 2 3 4 5
|
||||
[ 1, 1, 1, 1, 1], #1
|
||||
[ 1, 1, 0, 0, 0], #2
|
||||
[ 1, 0, 1, 0, 0], #3
|
||||
[ 1, 0, 0, 1, 0], #4
|
||||
[ 1, 0, 0, 0, 1] #5
|
||||
]
|
||||
coarse_adj_npy = np.array(coarse_adj_list)
|
||||
coarse_adj_npy = L_Matrix(coarse_adj_npy, len(coarse_adj_npy)) # len返回的是行数
|
||||
return coarse_adj_npy
|
||||
|
||||
|
||||
class ParsingGait(BaseModel):
|
||||
|
||||
def build_network(self, model_cfg):
|
||||
self.Backbone = self.get_backbone(model_cfg['backbone_cfg'])
|
||||
self.Backbone = SetBlockWrapper(self.Backbone)
|
||||
self.FCs = SeparateFCs(**model_cfg['SeparateFCs'])
|
||||
self.BNNecks = SeparateBNNecks(**model_cfg['SeparateBNNecks'])
|
||||
self.TP = PackSequenceWrapper(torch.max)
|
||||
self.HPP = HorizontalPoolingPyramid(bin_num=model_cfg['bin_num'])
|
||||
|
||||
nfeat = model_cfg['SeparateFCs']['in_channels']
|
||||
gcn_cfg = model_cfg['gcn_cfg']
|
||||
self.fine_parts = gcn_cfg['fine_parts']
|
||||
coarse_parts = gcn_cfg['coarse_parts']
|
||||
|
||||
self.only_fine_graph = gcn_cfg['only_fine_graph']
|
||||
self.only_coarse_graph = gcn_cfg['only_coarse_graph']
|
||||
self.combine_fine_coarse_graph = gcn_cfg['combine_fine_coarse_graph']
|
||||
|
||||
if self.only_fine_graph:
|
||||
fine_adj_npy = get_fine_adj_npy()
|
||||
self.fine_adj_npy = torch.from_numpy(fine_adj_npy).float()
|
||||
self.gcn_fine = GCN(self.fine_parts, nfeat, nfeat, isMeanPooling=True)
|
||||
self.gammas_fine = torch.nn.Parameter(torch.ones(self.fine_parts) * 0.75)
|
||||
elif self.only_coarse_graph:
|
||||
coarse_adj_npy = get_coarse_adj_npy()
|
||||
self.coarse_adj_npy = torch.from_numpy(coarse_adj_npy).float()
|
||||
self.gcn_coarse = GCN(coarse_parts, nfeat, nfeat, isMeanPooling=True)
|
||||
self.gammas_coarse = torch.nn.Parameter(torch.ones(coarse_parts) * 0.75)
|
||||
elif self.combine_fine_coarse_graph:
|
||||
fine_adj_npy = get_fine_adj_npy()
|
||||
self.fine_adj_npy = torch.from_numpy(fine_adj_npy).float()
|
||||
self.gcn_fine = GCN(self.fine_parts, nfeat, nfeat, isMeanPooling=True)
|
||||
self.gammas_fine = torch.nn.Parameter(torch.ones(self.fine_parts) * 0.75)
|
||||
coarse_adj_npy = get_coarse_adj_npy()
|
||||
self.coarse_adj_npy = torch.from_numpy(coarse_adj_npy).float()
|
||||
self.gcn_coarse = GCN(coarse_parts, nfeat, nfeat, isMeanPooling=True)
|
||||
self.gammas_coarse = torch.nn.Parameter(torch.ones(coarse_parts) * 0.75)
|
||||
else:
|
||||
raise ValueError("You should choose fine/coarse graph, or combine both of them.")
|
||||
|
||||
def PPforGCN(self, x):
|
||||
"""
|
||||
Part Pooling for GCN
|
||||
x : [n, p, c, h, w]
|
||||
ret : [n, p, c]
|
||||
"""
|
||||
n, p, c, h, w = x.size()
|
||||
z = x.view(n, p, c, -1) # [n, p, c, h*w]
|
||||
z = z.mean(-1) + z.max(-1)[0] # [n, p, c]
|
||||
return z
|
||||
|
||||
def ParsPartforFineGraph(self, mask_resize, z):
|
||||
"""
|
||||
x: [n, c, s, h, w]
|
||||
paes: [n, 1, s, H, W]
|
||||
return [n*s, 11, c, h, w]
|
||||
***Fine Parts:
|
||||
# 0: Background,
|
||||
1: Head,
|
||||
2: Torso,
|
||||
3: Left-arm,
|
||||
4: Right-arm,
|
||||
5: Left-hand,
|
||||
6: Right-hand,
|
||||
7: Left-leg,
|
||||
8: Right-leg,
|
||||
9: Left-foot,
|
||||
10: Right-foot,
|
||||
11: Dress
|
||||
"""
|
||||
fine_mask_list = list()
|
||||
for i in range(1, self.fine_parts + 1):
|
||||
fine_mask_list.append((mask_resize.long() == i)) # split mask of each class
|
||||
|
||||
fine_z_list = list()
|
||||
for i in range(len(fine_mask_list)):
|
||||
mask = fine_mask_list[i].unsqueeze(1)
|
||||
fine_z_list.append((mask.float() * z * self.gammas_fine[i] + (~mask).float() * z * (1.0 - self.gammas_fine[i])).unsqueeze(1)) # split feature map by mask of each class
|
||||
fine_z_feat = torch.cat(fine_z_list, dim=1) # [n*s, 11, c, h, w] or [n*s, 5, c, h, w]
|
||||
|
||||
return fine_z_feat
|
||||
|
||||
def ParsPartforCoarseGraph(self, mask_resize, z):
|
||||
"""
|
||||
x: [n, c, s, h, w]
|
||||
paes: [n, 1, s, H, W]
|
||||
return [n*s, 5, c, h, w]
|
||||
***Coarse Parts:
|
||||
1: [1, 2, 11] Head, Torso, Dress
|
||||
2: [3, 5] Left-arm, Left-hand
|
||||
3: [4, 6] Right-arm, Right-hand
|
||||
4: [7, 9] Left-leg, Left-foot
|
||||
5: [8, 10] Right-leg, Right-foot
|
||||
"""
|
||||
coarse_mask_list = list()
|
||||
coarse_parts = [[1,2,11], [3,5], [4,6], [7,9], [8,10]]
|
||||
for coarse_part in coarse_parts:
|
||||
part = mask_resize.long() == -1
|
||||
for i in coarse_part:
|
||||
part += (mask_resize.long() == i)
|
||||
coarse_mask_list.append(part)
|
||||
|
||||
coarse_z_list = list()
|
||||
for i in range(len(coarse_mask_list)):
|
||||
mask = coarse_mask_list[i].unsqueeze(1)
|
||||
coarse_z_list.append((mask.float() * z * self.gammas_coarse[i] + (~mask).float() * z * (1.0 - self.gammas_coarse[i])).unsqueeze(1)) # split feature map by mask of each class
|
||||
coarse_z_feat = torch.cat(coarse_z_list, dim=1) # [n*s, 11, c, h, w] or [n*s, 5, c, h, w]
|
||||
|
||||
return coarse_z_feat
|
||||
|
||||
def ParsPartforGCN(self, x, pars):
|
||||
"""
|
||||
x: [n, c, s, h, w]
|
||||
paes: [n, 1, s, H, W]
|
||||
return [n*s, 11, c, h, w] or [n*s, 5, c, h, w]
|
||||
"""
|
||||
n, c, s, h, w = x.size()
|
||||
# mask_resize: [n, s, h, w]
|
||||
mask_resize = F.interpolate(input=pars.squeeze(1), size=(h, w), mode='nearest')
|
||||
mask_resize = mask_resize.view(n*s, h, w)
|
||||
|
||||
z = x.transpose(1, 2).reshape(n*s, c, h, w)
|
||||
|
||||
if self.only_fine_graph:
|
||||
fine_z_feat = self.ParsPartforFineGraph(mask_resize, z)
|
||||
return fine_z_feat, None
|
||||
elif self.only_coarse_graph:
|
||||
coarse_z_feat = self.ParsPartforCoarseGraph(mask_resize, z)
|
||||
return None, coarse_z_feat
|
||||
elif self.combine_fine_coarse_graph:
|
||||
fine_z_feat = self.ParsPartforFineGraph(mask_resize, z)
|
||||
coarse_z_feat = self.ParsPartforCoarseGraph(mask_resize, z)
|
||||
return fine_z_feat, coarse_z_feat
|
||||
else:
|
||||
raise ValueError("You should choose fine/coarse graph, or combine both of them.")
|
||||
|
||||
|
||||
def get_gcn_feat(self, n, input, adj_np, is_cuda, seqL):
|
||||
input_ps = self.PPforGCN(input) # [n*s, 11, c]
|
||||
n_s, p, c = input_ps.size()
|
||||
if is_cuda:
|
||||
adj = adj_np.cuda()
|
||||
adj = adj.repeat(n_s, 1, 1)
|
||||
if p == 11:
|
||||
output_ps = self.gcn_fine(input_ps, adj) # [n*s, 11, c]
|
||||
elif p == 5:
|
||||
output_ps = self.gcn_coarse(input_ps, adj) # [n*s, 5, c]
|
||||
else:
|
||||
raise ValueError(f"The parsing parts should be 11 or 5, but got {p}")
|
||||
output_ps = output_ps.view(n, n_s//n, p, c) # [n, s, ps, c]
|
||||
output_ps = self.TP(output_ps, seqL, dim=1, options={"dim": 1})[0] # [n, ps, c]
|
||||
|
||||
return output_ps
|
||||
|
||||
|
||||
def forward(self, inputs):
|
||||
ipts, labs, _, _, seqL = inputs
|
||||
|
||||
pars = ipts[0]
|
||||
if len(pars.size()) == 4:
|
||||
pars = pars.unsqueeze(1)
|
||||
|
||||
del ipts
|
||||
outs = self.Backbone(pars) # [n, c, s, h, w]
|
||||
|
||||
outs_n, outs_c, outs_s, outs_h, outs_w = outs.size()
|
||||
|
||||
# split features by parsing classes
|
||||
# outs_ps_fine: [n*s, 11, c, h, w]
|
||||
# outs_ps_coarse: [n*s, 5, c, h, w]
|
||||
outs_ps_fine, outs_ps_coarse = self.ParsPartforGCN(outs, pars)
|
||||
|
||||
is_cuda = pars.is_cuda
|
||||
if self.only_fine_graph:
|
||||
outs_ps = self.get_gcn_feat(outs_n, outs_ps_fine, self.fine_adj_npy, is_cuda, seqL) # [n, 11, c]
|
||||
elif self.only_coarse_graph:
|
||||
outs_ps = self.get_gcn_feat(outs_n, outs_ps_coarse, self.coarse_adj_npy, is_cuda, seqL) # [n, 5, c]
|
||||
elif self.combine_fine_coarse_graph:
|
||||
outs_fine = self.get_gcn_feat(outs_n, outs_ps_fine, self.fine_adj_npy, is_cuda, seqL) # [n, 11, c]
|
||||
outs_coarse = self.get_gcn_feat(outs_n, outs_ps_coarse, self.coarse_adj_npy, is_cuda, seqL) # [n, 5, c]
|
||||
outs_ps = torch.cat([outs_fine, outs_coarse], 1) # [n, 16, c]
|
||||
else:
|
||||
raise ValueError("You should choose fine/coarse graph, or combine both of them.")
|
||||
outs_ps = outs_ps.transpose(1, 2).contiguous() # [n, c, ps]
|
||||
|
||||
# Temporal Pooling, TP
|
||||
outs = self.TP(outs, seqL, options={"dim": 2})[0] # [n, c, h, w]
|
||||
# Horizontal Pooling Matching, HPM
|
||||
feat = self.HPP(outs) # [n, c, p]
|
||||
|
||||
feat = torch.cat([feat, outs_ps], dim=-1) # [n, c, p+ps]
|
||||
|
||||
embed_1 = self.FCs(feat) # [n, c, p+ps]
|
||||
embed_2, logits = self.BNNecks(embed_1) # [n, c, p+ps]
|
||||
embed = embed_1
|
||||
|
||||
n, _, s, h, w = pars.size()
|
||||
retval = {
|
||||
'training_feat': {
|
||||
'triplet': {'embeddings': embed_1, 'labels': labs},
|
||||
'softmax': {'logits': logits, 'labels': labs}
|
||||
},
|
||||
'visual_summary': {
|
||||
'image/pars': pars.view(n*s, 1, h, w)
|
||||
},
|
||||
'inference_feat': {
|
||||
'embeddings': embed
|
||||
}
|
||||
}
|
||||
return retval
|
||||
Reference in New Issue
Block a user