Support skeleton (#155)

* pose * pose * pose * pose * 你的提交消息 * pose * pose * Delete train1.sh * pretreatment * configs * pose * reference * Update gaittr.py * naming * naming * Update transform.py * update for datasets * update README * update name and README * update * Update transform.py
2023-09-27 16:20:00 +08:00
parent 853bb1821d
commit 2c29afadf3
41 changed files with 4251 additions and 12 deletions
@@ -0,0 +1,98 @@
+data_cfg:
+  dataset_name: CASIA-B
+  dataset_root: your_path
+  dataset_partition: ./datasets/CASIA-B/CASIA-B.json
+  test_dataset_name: CASIA-B
+  num_workers: 8
+  remove_no_gallery: false
+  frame_threshold: 60
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 6000
+  save_name: GaitGraph1_phase1
+  sampler:
+    batch_size: 256
+    frames_num_fixed: 60
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered
+    frames_skip_num: 0
+  metric: euc
+  eval_func: evaluate_indoor_dataset
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: GaitGraph1Input
+
+loss_cfg:
+  - loss_term_weight: 1
+    temperature: 0.01
+    type:  SupConLoss_Re
+    log_prefix: SupConLoss
+
+model_cfg:
+  model: GaitGraph1
+  joint_format: coco
+  input_num: 1
+  reduction: 8
+  block: Bottleneck # Basic, initial
+  input_branch:
+    - 3
+    - 64
+    - 64
+    - 32
+  main_stream:
+    - 32
+    - 128
+    - 128
+    - 256
+    - 256
+  num_class: 128
+  tta: true
+
+optimizer_cfg:
+  lr: 0.01
+  solver: Adam
+  weight_decay: 0.00001
+
+scheduler_cfg:
+  max_lr: 0.01
+  total_steps: 6000
+  scheduler: OneCycleLR
+  div_factor: 25 
+
+trainer_cfg:
+  enable_float16: false
+  log_iter: 1000
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 1000
+  save_name: GaitGraph1_phase1
+  sync_BN: true
+  total_iter: 6000
+  sampler:
+    batch_shuffle: true
+    frames_num_fixed: 100
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0
+    batch_size: 128
+    type: CommonSampler
+  transform:
+    - type: TwoView
+      trf_cfg:
+        - type: InversePosesPre
+          probability: 0.1
+        - type: FlipSequence
+          probability: 0.5
+        - type: RandomSelectSequence
+          sequence_length: 60
+        - type: PointNoise
+          std: 0.05
+        - type: JointNoise
+          std: 0.2
+        - type: GaitGraph1Input
@@ -0,0 +1,100 @@
+data_cfg:
+  dataset_name: GREW
+  dataset_root: your_path
+  dataset_partition: ./datasets/GREW/GREW.json
+  test_dataset_name: GREW
+  num_workers: 8
+  remove_no_gallery: false
+  frame_threshold: 16
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 150000
+  save_name: GaitGraph1_phase1
+  sampler:
+    batch_size: 256
+    frames_num_fixed: 501
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered
+    frames_skip_num: 0
+  metric: euc
+  # eval_func: GREW_submission
+  eval_func: evaluate_real_scene
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: SelectSequenceCenter
+          sequence_length: 16
+        - type: GaitGraph1Input
+
+loss_cfg:
+  - loss_term_weight: 1
+    temperature: 0.01
+    type:  SupConLoss_Re
+    log_prefix: SupConLoss
+
+model_cfg:
+  model: GaitGraph1
+  joint_format: coco
+  input_num: 1
+  reduction: 8
+  block: Bottleneck # Basic, initial
+  input_branch:
+    - 3
+    - 64
+    - 64
+    - 32
+  main_stream:
+    - 32
+    - 128
+    - 128
+    - 256
+    - 256
+  num_class: 256
+  tta: true
+
+optimizer_cfg:
+  lr: 0.01
+  solver: Adam
+  weight_decay: 0.00001
+
+scheduler_cfg:
+  max_lr: 0.01
+  total_steps: 150000
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false
+  log_iter: 100
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 5000
+  save_name: GaitGraph1_phase1
+  sync_BN: true
+  total_iter: 150000
+  sampler:
+    batch_shuffle: true
+    frames_num_fixed: 501
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0
+    batch_size: 128
+    type: CommonSampler
+  transform:
+    - type: TwoView
+      trf_cfg:
+        - type: MirrorPoses
+          probability: 0.5
+        - type: FlipSequence
+          probability: 0.5
+        - type: RandomSelectSequence
+          sequence_length: 16
+        - type: PointNoise
+          std: 0.05
+        - type: JointNoise
+          std: 0.1
+        - type: GaitGraph1Input
@@ -0,0 +1,100 @@
+data_cfg:
+  dataset_name: Gait3D
+  dataset_root: your_path
+  dataset_partition: ./datasets/Gait3D/Gait3D.json
+  test_dataset_name: Gait3D
+  num_workers: 8
+  remove_no_gallery: false
+  frame_threshold: 24
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 20000
+  save_name: GaitGraph1_phase1
+  sampler:
+    batch_size: 256
+    frames_num_fixed: 130
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered
+    frames_skip_num: 0
+  metric: euc
+  eval_func: evaluate_Gait3D
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: SelectSequenceCenter
+          sequence_length: 24
+        - type: GaitGraph1Input
+
+loss_cfg:
+  - loss_term_weight: 1
+    temperature: 0.01
+    type:  SupConLoss_Re
+    log_prefix: SupConLoss
+
+model_cfg:
+  model: GaitGraph1
+  joint_format: coco
+  input_num: 1
+  reduction: 8
+  block: Bottleneck # Basic, initial
+  input_branch:
+    - 3
+    - 64
+    - 64
+    - 32
+  main_stream:
+    - 32
+    - 128
+    - 128
+    - 256
+    - 256
+  num_class: 128
+  tta: true
+
+optimizer_cfg:
+  lr: 0.01
+  solver: Adam
+  weight_decay: 0.00001
+
+scheduler_cfg:
+  max_lr: 0.01
+  total_steps: 20000
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false
+  log_iter: 100
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 500
+  save_name: GaitGraph1_phase1
+  sync_BN: true
+  total_iter: 20000
+  sampler:
+    batch_shuffle: true
+    frames_num_fixed: 130
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0
+    batch_size: 128
+    type: CommonSampler
+  transform:
+    - type: TwoView
+      trf_cfg:
+        - type: MirrorPoses
+          probability: 0.5
+        - type: FlipSequence
+          probability: 0.5
+        - type: RandomSelectSequence
+          sequence_length: 16
+        - type: PointNoise
+          std: 0.05
+        - type: JointNoise
+          std: 0.1
+        - type: GaitGraph1Input
+
@@ -0,0 +1,99 @@
+data_cfg:
+  dataset_name: OUMVLP
+  dataset_root: your_path
+  dataset_partition: ./datasets/OUMVLP/OUMVLP.json
+  test_dataset_name: OUMVLP
+  num_workers: 8
+  remove_no_gallery: false
+  frame_threshold: 17
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 150000
+  save_name: GaitGraph1_phase1
+  sampler:
+    batch_size: 256
+    frames_num_fixed: 35
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered
+    frames_skip_num: 0
+  metric: euc
+  eval_func: evaluate_indoor_dataset
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: SelectSequenceCenter
+          sequence_length: 17
+        - type: GaitGraph1Input
+
+loss_cfg:
+  - loss_term_weight: 1
+    temperature: 0.01
+    type:  SupConLoss_Re
+    log_prefix: SupConLoss
+
+model_cfg:
+  model: GaitGraph1
+  joint_format: alphapose
+  input_num: 1
+  reduction: 8
+  block: Bottleneck # Basic, initial
+  input_branch:
+    - 3
+    - 64
+    - 64
+    - 32
+  main_stream:
+    - 32
+    - 128
+    - 128
+    - 256
+    - 256
+  num_class: 256
+  tta: true
+
+optimizer_cfg:
+  lr: 0.01
+  solver: Adam
+  weight_decay: 0.00001
+
+scheduler_cfg:
+  max_lr: 0.01
+  total_steps: 150000
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false
+  log_iter: 1000
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 10000
+  save_name: GaitGraph1_phase1
+  sync_BN: true
+  total_iter: 150000
+  sampler:
+    batch_shuffle: true
+    frames_num_fixed: 35
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0
+    batch_size: 128
+    type: CommonSampler
+  transform:
+    - type: TwoView
+      trf_cfg:
+        - type: MirrorPoses
+          probability: 0.5
+        - type: FlipSequence
+          probability: 0.5
+        - type: RandomSelectSequence
+          sequence_length: 16
+        - type: PointNoise
+          std: 0.05
+        - type: JointNoise
+          std: 0.1
+        - type: GaitGraph1Input
@@ -0,0 +1,99 @@
+data_cfg:
+  dataset_name: OUMVLP
+  dataset_root: your_path
+  dataset_partition: ./datasets/OUMVLP/OUMVLP.json
+  test_dataset_name: OUMVLP
+  num_workers: 8
+  remove_no_gallery: false
+  frame_threshold: 17
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 150000
+  save_name: GaitGraph1_phase1
+  sampler:
+    batch_size: 256
+    frames_num_fixed: 25
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered
+    frames_skip_num: 0
+  metric: euc
+  eval_func: evaluate_indoor_dataset
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: SelectSequenceCenter
+          sequence_length: 17
+        - type: SkeletonInput
+
+loss_cfg:
+  - loss_term_weight: 1
+    temperature: 0.01
+    type:  SupConLoss_Re
+    log_prefix: SupConLoss
+
+model_cfg:
+  model: GaitGraph1
+  joint_format: coco
+  input_num: 1
+  reduction: 8
+  block: Bottleneck # Basic, initial
+  input_branch:
+    - 3
+    - 64
+    - 64
+    - 32
+  main_stream:
+    - 32
+    - 128
+    - 128
+    - 256
+    - 256
+  num_class: 256
+  tta: true
+
+optimizer_cfg:
+  lr: 0.01
+  solver: Adam
+  weight_decay: 0.00001
+
+scheduler_cfg:
+  max_lr: 0.01
+  total_steps: 150000
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false
+  log_iter: 1000
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 10000
+  save_name: GaitGraph1_phase1
+  sync_BN: true
+  total_iter: 150000
+  sampler:
+    batch_shuffle: true
+    frames_num_fixed: 25
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0
+    batch_size: 128
+    type: CommonSampler
+  transform:
+    - type: TwoView
+      trf_cfg:
+        - type: MirrorPoses
+          probability: 0.5
+        - type: FlipSequence
+          probability: 0.5
+        - type: RandomSelectSequence
+          sequence_length: 16
+        - type: PointNoise
+          std: 0.05
+        - type: JointNoise
+          std: 0.1
+        - type: GaitGraph1Input
@@ -0,0 +1,97 @@
+data_cfg:
+  dataset_name: CASIA-B
+  dataset_root: your_path
+  dataset_partition: ./datasets/CASIA-B/CASIA-B.json
+  test_dataset_name: CASIA-B
+  num_workers: 8
+  remove_no_gallery: false
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 500
+  save_name: GaitGraph2
+  sampler:
+    batch_size: 256 
+    frames_num_fixed: 60
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered
+    frames_skip_num: 0
+  metric: cos
+  eval_func: evaluate_indoor_dataset
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: NormalizeEmpty
+        - type: GaitGraphMultiInput
+
+loss_cfg:
+  - loss_term_weight: 1
+    temperature: 0.01
+    type:  SupConLoss_Lp
+    log_prefix: SupConLoss
+
+model_cfg:
+  model: GaitGraph2
+  joint_format: coco
+  input_num: 3
+  reduction: 8
+  block: Bottleneck # Basic, initial
+  input_branch:
+    - 5
+    - 64
+    - 32
+  main_stream:
+    - 32
+    - 128
+    - 256
+  num_class: 128
+  tta: true
+
+optimizer_cfg:
+  lr: 0.005 #0.005
+  solver: AdamW
+  weight_decay: 0.00001
+
+scheduler_cfg:
+  max_lr: 0.005
+  total_steps: 500
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false
+  log_iter: 100
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 100
+  save_name: GaitGraph2
+  sync_BN: true
+  total_iter: 500
+  sampler:
+    batch_shuffle: true
+    frames_num_fixed: 60
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0
+    batch_size: 768
+    type: CommonSampler
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: NormalizeEmpty
+        - type: FlipSequence
+          probability: 0.5
+        - type: InversePosesPre
+          probability: 0.1
+        - type:  JointNoise
+          std: 0.25
+        - type: PointNoise
+          std: 0.05
+        - type: RandomMove
+          random_r:
+                    - 4
+                    - 1
+        - type: GaitGraphMultiInput
@@ -0,0 +1,103 @@
+data_cfg:
+  dataset_name: CCPG
+  dataset_root: your_path
+  dataset_partition: ./datasets/CCPG/CCPG.json
+  test_dataset_name: CCPG
+  num_workers: 8
+  remove_no_gallery: false
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 2000
+  save_name: GaitGraph2_CCPG
+  sampler:
+    batch_size: 256 
+    frames_num_fixed: 30
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered
+    frames_skip_num: 0
+  metric: cos
+  eval_func: evaluate_CCPG
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: NormalizeEmpty
+        - type: GaitGraphMultiInput
+          joint_format: alphapose
+      
+
+loss_cfg:
+  - loss_term_weight: 1
+    temperature: 0.01
+    type:  SupConLoss_Lp
+    log_prefix: SupConLoss
+
+model_cfg:
+  model: GaitGraph2
+  joint_format: alphapose
+  input_num: 3
+  reduction: 4
+  block: Bottleneck # Basic, initial
+  input_branch:
+    - 5
+    - 64
+    - 32
+  main_stream:
+    - 32
+    - 128
+    - 256
+  num_class: 128
+  tta: true
+
+optimizer_cfg:
+  lr: 0.005
+  solver: Adam
+  weight_decay: 0.00001
+
+scheduler_cfg:
+  three_phase: True
+  max_lr: 0.005
+  total_steps: 2000
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false 
+  log_iter: 20
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 200
+  save_name: GaitGraph2
+  sync_BN: true
+  total_iter:  2000
+  sampler:
+    batch_shuffle: true
+    frames_num_fixed: 60
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0
+    batch_size: 768
+    type: CommonSampler
+
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: NormalizeEmpty
+        - type: FlipSequence
+          probability: 0.5
+        - type: InversePosesPre
+          probability: 0.1
+          joint_format: alphapose
+        - type:  JointNoise
+          std: 0.1
+        - type: PointNoise
+          std: 0.05
+        - type: RandomMove
+          random_r:
+            - 3
+            - 1
+        - type: GaitGraphMultiInput
+          joint_format: alphapose
@@ -0,0 +1,105 @@
+data_cfg:
+  dataset_name: GREW
+  dataset_root: your_path
+  dataset_partition: ./datasets/GREW/GREW.json
+  test_dataset_name: GREW
+  num_workers: 8
+  remove_no_gallery: false
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 50000
+  save_name: GaitGraph2
+  sampler:
+    batch_size: 256 
+    frames_num_fixed: 60
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered
+    frames_skip_num: 0
+  metric: cos
+  eval_func: GREW_submission
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: NormalizeEmpty
+        - type: GaitGraphMultiInput
+      
+
+loss_cfg:
+  - loss_term_weight: 1
+    temperature: 0.01
+    type:  SupConLoss_Lp
+    log_prefix: SupConLoss
+
+model_cfg:
+  model: GaitGraph2
+  joint_format: coco
+  input_num: 3
+  reduction: 4
+  block: Bottleneck # Basic, initial
+  input_branch:
+    - 5
+    - 64
+    - 64
+    - 32
+  main_stream:
+    - 32
+    - 128
+    - 128
+    - 128
+    - 256
+    - 256
+    - 256
+  num_class: 128
+  tta: true
+
+optimizer_cfg:
+  lr: 0.005
+  solver: Adam
+  weight_decay: 0.00001
+
+scheduler_cfg:
+  three_phase: True
+  max_lr: 0.005
+  total_steps: 50000
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false 
+  log_iter: 1000
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 10000
+  save_name: GaitGraph2
+  sync_BN: true
+  total_iter:  50000
+  sampler:
+    batch_shuffle: true
+    frames_num_fixed: 60
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0
+    batch_size: 768
+    type: CommonSampler
+
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: NormalizeEmpty
+        - type: FlipSequence
+          probability: 0.5
+        - type: InversePosesPre
+          probability: 0.1
+        - type:  JointNoise
+          std: 0.25
+        - type: PointNoise
+          std: 0.05
+        - type: RandomMove
+          random_r:
+                    - 4
+                    - 1
+        - type: GaitGraphMultiInput
@@ -0,0 +1,93 @@
+data_cfg:
+  dataset_name: Gait3D
+  dataset_root: your_path
+  dataset_partition: ./datasets/Gait3D/Gait3D.json
+  test_dataset_name: Gait3D
+  num_workers: 8
+  remove_no_gallery: false
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 2000
+  save_name: GaitGraph2
+  sampler:
+    batch_size: 256 
+    frames_num_fixed: 60
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered
+    frames_skip_num: 0
+  metric: cos
+  eval_func: evaluate_Gait3D
+  transform:
+    - type: GaitGraphMultiInput
+
+loss_cfg:
+  - loss_term_weight: 1
+    temperature: 0.01
+    type:  SupConLoss_Lp
+    log_prefix: SupConLoss
+
+model_cfg:
+  model: GaitGraph2
+  joint_format: coco
+  input_num: 3
+  reduction: 8
+  block: Bottleneck # Basic, initial
+  input_branch:
+    - 5
+    - 64
+    - 32
+  main_stream:
+    - 32
+    - 128
+    - 256
+  num_class: 128
+  tta: true
+
+optimizer_cfg:
+  lr: 0.005 #0.005
+  solver: AdamW
+  weight_decay: 0.00001
+
+scheduler_cfg:
+  max_lr: 0.005
+  total_steps: 2000
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false
+  log_iter: 20
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 200
+  save_name: GaitGraph2
+  sync_BN: true
+  total_iter: 2000
+  sampler:
+    batch_shuffle: true
+    frames_num_fixed: 60
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0
+    batch_size: 768 
+    type: CommonSampler
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: FlipSequence
+          probability: 0.5
+        - type: InversePosesPre
+          probability: 0.1
+        - type:  JointNoise
+          std: 0.25
+        - type: PointNoise
+          std: 0.05
+        - type: RandomMove
+          random_r:
+                    - 4
+                    - 1
+        - type: GaitGraphMultiInput
@@ -0,0 +1,108 @@
+data_cfg:
+  dataset_name: OUMVLP
+  dataset_root: your_path
+  dataset_partition: ./datasets/OUMVLP/OUMVLP.json
+  test_dataset_name: OUMVLP
+  num_workers: 8
+  remove_no_gallery: false
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 80000
+  save_name: GaitGraph2_phase2
+  sampler:
+    batch_size: 256 
+    frames_num_fixed: 30
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered
+    frames_skip_num: 0
+  metric: cos
+  eval_func: evaluate_indoor_dataset
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: NormalizeEmpty
+        - type: GaitGraphMultiInput
+          joint_format: alphapose
+      
+
+loss_cfg:
+  - loss_term_weight: 1
+    temperature: 0.01
+    type:  SupConLoss_Lp
+    log_prefix: SupConLoss
+
+model_cfg:
+  model: GaitGraph2
+  joint_format: alphapose
+  input_num: 3
+  reduction: 4
+  block: Bottleneck # Basic, initial
+  input_branch:
+    - 5
+    - 64
+    - 64
+    - 32
+  main_stream:
+    - 32
+    - 128
+    - 128
+    - 128
+    - 256
+    - 256
+    - 256
+  num_class: 128
+  tta: true
+
+optimizer_cfg:
+  lr: 0.005
+  solver: Adam
+  weight_decay: 0.00001
+
+scheduler_cfg:
+  three_phase: True
+  max_lr: 0.005
+  total_steps: 80000
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false 
+  log_iter: 1000
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 10000
+  save_name: GaitGraph2
+  sync_BN: true
+  total_iter:  80000
+  sampler:
+    batch_shuffle: true
+    frames_num_fixed: 30
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0
+    batch_size: 768
+    type: CommonSampler
+
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: NormalizeEmpty
+        - type: FlipSequence
+          probability: 0.5
+        - type: InversePosesPre
+          probability: 0.1
+          joint_format: alphapose
+        - type:  JointNoise
+          std: 0.1
+        - type: PointNoise
+          std: 0.05
+        - type: RandomMove
+          random_r:
+            - 3
+            - 1
+        - type: GaitGraphMultiInput
+          joint_format: alphapose
@@ -0,0 +1,108 @@
+data_cfg:
+  dataset_name: OUMVLP
+  dataset_root: your_path
+  dataset_partition: ./datasets/OUMVLP/OUMVLP.json
+  test_dataset_name: OUMVLP
+  num_workers: 8
+  remove_no_gallery: false
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 80000
+  save_name: GaitGraph2_phase2
+  sampler:
+    batch_size: 256 
+    frames_num_fixed: 30
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered
+    frames_skip_num: 0
+  metric: cos
+  eval_func: evaluate_indoor_dataset
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: NormalizeEmpty
+        - type: GaitGraphMultiInput
+          joint_format: coco
+      
+
+loss_cfg:
+  - loss_term_weight: 1
+    temperature: 0.01
+    type:  SupConLoss_Lp
+    log_prefix: SupConLoss
+
+model_cfg:
+  model: GaitGraph2
+  joint_format: coco
+  input_num: 3
+  reduction: 4
+  block: Bottleneck # Basic, initial
+  input_branch:
+    - 5
+    - 64
+    - 64
+    - 32
+  main_stream:
+    - 32
+    - 128
+    - 128
+    - 128
+    - 256
+    - 256
+    - 256
+  num_class: 128
+  tta: true
+
+optimizer_cfg:
+  lr: 0.005
+  solver: Adam
+  weight_decay: 0.00001
+
+scheduler_cfg:
+  three_phase: True
+  max_lr: 0.005
+  total_steps: 80000
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false 
+  log_iter: 1000
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 10000
+  save_name: GaitGraph2
+  sync_BN: true
+  total_iter:  80000
+  sampler:
+    batch_shuffle: true
+    frames_num_fixed: 30
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0
+    batch_size: 768
+    type: CommonSampler
+
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: NormalizeEmpty
+        - type: FlipSequence
+          probability: 0.5
+        - type: InversePosesPre
+          probability: 0.1
+          joint_format: coco
+        - type:  JointNoise
+          std: 0.1
+        - type: PointNoise
+          std: 0.05
+        - type: RandomMove
+          random_r:
+            - 3
+            - 1
+        - type: GaitGraphMultiInput
+          joint_format: coco
@@ -0,0 +1,94 @@
+data_cfg:
+  dataset_name: SUSTech1K
+  dataset_root:  your_path
+  dataset_partition: ./datasets/SUSTech1K/SUSTech1K.json
+  test_dataset_name: SUSTech1K
+  num_workers: 8
+  data_in_use: [false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false]
+  remove_no_gallery: false
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 2000
+  save_name: GaitGraph2-SUSTech1k
+  sampler:
+    batch_size: 256 
+    frames_num_fixed: 60
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered
+    frames_skip_num: 0
+  metric: cos
+  eval_func: evaluate_indoor_dataset
+  transform:
+    - type: GaitGraphMultiInput
+
+loss_cfg:
+  - loss_term_weight: 1
+    temperature: 0.01
+    type:  SupConLoss_Lp
+    log_prefix: SupConLoss
+
+model_cfg:
+  model: GaitGraph2
+  joint_format: coco
+  input_num: 3
+  reduction: 8
+  block: Bottleneck # Basic, initial
+  input_branch:
+    - 5
+    - 64
+    - 32
+  main_stream:
+    - 32
+    - 128
+    - 256
+  num_class: 128
+  tta: true
+
+optimizer_cfg:
+  lr: 0.005 #0.005
+  solver: AdamW
+  weight_decay: 0.00001
+
+scheduler_cfg:
+  max_lr: 0.005
+  total_steps: 2000
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false
+  log_iter: 20
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 200
+  save_name: GaitGraph2
+  sync_BN: true
+  total_iter: 2000
+  sampler:
+    batch_shuffle: true
+    frames_num_fixed: 60
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0
+    batch_size: 768  # 256 only for debug #
+    type: CommonSampler
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: FlipSequence
+          probability: 0.5
+        - type: InversePosesPre
+          probability: 0.1
+        - type:  JointNoise
+          std: 0.25
+        - type: PointNoise
+          std: 0.05
+        - type: RandomMove
+          random_r:
+                    - 4
+                    - 1
+        - type: GaitGraphMultiInput
@@ -0,0 +1,91 @@
+data_cfg:
+  dataset_name: CCPG
+  dataset_root: your_path
+  dataset_partition: ./datasets/CCPG/CCPG.json
+  num_workers: 1
+  remove_no_gallery: false
+  frame_threshold: 0
+  test_dataset_name: CCPG
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 40000
+  save_name: GaitTR-CCPG
+  eval_func: evaluate_CCPG
+  sampler:
+    batch_size: 4 #should same to num_gpus
+    sample_type: all_ordered
+    type: InferenceSampler
+  metric: euc # cos
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: GaitTRMultiInput
+          joint_format: alphapose
+        - type: SkeletonInput
+loss_cfg:
+  type: TripletLoss
+  margin: 0.3
+  log_prefix: triplet
+
+model_cfg:
+  model: GaitTR
+  in_channels:
+    - 10
+    - 64
+    - 64
+    - 128
+    - 256
+  num_class: 128
+  joint_format: alphapose
+
+
+optimizer_cfg:
+  lr: 0.001
+  solver: Adam
+  weight_decay: 0.00002
+
+scheduler_cfg:
+  three_phase: True
+  max_lr: 0.001
+  div_factor: 100
+  final_div_factor: 1000.0
+  total_steps: 40000
+  pct_start: 0.475 
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false #not use
+  log_iter: 100
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 4000
+  save_name: GaitTR-CCPG
+  sync_BN: true
+  total_iter: 40000
+  sampler:
+    batch_shuffle: false
+    batch_size:
+      - 32
+      - 4
+    frames_num_fixed: 60
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0 
+    type: TripletSampler
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: PointNoise
+          std: 0.3
+        - type: InversePosesPre
+          joint_format: alphapose
+          probability: 0.1
+        - type: JointNoise
+          std: 0.3
+        - type: GaitTRMultiInput
+          joint_format: alphapose
+        - type: SkeletonInput
@@ -0,0 +1,91 @@
+data_cfg:
+  dataset_name: GREW
+  dataset_root: your_path
+  dataset_partition: ./datasets/GREW/GREW.json
+  num_workers: 1
+  remove_no_gallery: false
+  test_dataset_name: GREW
+  frame_threshold: 0
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 150000
+  save_name: GaitTR
+  eval_func: GREW_submission
+  sampler:
+    batch_size: 4 #should same to num_gpus
+    sample_type: all_ordered
+    type: InferenceSampler
+  metric: euc # cos
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: GaitTRMultiInput
+          joint_format: coco
+        - type: SkeletonInput
+
+loss_cfg:
+  type: TripletLoss
+  margin: 0.3
+  log_prefix: triplet
+
+model_cfg:
+  model: GaitTR
+  in_channels:
+    - 10
+    - 64
+    - 64
+    - 128
+    - 256
+  num_class: 256
+  joint_format: coco
+
+
+optimizer_cfg:
+  lr: 0.001
+  solver: Adam
+  weight_decay: 0.00002
+
+scheduler_cfg:
+  three_phase: True
+  max_lr: 0.001
+  div_factor: 100
+  final_div_factor: 1000.0
+  total_steps: 150000
+  pct_start: 0.475 
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false #not use
+  log_iter: 100
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 5000
+  save_name: GaitTR
+  sync_BN: true
+  total_iter: 150000
+  sampler:
+    batch_shuffle: false
+    batch_size:
+      - 32
+      - 8
+    frames_num_fixed: 60
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0 
+    type: TripletSampler
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: PointNoise
+          std: 0.3
+        - type: InversePosesPre
+          probability: 0.1
+        - type: JointNoise
+          std: 0.3
+        - type: GaitTRMultiInput
+          joint_format: coco
+        - type: SkeletonInput
@@ -0,0 +1,91 @@
+data_cfg:
+  dataset_name: Gait3D
+  dataset_root: your_path
+  dataset_partition: ./datasets/Gait3D/Gait3D.json
+  test_dataset_name: Gait3D
+  num_workers: 1
+  remove_no_gallery: false
+  frame_threshold: 0
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 40000
+  save_name: GaitTR
+  eval_func: evaluate_Gait3D
+  sampler:
+    batch_size: 4 #should same to num_gpus
+    sample_type: all_ordered
+    type: InferenceSampler
+  metric: euc # cos
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: GaitTRMultiInput
+          joint_format: coco
+        - type: SkeletonInput
+
+loss_cfg:
+  type: TripletLoss
+  margin: 0.3
+  log_prefix: triplet
+
+model_cfg:
+  model: GaitTR
+  in_channels:
+    - 10
+    - 64
+    - 64
+    - 128
+    - 256
+  num_class: 128
+  joint_format: coco
+
+
+optimizer_cfg:
+  lr: 0.001
+  solver: Adam
+  weight_decay: 0.00002
+
+scheduler_cfg:
+  three_phase: True
+  max_lr: 0.001
+  div_factor: 100
+  final_div_factor: 1000.0
+  total_steps: 40000
+  pct_start: 0.475 
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false #not use
+  log_iter: 100
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 1000
+  save_name: GaitTR
+  sync_BN: true
+  total_iter: 40000
+  sampler:
+    batch_shuffle: false
+    batch_size:
+      - 32
+      - 4
+    frames_num_fixed: 60
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0 
+    type: TripletSampler
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: PointNoise
+          std: 0.3
+        - type: InversePosesPre
+          probability: 0.1
+        - type: JointNoise
+          std: 0.3
+        - type: GaitTRMultiInput
+          joint_format: coco
+        - type: SkeletonInput
@@ -0,0 +1,92 @@
+data_cfg:
+  dataset_name: OUMVLP
+  dataset_root: your_path
+  dataset_partition: ./datasets/OUMVLP/OUMVLP.json
+  num_workers: 1
+  remove_no_gallery: false
+  test_dataset_name: OUMVLP
+  frame_threshold: 0
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 150000
+  save_name: GaitTR
+  eval_func: evaluate_indoor_dataset
+  sampler:
+    batch_size: 4 #should same to num_gpus
+    sample_type: all_ordered
+    type: InferenceSampler
+  metric: euc # cos
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: GaitTRMultiInput
+          joint_format: alphapose
+        - type: SkeletonInput
+
+loss_cfg:
+  type: TripletLoss
+  margin: 0.3
+  log_prefix: triplet
+
+model_cfg:
+  model: GaitTR
+  in_channels:
+    - 10
+    - 64
+    - 64
+    - 128
+    - 256
+  num_class: 256
+  joint_format: alphapose
+
+
+optimizer_cfg:
+  lr: 0.001
+  solver: Adam
+  weight_decay: 0.00002
+
+scheduler_cfg:
+  three_phase: True
+  max_lr: 0.001
+  div_factor: 100
+  final_div_factor: 1000.0
+  total_steps: 150000
+  pct_start: 0.475 
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false #not use
+  log_iter: 100
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 5000
+  save_name: GaitTR
+  sync_BN: true
+  total_iter: 150000
+  sampler:
+    batch_shuffle: false
+    batch_size:
+      - 32
+      - 16
+    frames_num_fixed: 60
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0 
+    type: TripletSampler
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: PointNoise
+          std: 0.3
+        - type: InversePosesPre
+          joint_format: alphapose
+          probability: 0.1
+        - type: JointNoise
+          std: 0.3
+        - type: GaitTRMultiInput
+          joint_format: alphapose
+        - type: SkeletonInput
@@ -0,0 +1,91 @@
+data_cfg:
+  dataset_name: OUMVLP
+  dataset_root: your_path
+  dataset_partition: ./datasets/OUMVLP/OUMVLP.json
+  num_workers: 1
+  remove_no_gallery: false
+  test_dataset_name: OUMVLP
+  frame_threshold: 0
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 150000
+  save_name: GaitTR
+  eval_func: evaluate_indoor_dataset
+  sampler:
+    batch_size: 4 #should same to num_gpus
+    sample_type: all_ordered
+    type: InferenceSampler
+  metric: euc # cos
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: GaitTRMultiInput
+          joint_format: coco
+        - type: SkeletonInput
+
+loss_cfg:
+  type: TripletLoss
+  margin: 0.3
+  log_prefix: triplet
+
+model_cfg:
+  model: GaitTR
+  in_channels:
+    - 10
+    - 64
+    - 64
+    - 128
+    - 256
+  num_class: 256
+  joint_format: coco
+
+
+optimizer_cfg:
+  lr: 0.001
+  solver: Adam
+  weight_decay: 0.00002
+
+scheduler_cfg:
+  three_phase: True
+  max_lr: 0.001
+  div_factor: 100
+  final_div_factor: 1000.0
+  total_steps: 150000
+  pct_start: 0.475 
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false #not use
+  log_iter: 100
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 5000
+  save_name: GaitTR
+  sync_BN: true
+  total_iter: 150000
+  sampler:
+    batch_shuffle: false
+    batch_size:
+      - 32
+      - 16
+    frames_num_fixed: 60
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0 
+    type: TripletSampler
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: PointNoise
+          std: 0.3
+        - type: InversePosesPre
+          probability: 0.1
+        - type: JointNoise
+          std: 0.3
+        - type: GaitTRMultiInput
+          joint_format: coco
+        - type: SkeletonInput
@@ -0,0 +1,92 @@
+data_cfg:
+  dataset_name: SUSTech1K
+  dataset_root: your_path
+  dataset_partition: ./datasets/SUSTech1K/SUSTech1K.json
+  num_workers: 1
+  data_in_use: [false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false]
+  remove_no_gallery: false
+  frame_threshold: 0
+  test_dataset_name: SUSTech1K
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 40000
+  save_name: GaitTR-SUSTech1k
+  eval_func: evaluate_indoor_dataset
+  sampler:
+    batch_size: 4 #should same to num_gpus
+    sample_type: all_ordered
+    type: InferenceSampler
+  metric: euc # cos
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: GaitTRMultiInput
+          joint_format: coco
+        - type: SkeletonInput
+
+loss_cfg:
+  type: TripletLoss
+  margin: 0.3
+  log_prefix: triplet
+
+model_cfg:
+  model: GaitTR
+  in_channels:
+    - 10
+    - 64
+    - 64
+    - 128
+    - 256
+  num_class: 128
+  joint_format: coco
+
+
+optimizer_cfg:
+  lr: 0.001
+  solver: Adam
+  weight_decay: 0.00002
+
+scheduler_cfg:
+  three_phase: True
+  max_lr: 0.001
+  div_factor: 100
+  final_div_factor: 1000.0
+  total_steps: 40000
+  pct_start: 0.475 
+  scheduler: OneCycleLR
+
+trainer_cfg:
+  enable_float16: false #not use
+  log_iter: 100
+  with_test: true
+  restore_ckpt_strict: false
+  restore_hint: 0
+  save_iter: 4000
+  save_name: GaitTR-SUSTech1k
+  sync_BN: true
+  total_iter: 40000
+  sampler:
+    batch_shuffle: false
+    batch_size:
+      - 32
+      - 4
+    frames_num_fixed: 60
+    frames_num_max: 50
+    frames_num_min: 25
+    sample_type: fixed_ordered #Repeat sample
+    frames_skip_num: 0 
+    type: TripletSampler
+  transform:
+    - type: Compose
+      trf_cfg:
+        - type: PointNoise
+          std: 0.3
+        - type: InversePosesPre
+          probability: 0.1
+        - type: JointNoise
+          std: 0.3
+        - type: GaitTRMultiInput
+          joint_format: coco
+        - type: SkeletonInput
@@ -0,0 +1,98 @@
+data_cfg:
+  dataset_name: OUMVLP
+  dataset_root: your_path
+  dataset_partition: ./datasets/OUMVLP/OUMVLP.json
+  num_workers: 1
+  remove_no_gallery: false
+  test_dataset_name: OUMVLP
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 300000
+  save_name: MSGG_OUMVLP
+  eval_func: evaluate_indoor_dataset
+  sampler:
+    batch_size: 8
+    sample_type: all_ordered
+    type: InferenceSampler
+  transform:
+    - type: MSGGTransform
+      prob: alphapose
+      # prob: coco
+  metric: euc # cos
+
+loss_cfg:
+  - loss_term_weight: 0.3
+    margin: 0.2
+    type: TripletLoss
+    log_prefix: triplet_joints
+  - loss_term_weight: 0.2
+    margin: 0.2
+    type: TripletLoss
+    log_prefix: triplet_limbs
+  - loss_term_weight: 0.1
+    margin: 0.2
+    type: TripletLoss
+    log_prefix: triplet_bodyparts
+  - loss_term_weight: 1.0
+    scale: 1
+    type: CrossEntropyLoss
+    log_accuracy: true
+    label_smooth: false
+    log_prefix: softmax
+
+model_cfg:
+  model: MultiScaleGaitGraph
+  in_channels:
+    - 3
+    - 16
+    - 32
+    - 64
+    - 128
+  out_channels: 128
+  graph_cfg:
+    layout: 'body_12'
+    strategy: 'gait_temporal'
+  temporal_kernel_size: 9
+  num_id: 5153
+
+optimizer_cfg:
+  lr: 0.1
+  momentum: 0.9
+  solver: SGD
+  weight_decay: 0.0005
+
+scheduler_cfg:
+  gamma: 0.1
+  milestones:
+    - 75000
+    - 150000
+    - 225000
+  scheduler: MultiStepLR
+
+trainer_cfg:
+  enable_float16: false
+  fix_BN: false
+  with_test: false
+  log_iter: 100
+  restore_ckpt_strict: true
+  restore_hint: 0
+  save_iter: 2000
+  save_name: MSGG_OUMVLP
+  sync_BN: true
+  total_iter: 300000
+  sampler:
+    batch_shuffle: false
+    batch_size:
+      - 32
+      - 8
+    frames_num_fixed: 30
+    frames_num_max: 50
+    frames_num_min: 25
+    frames_skip_num: 0
+    sample_type: fixed_ordered
+    type: TripletSampler
+  transform:
+    - type: MSGGTransform
+      prob: alphapose
@@ -0,0 +1,98 @@
+data_cfg:
+  dataset_name: SUSTech1K
+  dataset_root: your_path
+  dataset_partition: ./datasets/SUSTech1K/SUSTech1K.json
+  num_workers: 1
+  remove_no_gallery: false
+  test_dataset_name: SUSTech1K
+
+evaluator_cfg:
+  enable_float16: false
+  restore_ckpt_strict: true
+  restore_hint: 300000
+  save_name: MSGG_SUSTech1K
+  eval_func: evaluate_indoor_dataset
+  sampler:
+    batch_size: 8
+    sample_type: all_ordered
+    type: InferenceSampler
+  transform:
+    - type: MSGGTransform
+      # prob: alphapose
+      prob: coco
+  metric: euc # cos
+
+loss_cfg:
+  - loss_term_weight: 0.3
+    margin: 0.2
+    type: TripletLoss
+    log_prefix: triplet_joints
+  - loss_term_weight: 0.2
+    margin: 0.2
+    type: TripletLoss
+    log_prefix: triplet_limbs
+  - loss_term_weight: 0.1
+    margin: 0.2
+    type: TripletLoss
+    log_prefix: triplet_bodyparts
+  - loss_term_weight: 1.0
+    scale: 1
+    type: CrossEntropyLoss
+    log_accuracy: true
+    label_smooth: false
+    log_prefix: softmax
+
+model_cfg:
+  model: MultiScaleGaitGraph
+  in_channels:
+    - 3
+    - 16
+    - 32
+    - 64
+    - 128
+  out_channels: 128
+  graph_cfg:
+    layout: 'body_12'
+    strategy: 'gait_temporal'
+  temporal_kernel_size: 9
+  num_id: 5153
+
+optimizer_cfg:
+  lr: 0.1
+  momentum: 0.9
+  solver: SGD
+  weight_decay: 0.0005
+
+scheduler_cfg:
+  gamma: 0.1
+  milestones:
+    - 75000
+    - 150000
+    - 225000
+  scheduler: MultiStepLR
+
+trainer_cfg:
+  enable_float16: false
+  fix_BN: false
+  with_test: false
+  log_iter: 100
+  restore_ckpt_strict: true
+  restore_hint: 0
+  save_iter: 2000
+  save_name: MSGG_SUSTech1K
+  sync_BN: true
+  total_iter: 300000
+  sampler:
+    batch_shuffle: false
+    batch_size:
+      - 32
+      - 8
+    frames_num_fixed: 30
+    frames_num_max: 50
+    frames_num_min: 25
+    frames_skip_num: 0
+    sample_type: fixed_ordered
+    type: TripletSampler
+  transform:
+    - type: MSGGTransform
+      prob: coco
@@ -24,15 +24,24 @@ ls *.tgz | xargs -n1 tar xzvf

 After unpacking these compressed files, run this command:

-Step2 : To rearrange directory of GREW dataset, turning to id-type-view structure, Run 
+Step2-1 : To rearrange directory of GREW dataset(for silhouette), turning to id-type-view structure, Run 
 ```
 python datasets/GREW/rearrange_GREW.py --input_path Path_of_GREW-raw --output_path Path_of_GREW-rearranged
 ```  
+Step2-2 : To rearrange directory of GREW dataset(for pose), turning to id-type-view structure, Run 
+```
+python datasets/GREW/rearrange_GREW_pose.py --input_path Path_of_GREW-pose --output_path Path_of_GREW-pose-rearranged
+```  

-Step3: Transforming images to pickle file, run 
+Step3-1: Transforming images to pickle file, run 
 ```
 python datasets/pretreatment.py --input_path Path_of_GREW-rearranged --output_path Path_of_GREW-pkl --dataset GREW
 ```
+Step3-2: Transforming pose txts to pickle file, run 
+```
+python datasets/pretreatment.py --input_path Path_of_GREW-pose-rearranged --output_path Path_of_GREW-pose-pkl --pose --dataset GREW
+```
+
 Then you will see the structure like:

 - Processed
@@ -0,0 +1,92 @@
+import argparse
+import os
+import shutil
+from pathlib import Path
+
+from tqdm import tqdm
+
+TOTAL_Test = 24000
+TOTAL_Train = 20000
+
+def rearrange_train(train_path: Path, output_path: Path) -> None:
+    progress = tqdm(total=TOTAL_Train)
+    for sid in train_path.iterdir():
+        if not sid.is_dir():
+            continue
+        for sub_seq in sid.iterdir():
+            if not sub_seq.is_dir():
+                continue
+            for subfile in os.listdir(sub_seq):
+                src = os.path.join(train_path, sid.name, sub_seq.name)
+                dst = os.path.join(output_path, sid.name+'train', '00', sub_seq.name)
+                os.makedirs(dst,exist_ok=True)
+                if subfile not in os.listdir(dst) and subfile.endswith('_2d_pose.txt'):
+                    pose_subfile = 'pose_'+subfile
+                    os.symlink(os.path.join(src, subfile),
+                               os.path.join(dst, pose_subfile))
+        progress.update(1)
+
+def rearrange_test(test_path: Path, output_path: Path) -> None:
+    # for gallery
+    gallery = Path(os.path.join(test_path, 'gallery'))
+    probe = Path(os.path.join(test_path, 'probe'))
+    progress = tqdm(total=TOTAL_Test)
+    for sid in gallery.iterdir():
+        if not sid.is_dir():
+            continue
+        cnt = 1
+        for sub_seq in sid.iterdir():
+            if not sub_seq.is_dir():
+                continue
+            for subfile in sorted(os.listdir(sub_seq)):
+                src = os.path.join(gallery, sid.name, sub_seq.name)
+                dst = os.path.join(output_path, sid.name, '%02d'%cnt, sub_seq.name)
+                os.makedirs(dst,exist_ok=True)
+                if subfile not in os.listdir(dst) and subfile.endswith('_2d_pose.txt'):
+                    pose_subfile = 'pose_'+subfile
+                    os.symlink(os.path.join(src, subfile),
+                               os.path.join(dst, pose_subfile))
+            cnt += 1
+            progress.update(1)
+    # for probe
+    for sub_seq in probe.iterdir():
+        if not sub_seq.is_dir():
+            continue
+        for subfile in os.listdir(sub_seq):
+            src = os.path.join(probe, sub_seq.name)
+            dst = os.path.join(output_path, 'probe', '03', sub_seq.name)
+            os.makedirs(dst,exist_ok=True)
+            if subfile not in os.listdir(dst) and subfile.endswith('_2d_pose.txt'):
+                pose_subfile = 'pose_'+subfile
+                os.symlink(os.path.join(src, subfile),
+                            os.path.join(dst, pose_subfile))
+            progress.update(1)
+
+def rearrange_GREW(input_path: Path, output_path: Path) -> None:
+    os.makedirs(output_path, exist_ok=True)
+
+    for folder in input_path.iterdir():
+        if not folder.is_dir():
+            continue
+
+        print(f'Rearranging {folder}')
+        if folder.name == 'train':
+            rearrange_train(folder,output_path)
+        if folder.name == 'test':
+            rearrange_test(folder, output_path)
+        if folder.name == 'distractor':
+            pass
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='GREW rearrange tool')
+    parser.add_argument('-i', '--input_path', required=True, type=str,
+                        help='Root path of raw dataset.')
+    parser.add_argument('-o', '--output_path', default='GREW_rearranged', type=str,
+                        help='Root path for output.')
+
+    args = parser.parse_args()
+
+    input_path = Path(args.input_path).resolve()
+    output_path = Path(args.output_path).resolve()
+    rearrange_GREW(input_path, output_path)
@@ -0,0 +1,19 @@
+# Datasets for MSGG
+MSGG needs to convert the pose key format of other datasets(such as CASIA-B, GREW, Gait3D,) from coco17 to the input format of Pyramid keys.
+
+## Data Pretreatment
+```python
+python datasets/MSGG/pyramid_keypoints_msgg.py --input_path Path_of_pose_pkl --output_path Path_of_pose_pyramid_pkl
+```
+
+## Citation
+```
+@article{peng2023learning,
+  title={Learning rich features for gait recognition by integrating skeletons and silhouettes},
+  author={Peng, Yunjie and Ma, Kang and Zhang, Yang and He, Zhiqiang},
+  journal={Multimedia Tools and Applications},
+  pages={1--22},
+  year={2023},
+  publisher={Springer}
+}
+```
@@ -0,0 +1,94 @@
+import os
+import os.path as osp
+import numpy as np
+import pdb
+import argparse
+import pickle
+
+ORG_KEYPOINTS = {
+    'nose'          :0,
+    'left_eye'      :1,
+    'right_eye'     :2,
+    'left_ear'      :3,
+    'right_ear'     :4,
+    'left_shoulder' :5,
+    'right_shoulder':6,
+    'left_elbow'    :7,
+    'right_elbow'   :8,
+    'left_wrist'    :9,
+    'right_wrist'   :10,
+    'left_hip'      :11,
+    'right_hip'     :12,
+    'left_knee'     :13,
+    'right_knee'    :14,
+    'left_ankle'    :15,
+    'right_ankle'   :16,
+}
+
+NEW_KEYPOINTS = {
+    0: 'right_shoulder',
+    1: 'right_elbow',
+    2: 'right_knee',
+    3: 'right_hip',
+    4: 'left_elbow',
+    5: 'left_knee',
+    6: 'left_shoulder',
+    7: 'right_wrist',
+    8: 'right_ankle',
+    9: 'left_hip',
+    10: 'left_wrist',
+    11: 'left_ankle',
+}
+
+def get_index_mapping():
+    index_mapping = {}
+    for _key in NEW_KEYPOINTS.keys():
+        map_index = ORG_KEYPOINTS[NEW_KEYPOINTS[_key]]
+        index_mapping[_key] = map_index
+    return index_mapping
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='OpenGait dataset pretreatment module.')
+    parser.add_argument('-i', '--input_path', default='', type=str, help='Root path of raw dataset.')
+    parser.add_argument('-o', '--output_path', default='', type=str, help='Output path of pickled dataset.')
+    args = parser.parse_args()
+
+    index_mapping = get_index_mapping()
+    data_path = args.input_path
+    des_path = args.output_path
+
+    id_list = sorted(os.listdir(data_path))
+    for _id in id_list:
+        type_list = sorted(os.listdir(osp.join(data_path, _id)))
+        for _type in type_list:
+            view_list = sorted(os.listdir(osp.join(data_path, _id, _type)))
+            for _view in view_list:
+                seq_info = [_id, _type, _view]
+                seq_info_str = '-'.join(seq_info)
+                seq_dir = osp.join(data_path, *seq_info)
+                des_dir = osp.join(des_path, *seq_info)
+                if osp.exists(des_dir) is False:
+                    os.makedirs(des_dir)
+
+                keypoints_list = os.listdir(seq_dir)
+                pkl_name = "{}.pkl".format(_view)
+                seq_path = osp.join(seq_dir, pkl_name)
+                save_path = osp.join(des_dir, pkl_name)
+                seq_path_exists = osp.exists(seq_path)
+
+                if seq_path_exists is False:
+                    print("seq:{} input:{}. ".format(seq_info_str, seq_path_exists))
+                    continue
+                with open(seq_path, 'rb') as f: 
+                    keypoints_data = pickle.load(f)
+                to_pickle = []
+                for keypoint in keypoints_data:
+                    mapped_keypoints = np.zeros((12, 3))
+                    for i in range(mapped_keypoints.shape[0]):
+                        mapped_keypoints[i] = keypoint[index_mapping[i]]
+                    to_pickle.append(mapped_keypoints)
+                keypoints = np.stack(to_pickle)
+                pickle.dump(keypoints, open(save_path, 'wb'))  
+                    
+            print("FINISHED: " + "-".join(seq_info))
+                
@@ -35,15 +35,27 @@ python datasets/OUMVLP/extractor.py --input_path Path_of_OUMVLP-base --output_pa
            ......
        ......
    ```
-Step3 : To rearrange directory of OUMVLP dataset, turning to id-type-view structure, Run 
+Step3-1 : To rearrange directory of OUMVLP dataset(for silhouette), turning to id-type-view structure, Run 
 ```
 python datasets/OUMVLP/rearrange_OUMVLP.py --input_path Path_of_OUMVLP-raw --output_path Path_of_OUMVLP-rearranged
 ```  
+Step3-2 : To rearrange directory of OUMVLP dataset(for pose), turning to id-type-view structure, Run 
+```
+python datasets/OUMVLP/rearrange_OUMVLP_pose.py --input_path Path_of_OUMVLP-pose --output_path Path_of_OUMVLP-pose-rearranged
+```  

-Step4: Transforming images to pickle file, run 
+Step4-1: Transforming images to pickle file, run 
 ```
 python datasets/pretreatment.py --input_path Path_of_OUMVLP-rearranged --output_path Path_of_OUMVLP-pkl
 ```
+Step4-2: Transforming pose txts to pickle file, run 
+```
+python datasets/pretreatment.py --input_path Path_of_GREW-pose-rearranged --output_path Path_of_GREW-pose-pkl --pose --dataset GREW
+```
+gernerate the 17 Number of Pose Points Format from 18 Number of Pose Points
+```
+python datasets/OUMVLP/rearrange_OUMVLP_pose.py --input_path Path_of_OUMVLP-pose18 --output_path Path_of_OUMVLP-pose17
+```

 - Processed
    ```
@@ -0,0 +1,85 @@
+import pickle
+from tqdm import tqdm
+from pathlib import Path
+import os
+import os.path as osp
+import argparse
+import logging
+
+'''
+    gernerate the 17 Number of Pose Points Format from 18 Number of Pose Points
+    OUMVLP 17
+               # keypoints = {
+            #     0: "nose",
+            #     1: "left_eye",
+            #     2: "right_eye",
+            #     3: "left_ear",
+            #     4: "right_ear",
+            #     5: "left_shoulder",
+            #     6: "right_shoulder",
+            #     7: "left_elbow",
+            #     8: "right_elbow",
+            #     9: "left_wrist",
+            #     10: "right_wrist",
+            #     11: "left_hip",
+            #     12: "right_hip",
+            #     13: "left_knee",
+            #     14: "right_knee",
+            #     15: "left_ankle",
+            #     16: "right_ankle"
+            # }
+    OUMVLP 18
+    mask=[0,15,14,17,16,5,2,6,3,7,4,11,8,12,9,13,10]
+           # keypoints = {
+            #     0: "nose",  
+            #     1: "neck",
+            #     2: "Rshoulder",
+            #     3: "Relbow",
+            #     4: "Rwrist",
+            #     5: "Lshoudler",
+            #     6: "Lelbow",
+            #     7: "Lwrist",
+            #     8: "Rhip",
+            #     9: "Rknee",
+            #     10: "Rankle",
+            #     11: "Lhip",
+            #     12: "Lknee",
+            #     13: "Lankle",
+            #     14: "Reye",
+            #     15: "Leye",
+            #     16: "Rear",
+            #     17: "Lear"
+            # }
+'''
+
+def ToOUMVLP17(input_path: Path, output_path: Path):
+    mask=[0,15,14,17,16,5,2,6,3,7,4,11,8,12,9,13,10]
+    TOTAL_SUBJECTS = 10307
+    progress = tqdm(total=TOTAL_SUBJECTS)
+
+    for subject in input_path.iterdir():
+        output_subject = subject.name
+        for seq in subject.iterdir():
+            output_seq = seq.name
+            for view in seq.iterdir():
+                src = os.path.join(view, f"{view.name}.pkl")
+                dst = os.path.join(output_path, output_subject, output_seq, view.name)
+                os.makedirs(dst, exist_ok=True)
+                with open(src,'rb') as f:
+                    srcdata = pickle.load(f)
+                    #[T,18,3]
+                data = srcdata[...,mask,:].copy()
+                # #[T,17,3]
+                pkl_path = os.path.join(dst,f'{view.name}.pkl')
+                pickle.dump(data,open(pkl_path,'wb')) 
+        progress.update(1)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='OpenGait dataset pretreatment module.')
+    parser.add_argument('-i', '--input_path', default='', type=str, help='Root path of raw dataset.')
+    parser.add_argument('-o', '--output_path', default='', type=str, help='Output path of pickled dataset.')
+    parser.add_argument('-l', '--log_to_file', default='./pretreatment.log', type=str, help='Log file path. Default: ./pretreatment.log')
+    args = parser.parse_args()
+    logging.info('Begin')
+    ToOUMVLP17(input_path=Path(args.input_path), output_path=Path(args.output_path))
+    logging.info('Done')
@@ -0,0 +1,44 @@
+import argparse
+import os
+import shutil
+from pathlib import Path
+from typing import Tuple
+from tqdm import tqdm
+
+
+TOTAL_SUBJECTS = 10307
+
+
+def sanitize(name: str) -> Tuple[str, str]:
+    return name.split('_')
+
+
+def rearrange(input_path: Path, output_path: Path) -> None:
+    os.makedirs(output_path, exist_ok=True)
+    progress = tqdm(total=TOTAL_SUBJECTS)
+    for folder in input_path.iterdir():
+        subject = folder.name
+        for sid in folder.iterdir():
+            view, seq = sanitize(sid.name)
+            src = os.path.join(input_path, subject,sid.name)
+            dst = os.path.join(output_path, subject, seq, view)
+            os.makedirs(dst, exist_ok=True)
+            for subfile in os.listdir(src):
+                if subfile not in os.listdir(dst) and subfile.endswith('.json'):
+                    os.symlink(os.path.join(src, subfile),
+                               os.path.join(dst, subfile))
+        progress.update(1)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='OUMVLP rearrange tool')
+    parser.add_argument('-i', '--input_path', required=True, type=str,
+                        help='Root path of raw dataset.')
+    parser.add_argument('-o', '--output_path', default='OUMVLP_rearranged', type=str,
+                        help='Root path for output.')
+
+    args = parser.parse_args()
+
+    input_path = Path(args.input_path).resolve()
+    output_path = Path(args.output_path).resolve()
+    rearrange(input_path, output_path)
@@ -12,7 +12,7 @@ from typing import Tuple
 import cv2
 import numpy as np
 from tqdm import tqdm
-
+import json

 def imgs2pickle(img_groups: Tuple, output_path: Path, img_size: int = 64, verbose: bool = False, dataset='CASIAB') -> None:
    """Reads a group of images and saves the data in pickle format.
@@ -127,6 +127,92 @@ def pretreat(input_path: Path, output_path: Path, img_size: int = 64, workers: i
            progress.update(1)
    logging.info('Done')

+def txts2pickle(txt_groups: Tuple, output_path: Path, verbose: bool = False, dataset='CASIAB') -> None:
+    """
+    Reads a group of images and saves the data in pickle format.
+
+    Args:
+        img_groups (Tuple): Tuple of (sid, seq, view) and list of image paths.
+        output_path (Path): Output path.
+        img_size (int, optional): Image resizing size. Defaults to 64.
+        verbose (bool, optional): Display debug info. Defaults to False.
+    """    
+    
+    sinfo = txt_groups[0]
+    txt_paths = txt_groups[1]
+    to_pickle = []
+    if dataset == 'OUMVLP':
+        for txt_file in sorted(txt_paths):
+            try:
+                with open(txt_file) as f:
+                    jsondata = json.load(f)
+                if len(jsondata['people'])==0:
+                    continue
+                data = np.array(jsondata["people"][0]["pose_keypoints_2d"]).reshape(-1,3)
+                to_pickle.append(data)
+            except:
+                print(txt_file)
+    else:
+        for txt_file in sorted(txt_paths):
+            if verbose:
+                logging.debug(f'Reading sid {sinfo[0]}, seq {sinfo[1]}, view {sinfo[2]} from {txt_file}')
+            data = np.genfromtxt(txt_file, delimiter=',')[2:].reshape(-1,3)
+            to_pickle.append(data)
+        
+    if to_pickle:
+        dst_path = os.path.join(output_path, *sinfo)
+        keypoints = np.stack(to_pickle)
+        os.makedirs(dst_path, exist_ok=True)
+        pkl_path = os.path.join(dst_path, f'{sinfo[2]}.pkl')
+        if verbose:
+            logging.debug(f'Saving {pkl_path}...')
+        pickle.dump(keypoints, open(pkl_path, 'wb'))   
+        logging.info(f'Saved {len(to_pickle)} valid frames\' keypoints to {pkl_path}.')
+
+    if len(to_pickle) < 5:
+        logging.warning(f'{sinfo} has less than 5 valid data.')
+
+
+
+def pretreat_pose(input_path: Path, output_path: Path, workers: int = 4, verbose: bool = False, dataset='CASIAB') -> None:
+    """Reads a dataset and saves the data in pickle format.
+
+    Args:
+        input_path (Path): Dataset root path.
+        output_path (Path): Output path.
+        img_size (int, optional): Image resizing size. Defaults to 64.
+        workers (int, optional): Number of thread workers. Defaults to 4.
+        verbose (bool, optional): Display debug info. Defaults to False.
+    """
+    txt_groups = defaultdict(list)
+    logging.info(f'Listing {input_path}')
+    total_files = 0
+    if dataset == 'OUMVLP':
+        for json_path in input_path.rglob('*.json'):
+            if verbose:
+                logging.debug(f'Adding {json_path}')
+            *_, sid, seq, view, _ = json_path.as_posix().split('/')
+            txt_groups[(sid, seq, view)].append(json_path)
+            total_files += 1
+    else:
+        for txt_path in input_path.rglob('*.txt'):
+            if verbose:
+                logging.debug(f'Adding {txt_path}')
+            *_, sid, seq, view, _ = txt_path.as_posix().split('/')
+            txt_groups[(sid, seq, view)].append(txt_path)
+            total_files += 1
+
+    logging.info(f'Total files listed: {total_files}')
+
+    progress = tqdm(total=len(txt_groups), desc='Pretreating', unit='folder')
+
+    with mp.Pool(workers) as pool:
+        logging.info(f'Start pretreating {input_path}')
+        for _ in pool.imap_unordered(partial(txts2pickle, output_path=output_path, verbose=verbose, dataset=args.dataset), txt_groups.items()):
+            progress.update(1)
+    logging.info('Done')
+
+

 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='OpenGait dataset pretreatment module.')
@@ -137,6 +223,7 @@ if __name__ == '__main__':
    parser.add_argument('-r', '--img_size', default=64, type=int, help='Image resizing size. Default 64')
    parser.add_argument('-d', '--dataset', default='CASIAB', type=str, help='Dataset for pretreatment.')
    parser.add_argument('-v', '--verbose', default=False, action='store_true', help='Display debug info.')
+    parser.add_argument('-p', '--pose', default=False, action='store_true', help='Processing pose.')
    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO, filename=args.log_file, filemode='w', format='[%(asctime)s - %(levelname)s]: %(message)s')
@@ -146,5 +233,7 @@ if __name__ == '__main__':
        logging.info('Verbose mode is on.')
        for k, v in args.__dict__.items():
            logging.debug(f'{k}: {v}')
-
-    pretreat(input_path=Path(args.input_path), output_path=Path(args.output_path), img_size=args.img_size, workers=args.n_workers, verbose=args.verbose, dataset=args.dataset)
+    if args.pose:
+        pretreat_pose(input_path=Path(args.input_path), output_path=Path(args.output_path), workers=args.n_workers, verbose=args.verbose, dataset=args.dataset)
+    else:
+        pretreat(input_path=Path(args.input_path), output_path=Path(args.output_path), img_size=args.img_size, workers=args.n_workers, verbose=args.verbose, dataset=args.dataset)
@@ -49,7 +49,10 @@ class TripletSampler(tordata.sampler.Sampler):
        return len(self.dataset)


-def sync_random_sample_list(obj_list, k):
+def sync_random_sample_list(obj_list, k, common_choice=False):
+    if common_choice:
+        idx = random.choices(range(len(obj_list)), k=k) 
+        idx = torch.tensor(idx)
    if len(obj_list) < k:
        idx = random.choices(range(len(obj_list)), k=k)
        idx = torch.tensor(idx)
@@ -97,3 +100,37 @@ class InferenceSampler(tordata.sampler.Sampler):

    def __len__(self):
        return len(self.dataset)
+
+
+class CommonSampler(tordata.sampler.Sampler):
+    def __init__(self,dataset,batch_size,batch_shuffle):
+
+        self.dataset = dataset
+        self.size = len(dataset)
+        self.batch_size = batch_size
+        if isinstance(self.batch_size,int)==False:
+            raise ValueError(
+                "batch_size shoude be (B) not {}".format(batch_size))
+        self.batch_shuffle = batch_shuffle
+        
+        self.world_size = dist.get_world_size()
+        if self.batch_size % self.world_size !=0:
+            raise ValueError("World size ({}) is not divisble by batch_size ({})".format(
+                self.world_size, batch_size))
+        self.rank = dist.get_rank() 
+    
+    def __iter__(self):
+        while True:
+            indices_list = list(range(self.size))
+            sample_indices = sync_random_sample_list(
+                    indices_list, self.batch_size, common_choice=True)
+            total_batch_size =  self.batch_size
+            total_size = int(math.ceil(total_batch_size /
+                                       self.world_size)) * self.world_size
+            sample_indices += sample_indices[:(
+                total_batch_size - len(sample_indices))]
+            sample_indices = sample_indices[self.rank:total_size:self.world_size]
+            yield sample_indices
+
+    def __len__(self):
+        return len(self.dataset)
@@ -196,3 +196,257 @@ def get_transform(trf_cfg=None):
        transform = [get_transform(cfg) for cfg in trf_cfg]
        return transform
    raise "Error type for -Transform-Cfg-"
+
+
+# **************** For pose ****************
+class RandomSelectSequence(object):
+    """
+    Randomly select different subsequences
+    """
+    def __init__(self, sequence_length=10):
+        self.sequence_length = sequence_length
+
+    def __call__(self, data):
+        try:
+            start = np.random.randint(0, data.shape[0] - self.sequence_length)
+        except ValueError:
+            raise ValueError("The sequence length of data is too short, which does not meet the requirements.")
+        end = start + self.sequence_length
+        return data[start:end]
+
+
+class SelectSequenceCenter(object):
+    """
+    Select center subsequence
+    """
+    def __init__(self, sequence_length=10):
+        self.sequence_length = sequence_length
+
+    def __call__(self, data):
+        try:
+            start = int((data.shape[0]/2) - (self.sequence_length / 2))
+        except ValueError:
+            raise ValueError("The sequence length of data is too short, which does not meet the requirements.")
+        end = start + self.sequence_length
+        return data[start:end]
+
+
+class MirrorPoses(object):
+    """
+    Performing Mirror Operations
+    """
+    def __init__(self, prob=0.5):
+        self.probability = probability
+
+    def __call__(self, data):
+        if np.random.random() <= self.probability:
+            center = np.mean(data[:, :, 0], axis=1, keepdims=True)
+            data[:, :, 0] = center - data[:, :, 0] + center
+
+        return data
+
+
+class NormalizeEmpty(object):
+    """
+    Normliza Empty Joint
+    """
+    def __call__(self, data):
+        frames, joints = np.where(data[:, :, 0] == 0)
+        for frame, joint in zip(frames, joints):
+            center_of_gravity = np.mean(data[frame], axis=0)
+            data[frame, joint, 0] = center_of_gravity[0]
+            data[frame, joint, 1] = center_of_gravity[1]
+            data[frame, joint, 2] = 0
+        return data
+
+
+class RandomMove(object):
+    """
+    Move: add Random Movement to each joint
+    """
+    def __init__(self,random_r =[4,1]):
+        self.random_r = random_r
+    def __call__(self, data):
+        noise = np.zeros(3)
+        noise[0] = np.random.uniform(-self.random_r[0], self.random_r[0])
+        noise[1] = np.random.uniform(-self.random_r[1], self.random_r[1])
+        data += np.tile(noise,(data.shape[0], data.shape[1], 1))
+        return data
+
+
+class PointNoise(object):
+    """
+    Add Gaussian noise to pose points
+    std: standard deviation
+    """
+    def __init__(self, std=0.01):
+        self.std = std
+
+    def __call__(self, data):
+        noise = np.random.normal(0, self.std, data.shape).astype(np.float32)
+        return data + noise
+
+
+class FlipSequence(object):
+    """
+    Temporal Fliping
+    """
+    def __init__(self, probability=0.5):
+        self.probability = probability
+    def __call__(self, data):
+        if np.random.random() <= self.probability:
+            return np.flip(data,axis=0).copy()
+        return data
+
+
+class InversePosesPre(object):
+    '''
+    Left-right flip of skeletons
+    '''
+    def __init__(self, probability=0.5, joint_format='coco'):
+        self.probability = probability
+        if joint_format == 'coco':
+            self.invers_arr = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
+        elif joint_format in ['alphapose', 'openpose']:
+            self.invers_arr = [0, 1, 5, 6, 7, 2, 3, 4, 11, 12, 13, 8, 9, 10, 15, 14, 17, 16]
+        else:
+            raise ValueError("Invalid joint_format.")
+            
+
+    def __call__(self, data):
+        for i in range(len(data)):
+            if np.random.random() <= self.probability:
+                data[i]=data[i,self.invers_arr,:]
+        return data
+
+
+class JointNoise(object):
+    """
+    Add Gaussian noise to joint
+    std: standard deviation
+    """
+
+    def __init__(self, std=0.25):
+        self.std = std
+
+    def __call__(self, data):
+        # T, V, C
+        noise = np.hstack((
+            np.random.normal(0, self.std, (data.shape[1], 2)),
+            np.zeros((data.shape[1], 1))
+        )).astype(np.float32)
+
+        return data + np.repeat(noise[np.newaxis, ...], data.shape[0], axis=0)
+
+
+class GaitTRMultiInput(object):
+    def __init__(self, joint_format='coco',):
+        if joint_format == 'coco':
+            self.connect_joint = np.array([5,0,0,1,2,0,0,5,6,7,8,5,6,11,12,13,14])
+        elif joint_format in ['alphapose', 'openpose']:
+            self.connect_joint = np.array([1,1,1,2,3,1,5,6,2,8,9,5,11,12,0,0,14,15])
+        else:
+            raise ValueError("Invalid joint_format.")
+
+    def __call__(self, data):
+        # (C, T, V) -> (I, C * 2, T, V)
+        data = np.transpose(data, (2, 0, 1))
+
+        data = data[:2, :, :]
+
+        C, T, V = data.shape
+        data_new = np.zeros((5, C, T, V))
+        # Joints
+        data_new[0, :C, :, :] = data
+        for i in range(V):
+            data_new[1, :, :, i] = data[:, :, i] - data[:, :, 0]
+        # Velocity
+        for i in range(T - 2):
+            data_new[2, :, i, :] = data[:, i + 1, :] - data[:, i, :]
+            data_new[3, :, i, :] = data[:, i + 2, :] - data[:, i, :]
+        # Bones
+        for i in range(len(self.connect_joint)):
+            data_new[4, :, :, i] = data[:, :, i] - data[:, :, self.connect_joint[i]]
+        
+        I, C, T, V = data_new.shape
+        data_new = data_new.reshape(I*C, T, V)
+        # (C T V) -> (T V C)
+        data_new = np.transpose(data_new, (1, 2, 0))
+
+        return data_new
+
+
+class GaitGraphMultiInput(object):
+    def __init__(self, center=0, joint_format='coco'):
+        self.center = center
+        if joint_format == 'coco':
+            self.connect_joint = np.array([5,0,0,1,2,0,0,5,6,7,8,5,6,11,12,13,14])
+        elif joint_format in ['alphapose', 'openpose']:
+            self.connect_joint = np.array([1,1,1,2,3,1,5,6,2,8,9,5,11,12,0,0,14,15])
+        else:
+            raise ValueError("Invalid joint_format.")
+
+    def __call__(self, data):
+        T, V, C = data.shape
+        x_new = np.zeros((T, V, 3, C + 2))
+        # Joints
+        x = data
+        x_new[:, :, 0, :C] = x
+        for i in range(V):
+            x_new[:, i, 0, C:] = x[:, i, :2] - x[:, self.center, :2]
+        # Velocity
+        for i in range(T - 2):
+            x_new[i, :, 1, :2] = x[i + 1, :, :2] - x[i, :, :2]
+            x_new[i, :, 1, 3:] = x[i + 2, :, :2] - x[i, :, :2]
+        x_new[:, :, 1, 3] = x[:, :, 2]
+        # Bones
+        for i in range(V):
+            x_new[:, i, 2, :2] = x[:, i, :2] - x[:, self.connect_joint[i], :2]
+        # Angles
+        bone_length = 0
+        for i in range(C - 1):
+            bone_length += np.power(x_new[:, :, 2, i], 2)
+        bone_length = np.sqrt(bone_length) + 0.0001
+        for i in range(C - 1):
+            x_new[:, :, 2, C+i] = np.arccos(x_new[:, :, 2, i] / bone_length)
+        x_new[:, :, 2, 3] = x[:, :, 2]
+        return x_new
+
+class GaitGraph1Input(object):
+    '''
+    Transpose the input
+    '''
+    def __call__(self, data):
+        # (T V C) -> (C T V)
+        data = np.transpose(data, (2, 0, 1))
+        return data[...,np.newaxis]
+
+class SkeletonInput(object):
+    '''
+    Transpose the input
+    '''
+    def __call__(self, data):
+        # (T V C) -> (T C V)
+        data = np.transpose(data, (0, 2, 1))
+        return data[...,np.newaxis]
+
+class TwoView(object):
+    def __init__(self,trf_cfg):
+        assert is_list(trf_cfg)
+        self.transform = T.Compose([get_transform(cfg) for cfg in trf_cfg])
+    def __call__(self, data):
+        return np.concatenate([self.transform(data), self.transform(data)], axis=1)
+
+
+class MSGGTransform():
+    def __init__(self, joint_format="coco"):
+        if joint_format == "coco": #17
+            self.mask=[6,8,14,12,7,13,5,10,16,11,9,15]
+        elif joint_format in ['alphapose', 'openpose']: #18
+            self.mask=[2,3,9,8,6,12,5,4,10,11,7,13]
+        else:
+            raise ValueError("Invalid joint_format.")
+        
+    def __call__(self, x):
+        result=x[...,self.mask,:].copy()
+        return result
@@ -0,0 +1,135 @@
+import torch
+import torch.nn as nn
+from ..modules import TemporalBasicBlock, TemporalBottleneckBlock, SpatialBasicBlock, SpatialBottleneckBlock
+
+class ResGCNModule(nn.Module):
+    """
+        ResGCNModule
+        Arxiv: https://arxiv.org/abs/2010.09978
+        Github: https://github.com/Thomas-yx/ResGCNv1
+                https://github.com/BNU-IVC/FastPoseGait
+    """
+    def __init__(self, in_channels, out_channels, block, A, stride=1, kernel_size=[9,2],reduction=4, get_res=False,is_main=False):
+        super(ResGCNModule, self).__init__()
+
+        if not len(kernel_size) == 2:
+            logging.info('')
+            logging.error('Error: Please check whether len(kernel_size) == 2')
+            raise ValueError()
+        if not kernel_size[0] % 2 == 1:
+            logging.info('')
+            logging.error('Error: Please check whether kernel_size[0] % 2 == 1')
+            raise ValueError()
+        temporal_window_size, max_graph_distance = kernel_size
+
+        if  block == 'initial':
+            module_res, block_res = False, False
+        elif block == 'Basic':
+            module_res, block_res = True, False
+        else:
+            module_res, block_res = False, True
+
+        if not module_res:
+            self.residual = lambda x: 0
+        elif stride == 1 and in_channels == out_channels:
+            self.residual = lambda x: x
+        else:
+            # stride =2
+            self.residual = nn.Sequential(
+                nn.Conv2d(in_channels, out_channels, 1, (stride,1)),
+                nn.BatchNorm2d(out_channels),
+            )
+        
+        if block in ['Basic','initial']:
+            spatial_block = SpatialBasicBlock
+            temporal_block = TemporalBasicBlock
+        if block == 'Bottleneck':
+            spatial_block = SpatialBottleneckBlock
+            temporal_block = TemporalBottleneckBlock
+        self.scn = spatial_block(in_channels, out_channels, max_graph_distance, block_res,reduction)
+        if in_channels == out_channels and is_main:
+            tcn_stride =True
+        else:
+            tcn_stride = False
+        self.tcn = temporal_block(out_channels, temporal_window_size, stride, block_res,reduction,get_res=get_res,tcn_stride=tcn_stride)
+        self.edge = nn.Parameter(torch.ones_like(A))
+
+    def forward(self, x, A):
+        A = A.cuda(x.get_device())
+        return self.tcn(self.scn(x, A*self.edge), self.residual(x))
+
+class ResGCNInputBranch(nn.Module):
+    """
+        ResGCNInputBranch_Module
+        Arxiv: https://arxiv.org/abs/2010.09978
+        Github: https://github.com/Thomas-yx/ResGCNv1
+    """
+    def __init__(self, input_branch, block, A, input_num , reduction = 4):
+        super(ResGCNInputBranch, self).__init__()
+
+        self.register_buffer('A', A)
+
+        module_list = []
+        for i in range(len(input_branch)-1):
+            if i==0:
+                module_list.append(ResGCNModule(input_branch[i],input_branch[i+1],'initial',A, reduction=reduction))
+            else:
+                module_list.append(ResGCNModule(input_branch[i],input_branch[i+1],block,A,reduction=reduction))
+        
+
+        self.bn = nn.BatchNorm2d(input_branch[0])
+        self.layers = nn.ModuleList(module_list)
+
+    def forward(self, x):
+
+        x = self.bn(x)
+        for layer in self.layers:
+            x = layer(x, self.A)
+
+        return x
+    
+    
+class ResGCN(nn.Module):
+    """
+        ResGCN
+        Arxiv: https://arxiv.org/abs/2010.09978
+    """
+    def __init__(self, input_num, input_branch, main_stream,num_class, reduction, block, graph):
+        super(ResGCN, self).__init__()
+        self.graph = graph
+        self.head= nn.ModuleList(
+            ResGCNInputBranch(input_branch, block, graph, input_num ,reduction)
+            for _ in range(input_num)
+        )
+        
+        main_stream_list = []
+        for i in range(len(main_stream)-1):
+            if main_stream[i]==main_stream[i+1]:
+                stride = 1
+            else:
+                stride = 2
+            if i ==0:
+                main_stream_list.append(ResGCNModule(main_stream[i]*input_num,main_stream[i+1],block,graph,stride=1,reduction = reduction,get_res=True,is_main=True))
+            else:
+                main_stream_list.append(ResGCNModule(main_stream[i],main_stream[i+1],block,graph,stride = stride, reduction = reduction,is_main=True))
+        self.backbone = nn.ModuleList(main_stream_list)
+        self.global_pooling = nn.AdaptiveAvgPool2d(1)
+        self.fcn = nn.Linear(256, num_class)
+
+    def forward(self, x):
+        # input branch
+        x_cat = []
+        for i, branch in enumerate(self.head):
+            x_cat.append(branch(x[:, i]))
+        x = torch.cat(x_cat, dim=1)
+
+        # main stream
+        for layer in self.backbone:
+            x = layer(x, self.graph)
+
+        # output
+        x = self.global_pooling(x)
+        x = x.squeeze(-1)
+        x = self.fcn(x.squeeze((-1)))
+        
+        return x
@@ -144,6 +144,7 @@ class BaseModel(MetaModel, nn.Module):

        self.build_network(cfgs['model_cfg'])
        self.init_parameters()
+        self.seq_trfs = get_transform(self.engine_cfg['transform'])

        self.msg_mgr.log_info(cfgs['data_cfg'])
        if training:
@@ -299,8 +300,7 @@ class BaseModel(MetaModel, nn.Module):
            tuple: training data including inputs, labels, and some meta data.
        """
        seqs_batch, labs_batch, typs_batch, vies_batch, seqL_batch = inputs
-        trf_cfgs = self.engine_cfg['transform']
-        seq_trfs = get_transform(trf_cfgs)
+        seq_trfs = self.seq_trfs
        if len(seqs_batch) != len(seq_trfs):
            raise ValueError(
                "The number of types of input data and transform should be same. But got {} and {}".format(len(seqs_batch), len(seq_trfs)))
@@ -0,0 +1,107 @@
+'''
+Modifed fromhttps://github.com/BNU-IVC/FastPoseGait/blob/main/fastposegait/modeling/losses/supconloss.py
+'''
+
+import torch.nn as nn
+import torch
+from .base import BaseLoss, gather_and_scale_wrapper
+
+class SupConLoss_Re(BaseLoss):
+    def __init__(self, temperature=0.01):
+        super(SupConLoss_Re, self).__init__()
+        self.train_loss = SupConLoss(temperature=temperature)
+    @gather_and_scale_wrapper
+    def forward(self, features, labels=None, mask=None):
+        loss = self.train_loss(features,labels)
+        self.info.update({
+            'loss': loss.detach().clone()})
+        return loss, self.info
+
+
+class SupConLoss(nn.Module):
+    """Supervised Contrastive Learning: https://arxiv.org/pdf/2004.11362.pdf.
+    It also supports the unsupervised contrastive loss in SimCLR"""
+    def __init__(self, temperature=0.01, contrast_mode='all',
+                 base_temperature=0.07):
+        super(SupConLoss, self).__init__()
+        self.temperature = temperature
+        self.contrast_mode = contrast_mode
+        self.base_temperature = base_temperature
+
+    def forward(self, features, labels=None, mask=None):
+        """Compute loss for model. If both `labels` and `mask` are None,
+        it degenerates to SimCLR unsupervised loss:
+        https://arxiv.org/pdf/2002.05709.pdf
+        Args:
+            features: hidden vector of shape [bsz, n_views, ...].
+            labels: ground truth of shape [bsz].
+            mask: contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j
+                has the same class as sample i. Can be asymmetric.
+        Returns:
+            A loss scalar.
+        """
+        device = (torch.device('cuda')
+                  if features.is_cuda
+                  else torch.device('cpu'))
+
+        if len(features.shape) < 3:
+            raise ValueError('`features` needs to be [bsz, n_views, ...],'
+                             'at least 3 dimensions are required')
+        if len(features.shape) > 3:
+            features = features.view(features.shape[0], features.shape[1], -1)
+
+        batch_size = features.shape[0]
+        if labels is not None and mask is not None:
+            raise ValueError('Cannot define both `labels` and `mask`')
+        elif labels is None and mask is None:
+            mask = torch.eye(batch_size, dtype=torch.float32).to(device)
+        elif labels is not None:
+            labels = labels.contiguous().view(-1, 1)
+            if labels.shape[0] != batch_size:
+                raise ValueError('Num of labels does not match num of features')
+            mask = torch.eq(labels, labels.T).float().to(device)
+        else:
+            mask = mask.float().to(device)
+
+        contrast_count = features.shape[1]
+        contrast_feature = torch.cat(torch.unbind(features, dim=1), dim=0)
+        if self.contrast_mode == 'one':
+            anchor_feature = features[:, 0]
+            anchor_count = 1
+        elif self.contrast_mode == 'all':
+            anchor_feature = contrast_feature
+            anchor_count = contrast_count
+        else:
+            raise ValueError('Unknown mode: {}'.format(self.contrast_mode))
+
+        # compute logits
+        anchor_dot_contrast = torch.div(
+            torch.matmul(anchor_feature, contrast_feature.T),
+            self.temperature)
+        # for numerical stability
+        logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True)
+        logits = anchor_dot_contrast - logits_max.detach()
+
+        # tile mask
+        mask = mask.repeat(anchor_count, contrast_count)
+        # mask-out self-contrast cases
+        logits_mask = torch.scatter(
+            torch.ones_like(mask),
+            1,
+            torch.arange(batch_size * anchor_count).view(-1, 1).to(device),
+            0
+        )
+        mask = mask * logits_mask
+
+        # compute log_prob
+        exp_logits = torch.exp(logits) * logits_mask
+        log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True))
+
+        # compute mean of log-likelihood over positive
+        mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1)
+
+        # loss
+        loss = - (self.temperature / self.base_temperature) * mean_log_prob_pos
+        loss = loss.view(anchor_count, batch_size).mean()
+
+        return loss
@@ -0,0 +1,19 @@
+'''
+Modifed fromhttps://github.com/BNU-IVC/FastPoseGait/blob/main/fastposegait/modeling/losses/supconloss_Lp.py
+'''
+
+from .base import BaseLoss, gather_and_scale_wrapper
+from pytorch_metric_learning import losses, distances
+
+class SupConLoss_Lp(BaseLoss):
+    def __init__(self, temperature=0.01):
+        super(SupConLoss_Lp, self).__init__()
+        self.distance = distances.LpDistance()
+        self.train_loss = losses.SupConLoss(temperature=temperature, distance=self.distance)
+    @gather_and_scale_wrapper
+    def forward(self, features, labels=None, mask=None):
+        loss = self.train_loss(features,labels)
+        self.info.update({
+            'loss': loss.detach().clone()})
+        return loss, self.info
+
@@ -0,0 +1,75 @@
+import torch
+from ..base_model import BaseModel
+from ..backbones.resgcn import ResGCN
+from ..modules import Graph
+import torch.nn.functional as F
+
+class GaitGraph1(BaseModel):
+    """
+        GaitGraph1: Gaitgraph: Graph Convolutional Network for Skeleton-Based Gait Recognition
+        Paper:    https://ieeexplore.ieee.org/document/9506717
+        Github:   https://github.com/tteepe/GaitGraph
+    """
+    def build_network(self, model_cfg):
+         
+        self.joint_format = model_cfg['joint_format']
+        self.input_num = model_cfg['input_num']
+        self.block = model_cfg['block']
+        self.input_branch = model_cfg['input_branch']
+        self.main_stream = model_cfg['main_stream']
+        self.num_class = model_cfg['num_class']
+        self.reduction = model_cfg['reduction']
+        self.tta = model_cfg['tta']
+        
+        ## Graph Init ##
+        self.graph = Graph(joint_format=self.joint_format,max_hop=3)
+        self.A = torch.tensor(self.graph.A, dtype=torch.float32, requires_grad=False)
+        ## Network ##
+        self.ResGCN = ResGCN(input_num=self.input_num, input_branch=self.input_branch, 
+                             main_stream=self.main_stream, num_class=self.num_class,
+                             reduction=self.reduction, block=self.block,graph=self.A)
+
+    def forward(self, inputs):
+
+        ipts, labs, type_, view_, seqL = inputs
+        x_input = ipts[0] # N T C V I
+        # x = N, T, C, V, M -> N, C, T, V, M
+        x_input = x_input.permute(0, 2, 3, 4, 1).contiguous()
+        N, T, V, I, C = x_input.size() 
+        
+        pose  = x_input
+        if self.training:
+            x_input = torch.cat([x_input[:,:int(T/2),...],x_input[:,int(T/2):,...]],dim=0) #[8, 60, 17, 1, 3]
+        elif self.tta:
+            data_flipped = torch.flip(x_input,dims=[1])
+            x_input = torch.cat([x_input,data_flipped], dim=0)
+
+        x = x_input.permute(0, 3, 4, 1, 2).contiguous()
+
+        # resgcn
+        x = self.ResGCN(x)
+        x = F.normalize(x, dim=1, p=2) # norm #only for GaitGraph1 # Remove from GaitGraph2
+        
+        if self.training:
+            f1, f2 = torch.split(x, [N, N], dim=0)
+            embed = torch.cat([f1.unsqueeze(1), f2.unsqueeze(1)], dim=1) #[4, 2, 128]
+            
+        elif self.tta:
+            f1, f2 = torch.split(x, [N, N], dim=0)
+            embed = torch.mean(torch.stack([f1, f2]), dim=0)
+            embed = embed.unsqueeze(-1)
+        else:
+            embed = embed.unsqueeze(-1)
+        
+        retval = {
+            'training_feat': {
+                'SupConLoss': {'features': embed , 'labels': labs}, # loss
+            },
+            'visual_summary': {
+                'image/pose': pose.view(N*T, 1, I*V, C).contiguous() # visualization
+            },
+            'inference_feat': {
+                'embeddings':   embed # for metric
+            }
+        }
+        return retval
@@ -0,0 +1,110 @@
+import torch
+import torch.nn as nn
+from ..base_model import BaseModel
+from ..backbones.resgcn import ResGCN
+from ..modules import Graph
+import numpy as np
+
+
+class GaitGraph2(BaseModel):
+    """
+        GaitGraph2: Towards a Deeper Understanding of Skeleton-based Gait Recognition
+        Paper:    https://openaccess.thecvf.com/content/CVPR2022W/Biometrics/papers/Teepe_Towards_a_Deeper_Understanding_of_Skeleton-Based_Gait_Recognition_CVPRW_2022_paper
+        Github:   https://github.com/tteepe/GaitGraph2
+    """
+    def build_network(self, model_cfg):
+         
+        self.joint_format = model_cfg['joint_format']
+        self.input_num = model_cfg['input_num']
+        self.block = model_cfg['block']
+        self.input_branch = model_cfg['input_branch']
+        self.main_stream = model_cfg['main_stream']
+        self.num_class = model_cfg['num_class']
+        self.reduction = model_cfg['reduction']
+        self.tta = model_cfg['tta']
+        ## Graph Init ##
+        self.graph = Graph(joint_format=self.joint_format,max_hop=3)
+        self.A = torch.tensor(self.graph.A, dtype=torch.float32, requires_grad=False)
+        ## Network ##
+        self.ResGCN = ResGCN(input_num=self.input_num, input_branch=self.input_branch, 
+                             main_stream=self.main_stream, num_class=self.num_class,
+                             reduction=self.reduction, block=self.block,graph=self.A)
+
+    def forward(self, inputs):
+
+        ipts, labs, type_, view_, seqL = inputs
+        x_input = ipts[0] 
+        N, T, V, I, C = x_input.size()
+        pose  = x_input
+        flip_idx = self.graph.flip_idx
+
+        if not self.training and self.tta:
+            multi_input = MultiInput(self.graph.connect_joint, self.graph.center)
+            x1 = []
+            x2 = []
+            for i in range(N):
+                x1.append(multi_input(x_input[i,:,:,0,:3].flip(0)))
+                x2.append(multi_input(x_input[i,:,flip_idx,0,:3]))
+            x_input = torch.cat([x_input, torch.stack(x1,0), torch.stack(x2,0)], dim=0)
+        
+        x = x_input.permute(0, 3, 4, 1, 2).contiguous()
+
+        # resgcn
+        x = self.ResGCN(x)
+
+        if not self.training and self.tta:
+            f1, f2, f3 = torch.split(x, [N, N, N], dim=0)
+            x = torch.cat((f1, f2, f3), dim=1)
+             
+        embed = torch.unsqueeze(x,-1)
+        
+        retval = {
+            'training_feat': {
+                'SupConLoss': {'features': x , 'labels': labs}, # loss
+            },
+            'visual_summary': {
+                'image/pose': pose.view(N*T, 1, I*V, C).contiguous() # visualization
+            },
+            'inference_feat': {
+                'embeddings': embed # for metric
+            }
+        }
+        return retval
+    
+class MultiInput:
+    def __init__(self, connect_joint, center):
+        self.connect_joint = connect_joint
+        self.center = center
+
+    def __call__(self, data):
+
+        # T, V, C -> T, V, I=3, C + 2
+        T, V, C = data.shape
+        x_new = torch.zeros((T, V, 3, C + 2), device=data.device)
+
+        # Joints
+        x = data
+        x_new[:, :, 0, :C] = x
+        for i in range(V):
+            x_new[:, i, 0, C:] = x[:, i, :2] - x[:, self.center, :2]
+
+        # Velocity
+        for i in range(T - 2):
+            x_new[i, :, 1, :2] = x[i + 1, :, :2] - x[i, :, :2]
+            x_new[i, :, 1, 3:] = x[i + 2, :, :2] - x[i, :, :2]
+        x_new[:, :, 1, 3] = x[:, :, 2]
+
+        # Bones
+        for i in range(V):
+            x_new[:, i, 2, :2] = x[:, i, :2] - x[:, self.connect_joint[i], :2]
+        bone_length = 0
+        for i in range(C - 1):
+            bone_length += torch.pow(x_new[:, :, 2, i], 2)
+        bone_length = torch.sqrt(bone_length) + 0.0001
+        for i in range(C - 1):
+            x_new[:, :, 2, C+i] = torch.acos(x_new[:, :, 2, i] / bone_length)
+        x_new[:, :, 2, 3] = x[:, :, 2]
+
+        data = x_new
+        return data
+
@@ -0,0 +1,186 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from ..base_model import BaseModel
+from ..modules import Graph, SpatialAttention
+import numpy as np
+import math
+
+
+class Mish(nn.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self,x):
+        return x * (torch.tanh(F.softplus(x)))
+
+class STModule(nn.Module):
+    def __init__(self,in_channels, out_channels, incidence, num_point):
+        super(STModule, self).__init__()
+        """
+        This class implements augmented graph spatial convolution in case of Spatial Transformer
+        Fucntion adapated from: https://github.com/Chiaraplizz/ST-TR/blob/master/code/st_gcn/net/gcn_attention.py
+        """
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.incidence = incidence
+        self.num_point = num_point
+        self.relu = Mish()
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.data_bn = nn.BatchNorm1d(self.in_channels * self.num_point)
+        self.attention_conv = SpatialAttention(in_channels=in_channels,out_channel=out_channels,A=self.incidence,num_point=self.num_point)
+    def forward(self,x):
+        N, C, T, V = x.size()
+        # data normlization
+        x = x.permute(0, 1, 3, 2).reshape(N, C * V, T)
+        x = self.data_bn(x)
+        x = x.reshape(N, C, V, T).permute(0, 1, 3, 2)
+        # adjacency matrix
+        self.incidence = self.incidence.cuda(x.get_device())
+        # N, T, C, V > NT, C, 1, V
+        xa = x.permute(0, 2, 1, 3).reshape(-1, C, 1, V)
+        # spatial attention
+        attn_out = self.attention_conv(xa)
+        # N, T, C, V > N, C, T, V
+        attn_out = attn_out.reshape(N, T, -1, V).permute(0, 2, 1, 3)
+        y = attn_out
+        y = self.bn(self.relu(y))
+        return y
+
+class UnitConv2D(nn.Module):
+    '''
+    This class is used in GaitTR[TCN_ST] block.
+    '''
+
+    def __init__(self, D_in, D_out, kernel_size=9, stride=1, dropout=0.1, bias=True):
+        super(UnitConv2D,self).__init__()
+        pad = int((kernel_size-1)/2)
+        self.conv = nn.Conv2d(D_in,D_out,kernel_size=(kernel_size,1)
+                            ,padding=(pad,0),stride=(stride,1),bias=bias)
+        self.bn = nn.BatchNorm2d(D_out)
+        self.relu = Mish()
+        self.dropout = nn.Dropout(dropout, inplace=False)
+        #initalize
+        self.conv_init(self.conv)
+
+    def forward(self,x):
+        x = self.dropout(x)
+        x = self.bn(self.relu(self.conv(x)))
+        return x
+
+    def conv_init(self,module):
+        n = module.out_channels
+        for k in module.kernel_size:
+            n = n*k
+        module.weight.data.normal_(0, math.sqrt(2. / n))
+
+class TCN_ST(nn.Module):
+    """
+    Block of GaitTR: https://arxiv.org/pdf/2204.03873.pdf
+    TCN: Temporal Convolution Network
+    ST: Sptail Temporal Graph Convolution Network
+    """
+    def __init__(self,in_channel,out_channel,A,num_point):
+        super(TCN_ST, self).__init__()
+        #params
+        self.in_channel = in_channel
+        self.out_channel = out_channel
+        self.A = A
+        self.num_point = num_point
+        #network
+        self.tcn = UnitConv2D(D_in=self.in_channel,D_out=self.in_channel,kernel_size=9)
+        self.st = STModule(in_channels=self.in_channel,out_channels=self.out_channel,incidence=self.A,num_point=self.num_point)
+        self.residual = lambda x: x
+        if (in_channel != out_channel):
+            self.residual_s = nn.Sequential(
+                nn.Conv2d(in_channel, out_channel, 1),
+                nn.BatchNorm2d(out_channel),
+            )
+            self.down = UnitConv2D(D_in=self.in_channel,D_out=out_channel,kernel_size=1,dropout=0)
+        else:
+            self.residual_s = lambda x: x
+            self.down = None
+
+    def forward(self,x):
+        x0 = self.tcn(x) + self.residual(x)
+        y = self.st(x0) + self.residual_s(x0)
+        # skip residual
+        y = y + (x if(self.down is None) else self.down(x))
+        return y
+
+
+
+class GaitTR(BaseModel):
+    """
+        GaitTR: Spatial Transformer Network on Skeleton-based Gait Recognition
+        Arxiv : https://arxiv.org/abs/2204.03873.pdf
+    """
+    def build_network(self, model_cfg):
+
+        in_c = model_cfg['in_channels']
+        self.num_class = model_cfg['num_class']
+        self.joint_format = model_cfg['joint_format']
+        self.graph = Graph(joint_format=self.joint_format,max_hop=3)
+
+        #### Network Define ####
+
+        # ajaceny matrix
+        self.A = torch.from_numpy(self.graph.A.astype(np.float32))
+
+        #data normalization
+        num_point = self.A.shape[-1]
+        self.data_bn = nn.BatchNorm1d(in_c[0] * num_point)
+        
+        #backbone
+        backbone = []
+        for i in range(len(in_c)-1):
+            backbone.append(TCN_ST(in_channel= in_c[i],out_channel= in_c[i+1],A=self.A,num_point=num_point))
+        self.backbone = nn.ModuleList(backbone)
+
+        self.fcn = nn.Conv1d(in_c[-1], self.num_class, kernel_size=1)
+
+    def forward(self, inputs):
+        ipts, labs, _, _, seqL = inputs
+
+        x= ipts[0] 
+        pose = x
+        # x = N, T, C, V, M -> N, C, T, V, M
+        x = x.permute(0, 2, 1, 3, 4)
+        N, C, T, V, M = x.size()
+        if len(x.size()) == 4:
+            x = x.unsqueeze(1)
+        del ipts
+
+        x = x.permute(0, 4, 3, 1, 2).contiguous().view(N, M * V * C, T)
+
+        x = self.data_bn(x)
+        x = x.view(N, M, V, C, T).permute(0, 1, 3, 4, 2).contiguous().view(
+                N * M, C, T, V)
+        #backbone
+        for _,m in enumerate(self.backbone):
+            x = m(x)
+        # V pooling
+        x = F.avg_pool2d(x, kernel_size=(1,V))
+        # M pooling
+        c = x.size(1)
+        t = x.size(2)
+        x = x.view(N, M, c, t).mean(dim=1).view(N, c, t)#[n,c,t]
+        # T pooling
+        x = F.avg_pool1d(x, kernel_size=x.size()[2]) #[n,c]
+        # C fcn
+        x = self.fcn(x) #[n,c']
+        x = F.avg_pool1d(x, x.size()[2:]) # [n,c']
+        x = x.view(N, self.num_class) # n,c
+        embed = x.unsqueeze(-1) # n,c,1
+
+        retval = {
+            'training_feat': {
+                'triplet': {'embeddings': embed, 'labels': labs}
+            },
+            'visual_summary': {
+                'image/pose': pose.view(N*T, M, V, C)
+            },
+            'inference_feat': {
+                'embeddings': embed
+            }
+        }
+        return retval
@@ -0,0 +1,484 @@
+import torch
+import copy
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+from ..base_model import BaseModel
+
+class MultiScaleGaitGraph(BaseModel):
+    """
+        Learning Rich Features for Gait Recognition by Integrating Skeletons and Silhouettes
+        Github: https://github.com/YunjiePeng/BimodalFusion
+    """
+
+    def build_network(self, model_cfg):
+        in_c = model_cfg['in_channels']
+        out_c = model_cfg['out_channels']
+        num_id = model_cfg['num_id']
+
+        temporal_kernel_size = model_cfg['temporal_kernel_size']
+
+        # load spatial graph
+        self.graph = SpatialGraph(**model_cfg['graph_cfg'])
+        A_lowSemantic = torch.tensor(self.graph.get_adjacency(semantic_level=0), dtype=torch.float32, requires_grad=False)
+        A_mediumSemantic =  torch.tensor(self.graph.get_adjacency(semantic_level=1), dtype=torch.float32, requires_grad=False)
+        A_highSemantic = torch.tensor(self.graph.get_adjacency(semantic_level=2), dtype=torch.float32, requires_grad=False)
+
+        self.register_buffer('A_lowSemantic', A_lowSemantic)
+        self.register_buffer('A_mediumSemantic', A_mediumSemantic)
+        self.register_buffer('A_highSemantic', A_highSemantic)
+
+        # build networks
+        spatial_kernel_size = self.graph.num_A
+        temporal_kernel_size = temporal_kernel_size
+        kernel_size = (temporal_kernel_size, spatial_kernel_size)
+
+        self.st_gcn_networks_lowSemantic = nn.ModuleList()
+        self.st_gcn_networks_mediumSemantic = nn.ModuleList()
+        self.st_gcn_networks_highSemantic = nn.ModuleList()
+        for i in range(len(in_c)-1):
+            if i == 0:
+                self.st_gcn_networks_lowSemantic.append(st_gcn_block(in_c[i], in_c[i+1], kernel_size, 1, residual=False))
+                self.st_gcn_networks_mediumSemantic.append(st_gcn_block(in_c[i], in_c[i+1], kernel_size, 1, residual=False))
+                self.st_gcn_networks_highSemantic.append(st_gcn_block(in_c[i], in_c[i+1], kernel_size, 1, residual=False))
+            else:
+                self.st_gcn_networks_lowSemantic.append(st_gcn_block(in_c[i], in_c[i+1], kernel_size, 1))
+                self.st_gcn_networks_mediumSemantic.append(st_gcn_block(in_c[i], in_c[i+1], kernel_size, 1))
+                self.st_gcn_networks_highSemantic.append(st_gcn_block(in_c[i], in_c[i+1], kernel_size, 1))
+
+            self.st_gcn_networks_lowSemantic.append(st_gcn_block(in_c[i+1], in_c[i+1], kernel_size, 1))
+            self.st_gcn_networks_mediumSemantic.append(st_gcn_block(in_c[i+1], in_c[i+1], kernel_size, 1))
+            self.st_gcn_networks_highSemantic.append(st_gcn_block(in_c[i+1], in_c[i+1], kernel_size, 1))
+
+        self.edge_importance_lowSemantic = nn.ParameterList([
+            nn.Parameter(torch.ones(self.A_lowSemantic.size()))
+            for i in self.st_gcn_networks_lowSemantic])
+
+        self.edge_importance_mediumSemantic = nn.ParameterList([
+            nn.Parameter(torch.ones(self.A_mediumSemantic.size()))
+            for i in self.st_gcn_networks_mediumSemantic])
+
+        self.edge_importance_highSemantic = nn.ParameterList([
+            nn.Parameter(torch.ones(self.A_highSemantic.size()))
+            for i in self.st_gcn_networks_highSemantic])
+
+        self.fc = nn.Linear(in_c[-1], out_c)
+        self.bn_neck = nn.BatchNorm1d(out_c)
+        self.encoder_cls = nn.Linear(out_c, num_id, bias=False)
+
+    def semantic_pooling(self, x):
+        cur_node_num = x.size()[-1]
+        half_x_1, half_x_2 = torch.split(x, int(cur_node_num / 2), dim=-1)
+        x_sp = torch.add(half_x_1, half_x_2) / 2
+        return x_sp
+
+    def forward(self, inputs):
+        ipts, labs, _, _, seqL = inputs
+        
+        x = ipts[0]  # [N, T, V, C]
+        del ipts
+        """
+           N - the number of videos.
+           T - the number of frames in one video.
+           V - the number of keypoints.
+           C - the number of features for one keypoint.
+        """
+        N, T, V, C = x.size()
+        x = x.permute(0, 3, 1, 2).contiguous()
+        x = x.view(N, C, T, V)
+
+        y = self.semantic_pooling(x)
+        z = self.semantic_pooling(y)
+        for gcn_lowSemantic, importance_lowSemantic, gcn_mediumSemantic, importance_mediumSemantic, gcn_highSemantic, importance_highSemantic in zip(self.st_gcn_networks_lowSemantic, self.edge_importance_lowSemantic, self.st_gcn_networks_mediumSemantic, self.edge_importance_mediumSemantic, self.st_gcn_networks_highSemantic, self.edge_importance_highSemantic):
+            x, _ = gcn_lowSemantic(x, self.A_lowSemantic * importance_lowSemantic)
+            y, _ = gcn_mediumSemantic(y, self.A_mediumSemantic * importance_mediumSemantic)
+            z, _ = gcn_highSemantic(z, self.A_highSemantic * importance_highSemantic)
+
+            # Cross-scale Message Passing
+            x_sp = self.semantic_pooling(x)
+            y = torch.add(y, x_sp)
+            y_sp = self.semantic_pooling(y)
+            z = torch.add(z, y_sp)
+        
+        # global pooling for each layer
+        x_sp = F.avg_pool2d(x, x.size()[2:])
+        N, C, T, V = x_sp.size()
+        x_sp = x_sp.view(N, C, T*V).contiguous()
+
+        y_sp = F.avg_pool2d(y, y.size()[2:])
+        N, C, T, V = y_sp.size()
+        y_sp = y_sp.view(N, C, T*V).contiguous()
+
+        z = F.avg_pool2d(z, z.size()[2:])
+        N, C, T, V = z.size()
+        z = z.permute(0, 2, 3, 1).contiguous()
+        z = z.view(N, T*V, C)
+
+        z_fc = self.fc(z.view(N, -1))
+        bn_z_fc = self.bn_neck(z_fc)
+        z_cls_score = self.encoder_cls(bn_z_fc)
+
+        z_fc = z_fc.unsqueeze(-1).contiguous() # [n, c, p]
+        z_cls_score = z_cls_score.unsqueeze(-1).contiguous() # [n, c, p]
+
+        retval = {
+            'training_feat': {
+                'triplet_joints': {'embeddings': x_sp, 'labels': labs},
+                'triplet_limbs': {'embeddings': y_sp, 'labels': labs},
+                'triplet_bodyparts': {'embeddings': z_fc, 'labels': labs},
+                'softmax': {'logits': z_cls_score, 'labels': labs}
+            },
+            'visual_summary': {},
+            'inference_feat': {
+                'embeddings': z_fc
+            }
+        }
+        return retval
+
+class st_gcn_block(nn.Module):
+    r"""Applies a spatial temporal graph convolution over an input graph sequence.
+    Args:
+        in_channels (int): Number of channels in the input sequence data
+        out_channels (int): Number of channels produced by the convolution
+        kernel_size (tuple): Size of the temporal convolving kernel and graph convolving kernel
+        stride (int, optional): Stride of the temporal convolution. Default: 1
+        dropout (int, optional): Dropout rate of the final output. Default: 0
+        residual (bool, optional): If ``True``, applies a residual mechanism. Default: ``True``
+    Shape:
+        - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format
+        - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
+        - Output[0]: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format
+        - Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format
+        where
+            :math:`N` is a batch size, i.e. the number of videos.
+            :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`.
+            :math:`T_{in}/T_{out}` is a length of input/output sequence, i.e. the number of frames in a video.
+            :math:`V` is the number of graph nodes.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 dropout=0,
+                 residual=True):
+        super().__init__()
+
+        assert len(kernel_size) == 2
+        assert kernel_size[0] % 2 == 1
+        padding = ((kernel_size[0] - 1) // 2, 0)
+
+        self.gcn = SCN(in_channels, out_channels, kernel_size[1])
+
+        self.tcn = nn.Sequential(
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(
+                out_channels,
+                out_channels,
+                (kernel_size[0], 1),
+                (stride, 1),
+                padding,
+            ),
+            nn.BatchNorm2d(out_channels),
+            nn.Dropout(dropout, inplace=True),
+        )
+
+        if not residual:
+            self.residual = lambda x: 0
+
+        elif (in_channels == out_channels) and (stride == 1):
+            self.residual = lambda x: x
+
+        else:
+            self.residual = nn.Sequential(
+                nn.Conv2d(
+                    in_channels,
+                    out_channels,
+                    kernel_size=1,
+                    stride=(stride, 1)),
+                nn.BatchNorm2d(out_channels),
+            )
+
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x, A):
+        res = self.residual(x)
+        x, A = self.gcn(x, A)
+        x = self.tcn(x) + res
+
+        return self.relu(x), A
+
+class SCN(nn.Module):
+    r"""The basic module for applying a graph convolution.
+    Args:
+        in_channels (int): Number of channels in the input sequence data
+        out_channels (int): Number of channels produced by the convolution
+        kernel_size (int): Size of the graph convolving kernel
+        t_kernel_size (int): Size of the temporal convolving kernel
+        t_stride (int, optional): Stride of the temporal convolution. Default: 1
+        t_padding (int, optional): Temporal zero-padding added to both sides of
+            the input. Default: 0
+        t_dilation (int, optional): Spacing between temporal kernel elements.
+            Default: 1
+        bias (bool, optional): If ``True``, adds a learnable bias to the output.
+            Default: ``True``
+    Shape:
+        - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format
+        - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
+        - Output[0]: Output graph sequence in :math:`(N, out_channels, T_{out}, V)` format
+        - Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format
+        where
+            :math:`N` is a batch size,
+            :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
+            :math:`T_{in}/T_{out}` is a length of input/output sequence,
+            :math:`V` is the number of graph nodes.
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 t_kernel_size=1,
+                 t_stride=1,
+                 t_padding=0,
+                 t_dilation=1,
+                 bias=True):
+        super().__init__()
+        # The defined module SCN are responsible only for the Spacial Graph (i.e. the graph in on frame),
+        # and the parameter t_kernel_size in this situation is always set to 1.
+
+        self.kernel_size = kernel_size
+        self.conv = nn.Conv2d(in_channels,
+                              out_channels * kernel_size,
+                              kernel_size=(t_kernel_size, 1),
+                              padding=(t_padding, 0),
+                              stride=(t_stride, 1),
+                              dilation=(t_dilation, 1),
+                              bias=bias)
+        """
+        The 1x1 conv operation here stands for the weight metrix W.
+        The kernel_size here stands for the number of different adjacency matrix, 
+            which are defined according to the partitioning strategy.
+        Because for neighbor nodes in the same subset (in one adjacency matrix), the weights are shared. 
+        It is reasonable to apply 1x1 conv as the implementation of weight function.
+        """
+
+
+    def forward(self, x, A):
+        assert A.size(0) == self.kernel_size
+
+        x = self.conv(x)
+
+        n, kc, t, v = x.size()
+        x = x.view(n, self.kernel_size, kc // self.kernel_size, t, v)
+        x = torch.einsum('nkctv,kvw->nctw', (x, A))
+
+        return x.contiguous(), A
+
+class SpatialGraph():
+    """ Use skeleton sequences extracted by Openpose/HRNet to construct Spatial-Temporal Graph
+
+    Args:
+        strategy (string): must be one of the follow candidates
+        - uniform: Uniform Labeling
+        - distance: Distance Partitioning
+        - spatial: Spatial Configuration Partitioning
+        - gait_temporal: Gait Temporal Configuration Partitioning
+            For more information, please refer to the section 'Partition Strategies' in PGG.
+        layout (string): must be one of the follow candidates
+        - body_12: Is consists of 12 joints.
+            (right shoulder, right elbow, right knee, right hip, left elbow, left knee,
+             left shoulder, right wrist, right ankle, left hip, left wrist, left ankle).
+            For more information, please refer to the section 'Data Processing' in PGG.
+        max_hop (int): the maximal distance between two connected nodes # 1-neighbor
+        dilation (int): controls the spacing between the kernel points
+    """
+    def __init__(self,
+                 layout='body_12', # Openpose here represents for body_12
+                 strategy='spatial',
+                 semantic_level=0,
+                 max_hop=1,
+                 dilation=1):
+        self.layout = layout
+        self.strategy = strategy
+        self.max_hop = max_hop
+        self.dilation = dilation
+        self.num_node, self.neighbor_link_dic = self.get_layout_info(layout)
+        self.num_A = self.get_A_num(strategy)
+
+    def __str__(self):
+        return self.A
+
+    def get_A_num(self, strategy):
+        if self.strategy == 'uniform':
+            return 1
+        elif self.strategy == 'distance':
+            return 2
+        elif (self.strategy == 'spatial') or (self.strategy == 'gait_temporal'):
+            return 3
+        else:
+            raise ValueError("Do Not Exist This Strategy")
+
+    def get_layout_info(self, layout):
+        if layout == 'body_12':
+            num_node = 12
+            neighbor_link_dic = {
+                0: [(7, 1), (1, 0), (10, 4), (4, 6),
+                     (8, 2), (2, 3), (11, 5), (5, 9),
+                     (9, 3), (3, 0), (9, 6), (6, 0)],
+                1: [(1, 0), (4, 0), (0, 3), (2, 3), (5, 3)],
+                2: [(1, 0), (2, 0)]
+            }
+            return num_node, neighbor_link_dic
+        else:
+            raise ValueError("Do Not Exist This Layout.")
+
+    def get_edge(self, semantic_level):
+        # edge is a list of [child, parent] pairs, regarding the center node as root node
+        self_link = [(i, i) for i in range(int(self.num_node / (2 ** semantic_level)))]
+        neighbor_link = self.neighbor_link_dic[semantic_level]
+        edge = self_link + neighbor_link
+        center = []
+        if self.layout == 'body_12':
+            if semantic_level == 0:
+                center = [0, 3, 6, 9]
+            elif semantic_level == 1:
+                center = [0, 3]
+            elif semantic_level == 2:
+                center = [0]
+        return edge, center
+
+    def get_gait_temporal_partitioning(self, semantic_level):
+        if semantic_level == 0:
+            if self.layout == 'body_12':
+                positive_node = {1, 2, 4, 5, 7, 8, 10, 11}
+                negative_node = {0, 3, 6, 9}
+        elif semantic_level == 1:
+            if self.layout == 'body_12':
+                positive_node = {1, 2, 4, 5}
+                negative_node = {0, 3}
+        elif semantic_level == 2:
+            if self.layout == 'body_12':
+                positive_node = {1, 2}
+                negative_node = {0}
+        return positive_node, negative_node
+            
+    def get_adjacency(self, semantic_level):
+        edge, center = self.get_edge(semantic_level)
+        num_node = int(self.num_node / (2 ** semantic_level))
+        hop_dis = get_hop_distance(num_node, edge, max_hop=self.max_hop)
+                
+        valid_hop = range(0, self.max_hop + 1, self.dilation)
+        adjacency = np.zeros((num_node, num_node))
+        for hop in valid_hop:
+            adjacency[hop_dis == hop] = 1
+
+        normalize_adjacency = normalize_digraph(adjacency)
+        # normalize_adjacency = adjacency # withoutNodeNorm
+
+        # normalize_adjacency[a][b] = x
+        # when x = 0, node b has no connection with node a within valid hop.
+        # when x ≠ 0, the normalized adjacency from node b to node a is x.
+        # the value of x is normalized by the number of adjacent neighbor nodes around the node b.
+
+        if self.strategy == 'uniform':
+            A = np.zeros((1, num_node, num_node))
+            A[0] = normalize_adjacency
+            return A
+        elif self.strategy == 'distance':
+            A = np.zeros((len(valid_hop), num_node, num_node))
+            for i, hop in enumerate(valid_hop):
+                A[i][hop_dis == hop] = normalize_adjacency[hop_dis == hop]
+            return A
+        elif self.strategy == 'spatial':
+            A = []
+            for hop in valid_hop:
+                a_root = np.zeros((num_node, num_node))
+                a_close = np.zeros((num_node, num_node))
+                a_further = np.zeros((num_node, num_node))
+                for i in range(num_node):
+                    for j in range(num_node):
+                        if hop_dis[j, i] == hop:
+                            j_hop_dis = min([hop_dis[j, _center] for _center in center])
+                            i_hop_dis = min([hop_dis[i, _center] for _center in center])
+                            if j_hop_dis == i_hop_dis:
+                                a_root[j, i] = normalize_adjacency[j, i]
+                            elif j_hop_dis > i_hop_dis:
+                                a_close[j, i] = normalize_adjacency[j, i]
+                            else:
+                                a_further[j, i] = normalize_adjacency[j, i]
+                if hop == 0:
+                    A.append(a_root)
+                else:
+                    A.append(a_root + a_close)
+                    A.append(a_further)
+            A = np.stack(A)
+            self.A = A
+            return A
+        elif self.strategy == 'gait_temporal':
+            A = []
+            positive_node, negative_node = self.get_gait_temporal_partitioning(semantic_level)
+            for hop in valid_hop:
+                a_root = np.zeros((num_node, num_node))
+                a_positive = np.zeros((num_node, num_node))
+                a_negative = np.zeros((num_node, num_node))
+                for i in range(num_node):
+                    for j in range(num_node):
+                        if hop_dis[j, i] == hop:
+                            if i == j:
+                                a_root[j, i] = normalize_adjacency[j, i]
+                            elif j in positive_node:
+                                a_positive[j, i] = normalize_adjacency[j, i]
+                            else:
+                                a_negative[j, i] = normalize_adjacency[j, i]
+                
+                if hop == 0:
+                    A.append(a_root)
+                else:
+                    A.append(a_negative)
+                    A.append(a_positive)
+            A = np.stack(A)
+            return A
+        else:
+            raise ValueError("Do Not Exist This Strategy")
+
+
+def get_hop_distance(num_node, edge, max_hop=1):
+    # Calculate the shortest path between nodes
+    # i.e. The minimum number of steps needed to walk from one node to another
+    A = np.zeros((num_node, num_node)) # Ajacent Matrix
+    for i, j in edge:
+        A[j, i] = 1
+        A[i, j] = 1
+
+    # compute hop steps
+    hop_dis = np.zeros((num_node, num_node)) + np.inf
+    transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
+    arrive_mat = (np.stack(transfer_mat) > 0)
+    for d in range(max_hop, -1, -1):
+        hop_dis[arrive_mat[d]] = d
+    return hop_dis
+
+
+def normalize_digraph(A):
+    Dl = np.sum(A, 0)
+    num_node = A.shape[0]
+    Dn = np.zeros((num_node, num_node))
+    for i in range(num_node):
+        if Dl[i] > 0:
+            Dn[i, i] = Dl[i]**(-1)
+    AD = np.dot(A, Dn)
+    return AD
+
+
+def normalize_undigraph(A):
+    Dl = np.sum(A, 0)
+    num_node = A.shape[0]
+    Dn = np.zeros((num_node, num_node))
+    for i in range(num_node):
+        if Dl[i] > 0:
+            Dn[i, i] = Dl[i]**(-0.5)
+    DAD = np.dot(np.dot(Dn, A), Dn)
+    return DAD
@@ -253,3 +253,443 @@ def RmBN2dAffine(model):
        if isinstance(m, nn.BatchNorm2d):
            m.weight.requires_grad = False
            m.bias.requires_grad = False
+
+
+'''
+Modifed from https://github.com/BNU-IVC/FastPoseGait/blob/main/fastposegait/modeling/components/units
+'''
+
+class Graph():
+    """
+    # Thanks to YAN Sijie for the released code on Github (https://github.com/yysijie/st-gcn)
+    """
+    def __init__(self, joint_format='coco', max_hop=2, dilation=1):
+        self.joint_format = joint_format
+        self.max_hop = max_hop
+        self.dilation = dilation
+
+        # get edges
+        self.num_node, self.edge, self.connect_joint, self.parts = self._get_edge()
+
+        # get adjacency matrix
+        self.A = self._get_adjacency()
+
+    def __str__(self):
+        return self.A
+
+    def _get_edge(self):
+        if self.joint_format == 'coco':
+            # keypoints = {
+            #     0: "nose",
+            #     1: "left_eye",
+            #     2: "right_eye",
+            #     3: "left_ear",
+            #     4: "right_ear",
+            #     5: "left_shoulder",
+            #     6: "right_shoulder",
+            #     7: "left_elbow",
+            #     8: "right_elbow",
+            #     9: "left_wrist",
+            #     10: "right_wrist",
+            #     11: "left_hip",
+            #     12: "right_hip",
+            #     13: "left_knee",
+            #     14: "right_knee",
+            #     15: "left_ankle",
+            #     16: "right_ankle"
+            # }
+            num_node = 17
+            self_link = [(i, i) for i in range(num_node)]
+            neighbor_link = [(0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 6), (5, 6),
+                             (5, 7), (7, 9), (6, 8), (8, 10), (5, 11), (6, 12), (11, 12),
+                             (11, 13), (13, 15), (12, 14), (14, 16)]
+            self.edge = self_link + neighbor_link
+            self.center = 0
+            self.flip_idx = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
+            connect_joint = np.array([5,0,0,1,2,0,0,5,6,7,8,5,6,11,12,13,14])
+            parts = [
+                np.array([5, 7, 9]),                      # left_arm
+                np.array([6, 8, 10]),                     # right_arm
+                np.array([11, 13, 15]),                   # left_leg
+                np.array([12, 14, 16]),                   # right_leg
+                np.array([0, 1, 2, 3, 4]),                # head
+            ]
+
+        elif self.joint_format == 'coco-no-head':
+            num_node = 12
+            self_link = [(i, i) for i in range(num_node)]
+            neighbor_link = [(0, 1),
+                             (0, 2), (2, 4), (1, 3), (3, 5), (0, 6), (1, 7), (6, 7),
+                             (6, 8), (8, 10), (7, 9), (9, 11)]
+            self.edge = self_link + neighbor_link
+            self.center = 0
+            connect_joint = np.array([3,1,0,2,4,0,6,8,10,7,9,11])
+            parts =[
+                np.array([0, 2, 4]),       # left_arm
+                np.array([1, 3, 5]),       # right_arm
+                np.array([6, 8, 10]),      # left_leg
+                np.array([7, 9, 11])       # right_leg
+            ]
+
+        elif self.joint_format =='alphapose' or self.joint_format =='openpose':
+            num_node = 18
+            self_link = [(i, i) for i in range(num_node)]
+            neighbor_link = [(0, 1), (0, 14), (0, 15), (14, 16), (15, 17),
+                             (1, 2), (2, 3), (3, 4), (1, 5), (5, 6), (6, 7),
+                             (1, 8), (8, 9), (9, 10), (1, 11), (11, 12), (12, 13)]
+            self.edge = self_link + neighbor_link
+            self.center = 1
+            self.flip_idx = [0, 1, 5, 6, 7, 2, 3, 4, 11, 12, 13, 8, 9, 10, 15, 14, 17, 16]
+            connect_joint = np.array([1,1,1,2,3,1,5,6,2,8,9,5,11,12,0,0,14,15])
+            parts = [
+                np.array([5, 6, 7]),               # left_arm
+                np.array([2, 3, 4]),               # right_arm
+                np.array([11, 12, 13]),            # left_leg
+                np.array([8, 9, 10]),              # right_leg
+                np.array([0, 1, 14, 15, 16, 17]),  # head
+            ]
+
+        else:
+            num_node, neighbor_link, connect_joint, parts = 0, [], [], []
+            logging.info('')
+            logging.error('Error: Do NOT exist this dataset: {}!'.format(self.dataset))
+            raise ValueError()
+        self_link = [(i, i) for i in range(num_node)]
+        edge = self_link + neighbor_link
+        return num_node, edge, connect_joint, parts
+
+    def _get_hop_distance(self):
+        A = np.zeros((self.num_node, self.num_node))
+        for i, j in self.edge:
+            A[j, i] = 1
+            A[i, j] = 1
+        hop_dis = np.zeros((self.num_node, self.num_node)) + np.inf
+        transfer_mat = [np.linalg.matrix_power(A, d) for d in range(self.max_hop + 1)]
+        arrive_mat = (np.stack(transfer_mat) > 0)
+        for d in range(self.max_hop, -1, -1):
+            hop_dis[arrive_mat[d]] = d
+        return hop_dis
+
+    def _get_adjacency(self):
+        hop_dis = self._get_hop_distance()
+        valid_hop = range(0, self.max_hop + 1, self.dilation)
+        adjacency = np.zeros((self.num_node, self.num_node))
+        for hop in valid_hop:
+            adjacency[hop_dis == hop] = 1
+        normalize_adjacency = self._normalize_digraph(adjacency)
+        A = np.zeros((len(valid_hop), self.num_node, self.num_node))
+        for i, hop in enumerate(valid_hop):
+            A[i][hop_dis == hop] = normalize_adjacency[hop_dis == hop]
+        return A
+
+    def _normalize_digraph(self, A):
+        Dl = np.sum(A, 0)
+        num_node = A.shape[0]
+        Dn = np.zeros((num_node, num_node))
+        for i in range(num_node):
+            if Dl[i] > 0:
+                Dn[i, i] = Dl[i]**(-1)
+        AD = np.dot(A, Dn)
+        return AD
+
+
+class TemporalBasicBlock(nn.Module):
+    """
+        TemporalConv_Res_Block
+        Arxiv: https://arxiv.org/abs/2010.09978
+        Github: https://github.com/Thomas-yx/ResGCNv1
+    """
+    def __init__(self, channels, temporal_window_size, stride=1, residual=False,reduction=0,get_res=False,tcn_stride=False):
+        super(TemporalBasicBlock, self).__init__()
+
+        padding = ((temporal_window_size - 1) // 2, 0)
+
+        if not residual:
+            self.residual = lambda x: 0
+        elif stride == 1:
+            self.residual = lambda x: x
+        else:
+            self.residual = nn.Sequential(
+                nn.Conv2d(channels, channels, 1, (stride,1)),
+                nn.BatchNorm2d(channels),
+            )
+
+        self.conv = nn.Conv2d(channels, channels, (temporal_window_size,1), (stride,1), padding)
+        self.bn = nn.BatchNorm2d(channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x, res_module):
+
+        res_block = self.residual(x)
+
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x + res_block + res_module)
+
+        return x
+
+
+class TemporalBottleneckBlock(nn.Module):
+    """
+        TemporalConv_Res_Bottleneck
+        Arxiv: https://arxiv.org/abs/2010.09978
+        Github: https://github.com/Thomas-yx/ResGCNv1
+    """
+    def __init__(self, channels, temporal_window_size, stride=1, residual=False, reduction=4,get_res=False, tcn_stride=False):
+        super(TemporalBottleneckBlock, self).__init__()
+        tcn_stride =False
+        padding = ((temporal_window_size - 1) // 2, 0)
+        inter_channels = channels // reduction
+        if get_res:
+            if tcn_stride:
+                stride =2
+            self.residual = nn.Sequential(
+                nn.Conv2d(channels, channels, 1, (2,1)),
+                nn.BatchNorm2d(channels),
+            )
+            tcn_stride= True
+        else:
+            if not residual:
+                self.residual = lambda x: 0
+            elif stride == 1:
+                self.residual = lambda x: x
+            else:
+                self.residual = nn.Sequential(
+                    nn.Conv2d(channels, channels, 1, (2,1)),
+                    nn.BatchNorm2d(channels),
+                )
+                tcn_stride= True
+
+        self.conv_down = nn.Conv2d(channels, inter_channels, 1)
+        self.bn_down = nn.BatchNorm2d(inter_channels)
+        if tcn_stride:
+            stride=2
+        self.conv = nn.Conv2d(inter_channels, inter_channels, (temporal_window_size,1), (stride,1), padding)
+        self.bn = nn.BatchNorm2d(inter_channels)
+        self.conv_up = nn.Conv2d(inter_channels, channels, 1)
+        self.bn_up = nn.BatchNorm2d(channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x, res_module):
+
+        res_block = self.residual(x)
+
+        x = self.conv_down(x)
+        x = self.bn_down(x)
+        x = self.relu(x)
+
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+
+        x = self.conv_up(x)
+        x = self.bn_up(x)
+        x = self.relu(x + res_block + res_module)
+        return x
+
+
+
+class SpatialGraphConv(nn.Module):
+    """
+        SpatialGraphConv_Basic_Block
+        Arxiv: https://arxiv.org/abs/1801.07455
+        Github: https://github.com/yysijie/st-gcn
+    """
+    def __init__(self, in_channels, out_channels, max_graph_distance):
+        super(SpatialGraphConv, self).__init__()
+
+        # spatial class number (distance = 0 for class 0, distance = 1 for class 1, ...)
+        self.s_kernel_size = max_graph_distance + 1
+
+        # weights of different spatial classes
+        self.gcn = nn.Conv2d(in_channels, out_channels*self.s_kernel_size, 1)
+
+    def forward(self, x, A):
+
+        # numbers in same class have same weight
+        x = self.gcn(x)
+
+        # divide nodes into different classes
+        n, kc, t, v = x.size()
+        x = x.view(n, self.s_kernel_size, kc//self.s_kernel_size, t, v).contiguous()
+
+        # spatial graph convolution
+        x = torch.einsum('nkctv,kvw->nctw', (x, A[:self.s_kernel_size])).contiguous()
+
+        return x
+
+class SpatialBasicBlock(nn.Module):
+    """
+        SpatialGraphConv_Res_Block
+        Arxiv: https://arxiv.org/abs/2010.09978
+        Github: https://github.com/Thomas-yx/ResGCNv1
+    """
+    def __init__(self, in_channels, out_channels, max_graph_distance, residual=False,reduction=0):
+        super(SpatialBasicBlock, self).__init__()
+
+        if not residual:
+            self.residual = lambda x: 0
+        elif in_channels == out_channels:
+            self.residual = lambda x: x
+        else:
+            self.residual = nn.Sequential(
+                nn.Conv2d(in_channels, out_channels, 1),
+                nn.BatchNorm2d(out_channels),
+            )
+
+        self.conv = SpatialGraphConv(in_channels, out_channels, max_graph_distance)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x, A):
+
+        res_block = self.residual(x)
+
+        x = self.conv(x, A)
+        x = self.bn(x)
+        x = self.relu(x + res_block)
+
+        return x
+
+class SpatialBottleneckBlock(nn.Module):
+    """
+        SpatialGraphConv_Res_Bottleneck
+        Arxiv: https://arxiv.org/abs/2010.09978
+        Github: https://github.com/Thomas-yx/ResGCNv1
+    """
+
+    def __init__(self, in_channels, out_channels, max_graph_distance, residual=False, reduction=4):
+        super(SpatialBottleneckBlock, self).__init__()
+
+        inter_channels = out_channels // reduction
+
+        if not residual:
+            self.residual = lambda x: 0
+        elif in_channels == out_channels:
+            self.residual = lambda x: x
+        else:
+            self.residual = nn.Sequential(
+                nn.Conv2d(in_channels, out_channels, 1),
+                nn.BatchNorm2d(out_channels),
+            )
+
+        self.conv_down = nn.Conv2d(in_channels, inter_channels, 1)
+        self.bn_down = nn.BatchNorm2d(inter_channels)
+        self.conv = SpatialGraphConv(inter_channels, inter_channels, max_graph_distance)
+        self.bn = nn.BatchNorm2d(inter_channels)
+        self.conv_up = nn.Conv2d(inter_channels, out_channels, 1)
+        self.bn_up = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x, A):
+
+        res_block = self.residual(x)
+
+        x = self.conv_down(x)
+        x = self.bn_down(x)
+        x = self.relu(x)
+
+        x = self.conv(x, A)
+        x = self.bn(x)
+        x = self.relu(x)
+
+        x = self.conv_up(x)
+        x = self.bn_up(x)
+        x = self.relu(x + res_block)
+
+        return x
+
+class SpatialAttention(nn.Module):
+    """
+    This class implements Spatial Transformer. 
+    Function adapted from: https://github.com/leaderj1001/Attention-Augmented-Conv2d
+    """
+    def __init__(self, in_channels, out_channel, A, num_point, dk_factor=0.25, kernel_size=1, Nh=8, num=4, stride=1):
+        super(SpatialAttention, self).__init__()
+        self.in_channels = in_channels
+        self.kernel_size = kernel_size
+        self.dk = int(dk_factor * out_channel)
+        self.dv = int(out_channel)
+        self.num = num
+        self.Nh = Nh
+        self.num_point=num_point
+        self.A = A[0] + A[1] + A[2]
+        self.stride = stride
+        self.padding = (self.kernel_size - 1) // 2
+
+        assert self.Nh != 0, "integer division or modulo by zero, Nh >= 1"
+        assert self.dk % self.Nh == 0, "dk should be divided by Nh. (example: out_channels: 20, dk: 40, Nh: 4)"
+        assert self.dv % self.Nh == 0, "dv should be divided by Nh. (example: out_channels: 20, dv: 4, Nh: 4)"
+        assert stride in [1, 2], str(stride) + " Up to 2 strides are allowed."
+
+        self.qkv_conv = nn.Conv2d(self.in_channels, 2 * self.dk + self.dv, kernel_size=self.kernel_size,
+                                    stride=stride,
+                                    padding=self.padding)
+
+        self.attn_out = nn.Conv2d(self.dv, self.dv, kernel_size=1, stride=1)
+
+    def forward(self, x):
+        # Input x
+        # (batch_size, channels, 1, joints)
+        B, _, T, V = x.size()
+
+        # flat_q, flat_k, flat_v
+        # (batch_size, Nh, dvh or dkh, joints)
+        # dvh = dv / Nh, dkh = dk / Nh
+        # q, k, v obtained by doing 2D convolution on the input (q=XWq, k=XWk, v=XWv)
+        flat_q, flat_k, flat_v, q, k, v = self.compute_flat_qkv(x, self.dk, self.dv, self.Nh)
+
+        # Calculate the scores, obtained by doing q*k
+        # (batch_size, Nh, joints, dkh)*(batch_size, Nh, dkh, joints) =  (batch_size, Nh, joints,joints)
+        # The multiplication can also be divided (multi_matmul) in case of space problems
+
+        logits = torch.matmul(flat_q.transpose(2, 3), flat_k)
+
+        weights = F.softmax(logits, dim=-1)
+
+        # attn_out
+        # (batch, Nh, joints, dvh)
+        # weights*V
+        # (batch, Nh, joints, joints)*(batch, Nh, joints, dvh)=(batch, Nh, joints, dvh)
+        attn_out = torch.matmul(weights, flat_v.transpose(2, 3))
+
+        attn_out = torch.reshape(attn_out, (B, self.Nh, T, V, self.dv // self.Nh))
+
+        attn_out = attn_out.permute(0, 1, 4, 2, 3)
+
+        # combine_heads_2d, combine heads only after having calculated each Z separately
+        # (batch, Nh*dv, 1, joints)
+        attn_out = self.combine_heads_2d(attn_out)
+
+        # Multiply for W0 (batch, out_channels, 1, joints) with out_channels=dv
+        attn_out = self.attn_out(attn_out)
+        return attn_out
+
+    def compute_flat_qkv(self, x, dk, dv, Nh):
+        qkv = self.qkv_conv(x)
+        # T=1 in this case, because we are considering each frame separately
+        N, _, T, V = qkv.size()
+
+        q, k, v = torch.split(qkv, [dk, dk, dv], dim=1)
+        q = self.split_heads_2d(q, Nh)
+        k = self.split_heads_2d(k, Nh)
+        v = self.split_heads_2d(v, Nh)
+
+        dkh = dk // Nh
+        q = q*(dkh ** -0.5)
+        flat_q = torch.reshape(q, (N, Nh, dkh, T * V))
+        flat_k = torch.reshape(k, (N, Nh, dkh, T * V))
+        flat_v = torch.reshape(v, (N, Nh, dv // self.Nh, T * V))
+        return flat_q, flat_k, flat_v, q, k, v
+
+    def split_heads_2d(self, x, Nh):
+        B, channels, T, V = x.size()
+        ret_shape = (B, Nh, channels // Nh, T, V)
+        split = torch.reshape(x, ret_shape)
+        return split
+
+    def combine_heads_2d(self, x):
+        batch, Nh, dv, T, V = x.size()
+        ret_shape = (batch, Nh * dv, T, V)
+        return torch.reshape(x, ret_shape)
+