diff --git a/configs/gaitgraph1/gaitgraph1_phase1.yaml b/configs/gaitgraph1/gaitgraph1_phase1.yaml new file mode 100644 index 0000000..7e74045 --- /dev/null +++ b/configs/gaitgraph1/gaitgraph1_phase1.yaml @@ -0,0 +1,98 @@ +data_cfg: + dataset_name: CASIA-B + dataset_root: your_path + dataset_partition: ./datasets/CASIA-B/CASIA-B.json + test_dataset_name: CASIA-B + num_workers: 8 + remove_no_gallery: false + frame_threshold: 60 + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 6000 + save_name: GaitGraph1_phase1 + sampler: + batch_size: 256 + frames_num_fixed: 60 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered + frames_skip_num: 0 + metric: euc + eval_func: evaluate_indoor_dataset + transform: + - type: Compose + trf_cfg: + - type: GaitGraph1Input + +loss_cfg: + - loss_term_weight: 1 + temperature: 0.01 + type: SupConLoss_Re + log_prefix: SupConLoss + +model_cfg: + model: GaitGraph1 + joint_format: coco + input_num: 1 + reduction: 8 + block: Bottleneck # Basic, initial + input_branch: + - 3 + - 64 + - 64 + - 32 + main_stream: + - 32 + - 128 + - 128 + - 256 + - 256 + num_class: 128 + tta: true + +optimizer_cfg: + lr: 0.01 + solver: Adam + weight_decay: 0.00001 + +scheduler_cfg: + max_lr: 0.01 + total_steps: 6000 + scheduler: OneCycleLR + div_factor: 25 + +trainer_cfg: + enable_float16: false + log_iter: 1000 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 1000 + save_name: GaitGraph1_phase1 + sync_BN: true + total_iter: 6000 + sampler: + batch_shuffle: true + frames_num_fixed: 100 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + batch_size: 128 + type: CommonSampler + transform: + - type: TwoView + trf_cfg: + - type: InversePosesPre + probability: 0.1 + - type: FlipSequence + probability: 0.5 + - type: RandomSelectSequence + sequence_length: 60 + - type: PointNoise + std: 0.05 + - type: JointNoise + std: 0.2 + - type: GaitGraph1Input diff --git a/configs/gaitgraph1/gaitgraph1_phase1_GREW.yaml b/configs/gaitgraph1/gaitgraph1_phase1_GREW.yaml new file mode 100644 index 0000000..2f56705 --- /dev/null +++ b/configs/gaitgraph1/gaitgraph1_phase1_GREW.yaml @@ -0,0 +1,100 @@ +data_cfg: + dataset_name: GREW + dataset_root: your_path + dataset_partition: ./datasets/GREW/GREW.json + test_dataset_name: GREW + num_workers: 8 + remove_no_gallery: false + frame_threshold: 16 + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 150000 + save_name: GaitGraph1_phase1 + sampler: + batch_size: 256 + frames_num_fixed: 501 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered + frames_skip_num: 0 + metric: euc + # eval_func: GREW_submission + eval_func: evaluate_real_scene + transform: + - type: Compose + trf_cfg: + - type: SelectSequenceCenter + sequence_length: 16 + - type: GaitGraph1Input + +loss_cfg: + - loss_term_weight: 1 + temperature: 0.01 + type: SupConLoss_Re + log_prefix: SupConLoss + +model_cfg: + model: GaitGraph1 + joint_format: coco + input_num: 1 + reduction: 8 + block: Bottleneck # Basic, initial + input_branch: + - 3 + - 64 + - 64 + - 32 + main_stream: + - 32 + - 128 + - 128 + - 256 + - 256 + num_class: 256 + tta: true + +optimizer_cfg: + lr: 0.01 + solver: Adam + weight_decay: 0.00001 + +scheduler_cfg: + max_lr: 0.01 + total_steps: 150000 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false + log_iter: 100 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 5000 + save_name: GaitGraph1_phase1 + sync_BN: true + total_iter: 150000 + sampler: + batch_shuffle: true + frames_num_fixed: 501 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + batch_size: 128 + type: CommonSampler + transform: + - type: TwoView + trf_cfg: + - type: MirrorPoses + probability: 0.5 + - type: FlipSequence + probability: 0.5 + - type: RandomSelectSequence + sequence_length: 16 + - type: PointNoise + std: 0.05 + - type: JointNoise + std: 0.1 + - type: GaitGraph1Input \ No newline at end of file diff --git a/configs/gaitgraph1/gaitgraph1_phase1_Gait3D.yaml b/configs/gaitgraph1/gaitgraph1_phase1_Gait3D.yaml new file mode 100644 index 0000000..ad5f5d8 --- /dev/null +++ b/configs/gaitgraph1/gaitgraph1_phase1_Gait3D.yaml @@ -0,0 +1,100 @@ +data_cfg: + dataset_name: Gait3D + dataset_root: your_path + dataset_partition: ./datasets/Gait3D/Gait3D.json + test_dataset_name: Gait3D + num_workers: 8 + remove_no_gallery: false + frame_threshold: 24 + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 20000 + save_name: GaitGraph1_phase1 + sampler: + batch_size: 256 + frames_num_fixed: 130 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered + frames_skip_num: 0 + metric: euc + eval_func: evaluate_Gait3D + transform: + - type: Compose + trf_cfg: + - type: SelectSequenceCenter + sequence_length: 24 + - type: GaitGraph1Input + +loss_cfg: + - loss_term_weight: 1 + temperature: 0.01 + type: SupConLoss_Re + log_prefix: SupConLoss + +model_cfg: + model: GaitGraph1 + joint_format: coco + input_num: 1 + reduction: 8 + block: Bottleneck # Basic, initial + input_branch: + - 3 + - 64 + - 64 + - 32 + main_stream: + - 32 + - 128 + - 128 + - 256 + - 256 + num_class: 128 + tta: true + +optimizer_cfg: + lr: 0.01 + solver: Adam + weight_decay: 0.00001 + +scheduler_cfg: + max_lr: 0.01 + total_steps: 20000 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false + log_iter: 100 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 500 + save_name: GaitGraph1_phase1 + sync_BN: true + total_iter: 20000 + sampler: + batch_shuffle: true + frames_num_fixed: 130 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + batch_size: 128 + type: CommonSampler + transform: + - type: TwoView + trf_cfg: + - type: MirrorPoses + probability: 0.5 + - type: FlipSequence + probability: 0.5 + - type: RandomSelectSequence + sequence_length: 16 + - type: PointNoise + std: 0.05 + - type: JointNoise + std: 0.1 + - type: GaitGraph1Input + diff --git a/configs/gaitgraph1/gaitgraph1_phase1_OUMVLP.yaml b/configs/gaitgraph1/gaitgraph1_phase1_OUMVLP.yaml new file mode 100644 index 0000000..40f1329 --- /dev/null +++ b/configs/gaitgraph1/gaitgraph1_phase1_OUMVLP.yaml @@ -0,0 +1,99 @@ +data_cfg: + dataset_name: OUMVLP + dataset_root: your_path + dataset_partition: ./datasets/OUMVLP/OUMVLP.json + test_dataset_name: OUMVLP + num_workers: 8 + remove_no_gallery: false + frame_threshold: 17 + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 150000 + save_name: GaitGraph1_phase1 + sampler: + batch_size: 256 + frames_num_fixed: 35 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered + frames_skip_num: 0 + metric: euc + eval_func: evaluate_indoor_dataset + transform: + - type: Compose + trf_cfg: + - type: SelectSequenceCenter + sequence_length: 17 + - type: GaitGraph1Input + +loss_cfg: + - loss_term_weight: 1 + temperature: 0.01 + type: SupConLoss_Re + log_prefix: SupConLoss + +model_cfg: + model: GaitGraph1 + joint_format: alphapose + input_num: 1 + reduction: 8 + block: Bottleneck # Basic, initial + input_branch: + - 3 + - 64 + - 64 + - 32 + main_stream: + - 32 + - 128 + - 128 + - 256 + - 256 + num_class: 256 + tta: true + +optimizer_cfg: + lr: 0.01 + solver: Adam + weight_decay: 0.00001 + +scheduler_cfg: + max_lr: 0.01 + total_steps: 150000 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false + log_iter: 1000 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 10000 + save_name: GaitGraph1_phase1 + sync_BN: true + total_iter: 150000 + sampler: + batch_shuffle: true + frames_num_fixed: 35 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + batch_size: 128 + type: CommonSampler + transform: + - type: TwoView + trf_cfg: + - type: MirrorPoses + probability: 0.5 + - type: FlipSequence + probability: 0.5 + - type: RandomSelectSequence + sequence_length: 16 + - type: PointNoise + std: 0.05 + - type: JointNoise + std: 0.1 + - type: GaitGraph1Input \ No newline at end of file diff --git a/configs/gaitgraph1/gaitgraph1_phase1_OUMVLP17.yaml b/configs/gaitgraph1/gaitgraph1_phase1_OUMVLP17.yaml new file mode 100644 index 0000000..65d5852 --- /dev/null +++ b/configs/gaitgraph1/gaitgraph1_phase1_OUMVLP17.yaml @@ -0,0 +1,99 @@ +data_cfg: + dataset_name: OUMVLP + dataset_root: your_path + dataset_partition: ./datasets/OUMVLP/OUMVLP.json + test_dataset_name: OUMVLP + num_workers: 8 + remove_no_gallery: false + frame_threshold: 17 + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 150000 + save_name: GaitGraph1_phase1 + sampler: + batch_size: 256 + frames_num_fixed: 25 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered + frames_skip_num: 0 + metric: euc + eval_func: evaluate_indoor_dataset + transform: + - type: Compose + trf_cfg: + - type: SelectSequenceCenter + sequence_length: 17 + - type: SkeletonInput + +loss_cfg: + - loss_term_weight: 1 + temperature: 0.01 + type: SupConLoss_Re + log_prefix: SupConLoss + +model_cfg: + model: GaitGraph1 + joint_format: coco + input_num: 1 + reduction: 8 + block: Bottleneck # Basic, initial + input_branch: + - 3 + - 64 + - 64 + - 32 + main_stream: + - 32 + - 128 + - 128 + - 256 + - 256 + num_class: 256 + tta: true + +optimizer_cfg: + lr: 0.01 + solver: Adam + weight_decay: 0.00001 + +scheduler_cfg: + max_lr: 0.01 + total_steps: 150000 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false + log_iter: 1000 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 10000 + save_name: GaitGraph1_phase1 + sync_BN: true + total_iter: 150000 + sampler: + batch_shuffle: true + frames_num_fixed: 25 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + batch_size: 128 + type: CommonSampler + transform: + - type: TwoView + trf_cfg: + - type: MirrorPoses + probability: 0.5 + - type: FlipSequence + probability: 0.5 + - type: RandomSelectSequence + sequence_length: 16 + - type: PointNoise + std: 0.05 + - type: JointNoise + std: 0.1 + - type: GaitGraph1Input diff --git a/configs/gaitgraph2/gaitgraph2.yaml b/configs/gaitgraph2/gaitgraph2.yaml new file mode 100644 index 0000000..50042cb --- /dev/null +++ b/configs/gaitgraph2/gaitgraph2.yaml @@ -0,0 +1,97 @@ +data_cfg: + dataset_name: CASIA-B + dataset_root: your_path + dataset_partition: ./datasets/CASIA-B/CASIA-B.json + test_dataset_name: CASIA-B + num_workers: 8 + remove_no_gallery: false + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 500 + save_name: GaitGraph2 + sampler: + batch_size: 256 + frames_num_fixed: 60 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered + frames_skip_num: 0 + metric: cos + eval_func: evaluate_indoor_dataset + transform: + - type: Compose + trf_cfg: + - type: NormalizeEmpty + - type: GaitGraphMultiInput + +loss_cfg: + - loss_term_weight: 1 + temperature: 0.01 + type: SupConLoss_Lp + log_prefix: SupConLoss + +model_cfg: + model: GaitGraph2 + joint_format: coco + input_num: 3 + reduction: 8 + block: Bottleneck # Basic, initial + input_branch: + - 5 + - 64 + - 32 + main_stream: + - 32 + - 128 + - 256 + num_class: 128 + tta: true + +optimizer_cfg: + lr: 0.005 #0.005 + solver: AdamW + weight_decay: 0.00001 + +scheduler_cfg: + max_lr: 0.005 + total_steps: 500 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false + log_iter: 100 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 100 + save_name: GaitGraph2 + sync_BN: true + total_iter: 500 + sampler: + batch_shuffle: true + frames_num_fixed: 60 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + batch_size: 768 + type: CommonSampler + transform: + - type: Compose + trf_cfg: + - type: NormalizeEmpty + - type: FlipSequence + probability: 0.5 + - type: InversePosesPre + probability: 0.1 + - type: JointNoise + std: 0.25 + - type: PointNoise + std: 0.05 + - type: RandomMove + random_r: + - 4 + - 1 + - type: GaitGraphMultiInput diff --git a/configs/gaitgraph2/gaitgraph2_CCPG.yaml b/configs/gaitgraph2/gaitgraph2_CCPG.yaml new file mode 100644 index 0000000..c8cdc0a --- /dev/null +++ b/configs/gaitgraph2/gaitgraph2_CCPG.yaml @@ -0,0 +1,103 @@ +data_cfg: + dataset_name: CCPG + dataset_root: your_path + dataset_partition: ./datasets/CCPG/CCPG.json + test_dataset_name: CCPG + num_workers: 8 + remove_no_gallery: false + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 2000 + save_name: GaitGraph2_CCPG + sampler: + batch_size: 256 + frames_num_fixed: 30 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered + frames_skip_num: 0 + metric: cos + eval_func: evaluate_CCPG + transform: + - type: Compose + trf_cfg: + - type: NormalizeEmpty + - type: GaitGraphMultiInput + joint_format: alphapose + + +loss_cfg: + - loss_term_weight: 1 + temperature: 0.01 + type: SupConLoss_Lp + log_prefix: SupConLoss + +model_cfg: + model: GaitGraph2 + joint_format: alphapose + input_num: 3 + reduction: 4 + block: Bottleneck # Basic, initial + input_branch: + - 5 + - 64 + - 32 + main_stream: + - 32 + - 128 + - 256 + num_class: 128 + tta: true + +optimizer_cfg: + lr: 0.005 + solver: Adam + weight_decay: 0.00001 + +scheduler_cfg: + three_phase: True + max_lr: 0.005 + total_steps: 2000 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false + log_iter: 20 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 200 + save_name: GaitGraph2 + sync_BN: true + total_iter: 2000 + sampler: + batch_shuffle: true + frames_num_fixed: 60 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + batch_size: 768 + type: CommonSampler + + transform: + - type: Compose + trf_cfg: + - type: NormalizeEmpty + - type: FlipSequence + probability: 0.5 + - type: InversePosesPre + probability: 0.1 + joint_format: alphapose + - type: JointNoise + std: 0.1 + - type: PointNoise + std: 0.05 + - type: RandomMove + random_r: + - 3 + - 1 + - type: GaitGraphMultiInput + joint_format: alphapose \ No newline at end of file diff --git a/configs/gaitgraph2/gaitgraph2_GREW.yaml b/configs/gaitgraph2/gaitgraph2_GREW.yaml new file mode 100644 index 0000000..b5b08ec --- /dev/null +++ b/configs/gaitgraph2/gaitgraph2_GREW.yaml @@ -0,0 +1,105 @@ +data_cfg: + dataset_name: GREW + dataset_root: your_path + dataset_partition: ./datasets/GREW/GREW.json + test_dataset_name: GREW + num_workers: 8 + remove_no_gallery: false + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 50000 + save_name: GaitGraph2 + sampler: + batch_size: 256 + frames_num_fixed: 60 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered + frames_skip_num: 0 + metric: cos + eval_func: GREW_submission + transform: + - type: Compose + trf_cfg: + - type: NormalizeEmpty + - type: GaitGraphMultiInput + + +loss_cfg: + - loss_term_weight: 1 + temperature: 0.01 + type: SupConLoss_Lp + log_prefix: SupConLoss + +model_cfg: + model: GaitGraph2 + joint_format: coco + input_num: 3 + reduction: 4 + block: Bottleneck # Basic, initial + input_branch: + - 5 + - 64 + - 64 + - 32 + main_stream: + - 32 + - 128 + - 128 + - 128 + - 256 + - 256 + - 256 + num_class: 128 + tta: true + +optimizer_cfg: + lr: 0.005 + solver: Adam + weight_decay: 0.00001 + +scheduler_cfg: + three_phase: True + max_lr: 0.005 + total_steps: 50000 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false + log_iter: 1000 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 10000 + save_name: GaitGraph2 + sync_BN: true + total_iter: 50000 + sampler: + batch_shuffle: true + frames_num_fixed: 60 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + batch_size: 768 + type: CommonSampler + + transform: + - type: Compose + trf_cfg: + - type: NormalizeEmpty + - type: FlipSequence + probability: 0.5 + - type: InversePosesPre + probability: 0.1 + - type: JointNoise + std: 0.25 + - type: PointNoise + std: 0.05 + - type: RandomMove + random_r: + - 4 + - 1 + - type: GaitGraphMultiInput diff --git a/configs/gaitgraph2/gaitgraph2_Gait3D.yaml b/configs/gaitgraph2/gaitgraph2_Gait3D.yaml new file mode 100644 index 0000000..2fb1d4d --- /dev/null +++ b/configs/gaitgraph2/gaitgraph2_Gait3D.yaml @@ -0,0 +1,93 @@ +data_cfg: + dataset_name: Gait3D + dataset_root: your_path + dataset_partition: ./datasets/Gait3D/Gait3D.json + test_dataset_name: Gait3D + num_workers: 8 + remove_no_gallery: false + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 2000 + save_name: GaitGraph2 + sampler: + batch_size: 256 + frames_num_fixed: 60 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered + frames_skip_num: 0 + metric: cos + eval_func: evaluate_Gait3D + transform: + - type: GaitGraphMultiInput + +loss_cfg: + - loss_term_weight: 1 + temperature: 0.01 + type: SupConLoss_Lp + log_prefix: SupConLoss + +model_cfg: + model: GaitGraph2 + joint_format: coco + input_num: 3 + reduction: 8 + block: Bottleneck # Basic, initial + input_branch: + - 5 + - 64 + - 32 + main_stream: + - 32 + - 128 + - 256 + num_class: 128 + tta: true + +optimizer_cfg: + lr: 0.005 #0.005 + solver: AdamW + weight_decay: 0.00001 + +scheduler_cfg: + max_lr: 0.005 + total_steps: 2000 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false + log_iter: 20 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 200 + save_name: GaitGraph2 + sync_BN: true + total_iter: 2000 + sampler: + batch_shuffle: true + frames_num_fixed: 60 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + batch_size: 768 + type: CommonSampler + transform: + - type: Compose + trf_cfg: + - type: FlipSequence + probability: 0.5 + - type: InversePosesPre + probability: 0.1 + - type: JointNoise + std: 0.25 + - type: PointNoise + std: 0.05 + - type: RandomMove + random_r: + - 4 + - 1 + - type: GaitGraphMultiInput diff --git a/configs/gaitgraph2/gaitgraph2_OUMVLP.yaml b/configs/gaitgraph2/gaitgraph2_OUMVLP.yaml new file mode 100644 index 0000000..4a65ffe --- /dev/null +++ b/configs/gaitgraph2/gaitgraph2_OUMVLP.yaml @@ -0,0 +1,108 @@ +data_cfg: + dataset_name: OUMVLP + dataset_root: your_path + dataset_partition: ./datasets/OUMVLP/OUMVLP.json + test_dataset_name: OUMVLP + num_workers: 8 + remove_no_gallery: false + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 80000 + save_name: GaitGraph2_phase2 + sampler: + batch_size: 256 + frames_num_fixed: 30 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered + frames_skip_num: 0 + metric: cos + eval_func: evaluate_indoor_dataset + transform: + - type: Compose + trf_cfg: + - type: NormalizeEmpty + - type: GaitGraphMultiInput + joint_format: alphapose + + +loss_cfg: + - loss_term_weight: 1 + temperature: 0.01 + type: SupConLoss_Lp + log_prefix: SupConLoss + +model_cfg: + model: GaitGraph2 + joint_format: alphapose + input_num: 3 + reduction: 4 + block: Bottleneck # Basic, initial + input_branch: + - 5 + - 64 + - 64 + - 32 + main_stream: + - 32 + - 128 + - 128 + - 128 + - 256 + - 256 + - 256 + num_class: 128 + tta: true + +optimizer_cfg: + lr: 0.005 + solver: Adam + weight_decay: 0.00001 + +scheduler_cfg: + three_phase: True + max_lr: 0.005 + total_steps: 80000 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false + log_iter: 1000 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 10000 + save_name: GaitGraph2 + sync_BN: true + total_iter: 80000 + sampler: + batch_shuffle: true + frames_num_fixed: 30 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + batch_size: 768 + type: CommonSampler + + transform: + - type: Compose + trf_cfg: + - type: NormalizeEmpty + - type: FlipSequence + probability: 0.5 + - type: InversePosesPre + probability: 0.1 + joint_format: alphapose + - type: JointNoise + std: 0.1 + - type: PointNoise + std: 0.05 + - type: RandomMove + random_r: + - 3 + - 1 + - type: GaitGraphMultiInput + joint_format: alphapose \ No newline at end of file diff --git a/configs/gaitgraph2/gaitgraph2_OUMVLP17.yaml b/configs/gaitgraph2/gaitgraph2_OUMVLP17.yaml new file mode 100644 index 0000000..17a8724 --- /dev/null +++ b/configs/gaitgraph2/gaitgraph2_OUMVLP17.yaml @@ -0,0 +1,108 @@ +data_cfg: + dataset_name: OUMVLP + dataset_root: your_path + dataset_partition: ./datasets/OUMVLP/OUMVLP.json + test_dataset_name: OUMVLP + num_workers: 8 + remove_no_gallery: false + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 80000 + save_name: GaitGraph2_phase2 + sampler: + batch_size: 256 + frames_num_fixed: 30 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered + frames_skip_num: 0 + metric: cos + eval_func: evaluate_indoor_dataset + transform: + - type: Compose + trf_cfg: + - type: NormalizeEmpty + - type: GaitGraphMultiInput + joint_format: coco + + +loss_cfg: + - loss_term_weight: 1 + temperature: 0.01 + type: SupConLoss_Lp + log_prefix: SupConLoss + +model_cfg: + model: GaitGraph2 + joint_format: coco + input_num: 3 + reduction: 4 + block: Bottleneck # Basic, initial + input_branch: + - 5 + - 64 + - 64 + - 32 + main_stream: + - 32 + - 128 + - 128 + - 128 + - 256 + - 256 + - 256 + num_class: 128 + tta: true + +optimizer_cfg: + lr: 0.005 + solver: Adam + weight_decay: 0.00001 + +scheduler_cfg: + three_phase: True + max_lr: 0.005 + total_steps: 80000 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false + log_iter: 1000 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 10000 + save_name: GaitGraph2 + sync_BN: true + total_iter: 80000 + sampler: + batch_shuffle: true + frames_num_fixed: 30 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + batch_size: 768 + type: CommonSampler + + transform: + - type: Compose + trf_cfg: + - type: NormalizeEmpty + - type: FlipSequence + probability: 0.5 + - type: InversePosesPre + probability: 0.1 + joint_format: coco + - type: JointNoise + std: 0.1 + - type: PointNoise + std: 0.05 + - type: RandomMove + random_r: + - 3 + - 1 + - type: GaitGraphMultiInput + joint_format: coco \ No newline at end of file diff --git a/configs/gaitgraph2/gaitgraph2_SUSTech1K.yaml b/configs/gaitgraph2/gaitgraph2_SUSTech1K.yaml new file mode 100644 index 0000000..4948273 --- /dev/null +++ b/configs/gaitgraph2/gaitgraph2_SUSTech1K.yaml @@ -0,0 +1,94 @@ +data_cfg: + dataset_name: SUSTech1K + dataset_root: your_path + dataset_partition: ./datasets/SUSTech1K/SUSTech1K.json + test_dataset_name: SUSTech1K + num_workers: 8 + data_in_use: [false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false] + remove_no_gallery: false + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 2000 + save_name: GaitGraph2-SUSTech1k + sampler: + batch_size: 256 + frames_num_fixed: 60 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered + frames_skip_num: 0 + metric: cos + eval_func: evaluate_indoor_dataset + transform: + - type: GaitGraphMultiInput + +loss_cfg: + - loss_term_weight: 1 + temperature: 0.01 + type: SupConLoss_Lp + log_prefix: SupConLoss + +model_cfg: + model: GaitGraph2 + joint_format: coco + input_num: 3 + reduction: 8 + block: Bottleneck # Basic, initial + input_branch: + - 5 + - 64 + - 32 + main_stream: + - 32 + - 128 + - 256 + num_class: 128 + tta: true + +optimizer_cfg: + lr: 0.005 #0.005 + solver: AdamW + weight_decay: 0.00001 + +scheduler_cfg: + max_lr: 0.005 + total_steps: 2000 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false + log_iter: 20 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 200 + save_name: GaitGraph2 + sync_BN: true + total_iter: 2000 + sampler: + batch_shuffle: true + frames_num_fixed: 60 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + batch_size: 768 # 256 only for debug # + type: CommonSampler + transform: + - type: Compose + trf_cfg: + - type: FlipSequence + probability: 0.5 + - type: InversePosesPre + probability: 0.1 + - type: JointNoise + std: 0.25 + - type: PointNoise + std: 0.05 + - type: RandomMove + random_r: + - 4 + - 1 + - type: GaitGraphMultiInput diff --git a/configs/gaittr/gaittr_CCPG.yaml b/configs/gaittr/gaittr_CCPG.yaml new file mode 100644 index 0000000..254d6fa --- /dev/null +++ b/configs/gaittr/gaittr_CCPG.yaml @@ -0,0 +1,91 @@ +data_cfg: + dataset_name: CCPG + dataset_root: your_path + dataset_partition: ./datasets/CCPG/CCPG.json + num_workers: 1 + remove_no_gallery: false + frame_threshold: 0 + test_dataset_name: CCPG + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 40000 + save_name: GaitTR-CCPG + eval_func: evaluate_CCPG + sampler: + batch_size: 4 #should same to num_gpus + sample_type: all_ordered + type: InferenceSampler + metric: euc # cos + transform: + - type: Compose + trf_cfg: + - type: GaitTRMultiInput + joint_format: alphapose + - type: SkeletonInput +loss_cfg: + type: TripletLoss + margin: 0.3 + log_prefix: triplet + +model_cfg: + model: GaitTR + in_channels: + - 10 + - 64 + - 64 + - 128 + - 256 + num_class: 128 + joint_format: alphapose + + +optimizer_cfg: + lr: 0.001 + solver: Adam + weight_decay: 0.00002 + +scheduler_cfg: + three_phase: True + max_lr: 0.001 + div_factor: 100 + final_div_factor: 1000.0 + total_steps: 40000 + pct_start: 0.475 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false #not use + log_iter: 100 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 4000 + save_name: GaitTR-CCPG + sync_BN: true + total_iter: 40000 + sampler: + batch_shuffle: false + batch_size: + - 32 + - 4 + frames_num_fixed: 60 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + type: TripletSampler + transform: + - type: Compose + trf_cfg: + - type: PointNoise + std: 0.3 + - type: InversePosesPre + joint_format: alphapose + probability: 0.1 + - type: JointNoise + std: 0.3 + - type: GaitTRMultiInput + joint_format: alphapose + - type: SkeletonInput \ No newline at end of file diff --git a/configs/gaittr/gaittr_GREW.yaml b/configs/gaittr/gaittr_GREW.yaml new file mode 100644 index 0000000..0c3f179 --- /dev/null +++ b/configs/gaittr/gaittr_GREW.yaml @@ -0,0 +1,91 @@ +data_cfg: + dataset_name: GREW + dataset_root: your_path + dataset_partition: ./datasets/GREW/GREW.json + num_workers: 1 + remove_no_gallery: false + test_dataset_name: GREW + frame_threshold: 0 + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 150000 + save_name: GaitTR + eval_func: GREW_submission + sampler: + batch_size: 4 #should same to num_gpus + sample_type: all_ordered + type: InferenceSampler + metric: euc # cos + transform: + - type: Compose + trf_cfg: + - type: GaitTRMultiInput + joint_format: coco + - type: SkeletonInput + +loss_cfg: + type: TripletLoss + margin: 0.3 + log_prefix: triplet + +model_cfg: + model: GaitTR + in_channels: + - 10 + - 64 + - 64 + - 128 + - 256 + num_class: 256 + joint_format: coco + + +optimizer_cfg: + lr: 0.001 + solver: Adam + weight_decay: 0.00002 + +scheduler_cfg: + three_phase: True + max_lr: 0.001 + div_factor: 100 + final_div_factor: 1000.0 + total_steps: 150000 + pct_start: 0.475 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false #not use + log_iter: 100 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 5000 + save_name: GaitTR + sync_BN: true + total_iter: 150000 + sampler: + batch_shuffle: false + batch_size: + - 32 + - 8 + frames_num_fixed: 60 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + type: TripletSampler + transform: + - type: Compose + trf_cfg: + - type: PointNoise + std: 0.3 + - type: InversePosesPre + probability: 0.1 + - type: JointNoise + std: 0.3 + - type: GaitTRMultiInput + joint_format: coco + - type: SkeletonInput diff --git a/configs/gaittr/gaittr_Gait3D.yaml b/configs/gaittr/gaittr_Gait3D.yaml new file mode 100644 index 0000000..21c5920 --- /dev/null +++ b/configs/gaittr/gaittr_Gait3D.yaml @@ -0,0 +1,91 @@ +data_cfg: + dataset_name: Gait3D + dataset_root: your_path + dataset_partition: ./datasets/Gait3D/Gait3D.json + test_dataset_name: Gait3D + num_workers: 1 + remove_no_gallery: false + frame_threshold: 0 + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 40000 + save_name: GaitTR + eval_func: evaluate_Gait3D + sampler: + batch_size: 4 #should same to num_gpus + sample_type: all_ordered + type: InferenceSampler + metric: euc # cos + transform: + - type: Compose + trf_cfg: + - type: GaitTRMultiInput + joint_format: coco + - type: SkeletonInput + +loss_cfg: + type: TripletLoss + margin: 0.3 + log_prefix: triplet + +model_cfg: + model: GaitTR + in_channels: + - 10 + - 64 + - 64 + - 128 + - 256 + num_class: 128 + joint_format: coco + + +optimizer_cfg: + lr: 0.001 + solver: Adam + weight_decay: 0.00002 + +scheduler_cfg: + three_phase: True + max_lr: 0.001 + div_factor: 100 + final_div_factor: 1000.0 + total_steps: 40000 + pct_start: 0.475 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false #not use + log_iter: 100 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 1000 + save_name: GaitTR + sync_BN: true + total_iter: 40000 + sampler: + batch_shuffle: false + batch_size: + - 32 + - 4 + frames_num_fixed: 60 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + type: TripletSampler + transform: + - type: Compose + trf_cfg: + - type: PointNoise + std: 0.3 + - type: InversePosesPre + probability: 0.1 + - type: JointNoise + std: 0.3 + - type: GaitTRMultiInput + joint_format: coco + - type: SkeletonInput \ No newline at end of file diff --git a/configs/gaittr/gaittr_OUMVLP.yaml b/configs/gaittr/gaittr_OUMVLP.yaml new file mode 100644 index 0000000..85de884 --- /dev/null +++ b/configs/gaittr/gaittr_OUMVLP.yaml @@ -0,0 +1,92 @@ +data_cfg: + dataset_name: OUMVLP + dataset_root: your_path + dataset_partition: ./datasets/OUMVLP/OUMVLP.json + num_workers: 1 + remove_no_gallery: false + test_dataset_name: OUMVLP + frame_threshold: 0 + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 150000 + save_name: GaitTR + eval_func: evaluate_indoor_dataset + sampler: + batch_size: 4 #should same to num_gpus + sample_type: all_ordered + type: InferenceSampler + metric: euc # cos + transform: + - type: Compose + trf_cfg: + - type: GaitTRMultiInput + joint_format: alphapose + - type: SkeletonInput + +loss_cfg: + type: TripletLoss + margin: 0.3 + log_prefix: triplet + +model_cfg: + model: GaitTR + in_channels: + - 10 + - 64 + - 64 + - 128 + - 256 + num_class: 256 + joint_format: alphapose + + +optimizer_cfg: + lr: 0.001 + solver: Adam + weight_decay: 0.00002 + +scheduler_cfg: + three_phase: True + max_lr: 0.001 + div_factor: 100 + final_div_factor: 1000.0 + total_steps: 150000 + pct_start: 0.475 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false #not use + log_iter: 100 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 5000 + save_name: GaitTR + sync_BN: true + total_iter: 150000 + sampler: + batch_shuffle: false + batch_size: + - 32 + - 16 + frames_num_fixed: 60 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + type: TripletSampler + transform: + - type: Compose + trf_cfg: + - type: PointNoise + std: 0.3 + - type: InversePosesPre + joint_format: alphapose + probability: 0.1 + - type: JointNoise + std: 0.3 + - type: GaitTRMultiInput + joint_format: alphapose + - type: SkeletonInput \ No newline at end of file diff --git a/configs/gaittr/gaittr_OUMVLP17.yaml b/configs/gaittr/gaittr_OUMVLP17.yaml new file mode 100644 index 0000000..fd03670 --- /dev/null +++ b/configs/gaittr/gaittr_OUMVLP17.yaml @@ -0,0 +1,91 @@ +data_cfg: + dataset_name: OUMVLP + dataset_root: your_path + dataset_partition: ./datasets/OUMVLP/OUMVLP.json + num_workers: 1 + remove_no_gallery: false + test_dataset_name: OUMVLP + frame_threshold: 0 + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 150000 + save_name: GaitTR + eval_func: evaluate_indoor_dataset + sampler: + batch_size: 4 #should same to num_gpus + sample_type: all_ordered + type: InferenceSampler + metric: euc # cos + transform: + - type: Compose + trf_cfg: + - type: GaitTRMultiInput + joint_format: coco + - type: SkeletonInput + +loss_cfg: + type: TripletLoss + margin: 0.3 + log_prefix: triplet + +model_cfg: + model: GaitTR + in_channels: + - 10 + - 64 + - 64 + - 128 + - 256 + num_class: 256 + joint_format: coco + + +optimizer_cfg: + lr: 0.001 + solver: Adam + weight_decay: 0.00002 + +scheduler_cfg: + three_phase: True + max_lr: 0.001 + div_factor: 100 + final_div_factor: 1000.0 + total_steps: 150000 + pct_start: 0.475 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false #not use + log_iter: 100 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 5000 + save_name: GaitTR + sync_BN: true + total_iter: 150000 + sampler: + batch_shuffle: false + batch_size: + - 32 + - 16 + frames_num_fixed: 60 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + type: TripletSampler + transform: + - type: Compose + trf_cfg: + - type: PointNoise + std: 0.3 + - type: InversePosesPre + probability: 0.1 + - type: JointNoise + std: 0.3 + - type: GaitTRMultiInput + joint_format: coco + - type: SkeletonInput \ No newline at end of file diff --git a/configs/gaittr/gaittr_SUSTech1K.yaml b/configs/gaittr/gaittr_SUSTech1K.yaml new file mode 100644 index 0000000..1b7c888 --- /dev/null +++ b/configs/gaittr/gaittr_SUSTech1K.yaml @@ -0,0 +1,92 @@ +data_cfg: + dataset_name: SUSTech1K + dataset_root: your_path + dataset_partition: ./datasets/SUSTech1K/SUSTech1K.json + num_workers: 1 + data_in_use: [false,false,false,true,false,false,false,false,false,false,false,false,false,false,false,false] + remove_no_gallery: false + frame_threshold: 0 + test_dataset_name: SUSTech1K + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 40000 + save_name: GaitTR-SUSTech1k + eval_func: evaluate_indoor_dataset + sampler: + batch_size: 4 #should same to num_gpus + sample_type: all_ordered + type: InferenceSampler + metric: euc # cos + transform: + - type: Compose + trf_cfg: + - type: GaitTRMultiInput + joint_format: coco + - type: SkeletonInput + +loss_cfg: + type: TripletLoss + margin: 0.3 + log_prefix: triplet + +model_cfg: + model: GaitTR + in_channels: + - 10 + - 64 + - 64 + - 128 + - 256 + num_class: 128 + joint_format: coco + + +optimizer_cfg: + lr: 0.001 + solver: Adam + weight_decay: 0.00002 + +scheduler_cfg: + three_phase: True + max_lr: 0.001 + div_factor: 100 + final_div_factor: 1000.0 + total_steps: 40000 + pct_start: 0.475 + scheduler: OneCycleLR + +trainer_cfg: + enable_float16: false #not use + log_iter: 100 + with_test: true + restore_ckpt_strict: false + restore_hint: 0 + save_iter: 4000 + save_name: GaitTR-SUSTech1k + sync_BN: true + total_iter: 40000 + sampler: + batch_shuffle: false + batch_size: + - 32 + - 4 + frames_num_fixed: 60 + frames_num_max: 50 + frames_num_min: 25 + sample_type: fixed_ordered #Repeat sample + frames_skip_num: 0 + type: TripletSampler + transform: + - type: Compose + trf_cfg: + - type: PointNoise + std: 0.3 + - type: InversePosesPre + probability: 0.1 + - type: JointNoise + std: 0.3 + - type: GaitTRMultiInput + joint_format: coco + - type: SkeletonInput \ No newline at end of file diff --git a/configs/msgg/msgg_OUMVLP.yaml b/configs/msgg/msgg_OUMVLP.yaml new file mode 100644 index 0000000..b31cb78 --- /dev/null +++ b/configs/msgg/msgg_OUMVLP.yaml @@ -0,0 +1,98 @@ +data_cfg: + dataset_name: OUMVLP + dataset_root: your_path + dataset_partition: ./datasets/OUMVLP/OUMVLP.json + num_workers: 1 + remove_no_gallery: false + test_dataset_name: OUMVLP + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 300000 + save_name: MSGG_OUMVLP + eval_func: evaluate_indoor_dataset + sampler: + batch_size: 8 + sample_type: all_ordered + type: InferenceSampler + transform: + - type: MSGGTransform + prob: alphapose + # prob: coco + metric: euc # cos + +loss_cfg: + - loss_term_weight: 0.3 + margin: 0.2 + type: TripletLoss + log_prefix: triplet_joints + - loss_term_weight: 0.2 + margin: 0.2 + type: TripletLoss + log_prefix: triplet_limbs + - loss_term_weight: 0.1 + margin: 0.2 + type: TripletLoss + log_prefix: triplet_bodyparts + - loss_term_weight: 1.0 + scale: 1 + type: CrossEntropyLoss + log_accuracy: true + label_smooth: false + log_prefix: softmax + +model_cfg: + model: MultiScaleGaitGraph + in_channels: + - 3 + - 16 + - 32 + - 64 + - 128 + out_channels: 128 + graph_cfg: + layout: 'body_12' + strategy: 'gait_temporal' + temporal_kernel_size: 9 + num_id: 5153 + +optimizer_cfg: + lr: 0.1 + momentum: 0.9 + solver: SGD + weight_decay: 0.0005 + +scheduler_cfg: + gamma: 0.1 + milestones: + - 75000 + - 150000 + - 225000 + scheduler: MultiStepLR + +trainer_cfg: + enable_float16: false + fix_BN: false + with_test: false + log_iter: 100 + restore_ckpt_strict: true + restore_hint: 0 + save_iter: 2000 + save_name: MSGG_OUMVLP + sync_BN: true + total_iter: 300000 + sampler: + batch_shuffle: false + batch_size: + - 32 + - 8 + frames_num_fixed: 30 + frames_num_max: 50 + frames_num_min: 25 + frames_skip_num: 0 + sample_type: fixed_ordered + type: TripletSampler + transform: + - type: MSGGTransform + prob: alphapose \ No newline at end of file diff --git a/configs/msgg/msgg_SUSTech1K.yaml b/configs/msgg/msgg_SUSTech1K.yaml new file mode 100644 index 0000000..2fe05af --- /dev/null +++ b/configs/msgg/msgg_SUSTech1K.yaml @@ -0,0 +1,98 @@ +data_cfg: + dataset_name: SUSTech1K + dataset_root: your_path + dataset_partition: ./datasets/SUSTech1K/SUSTech1K.json + num_workers: 1 + remove_no_gallery: false + test_dataset_name: SUSTech1K + +evaluator_cfg: + enable_float16: false + restore_ckpt_strict: true + restore_hint: 300000 + save_name: MSGG_SUSTech1K + eval_func: evaluate_indoor_dataset + sampler: + batch_size: 8 + sample_type: all_ordered + type: InferenceSampler + transform: + - type: MSGGTransform + # prob: alphapose + prob: coco + metric: euc # cos + +loss_cfg: + - loss_term_weight: 0.3 + margin: 0.2 + type: TripletLoss + log_prefix: triplet_joints + - loss_term_weight: 0.2 + margin: 0.2 + type: TripletLoss + log_prefix: triplet_limbs + - loss_term_weight: 0.1 + margin: 0.2 + type: TripletLoss + log_prefix: triplet_bodyparts + - loss_term_weight: 1.0 + scale: 1 + type: CrossEntropyLoss + log_accuracy: true + label_smooth: false + log_prefix: softmax + +model_cfg: + model: MultiScaleGaitGraph + in_channels: + - 3 + - 16 + - 32 + - 64 + - 128 + out_channels: 128 + graph_cfg: + layout: 'body_12' + strategy: 'gait_temporal' + temporal_kernel_size: 9 + num_id: 5153 + +optimizer_cfg: + lr: 0.1 + momentum: 0.9 + solver: SGD + weight_decay: 0.0005 + +scheduler_cfg: + gamma: 0.1 + milestones: + - 75000 + - 150000 + - 225000 + scheduler: MultiStepLR + +trainer_cfg: + enable_float16: false + fix_BN: false + with_test: false + log_iter: 100 + restore_ckpt_strict: true + restore_hint: 0 + save_iter: 2000 + save_name: MSGG_SUSTech1K + sync_BN: true + total_iter: 300000 + sampler: + batch_shuffle: false + batch_size: + - 32 + - 8 + frames_num_fixed: 30 + frames_num_max: 50 + frames_num_min: 25 + frames_skip_num: 0 + sample_type: fixed_ordered + type: TripletSampler + transform: + - type: MSGGTransform + prob: coco \ No newline at end of file diff --git a/datasets/GREW/README.md b/datasets/GREW/README.md index af071a5..6ebf4a4 100644 --- a/datasets/GREW/README.md +++ b/datasets/GREW/README.md @@ -24,15 +24,24 @@ ls *.tgz | xargs -n1 tar xzvf After unpacking these compressed files, run this command: -Step2 : To rearrange directory of GREW dataset, turning to id-type-view structure, Run +Step2-1 : To rearrange directory of GREW dataset(for silhouette), turning to id-type-view structure, Run ``` python datasets/GREW/rearrange_GREW.py --input_path Path_of_GREW-raw --output_path Path_of_GREW-rearranged ``` +Step2-2 : To rearrange directory of GREW dataset(for pose), turning to id-type-view structure, Run +``` +python datasets/GREW/rearrange_GREW_pose.py --input_path Path_of_GREW-pose --output_path Path_of_GREW-pose-rearranged +``` -Step3: Transforming images to pickle file, run +Step3-1: Transforming images to pickle file, run ``` python datasets/pretreatment.py --input_path Path_of_GREW-rearranged --output_path Path_of_GREW-pkl --dataset GREW ``` +Step3-2: Transforming pose txts to pickle file, run +``` +python datasets/pretreatment.py --input_path Path_of_GREW-pose-rearranged --output_path Path_of_GREW-pose-pkl --pose --dataset GREW +``` + Then you will see the structure like: - Processed diff --git a/datasets/GREW/rearrange_GREW_pose.py b/datasets/GREW/rearrange_GREW_pose.py new file mode 100644 index 0000000..14f14cb --- /dev/null +++ b/datasets/GREW/rearrange_GREW_pose.py @@ -0,0 +1,92 @@ +import argparse +import os +import shutil +from pathlib import Path + +from tqdm import tqdm + +TOTAL_Test = 24000 +TOTAL_Train = 20000 + +def rearrange_train(train_path: Path, output_path: Path) -> None: + progress = tqdm(total=TOTAL_Train) + for sid in train_path.iterdir(): + if not sid.is_dir(): + continue + for sub_seq in sid.iterdir(): + if not sub_seq.is_dir(): + continue + for subfile in os.listdir(sub_seq): + src = os.path.join(train_path, sid.name, sub_seq.name) + dst = os.path.join(output_path, sid.name+'train', '00', sub_seq.name) + os.makedirs(dst,exist_ok=True) + if subfile not in os.listdir(dst) and subfile.endswith('_2d_pose.txt'): + pose_subfile = 'pose_'+subfile + os.symlink(os.path.join(src, subfile), + os.path.join(dst, pose_subfile)) + progress.update(1) + +def rearrange_test(test_path: Path, output_path: Path) -> None: + # for gallery + gallery = Path(os.path.join(test_path, 'gallery')) + probe = Path(os.path.join(test_path, 'probe')) + progress = tqdm(total=TOTAL_Test) + for sid in gallery.iterdir(): + if not sid.is_dir(): + continue + cnt = 1 + for sub_seq in sid.iterdir(): + if not sub_seq.is_dir(): + continue + for subfile in sorted(os.listdir(sub_seq)): + src = os.path.join(gallery, sid.name, sub_seq.name) + dst = os.path.join(output_path, sid.name, '%02d'%cnt, sub_seq.name) + os.makedirs(dst,exist_ok=True) + if subfile not in os.listdir(dst) and subfile.endswith('_2d_pose.txt'): + pose_subfile = 'pose_'+subfile + os.symlink(os.path.join(src, subfile), + os.path.join(dst, pose_subfile)) + cnt += 1 + progress.update(1) + # for probe + for sub_seq in probe.iterdir(): + if not sub_seq.is_dir(): + continue + for subfile in os.listdir(sub_seq): + src = os.path.join(probe, sub_seq.name) + dst = os.path.join(output_path, 'probe', '03', sub_seq.name) + os.makedirs(dst,exist_ok=True) + if subfile not in os.listdir(dst) and subfile.endswith('_2d_pose.txt'): + pose_subfile = 'pose_'+subfile + os.symlink(os.path.join(src, subfile), + os.path.join(dst, pose_subfile)) + progress.update(1) + +def rearrange_GREW(input_path: Path, output_path: Path) -> None: + os.makedirs(output_path, exist_ok=True) + + for folder in input_path.iterdir(): + if not folder.is_dir(): + continue + + print(f'Rearranging {folder}') + if folder.name == 'train': + rearrange_train(folder,output_path) + if folder.name == 'test': + rearrange_test(folder, output_path) + if folder.name == 'distractor': + pass + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='GREW rearrange tool') + parser.add_argument('-i', '--input_path', required=True, type=str, + help='Root path of raw dataset.') + parser.add_argument('-o', '--output_path', default='GREW_rearranged', type=str, + help='Root path for output.') + + args = parser.parse_args() + + input_path = Path(args.input_path).resolve() + output_path = Path(args.output_path).resolve() + rearrange_GREW(input_path, output_path) \ No newline at end of file diff --git a/datasets/MSGG/README.md b/datasets/MSGG/README.md new file mode 100644 index 0000000..c8ac618 --- /dev/null +++ b/datasets/MSGG/README.md @@ -0,0 +1,19 @@ +# Datasets for MSGG +MSGG needs to convert the pose key format of other datasets(such as CASIA-B, GREW, Gait3D,) from coco17 to the input format of Pyramid keys. + +## Data Pretreatment +```python +python datasets/MSGG/pyramid_keypoints_msgg.py --input_path Path_of_pose_pkl --output_path Path_of_pose_pyramid_pkl +``` + +## Citation +``` +@article{peng2023learning, + title={Learning rich features for gait recognition by integrating skeletons and silhouettes}, + author={Peng, Yunjie and Ma, Kang and Zhang, Yang and He, Zhiqiang}, + journal={Multimedia Tools and Applications}, + pages={1--22}, + year={2023}, + publisher={Springer} +} +``` \ No newline at end of file diff --git a/datasets/MSGG/pyramid_keypoints_msgg.py b/datasets/MSGG/pyramid_keypoints_msgg.py new file mode 100644 index 0000000..61acf4c --- /dev/null +++ b/datasets/MSGG/pyramid_keypoints_msgg.py @@ -0,0 +1,94 @@ +import os +import os.path as osp +import numpy as np +import pdb +import argparse +import pickle + +ORG_KEYPOINTS = { + 'nose' :0, + 'left_eye' :1, + 'right_eye' :2, + 'left_ear' :3, + 'right_ear' :4, + 'left_shoulder' :5, + 'right_shoulder':6, + 'left_elbow' :7, + 'right_elbow' :8, + 'left_wrist' :9, + 'right_wrist' :10, + 'left_hip' :11, + 'right_hip' :12, + 'left_knee' :13, + 'right_knee' :14, + 'left_ankle' :15, + 'right_ankle' :16, +} + +NEW_KEYPOINTS = { + 0: 'right_shoulder', + 1: 'right_elbow', + 2: 'right_knee', + 3: 'right_hip', + 4: 'left_elbow', + 5: 'left_knee', + 6: 'left_shoulder', + 7: 'right_wrist', + 8: 'right_ankle', + 9: 'left_hip', + 10: 'left_wrist', + 11: 'left_ankle', +} + +def get_index_mapping(): + index_mapping = {} + for _key in NEW_KEYPOINTS.keys(): + map_index = ORG_KEYPOINTS[NEW_KEYPOINTS[_key]] + index_mapping[_key] = map_index + return index_mapping + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='OpenGait dataset pretreatment module.') + parser.add_argument('-i', '--input_path', default='', type=str, help='Root path of raw dataset.') + parser.add_argument('-o', '--output_path', default='', type=str, help='Output path of pickled dataset.') + args = parser.parse_args() + + index_mapping = get_index_mapping() + data_path = args.input_path + des_path = args.output_path + + id_list = sorted(os.listdir(data_path)) + for _id in id_list: + type_list = sorted(os.listdir(osp.join(data_path, _id))) + for _type in type_list: + view_list = sorted(os.listdir(osp.join(data_path, _id, _type))) + for _view in view_list: + seq_info = [_id, _type, _view] + seq_info_str = '-'.join(seq_info) + seq_dir = osp.join(data_path, *seq_info) + des_dir = osp.join(des_path, *seq_info) + if osp.exists(des_dir) is False: + os.makedirs(des_dir) + + keypoints_list = os.listdir(seq_dir) + pkl_name = "{}.pkl".format(_view) + seq_path = osp.join(seq_dir, pkl_name) + save_path = osp.join(des_dir, pkl_name) + seq_path_exists = osp.exists(seq_path) + + if seq_path_exists is False: + print("seq:{} input:{}. ".format(seq_info_str, seq_path_exists)) + continue + with open(seq_path, 'rb') as f: + keypoints_data = pickle.load(f) + to_pickle = [] + for keypoint in keypoints_data: + mapped_keypoints = np.zeros((12, 3)) + for i in range(mapped_keypoints.shape[0]): + mapped_keypoints[i] = keypoint[index_mapping[i]] + to_pickle.append(mapped_keypoints) + keypoints = np.stack(to_pickle) + pickle.dump(keypoints, open(save_path, 'wb')) + + print("FINISHED: " + "-".join(seq_info)) + diff --git a/datasets/OUMVLP/README.md b/datasets/OUMVLP/README.md index 932fa04..00575db 100644 --- a/datasets/OUMVLP/README.md +++ b/datasets/OUMVLP/README.md @@ -35,15 +35,27 @@ python datasets/OUMVLP/extractor.py --input_path Path_of_OUMVLP-base --output_pa ...... ...... ``` -Step3 : To rearrange directory of OUMVLP dataset, turning to id-type-view structure, Run +Step3-1 : To rearrange directory of OUMVLP dataset(for silhouette), turning to id-type-view structure, Run ``` python datasets/OUMVLP/rearrange_OUMVLP.py --input_path Path_of_OUMVLP-raw --output_path Path_of_OUMVLP-rearranged ``` +Step3-2 : To rearrange directory of OUMVLP dataset(for pose), turning to id-type-view structure, Run +``` +python datasets/OUMVLP/rearrange_OUMVLP_pose.py --input_path Path_of_OUMVLP-pose --output_path Path_of_OUMVLP-pose-rearranged +``` -Step4: Transforming images to pickle file, run +Step4-1: Transforming images to pickle file, run ``` python datasets/pretreatment.py --input_path Path_of_OUMVLP-rearranged --output_path Path_of_OUMVLP-pkl ``` +Step4-2: Transforming pose txts to pickle file, run +``` +python datasets/pretreatment.py --input_path Path_of_GREW-pose-rearranged --output_path Path_of_GREW-pose-pkl --pose --dataset GREW +``` +gernerate the 17 Number of Pose Points Format from 18 Number of Pose Points +``` +python datasets/OUMVLP/rearrange_OUMVLP_pose.py --input_path Path_of_OUMVLP-pose18 --output_path Path_of_OUMVLP-pose17 +``` - Processed ``` diff --git a/datasets/OUMVLP/oumvlp17.py b/datasets/OUMVLP/oumvlp17.py new file mode 100644 index 0000000..6bce89b --- /dev/null +++ b/datasets/OUMVLP/oumvlp17.py @@ -0,0 +1,85 @@ +import pickle +from tqdm import tqdm +from pathlib import Path +import os +import os.path as osp +import argparse +import logging + +''' + gernerate the 17 Number of Pose Points Format from 18 Number of Pose Points + OUMVLP 17 + # keypoints = { + # 0: "nose", + # 1: "left_eye", + # 2: "right_eye", + # 3: "left_ear", + # 4: "right_ear", + # 5: "left_shoulder", + # 6: "right_shoulder", + # 7: "left_elbow", + # 8: "right_elbow", + # 9: "left_wrist", + # 10: "right_wrist", + # 11: "left_hip", + # 12: "right_hip", + # 13: "left_knee", + # 14: "right_knee", + # 15: "left_ankle", + # 16: "right_ankle" + # } + OUMVLP 18 + mask=[0,15,14,17,16,5,2,6,3,7,4,11,8,12,9,13,10] + # keypoints = { + # 0: "nose", + # 1: "neck", + # 2: "Rshoulder", + # 3: "Relbow", + # 4: "Rwrist", + # 5: "Lshoudler", + # 6: "Lelbow", + # 7: "Lwrist", + # 8: "Rhip", + # 9: "Rknee", + # 10: "Rankle", + # 11: "Lhip", + # 12: "Lknee", + # 13: "Lankle", + # 14: "Reye", + # 15: "Leye", + # 16: "Rear", + # 17: "Lear" + # } +''' + +def ToOUMVLP17(input_path: Path, output_path: Path): + mask=[0,15,14,17,16,5,2,6,3,7,4,11,8,12,9,13,10] + TOTAL_SUBJECTS = 10307 + progress = tqdm(total=TOTAL_SUBJECTS) + + for subject in input_path.iterdir(): + output_subject = subject.name + for seq in subject.iterdir(): + output_seq = seq.name + for view in seq.iterdir(): + src = os.path.join(view, f"{view.name}.pkl") + dst = os.path.join(output_path, output_subject, output_seq, view.name) + os.makedirs(dst, exist_ok=True) + with open(src,'rb') as f: + srcdata = pickle.load(f) + #[T,18,3] + data = srcdata[...,mask,:].copy() + # #[T,17,3] + pkl_path = os.path.join(dst,f'{view.name}.pkl') + pickle.dump(data,open(pkl_path,'wb')) + progress.update(1) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='OpenGait dataset pretreatment module.') + parser.add_argument('-i', '--input_path', default='', type=str, help='Root path of raw dataset.') + parser.add_argument('-o', '--output_path', default='', type=str, help='Output path of pickled dataset.') + parser.add_argument('-l', '--log_to_file', default='./pretreatment.log', type=str, help='Log file path. Default: ./pretreatment.log') + args = parser.parse_args() + logging.info('Begin') + ToOUMVLP17(input_path=Path(args.input_path), output_path=Path(args.output_path)) + logging.info('Done') diff --git a/datasets/OUMVLP/rearrange_OUMVLP_pose.py b/datasets/OUMVLP/rearrange_OUMVLP_pose.py new file mode 100644 index 0000000..edeedaa --- /dev/null +++ b/datasets/OUMVLP/rearrange_OUMVLP_pose.py @@ -0,0 +1,44 @@ +import argparse +import os +import shutil +from pathlib import Path +from typing import Tuple +from tqdm import tqdm + + +TOTAL_SUBJECTS = 10307 + + +def sanitize(name: str) -> Tuple[str, str]: + return name.split('_') + + +def rearrange(input_path: Path, output_path: Path) -> None: + os.makedirs(output_path, exist_ok=True) + progress = tqdm(total=TOTAL_SUBJECTS) + for folder in input_path.iterdir(): + subject = folder.name + for sid in folder.iterdir(): + view, seq = sanitize(sid.name) + src = os.path.join(input_path, subject,sid.name) + dst = os.path.join(output_path, subject, seq, view) + os.makedirs(dst, exist_ok=True) + for subfile in os.listdir(src): + if subfile not in os.listdir(dst) and subfile.endswith('.json'): + os.symlink(os.path.join(src, subfile), + os.path.join(dst, subfile)) + progress.update(1) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='OUMVLP rearrange tool') + parser.add_argument('-i', '--input_path', required=True, type=str, + help='Root path of raw dataset.') + parser.add_argument('-o', '--output_path', default='OUMVLP_rearranged', type=str, + help='Root path for output.') + + args = parser.parse_args() + + input_path = Path(args.input_path).resolve() + output_path = Path(args.output_path).resolve() + rearrange(input_path, output_path) diff --git a/datasets/pretreatment.py b/datasets/pretreatment.py index d8440c0..60cb750 100644 --- a/datasets/pretreatment.py +++ b/datasets/pretreatment.py @@ -12,7 +12,7 @@ from typing import Tuple import cv2 import numpy as np from tqdm import tqdm - +import json def imgs2pickle(img_groups: Tuple, output_path: Path, img_size: int = 64, verbose: bool = False, dataset='CASIAB') -> None: """Reads a group of images and saves the data in pickle format. @@ -127,6 +127,92 @@ def pretreat(input_path: Path, output_path: Path, img_size: int = 64, workers: i progress.update(1) logging.info('Done') +def txts2pickle(txt_groups: Tuple, output_path: Path, verbose: bool = False, dataset='CASIAB') -> None: + """ + Reads a group of images and saves the data in pickle format. + + Args: + img_groups (Tuple): Tuple of (sid, seq, view) and list of image paths. + output_path (Path): Output path. + img_size (int, optional): Image resizing size. Defaults to 64. + verbose (bool, optional): Display debug info. Defaults to False. + """ + + sinfo = txt_groups[0] + txt_paths = txt_groups[1] + to_pickle = [] + if dataset == 'OUMVLP': + for txt_file in sorted(txt_paths): + try: + with open(txt_file) as f: + jsondata = json.load(f) + if len(jsondata['people'])==0: + continue + data = np.array(jsondata["people"][0]["pose_keypoints_2d"]).reshape(-1,3) + to_pickle.append(data) + except: + print(txt_file) + else: + for txt_file in sorted(txt_paths): + if verbose: + logging.debug(f'Reading sid {sinfo[0]}, seq {sinfo[1]}, view {sinfo[2]} from {txt_file}') + data = np.genfromtxt(txt_file, delimiter=',')[2:].reshape(-1,3) + to_pickle.append(data) + + if to_pickle: + dst_path = os.path.join(output_path, *sinfo) + keypoints = np.stack(to_pickle) + os.makedirs(dst_path, exist_ok=True) + pkl_path = os.path.join(dst_path, f'{sinfo[2]}.pkl') + if verbose: + logging.debug(f'Saving {pkl_path}...') + pickle.dump(keypoints, open(pkl_path, 'wb')) + logging.info(f'Saved {len(to_pickle)} valid frames\' keypoints to {pkl_path}.') + + if len(to_pickle) < 5: + logging.warning(f'{sinfo} has less than 5 valid data.') + + + +def pretreat_pose(input_path: Path, output_path: Path, workers: int = 4, verbose: bool = False, dataset='CASIAB') -> None: + """Reads a dataset and saves the data in pickle format. + + Args: + input_path (Path): Dataset root path. + output_path (Path): Output path. + img_size (int, optional): Image resizing size. Defaults to 64. + workers (int, optional): Number of thread workers. Defaults to 4. + verbose (bool, optional): Display debug info. Defaults to False. + """ + txt_groups = defaultdict(list) + logging.info(f'Listing {input_path}') + total_files = 0 + if dataset == 'OUMVLP': + for json_path in input_path.rglob('*.json'): + if verbose: + logging.debug(f'Adding {json_path}') + *_, sid, seq, view, _ = json_path.as_posix().split('/') + txt_groups[(sid, seq, view)].append(json_path) + total_files += 1 + else: + for txt_path in input_path.rglob('*.txt'): + if verbose: + logging.debug(f'Adding {txt_path}') + *_, sid, seq, view, _ = txt_path.as_posix().split('/') + txt_groups[(sid, seq, view)].append(txt_path) + total_files += 1 + + logging.info(f'Total files listed: {total_files}') + + progress = tqdm(total=len(txt_groups), desc='Pretreating', unit='folder') + + with mp.Pool(workers) as pool: + logging.info(f'Start pretreating {input_path}') + for _ in pool.imap_unordered(partial(txts2pickle, output_path=output_path, verbose=verbose, dataset=args.dataset), txt_groups.items()): + progress.update(1) + logging.info('Done') + + if __name__ == '__main__': parser = argparse.ArgumentParser(description='OpenGait dataset pretreatment module.') @@ -137,6 +223,7 @@ if __name__ == '__main__': parser.add_argument('-r', '--img_size', default=64, type=int, help='Image resizing size. Default 64') parser.add_argument('-d', '--dataset', default='CASIAB', type=str, help='Dataset for pretreatment.') parser.add_argument('-v', '--verbose', default=False, action='store_true', help='Display debug info.') + parser.add_argument('-p', '--pose', default=False, action='store_true', help='Processing pose.') args = parser.parse_args() logging.basicConfig(level=logging.INFO, filename=args.log_file, filemode='w', format='[%(asctime)s - %(levelname)s]: %(message)s') @@ -146,5 +233,7 @@ if __name__ == '__main__': logging.info('Verbose mode is on.') for k, v in args.__dict__.items(): logging.debug(f'{k}: {v}') - - pretreat(input_path=Path(args.input_path), output_path=Path(args.output_path), img_size=args.img_size, workers=args.n_workers, verbose=args.verbose, dataset=args.dataset) + if args.pose: + pretreat_pose(input_path=Path(args.input_path), output_path=Path(args.output_path), workers=args.n_workers, verbose=args.verbose, dataset=args.dataset) + else: + pretreat(input_path=Path(args.input_path), output_path=Path(args.output_path), img_size=args.img_size, workers=args.n_workers, verbose=args.verbose, dataset=args.dataset) diff --git a/opengait/data/sampler.py b/opengait/data/sampler.py index 11a4a87..605e0f9 100644 --- a/opengait/data/sampler.py +++ b/opengait/data/sampler.py @@ -49,7 +49,10 @@ class TripletSampler(tordata.sampler.Sampler): return len(self.dataset) -def sync_random_sample_list(obj_list, k): +def sync_random_sample_list(obj_list, k, common_choice=False): + if common_choice: + idx = random.choices(range(len(obj_list)), k=k) + idx = torch.tensor(idx) if len(obj_list) < k: idx = random.choices(range(len(obj_list)), k=k) idx = torch.tensor(idx) @@ -97,3 +100,37 @@ class InferenceSampler(tordata.sampler.Sampler): def __len__(self): return len(self.dataset) + + +class CommonSampler(tordata.sampler.Sampler): + def __init__(self,dataset,batch_size,batch_shuffle): + + self.dataset = dataset + self.size = len(dataset) + self.batch_size = batch_size + if isinstance(self.batch_size,int)==False: + raise ValueError( + "batch_size shoude be (B) not {}".format(batch_size)) + self.batch_shuffle = batch_shuffle + + self.world_size = dist.get_world_size() + if self.batch_size % self.world_size !=0: + raise ValueError("World size ({}) is not divisble by batch_size ({})".format( + self.world_size, batch_size)) + self.rank = dist.get_rank() + + def __iter__(self): + while True: + indices_list = list(range(self.size)) + sample_indices = sync_random_sample_list( + indices_list, self.batch_size, common_choice=True) + total_batch_size = self.batch_size + total_size = int(math.ceil(total_batch_size / + self.world_size)) * self.world_size + sample_indices += sample_indices[:( + total_batch_size - len(sample_indices))] + sample_indices = sample_indices[self.rank:total_size:self.world_size] + yield sample_indices + + def __len__(self): + return len(self.dataset) diff --git a/opengait/data/transform.py b/opengait/data/transform.py index 61b362e..c5fd0d4 100644 --- a/opengait/data/transform.py +++ b/opengait/data/transform.py @@ -196,3 +196,257 @@ def get_transform(trf_cfg=None): transform = [get_transform(cfg) for cfg in trf_cfg] return transform raise "Error type for -Transform-Cfg-" + + +# **************** For pose **************** +class RandomSelectSequence(object): + """ + Randomly select different subsequences + """ + def __init__(self, sequence_length=10): + self.sequence_length = sequence_length + + def __call__(self, data): + try: + start = np.random.randint(0, data.shape[0] - self.sequence_length) + except ValueError: + raise ValueError("The sequence length of data is too short, which does not meet the requirements.") + end = start + self.sequence_length + return data[start:end] + + +class SelectSequenceCenter(object): + """ + Select center subsequence + """ + def __init__(self, sequence_length=10): + self.sequence_length = sequence_length + + def __call__(self, data): + try: + start = int((data.shape[0]/2) - (self.sequence_length / 2)) + except ValueError: + raise ValueError("The sequence length of data is too short, which does not meet the requirements.") + end = start + self.sequence_length + return data[start:end] + + +class MirrorPoses(object): + """ + Performing Mirror Operations + """ + def __init__(self, prob=0.5): + self.probability = probability + + def __call__(self, data): + if np.random.random() <= self.probability: + center = np.mean(data[:, :, 0], axis=1, keepdims=True) + data[:, :, 0] = center - data[:, :, 0] + center + + return data + + +class NormalizeEmpty(object): + """ + Normliza Empty Joint + """ + def __call__(self, data): + frames, joints = np.where(data[:, :, 0] == 0) + for frame, joint in zip(frames, joints): + center_of_gravity = np.mean(data[frame], axis=0) + data[frame, joint, 0] = center_of_gravity[0] + data[frame, joint, 1] = center_of_gravity[1] + data[frame, joint, 2] = 0 + return data + + +class RandomMove(object): + """ + Move: add Random Movement to each joint + """ + def __init__(self,random_r =[4,1]): + self.random_r = random_r + def __call__(self, data): + noise = np.zeros(3) + noise[0] = np.random.uniform(-self.random_r[0], self.random_r[0]) + noise[1] = np.random.uniform(-self.random_r[1], self.random_r[1]) + data += np.tile(noise,(data.shape[0], data.shape[1], 1)) + return data + + +class PointNoise(object): + """ + Add Gaussian noise to pose points + std: standard deviation + """ + def __init__(self, std=0.01): + self.std = std + + def __call__(self, data): + noise = np.random.normal(0, self.std, data.shape).astype(np.float32) + return data + noise + + +class FlipSequence(object): + """ + Temporal Fliping + """ + def __init__(self, probability=0.5): + self.probability = probability + def __call__(self, data): + if np.random.random() <= self.probability: + return np.flip(data,axis=0).copy() + return data + + +class InversePosesPre(object): + ''' + Left-right flip of skeletons + ''' + def __init__(self, probability=0.5, joint_format='coco'): + self.probability = probability + if joint_format == 'coco': + self.invers_arr = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] + elif joint_format in ['alphapose', 'openpose']: + self.invers_arr = [0, 1, 5, 6, 7, 2, 3, 4, 11, 12, 13, 8, 9, 10, 15, 14, 17, 16] + else: + raise ValueError("Invalid joint_format.") + + + def __call__(self, data): + for i in range(len(data)): + if np.random.random() <= self.probability: + data[i]=data[i,self.invers_arr,:] + return data + + +class JointNoise(object): + """ + Add Gaussian noise to joint + std: standard deviation + """ + + def __init__(self, std=0.25): + self.std = std + + def __call__(self, data): + # T, V, C + noise = np.hstack(( + np.random.normal(0, self.std, (data.shape[1], 2)), + np.zeros((data.shape[1], 1)) + )).astype(np.float32) + + return data + np.repeat(noise[np.newaxis, ...], data.shape[0], axis=0) + + +class GaitTRMultiInput(object): + def __init__(self, joint_format='coco',): + if joint_format == 'coco': + self.connect_joint = np.array([5,0,0,1,2,0,0,5,6,7,8,5,6,11,12,13,14]) + elif joint_format in ['alphapose', 'openpose']: + self.connect_joint = np.array([1,1,1,2,3,1,5,6,2,8,9,5,11,12,0,0,14,15]) + else: + raise ValueError("Invalid joint_format.") + + def __call__(self, data): + # (C, T, V) -> (I, C * 2, T, V) + data = np.transpose(data, (2, 0, 1)) + + data = data[:2, :, :] + + C, T, V = data.shape + data_new = np.zeros((5, C, T, V)) + # Joints + data_new[0, :C, :, :] = data + for i in range(V): + data_new[1, :, :, i] = data[:, :, i] - data[:, :, 0] + # Velocity + for i in range(T - 2): + data_new[2, :, i, :] = data[:, i + 1, :] - data[:, i, :] + data_new[3, :, i, :] = data[:, i + 2, :] - data[:, i, :] + # Bones + for i in range(len(self.connect_joint)): + data_new[4, :, :, i] = data[:, :, i] - data[:, :, self.connect_joint[i]] + + I, C, T, V = data_new.shape + data_new = data_new.reshape(I*C, T, V) + # (C T V) -> (T V C) + data_new = np.transpose(data_new, (1, 2, 0)) + + return data_new + + +class GaitGraphMultiInput(object): + def __init__(self, center=0, joint_format='coco'): + self.center = center + if joint_format == 'coco': + self.connect_joint = np.array([5,0,0,1,2,0,0,5,6,7,8,5,6,11,12,13,14]) + elif joint_format in ['alphapose', 'openpose']: + self.connect_joint = np.array([1,1,1,2,3,1,5,6,2,8,9,5,11,12,0,0,14,15]) + else: + raise ValueError("Invalid joint_format.") + + def __call__(self, data): + T, V, C = data.shape + x_new = np.zeros((T, V, 3, C + 2)) + # Joints + x = data + x_new[:, :, 0, :C] = x + for i in range(V): + x_new[:, i, 0, C:] = x[:, i, :2] - x[:, self.center, :2] + # Velocity + for i in range(T - 2): + x_new[i, :, 1, :2] = x[i + 1, :, :2] - x[i, :, :2] + x_new[i, :, 1, 3:] = x[i + 2, :, :2] - x[i, :, :2] + x_new[:, :, 1, 3] = x[:, :, 2] + # Bones + for i in range(V): + x_new[:, i, 2, :2] = x[:, i, :2] - x[:, self.connect_joint[i], :2] + # Angles + bone_length = 0 + for i in range(C - 1): + bone_length += np.power(x_new[:, :, 2, i], 2) + bone_length = np.sqrt(bone_length) + 0.0001 + for i in range(C - 1): + x_new[:, :, 2, C+i] = np.arccos(x_new[:, :, 2, i] / bone_length) + x_new[:, :, 2, 3] = x[:, :, 2] + return x_new + +class GaitGraph1Input(object): + ''' + Transpose the input + ''' + def __call__(self, data): + # (T V C) -> (C T V) + data = np.transpose(data, (2, 0, 1)) + return data[...,np.newaxis] + +class SkeletonInput(object): + ''' + Transpose the input + ''' + def __call__(self, data): + # (T V C) -> (T C V) + data = np.transpose(data, (0, 2, 1)) + return data[...,np.newaxis] + +class TwoView(object): + def __init__(self,trf_cfg): + assert is_list(trf_cfg) + self.transform = T.Compose([get_transform(cfg) for cfg in trf_cfg]) + def __call__(self, data): + return np.concatenate([self.transform(data), self.transform(data)], axis=1) + + +class MSGGTransform(): + def __init__(self, joint_format="coco"): + if joint_format == "coco": #17 + self.mask=[6,8,14,12,7,13,5,10,16,11,9,15] + elif joint_format in ['alphapose', 'openpose']: #18 + self.mask=[2,3,9,8,6,12,5,4,10,11,7,13] + else: + raise ValueError("Invalid joint_format.") + + def __call__(self, x): + result=x[...,self.mask,:].copy() + return result \ No newline at end of file diff --git a/opengait/modeling/backbones/resgcn.py b/opengait/modeling/backbones/resgcn.py new file mode 100644 index 0000000..66616f0 --- /dev/null +++ b/opengait/modeling/backbones/resgcn.py @@ -0,0 +1,135 @@ +import torch +import torch.nn as nn +from ..modules import TemporalBasicBlock, TemporalBottleneckBlock, SpatialBasicBlock, SpatialBottleneckBlock + +class ResGCNModule(nn.Module): + """ + ResGCNModule + Arxiv: https://arxiv.org/abs/2010.09978 + Github: https://github.com/Thomas-yx/ResGCNv1 + https://github.com/BNU-IVC/FastPoseGait + """ + def __init__(self, in_channels, out_channels, block, A, stride=1, kernel_size=[9,2],reduction=4, get_res=False,is_main=False): + super(ResGCNModule, self).__init__() + + if not len(kernel_size) == 2: + logging.info('') + logging.error('Error: Please check whether len(kernel_size) == 2') + raise ValueError() + if not kernel_size[0] % 2 == 1: + logging.info('') + logging.error('Error: Please check whether kernel_size[0] % 2 == 1') + raise ValueError() + temporal_window_size, max_graph_distance = kernel_size + + if block == 'initial': + module_res, block_res = False, False + elif block == 'Basic': + module_res, block_res = True, False + else: + module_res, block_res = False, True + + if not module_res: + self.residual = lambda x: 0 + elif stride == 1 and in_channels == out_channels: + self.residual = lambda x: x + else: + # stride =2 + self.residual = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 1, (stride,1)), + nn.BatchNorm2d(out_channels), + ) + + if block in ['Basic','initial']: + spatial_block = SpatialBasicBlock + temporal_block = TemporalBasicBlock + if block == 'Bottleneck': + spatial_block = SpatialBottleneckBlock + temporal_block = TemporalBottleneckBlock + self.scn = spatial_block(in_channels, out_channels, max_graph_distance, block_res,reduction) + if in_channels == out_channels and is_main: + tcn_stride =True + else: + tcn_stride = False + self.tcn = temporal_block(out_channels, temporal_window_size, stride, block_res,reduction,get_res=get_res,tcn_stride=tcn_stride) + self.edge = nn.Parameter(torch.ones_like(A)) + + def forward(self, x, A): + A = A.cuda(x.get_device()) + return self.tcn(self.scn(x, A*self.edge), self.residual(x)) + +class ResGCNInputBranch(nn.Module): + """ + ResGCNInputBranch_Module + Arxiv: https://arxiv.org/abs/2010.09978 + Github: https://github.com/Thomas-yx/ResGCNv1 + """ + def __init__(self, input_branch, block, A, input_num , reduction = 4): + super(ResGCNInputBranch, self).__init__() + + self.register_buffer('A', A) + + module_list = [] + for i in range(len(input_branch)-1): + if i==0: + module_list.append(ResGCNModule(input_branch[i],input_branch[i+1],'initial',A, reduction=reduction)) + else: + module_list.append(ResGCNModule(input_branch[i],input_branch[i+1],block,A,reduction=reduction)) + + + self.bn = nn.BatchNorm2d(input_branch[0]) + self.layers = nn.ModuleList(module_list) + + def forward(self, x): + + x = self.bn(x) + for layer in self.layers: + x = layer(x, self.A) + + return x + + +class ResGCN(nn.Module): + """ + ResGCN + Arxiv: https://arxiv.org/abs/2010.09978 + """ + def __init__(self, input_num, input_branch, main_stream,num_class, reduction, block, graph): + super(ResGCN, self).__init__() + self.graph = graph + self.head= nn.ModuleList( + ResGCNInputBranch(input_branch, block, graph, input_num ,reduction) + for _ in range(input_num) + ) + + main_stream_list = [] + for i in range(len(main_stream)-1): + if main_stream[i]==main_stream[i+1]: + stride = 1 + else: + stride = 2 + if i ==0: + main_stream_list.append(ResGCNModule(main_stream[i]*input_num,main_stream[i+1],block,graph,stride=1,reduction = reduction,get_res=True,is_main=True)) + else: + main_stream_list.append(ResGCNModule(main_stream[i],main_stream[i+1],block,graph,stride = stride, reduction = reduction,is_main=True)) + self.backbone = nn.ModuleList(main_stream_list) + self.global_pooling = nn.AdaptiveAvgPool2d(1) + self.fcn = nn.Linear(256, num_class) + + def forward(self, x): + # input branch + x_cat = [] + for i, branch in enumerate(self.head): + x_cat.append(branch(x[:, i])) + x = torch.cat(x_cat, dim=1) + + # main stream + for layer in self.backbone: + x = layer(x, self.graph) + + # output + x = self.global_pooling(x) + x = x.squeeze(-1) + x = self.fcn(x.squeeze((-1))) + + return x \ No newline at end of file diff --git a/opengait/modeling/base_model.py b/opengait/modeling/base_model.py index 311da48..98aa103 100644 --- a/opengait/modeling/base_model.py +++ b/opengait/modeling/base_model.py @@ -144,6 +144,7 @@ class BaseModel(MetaModel, nn.Module): self.build_network(cfgs['model_cfg']) self.init_parameters() + self.seq_trfs = get_transform(self.engine_cfg['transform']) self.msg_mgr.log_info(cfgs['data_cfg']) if training: @@ -299,8 +300,7 @@ class BaseModel(MetaModel, nn.Module): tuple: training data including inputs, labels, and some meta data. """ seqs_batch, labs_batch, typs_batch, vies_batch, seqL_batch = inputs - trf_cfgs = self.engine_cfg['transform'] - seq_trfs = get_transform(trf_cfgs) + seq_trfs = self.seq_trfs if len(seqs_batch) != len(seq_trfs): raise ValueError( "The number of types of input data and transform should be same. But got {} and {}".format(len(seqs_batch), len(seq_trfs))) diff --git a/opengait/modeling/losses/supconloss.py b/opengait/modeling/losses/supconloss.py new file mode 100644 index 0000000..f35bced --- /dev/null +++ b/opengait/modeling/losses/supconloss.py @@ -0,0 +1,107 @@ +''' +Modifed fromhttps://github.com/BNU-IVC/FastPoseGait/blob/main/fastposegait/modeling/losses/supconloss.py +''' + +import torch.nn as nn +import torch +from .base import BaseLoss, gather_and_scale_wrapper + +class SupConLoss_Re(BaseLoss): + def __init__(self, temperature=0.01): + super(SupConLoss_Re, self).__init__() + self.train_loss = SupConLoss(temperature=temperature) + @gather_and_scale_wrapper + def forward(self, features, labels=None, mask=None): + loss = self.train_loss(features,labels) + self.info.update({ + 'loss': loss.detach().clone()}) + return loss, self.info + + +class SupConLoss(nn.Module): + """Supervised Contrastive Learning: https://arxiv.org/pdf/2004.11362.pdf. + It also supports the unsupervised contrastive loss in SimCLR""" + def __init__(self, temperature=0.01, contrast_mode='all', + base_temperature=0.07): + super(SupConLoss, self).__init__() + self.temperature = temperature + self.contrast_mode = contrast_mode + self.base_temperature = base_temperature + + def forward(self, features, labels=None, mask=None): + """Compute loss for model. If both `labels` and `mask` are None, + it degenerates to SimCLR unsupervised loss: + https://arxiv.org/pdf/2002.05709.pdf + Args: + features: hidden vector of shape [bsz, n_views, ...]. + labels: ground truth of shape [bsz]. + mask: contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j + has the same class as sample i. Can be asymmetric. + Returns: + A loss scalar. + """ + device = (torch.device('cuda') + if features.is_cuda + else torch.device('cpu')) + + if len(features.shape) < 3: + raise ValueError('`features` needs to be [bsz, n_views, ...],' + 'at least 3 dimensions are required') + if len(features.shape) > 3: + features = features.view(features.shape[0], features.shape[1], -1) + + batch_size = features.shape[0] + if labels is not None and mask is not None: + raise ValueError('Cannot define both `labels` and `mask`') + elif labels is None and mask is None: + mask = torch.eye(batch_size, dtype=torch.float32).to(device) + elif labels is not None: + labels = labels.contiguous().view(-1, 1) + if labels.shape[0] != batch_size: + raise ValueError('Num of labels does not match num of features') + mask = torch.eq(labels, labels.T).float().to(device) + else: + mask = mask.float().to(device) + + contrast_count = features.shape[1] + contrast_feature = torch.cat(torch.unbind(features, dim=1), dim=0) + if self.contrast_mode == 'one': + anchor_feature = features[:, 0] + anchor_count = 1 + elif self.contrast_mode == 'all': + anchor_feature = contrast_feature + anchor_count = contrast_count + else: + raise ValueError('Unknown mode: {}'.format(self.contrast_mode)) + + # compute logits + anchor_dot_contrast = torch.div( + torch.matmul(anchor_feature, contrast_feature.T), + self.temperature) + # for numerical stability + logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True) + logits = anchor_dot_contrast - logits_max.detach() + + # tile mask + mask = mask.repeat(anchor_count, contrast_count) + # mask-out self-contrast cases + logits_mask = torch.scatter( + torch.ones_like(mask), + 1, + torch.arange(batch_size * anchor_count).view(-1, 1).to(device), + 0 + ) + mask = mask * logits_mask + + # compute log_prob + exp_logits = torch.exp(logits) * logits_mask + log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True)) + + # compute mean of log-likelihood over positive + mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1) + + # loss + loss = - (self.temperature / self.base_temperature) * mean_log_prob_pos + loss = loss.view(anchor_count, batch_size).mean() + + return loss \ No newline at end of file diff --git a/opengait/modeling/losses/supconloss_Lp.py b/opengait/modeling/losses/supconloss_Lp.py new file mode 100644 index 0000000..e7a5669 --- /dev/null +++ b/opengait/modeling/losses/supconloss_Lp.py @@ -0,0 +1,19 @@ +''' +Modifed fromhttps://github.com/BNU-IVC/FastPoseGait/blob/main/fastposegait/modeling/losses/supconloss_Lp.py +''' + +from .base import BaseLoss, gather_and_scale_wrapper +from pytorch_metric_learning import losses, distances + +class SupConLoss_Lp(BaseLoss): + def __init__(self, temperature=0.01): + super(SupConLoss_Lp, self).__init__() + self.distance = distances.LpDistance() + self.train_loss = losses.SupConLoss(temperature=temperature, distance=self.distance) + @gather_and_scale_wrapper + def forward(self, features, labels=None, mask=None): + loss = self.train_loss(features,labels) + self.info.update({ + 'loss': loss.detach().clone()}) + return loss, self.info + diff --git a/opengait/modeling/models/gaitgraph1.py b/opengait/modeling/models/gaitgraph1.py new file mode 100644 index 0000000..9c0b1ca --- /dev/null +++ b/opengait/modeling/models/gaitgraph1.py @@ -0,0 +1,75 @@ +import torch +from ..base_model import BaseModel +from ..backbones.resgcn import ResGCN +from ..modules import Graph +import torch.nn.functional as F + +class GaitGraph1(BaseModel): + """ + GaitGraph1: Gaitgraph: Graph Convolutional Network for Skeleton-Based Gait Recognition + Paper: https://ieeexplore.ieee.org/document/9506717 + Github: https://github.com/tteepe/GaitGraph + """ + def build_network(self, model_cfg): + + self.joint_format = model_cfg['joint_format'] + self.input_num = model_cfg['input_num'] + self.block = model_cfg['block'] + self.input_branch = model_cfg['input_branch'] + self.main_stream = model_cfg['main_stream'] + self.num_class = model_cfg['num_class'] + self.reduction = model_cfg['reduction'] + self.tta = model_cfg['tta'] + + ## Graph Init ## + self.graph = Graph(joint_format=self.joint_format,max_hop=3) + self.A = torch.tensor(self.graph.A, dtype=torch.float32, requires_grad=False) + ## Network ## + self.ResGCN = ResGCN(input_num=self.input_num, input_branch=self.input_branch, + main_stream=self.main_stream, num_class=self.num_class, + reduction=self.reduction, block=self.block,graph=self.A) + + def forward(self, inputs): + + ipts, labs, type_, view_, seqL = inputs + x_input = ipts[0] # N T C V I + # x = N, T, C, V, M -> N, C, T, V, M + x_input = x_input.permute(0, 2, 3, 4, 1).contiguous() + N, T, V, I, C = x_input.size() + + pose = x_input + if self.training: + x_input = torch.cat([x_input[:,:int(T/2),...],x_input[:,int(T/2):,...]],dim=0) #[8, 60, 17, 1, 3] + elif self.tta: + data_flipped = torch.flip(x_input,dims=[1]) + x_input = torch.cat([x_input,data_flipped], dim=0) + + x = x_input.permute(0, 3, 4, 1, 2).contiguous() + + # resgcn + x = self.ResGCN(x) + x = F.normalize(x, dim=1, p=2) # norm #only for GaitGraph1 # Remove from GaitGraph2 + + if self.training: + f1, f2 = torch.split(x, [N, N], dim=0) + embed = torch.cat([f1.unsqueeze(1), f2.unsqueeze(1)], dim=1) #[4, 2, 128] + + elif self.tta: + f1, f2 = torch.split(x, [N, N], dim=0) + embed = torch.mean(torch.stack([f1, f2]), dim=0) + embed = embed.unsqueeze(-1) + else: + embed = embed.unsqueeze(-1) + + retval = { + 'training_feat': { + 'SupConLoss': {'features': embed , 'labels': labs}, # loss + }, + 'visual_summary': { + 'image/pose': pose.view(N*T, 1, I*V, C).contiguous() # visualization + }, + 'inference_feat': { + 'embeddings': embed # for metric + } + } + return retval diff --git a/opengait/modeling/models/gaitgraph2.py b/opengait/modeling/models/gaitgraph2.py new file mode 100644 index 0000000..af735a1 --- /dev/null +++ b/opengait/modeling/models/gaitgraph2.py @@ -0,0 +1,110 @@ +import torch +import torch.nn as nn +from ..base_model import BaseModel +from ..backbones.resgcn import ResGCN +from ..modules import Graph +import numpy as np + + +class GaitGraph2(BaseModel): + """ + GaitGraph2: Towards a Deeper Understanding of Skeleton-based Gait Recognition + Paper: https://openaccess.thecvf.com/content/CVPR2022W/Biometrics/papers/Teepe_Towards_a_Deeper_Understanding_of_Skeleton-Based_Gait_Recognition_CVPRW_2022_paper + Github: https://github.com/tteepe/GaitGraph2 + """ + def build_network(self, model_cfg): + + self.joint_format = model_cfg['joint_format'] + self.input_num = model_cfg['input_num'] + self.block = model_cfg['block'] + self.input_branch = model_cfg['input_branch'] + self.main_stream = model_cfg['main_stream'] + self.num_class = model_cfg['num_class'] + self.reduction = model_cfg['reduction'] + self.tta = model_cfg['tta'] + ## Graph Init ## + self.graph = Graph(joint_format=self.joint_format,max_hop=3) + self.A = torch.tensor(self.graph.A, dtype=torch.float32, requires_grad=False) + ## Network ## + self.ResGCN = ResGCN(input_num=self.input_num, input_branch=self.input_branch, + main_stream=self.main_stream, num_class=self.num_class, + reduction=self.reduction, block=self.block,graph=self.A) + + def forward(self, inputs): + + ipts, labs, type_, view_, seqL = inputs + x_input = ipts[0] + N, T, V, I, C = x_input.size() + pose = x_input + flip_idx = self.graph.flip_idx + + if not self.training and self.tta: + multi_input = MultiInput(self.graph.connect_joint, self.graph.center) + x1 = [] + x2 = [] + for i in range(N): + x1.append(multi_input(x_input[i,:,:,0,:3].flip(0))) + x2.append(multi_input(x_input[i,:,flip_idx,0,:3])) + x_input = torch.cat([x_input, torch.stack(x1,0), torch.stack(x2,0)], dim=0) + + x = x_input.permute(0, 3, 4, 1, 2).contiguous() + + # resgcn + x = self.ResGCN(x) + + if not self.training and self.tta: + f1, f2, f3 = torch.split(x, [N, N, N], dim=0) + x = torch.cat((f1, f2, f3), dim=1) + + embed = torch.unsqueeze(x,-1) + + retval = { + 'training_feat': { + 'SupConLoss': {'features': x , 'labels': labs}, # loss + }, + 'visual_summary': { + 'image/pose': pose.view(N*T, 1, I*V, C).contiguous() # visualization + }, + 'inference_feat': { + 'embeddings': embed # for metric + } + } + return retval + +class MultiInput: + def __init__(self, connect_joint, center): + self.connect_joint = connect_joint + self.center = center + + def __call__(self, data): + + # T, V, C -> T, V, I=3, C + 2 + T, V, C = data.shape + x_new = torch.zeros((T, V, 3, C + 2), device=data.device) + + # Joints + x = data + x_new[:, :, 0, :C] = x + for i in range(V): + x_new[:, i, 0, C:] = x[:, i, :2] - x[:, self.center, :2] + + # Velocity + for i in range(T - 2): + x_new[i, :, 1, :2] = x[i + 1, :, :2] - x[i, :, :2] + x_new[i, :, 1, 3:] = x[i + 2, :, :2] - x[i, :, :2] + x_new[:, :, 1, 3] = x[:, :, 2] + + # Bones + for i in range(V): + x_new[:, i, 2, :2] = x[:, i, :2] - x[:, self.connect_joint[i], :2] + bone_length = 0 + for i in range(C - 1): + bone_length += torch.pow(x_new[:, :, 2, i], 2) + bone_length = torch.sqrt(bone_length) + 0.0001 + for i in range(C - 1): + x_new[:, :, 2, C+i] = torch.acos(x_new[:, :, 2, i] / bone_length) + x_new[:, :, 2, 3] = x[:, :, 2] + + data = x_new + return data + diff --git a/opengait/modeling/models/gaittr.py b/opengait/modeling/models/gaittr.py new file mode 100644 index 0000000..0e7f1f0 --- /dev/null +++ b/opengait/modeling/models/gaittr.py @@ -0,0 +1,186 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from ..base_model import BaseModel +from ..modules import Graph, SpatialAttention +import numpy as np +import math + + +class Mish(nn.Module): + def __init__(self): + super().__init__() + def forward(self,x): + return x * (torch.tanh(F.softplus(x))) + +class STModule(nn.Module): + def __init__(self,in_channels, out_channels, incidence, num_point): + super(STModule, self).__init__() + """ + This class implements augmented graph spatial convolution in case of Spatial Transformer + Fucntion adapated from: https://github.com/Chiaraplizz/ST-TR/blob/master/code/st_gcn/net/gcn_attention.py + """ + self.in_channels = in_channels + self.out_channels = out_channels + self.incidence = incidence + self.num_point = num_point + self.relu = Mish() + self.bn = nn.BatchNorm2d(out_channels) + self.data_bn = nn.BatchNorm1d(self.in_channels * self.num_point) + self.attention_conv = SpatialAttention(in_channels=in_channels,out_channel=out_channels,A=self.incidence,num_point=self.num_point) + def forward(self,x): + N, C, T, V = x.size() + # data normlization + x = x.permute(0, 1, 3, 2).reshape(N, C * V, T) + x = self.data_bn(x) + x = x.reshape(N, C, V, T).permute(0, 1, 3, 2) + # adjacency matrix + self.incidence = self.incidence.cuda(x.get_device()) + # N, T, C, V > NT, C, 1, V + xa = x.permute(0, 2, 1, 3).reshape(-1, C, 1, V) + # spatial attention + attn_out = self.attention_conv(xa) + # N, T, C, V > N, C, T, V + attn_out = attn_out.reshape(N, T, -1, V).permute(0, 2, 1, 3) + y = attn_out + y = self.bn(self.relu(y)) + return y + +class UnitConv2D(nn.Module): + ''' + This class is used in GaitTR[TCN_ST] block. + ''' + + def __init__(self, D_in, D_out, kernel_size=9, stride=1, dropout=0.1, bias=True): + super(UnitConv2D,self).__init__() + pad = int((kernel_size-1)/2) + self.conv = nn.Conv2d(D_in,D_out,kernel_size=(kernel_size,1) + ,padding=(pad,0),stride=(stride,1),bias=bias) + self.bn = nn.BatchNorm2d(D_out) + self.relu = Mish() + self.dropout = nn.Dropout(dropout, inplace=False) + #initalize + self.conv_init(self.conv) + + def forward(self,x): + x = self.dropout(x) + x = self.bn(self.relu(self.conv(x))) + return x + + def conv_init(self,module): + n = module.out_channels + for k in module.kernel_size: + n = n*k + module.weight.data.normal_(0, math.sqrt(2. / n)) + +class TCN_ST(nn.Module): + """ + Block of GaitTR: https://arxiv.org/pdf/2204.03873.pdf + TCN: Temporal Convolution Network + ST: Sptail Temporal Graph Convolution Network + """ + def __init__(self,in_channel,out_channel,A,num_point): + super(TCN_ST, self).__init__() + #params + self.in_channel = in_channel + self.out_channel = out_channel + self.A = A + self.num_point = num_point + #network + self.tcn = UnitConv2D(D_in=self.in_channel,D_out=self.in_channel,kernel_size=9) + self.st = STModule(in_channels=self.in_channel,out_channels=self.out_channel,incidence=self.A,num_point=self.num_point) + self.residual = lambda x: x + if (in_channel != out_channel): + self.residual_s = nn.Sequential( + nn.Conv2d(in_channel, out_channel, 1), + nn.BatchNorm2d(out_channel), + ) + self.down = UnitConv2D(D_in=self.in_channel,D_out=out_channel,kernel_size=1,dropout=0) + else: + self.residual_s = lambda x: x + self.down = None + + def forward(self,x): + x0 = self.tcn(x) + self.residual(x) + y = self.st(x0) + self.residual_s(x0) + # skip residual + y = y + (x if(self.down is None) else self.down(x)) + return y + + + +class GaitTR(BaseModel): + """ + GaitTR: Spatial Transformer Network on Skeleton-based Gait Recognition + Arxiv : https://arxiv.org/abs/2204.03873.pdf + """ + def build_network(self, model_cfg): + + in_c = model_cfg['in_channels'] + self.num_class = model_cfg['num_class'] + self.joint_format = model_cfg['joint_format'] + self.graph = Graph(joint_format=self.joint_format,max_hop=3) + + #### Network Define #### + + # ajaceny matrix + self.A = torch.from_numpy(self.graph.A.astype(np.float32)) + + #data normalization + num_point = self.A.shape[-1] + self.data_bn = nn.BatchNorm1d(in_c[0] * num_point) + + #backbone + backbone = [] + for i in range(len(in_c)-1): + backbone.append(TCN_ST(in_channel= in_c[i],out_channel= in_c[i+1],A=self.A,num_point=num_point)) + self.backbone = nn.ModuleList(backbone) + + self.fcn = nn.Conv1d(in_c[-1], self.num_class, kernel_size=1) + + def forward(self, inputs): + ipts, labs, _, _, seqL = inputs + + x= ipts[0] + pose = x + # x = N, T, C, V, M -> N, C, T, V, M + x = x.permute(0, 2, 1, 3, 4) + N, C, T, V, M = x.size() + if len(x.size()) == 4: + x = x.unsqueeze(1) + del ipts + + x = x.permute(0, 4, 3, 1, 2).contiguous().view(N, M * V * C, T) + + x = self.data_bn(x) + x = x.view(N, M, V, C, T).permute(0, 1, 3, 4, 2).contiguous().view( + N * M, C, T, V) + #backbone + for _,m in enumerate(self.backbone): + x = m(x) + # V pooling + x = F.avg_pool2d(x, kernel_size=(1,V)) + # M pooling + c = x.size(1) + t = x.size(2) + x = x.view(N, M, c, t).mean(dim=1).view(N, c, t)#[n,c,t] + # T pooling + x = F.avg_pool1d(x, kernel_size=x.size()[2]) #[n,c] + # C fcn + x = self.fcn(x) #[n,c'] + x = F.avg_pool1d(x, x.size()[2:]) # [n,c'] + x = x.view(N, self.num_class) # n,c + embed = x.unsqueeze(-1) # n,c,1 + + retval = { + 'training_feat': { + 'triplet': {'embeddings': embed, 'labels': labs} + }, + 'visual_summary': { + 'image/pose': pose.view(N*T, M, V, C) + }, + 'inference_feat': { + 'embeddings': embed + } + } + return retval diff --git a/opengait/modeling/models/msgg.py b/opengait/modeling/models/msgg.py new file mode 100644 index 0000000..ea5d279 --- /dev/null +++ b/opengait/modeling/models/msgg.py @@ -0,0 +1,484 @@ +import torch +import copy +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +from ..base_model import BaseModel + +class MultiScaleGaitGraph(BaseModel): + """ + Learning Rich Features for Gait Recognition by Integrating Skeletons and Silhouettes + Github: https://github.com/YunjiePeng/BimodalFusion + """ + + def build_network(self, model_cfg): + in_c = model_cfg['in_channels'] + out_c = model_cfg['out_channels'] + num_id = model_cfg['num_id'] + + temporal_kernel_size = model_cfg['temporal_kernel_size'] + + # load spatial graph + self.graph = SpatialGraph(**model_cfg['graph_cfg']) + A_lowSemantic = torch.tensor(self.graph.get_adjacency(semantic_level=0), dtype=torch.float32, requires_grad=False) + A_mediumSemantic = torch.tensor(self.graph.get_adjacency(semantic_level=1), dtype=torch.float32, requires_grad=False) + A_highSemantic = torch.tensor(self.graph.get_adjacency(semantic_level=2), dtype=torch.float32, requires_grad=False) + + self.register_buffer('A_lowSemantic', A_lowSemantic) + self.register_buffer('A_mediumSemantic', A_mediumSemantic) + self.register_buffer('A_highSemantic', A_highSemantic) + + # build networks + spatial_kernel_size = self.graph.num_A + temporal_kernel_size = temporal_kernel_size + kernel_size = (temporal_kernel_size, spatial_kernel_size) + + self.st_gcn_networks_lowSemantic = nn.ModuleList() + self.st_gcn_networks_mediumSemantic = nn.ModuleList() + self.st_gcn_networks_highSemantic = nn.ModuleList() + for i in range(len(in_c)-1): + if i == 0: + self.st_gcn_networks_lowSemantic.append(st_gcn_block(in_c[i], in_c[i+1], kernel_size, 1, residual=False)) + self.st_gcn_networks_mediumSemantic.append(st_gcn_block(in_c[i], in_c[i+1], kernel_size, 1, residual=False)) + self.st_gcn_networks_highSemantic.append(st_gcn_block(in_c[i], in_c[i+1], kernel_size, 1, residual=False)) + else: + self.st_gcn_networks_lowSemantic.append(st_gcn_block(in_c[i], in_c[i+1], kernel_size, 1)) + self.st_gcn_networks_mediumSemantic.append(st_gcn_block(in_c[i], in_c[i+1], kernel_size, 1)) + self.st_gcn_networks_highSemantic.append(st_gcn_block(in_c[i], in_c[i+1], kernel_size, 1)) + + self.st_gcn_networks_lowSemantic.append(st_gcn_block(in_c[i+1], in_c[i+1], kernel_size, 1)) + self.st_gcn_networks_mediumSemantic.append(st_gcn_block(in_c[i+1], in_c[i+1], kernel_size, 1)) + self.st_gcn_networks_highSemantic.append(st_gcn_block(in_c[i+1], in_c[i+1], kernel_size, 1)) + + self.edge_importance_lowSemantic = nn.ParameterList([ + nn.Parameter(torch.ones(self.A_lowSemantic.size())) + for i in self.st_gcn_networks_lowSemantic]) + + self.edge_importance_mediumSemantic = nn.ParameterList([ + nn.Parameter(torch.ones(self.A_mediumSemantic.size())) + for i in self.st_gcn_networks_mediumSemantic]) + + self.edge_importance_highSemantic = nn.ParameterList([ + nn.Parameter(torch.ones(self.A_highSemantic.size())) + for i in self.st_gcn_networks_highSemantic]) + + self.fc = nn.Linear(in_c[-1], out_c) + self.bn_neck = nn.BatchNorm1d(out_c) + self.encoder_cls = nn.Linear(out_c, num_id, bias=False) + + def semantic_pooling(self, x): + cur_node_num = x.size()[-1] + half_x_1, half_x_2 = torch.split(x, int(cur_node_num / 2), dim=-1) + x_sp = torch.add(half_x_1, half_x_2) / 2 + return x_sp + + def forward(self, inputs): + ipts, labs, _, _, seqL = inputs + + x = ipts[0] # [N, T, V, C] + del ipts + """ + N - the number of videos. + T - the number of frames in one video. + V - the number of keypoints. + C - the number of features for one keypoint. + """ + N, T, V, C = x.size() + x = x.permute(0, 3, 1, 2).contiguous() + x = x.view(N, C, T, V) + + y = self.semantic_pooling(x) + z = self.semantic_pooling(y) + for gcn_lowSemantic, importance_lowSemantic, gcn_mediumSemantic, importance_mediumSemantic, gcn_highSemantic, importance_highSemantic in zip(self.st_gcn_networks_lowSemantic, self.edge_importance_lowSemantic, self.st_gcn_networks_mediumSemantic, self.edge_importance_mediumSemantic, self.st_gcn_networks_highSemantic, self.edge_importance_highSemantic): + x, _ = gcn_lowSemantic(x, self.A_lowSemantic * importance_lowSemantic) + y, _ = gcn_mediumSemantic(y, self.A_mediumSemantic * importance_mediumSemantic) + z, _ = gcn_highSemantic(z, self.A_highSemantic * importance_highSemantic) + + # Cross-scale Message Passing + x_sp = self.semantic_pooling(x) + y = torch.add(y, x_sp) + y_sp = self.semantic_pooling(y) + z = torch.add(z, y_sp) + + # global pooling for each layer + x_sp = F.avg_pool2d(x, x.size()[2:]) + N, C, T, V = x_sp.size() + x_sp = x_sp.view(N, C, T*V).contiguous() + + y_sp = F.avg_pool2d(y, y.size()[2:]) + N, C, T, V = y_sp.size() + y_sp = y_sp.view(N, C, T*V).contiguous() + + z = F.avg_pool2d(z, z.size()[2:]) + N, C, T, V = z.size() + z = z.permute(0, 2, 3, 1).contiguous() + z = z.view(N, T*V, C) + + z_fc = self.fc(z.view(N, -1)) + bn_z_fc = self.bn_neck(z_fc) + z_cls_score = self.encoder_cls(bn_z_fc) + + z_fc = z_fc.unsqueeze(-1).contiguous() # [n, c, p] + z_cls_score = z_cls_score.unsqueeze(-1).contiguous() # [n, c, p] + + retval = { + 'training_feat': { + 'triplet_joints': {'embeddings': x_sp, 'labels': labs}, + 'triplet_limbs': {'embeddings': y_sp, 'labels': labs}, + 'triplet_bodyparts': {'embeddings': z_fc, 'labels': labs}, + 'softmax': {'logits': z_cls_score, 'labels': labs} + }, + 'visual_summary': {}, + 'inference_feat': { + 'embeddings': z_fc + } + } + return retval + +class st_gcn_block(nn.Module): + r"""Applies a spatial temporal graph convolution over an input graph sequence. + Args: + in_channels (int): Number of channels in the input sequence data + out_channels (int): Number of channels produced by the convolution + kernel_size (tuple): Size of the temporal convolving kernel and graph convolving kernel + stride (int, optional): Stride of the temporal convolution. Default: 1 + dropout (int, optional): Dropout rate of the final output. Default: 0 + residual (bool, optional): If ``True``, applies a residual mechanism. Default: ``True`` + Shape: + - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format + - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format + - Output[0]: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format + - Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format + where + :math:`N` is a batch size, i.e. the number of videos. + :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`. + :math:`T_{in}/T_{out}` is a length of input/output sequence, i.e. the number of frames in a video. + :math:`V` is the number of graph nodes. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + dropout=0, + residual=True): + super().__init__() + + assert len(kernel_size) == 2 + assert kernel_size[0] % 2 == 1 + padding = ((kernel_size[0] - 1) // 2, 0) + + self.gcn = SCN(in_channels, out_channels, kernel_size[1]) + + self.tcn = nn.Sequential( + nn.BatchNorm2d(out_channels), + nn.ReLU(inplace=True), + nn.Conv2d( + out_channels, + out_channels, + (kernel_size[0], 1), + (stride, 1), + padding, + ), + nn.BatchNorm2d(out_channels), + nn.Dropout(dropout, inplace=True), + ) + + if not residual: + self.residual = lambda x: 0 + + elif (in_channels == out_channels) and (stride == 1): + self.residual = lambda x: x + + else: + self.residual = nn.Sequential( + nn.Conv2d( + in_channels, + out_channels, + kernel_size=1, + stride=(stride, 1)), + nn.BatchNorm2d(out_channels), + ) + + self.relu = nn.ReLU(inplace=True) + + def forward(self, x, A): + res = self.residual(x) + x, A = self.gcn(x, A) + x = self.tcn(x) + res + + return self.relu(x), A + +class SCN(nn.Module): + r"""The basic module for applying a graph convolution. + Args: + in_channels (int): Number of channels in the input sequence data + out_channels (int): Number of channels produced by the convolution + kernel_size (int): Size of the graph convolving kernel + t_kernel_size (int): Size of the temporal convolving kernel + t_stride (int, optional): Stride of the temporal convolution. Default: 1 + t_padding (int, optional): Temporal zero-padding added to both sides of + the input. Default: 0 + t_dilation (int, optional): Spacing between temporal kernel elements. + Default: 1 + bias (bool, optional): If ``True``, adds a learnable bias to the output. + Default: ``True`` + Shape: + - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format + - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format + - Output[0]: Output graph sequence in :math:`(N, out_channels, T_{out}, V)` format + - Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format + where + :math:`N` is a batch size, + :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`, + :math:`T_{in}/T_{out}` is a length of input/output sequence, + :math:`V` is the number of graph nodes. + """ + def __init__(self, + in_channels, + out_channels, + kernel_size, + t_kernel_size=1, + t_stride=1, + t_padding=0, + t_dilation=1, + bias=True): + super().__init__() + # The defined module SCN are responsible only for the Spacial Graph (i.e. the graph in on frame), + # and the parameter t_kernel_size in this situation is always set to 1. + + self.kernel_size = kernel_size + self.conv = nn.Conv2d(in_channels, + out_channels * kernel_size, + kernel_size=(t_kernel_size, 1), + padding=(t_padding, 0), + stride=(t_stride, 1), + dilation=(t_dilation, 1), + bias=bias) + """ + The 1x1 conv operation here stands for the weight metrix W. + The kernel_size here stands for the number of different adjacency matrix, + which are defined according to the partitioning strategy. + Because for neighbor nodes in the same subset (in one adjacency matrix), the weights are shared. + It is reasonable to apply 1x1 conv as the implementation of weight function. + """ + + + def forward(self, x, A): + assert A.size(0) == self.kernel_size + + x = self.conv(x) + + n, kc, t, v = x.size() + x = x.view(n, self.kernel_size, kc // self.kernel_size, t, v) + x = torch.einsum('nkctv,kvw->nctw', (x, A)) + + return x.contiguous(), A + +class SpatialGraph(): + """ Use skeleton sequences extracted by Openpose/HRNet to construct Spatial-Temporal Graph + + Args: + strategy (string): must be one of the follow candidates + - uniform: Uniform Labeling + - distance: Distance Partitioning + - spatial: Spatial Configuration Partitioning + - gait_temporal: Gait Temporal Configuration Partitioning + For more information, please refer to the section 'Partition Strategies' in PGG. + layout (string): must be one of the follow candidates + - body_12: Is consists of 12 joints. + (right shoulder, right elbow, right knee, right hip, left elbow, left knee, + left shoulder, right wrist, right ankle, left hip, left wrist, left ankle). + For more information, please refer to the section 'Data Processing' in PGG. + max_hop (int): the maximal distance between two connected nodes # 1-neighbor + dilation (int): controls the spacing between the kernel points + """ + def __init__(self, + layout='body_12', # Openpose here represents for body_12 + strategy='spatial', + semantic_level=0, + max_hop=1, + dilation=1): + self.layout = layout + self.strategy = strategy + self.max_hop = max_hop + self.dilation = dilation + self.num_node, self.neighbor_link_dic = self.get_layout_info(layout) + self.num_A = self.get_A_num(strategy) + + def __str__(self): + return self.A + + def get_A_num(self, strategy): + if self.strategy == 'uniform': + return 1 + elif self.strategy == 'distance': + return 2 + elif (self.strategy == 'spatial') or (self.strategy == 'gait_temporal'): + return 3 + else: + raise ValueError("Do Not Exist This Strategy") + + def get_layout_info(self, layout): + if layout == 'body_12': + num_node = 12 + neighbor_link_dic = { + 0: [(7, 1), (1, 0), (10, 4), (4, 6), + (8, 2), (2, 3), (11, 5), (5, 9), + (9, 3), (3, 0), (9, 6), (6, 0)], + 1: [(1, 0), (4, 0), (0, 3), (2, 3), (5, 3)], + 2: [(1, 0), (2, 0)] + } + return num_node, neighbor_link_dic + else: + raise ValueError("Do Not Exist This Layout.") + + def get_edge(self, semantic_level): + # edge is a list of [child, parent] pairs, regarding the center node as root node + self_link = [(i, i) for i in range(int(self.num_node / (2 ** semantic_level)))] + neighbor_link = self.neighbor_link_dic[semantic_level] + edge = self_link + neighbor_link + center = [] + if self.layout == 'body_12': + if semantic_level == 0: + center = [0, 3, 6, 9] + elif semantic_level == 1: + center = [0, 3] + elif semantic_level == 2: + center = [0] + return edge, center + + def get_gait_temporal_partitioning(self, semantic_level): + if semantic_level == 0: + if self.layout == 'body_12': + positive_node = {1, 2, 4, 5, 7, 8, 10, 11} + negative_node = {0, 3, 6, 9} + elif semantic_level == 1: + if self.layout == 'body_12': + positive_node = {1, 2, 4, 5} + negative_node = {0, 3} + elif semantic_level == 2: + if self.layout == 'body_12': + positive_node = {1, 2} + negative_node = {0} + return positive_node, negative_node + + def get_adjacency(self, semantic_level): + edge, center = self.get_edge(semantic_level) + num_node = int(self.num_node / (2 ** semantic_level)) + hop_dis = get_hop_distance(num_node, edge, max_hop=self.max_hop) + + valid_hop = range(0, self.max_hop + 1, self.dilation) + adjacency = np.zeros((num_node, num_node)) + for hop in valid_hop: + adjacency[hop_dis == hop] = 1 + + normalize_adjacency = normalize_digraph(adjacency) + # normalize_adjacency = adjacency # withoutNodeNorm + + # normalize_adjacency[a][b] = x + # when x = 0, node b has no connection with node a within valid hop. + # when x ≠ 0, the normalized adjacency from node b to node a is x. + # the value of x is normalized by the number of adjacent neighbor nodes around the node b. + + if self.strategy == 'uniform': + A = np.zeros((1, num_node, num_node)) + A[0] = normalize_adjacency + return A + elif self.strategy == 'distance': + A = np.zeros((len(valid_hop), num_node, num_node)) + for i, hop in enumerate(valid_hop): + A[i][hop_dis == hop] = normalize_adjacency[hop_dis == hop] + return A + elif self.strategy == 'spatial': + A = [] + for hop in valid_hop: + a_root = np.zeros((num_node, num_node)) + a_close = np.zeros((num_node, num_node)) + a_further = np.zeros((num_node, num_node)) + for i in range(num_node): + for j in range(num_node): + if hop_dis[j, i] == hop: + j_hop_dis = min([hop_dis[j, _center] for _center in center]) + i_hop_dis = min([hop_dis[i, _center] for _center in center]) + if j_hop_dis == i_hop_dis: + a_root[j, i] = normalize_adjacency[j, i] + elif j_hop_dis > i_hop_dis: + a_close[j, i] = normalize_adjacency[j, i] + else: + a_further[j, i] = normalize_adjacency[j, i] + if hop == 0: + A.append(a_root) + else: + A.append(a_root + a_close) + A.append(a_further) + A = np.stack(A) + self.A = A + return A + elif self.strategy == 'gait_temporal': + A = [] + positive_node, negative_node = self.get_gait_temporal_partitioning(semantic_level) + for hop in valid_hop: + a_root = np.zeros((num_node, num_node)) + a_positive = np.zeros((num_node, num_node)) + a_negative = np.zeros((num_node, num_node)) + for i in range(num_node): + for j in range(num_node): + if hop_dis[j, i] == hop: + if i == j: + a_root[j, i] = normalize_adjacency[j, i] + elif j in positive_node: + a_positive[j, i] = normalize_adjacency[j, i] + else: + a_negative[j, i] = normalize_adjacency[j, i] + + if hop == 0: + A.append(a_root) + else: + A.append(a_negative) + A.append(a_positive) + A = np.stack(A) + return A + else: + raise ValueError("Do Not Exist This Strategy") + + +def get_hop_distance(num_node, edge, max_hop=1): + # Calculate the shortest path between nodes + # i.e. The minimum number of steps needed to walk from one node to another + A = np.zeros((num_node, num_node)) # Ajacent Matrix + for i, j in edge: + A[j, i] = 1 + A[i, j] = 1 + + # compute hop steps + hop_dis = np.zeros((num_node, num_node)) + np.inf + transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)] + arrive_mat = (np.stack(transfer_mat) > 0) + for d in range(max_hop, -1, -1): + hop_dis[arrive_mat[d]] = d + return hop_dis + + +def normalize_digraph(A): + Dl = np.sum(A, 0) + num_node = A.shape[0] + Dn = np.zeros((num_node, num_node)) + for i in range(num_node): + if Dl[i] > 0: + Dn[i, i] = Dl[i]**(-1) + AD = np.dot(A, Dn) + return AD + + +def normalize_undigraph(A): + Dl = np.sum(A, 0) + num_node = A.shape[0] + Dn = np.zeros((num_node, num_node)) + for i in range(num_node): + if Dl[i] > 0: + Dn[i, i] = Dl[i]**(-0.5) + DAD = np.dot(np.dot(Dn, A), Dn) + return DAD diff --git a/opengait/modeling/modules.py b/opengait/modeling/modules.py index a911b91..a0e4461 100644 --- a/opengait/modeling/modules.py +++ b/opengait/modeling/modules.py @@ -253,3 +253,443 @@ def RmBN2dAffine(model): if isinstance(m, nn.BatchNorm2d): m.weight.requires_grad = False m.bias.requires_grad = False + + +''' +Modifed from https://github.com/BNU-IVC/FastPoseGait/blob/main/fastposegait/modeling/components/units +''' + +class Graph(): + """ + # Thanks to YAN Sijie for the released code on Github (https://github.com/yysijie/st-gcn) + """ + def __init__(self, joint_format='coco', max_hop=2, dilation=1): + self.joint_format = joint_format + self.max_hop = max_hop + self.dilation = dilation + + # get edges + self.num_node, self.edge, self.connect_joint, self.parts = self._get_edge() + + # get adjacency matrix + self.A = self._get_adjacency() + + def __str__(self): + return self.A + + def _get_edge(self): + if self.joint_format == 'coco': + # keypoints = { + # 0: "nose", + # 1: "left_eye", + # 2: "right_eye", + # 3: "left_ear", + # 4: "right_ear", + # 5: "left_shoulder", + # 6: "right_shoulder", + # 7: "left_elbow", + # 8: "right_elbow", + # 9: "left_wrist", + # 10: "right_wrist", + # 11: "left_hip", + # 12: "right_hip", + # 13: "left_knee", + # 14: "right_knee", + # 15: "left_ankle", + # 16: "right_ankle" + # } + num_node = 17 + self_link = [(i, i) for i in range(num_node)] + neighbor_link = [(0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 6), (5, 6), + (5, 7), (7, 9), (6, 8), (8, 10), (5, 11), (6, 12), (11, 12), + (11, 13), (13, 15), (12, 14), (14, 16)] + self.edge = self_link + neighbor_link + self.center = 0 + self.flip_idx = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] + connect_joint = np.array([5,0,0,1,2,0,0,5,6,7,8,5,6,11,12,13,14]) + parts = [ + np.array([5, 7, 9]), # left_arm + np.array([6, 8, 10]), # right_arm + np.array([11, 13, 15]), # left_leg + np.array([12, 14, 16]), # right_leg + np.array([0, 1, 2, 3, 4]), # head + ] + + elif self.joint_format == 'coco-no-head': + num_node = 12 + self_link = [(i, i) for i in range(num_node)] + neighbor_link = [(0, 1), + (0, 2), (2, 4), (1, 3), (3, 5), (0, 6), (1, 7), (6, 7), + (6, 8), (8, 10), (7, 9), (9, 11)] + self.edge = self_link + neighbor_link + self.center = 0 + connect_joint = np.array([3,1,0,2,4,0,6,8,10,7,9,11]) + parts =[ + np.array([0, 2, 4]), # left_arm + np.array([1, 3, 5]), # right_arm + np.array([6, 8, 10]), # left_leg + np.array([7, 9, 11]) # right_leg + ] + + elif self.joint_format =='alphapose' or self.joint_format =='openpose': + num_node = 18 + self_link = [(i, i) for i in range(num_node)] + neighbor_link = [(0, 1), (0, 14), (0, 15), (14, 16), (15, 17), + (1, 2), (2, 3), (3, 4), (1, 5), (5, 6), (6, 7), + (1, 8), (8, 9), (9, 10), (1, 11), (11, 12), (12, 13)] + self.edge = self_link + neighbor_link + self.center = 1 + self.flip_idx = [0, 1, 5, 6, 7, 2, 3, 4, 11, 12, 13, 8, 9, 10, 15, 14, 17, 16] + connect_joint = np.array([1,1,1,2,3,1,5,6,2,8,9,5,11,12,0,0,14,15]) + parts = [ + np.array([5, 6, 7]), # left_arm + np.array([2, 3, 4]), # right_arm + np.array([11, 12, 13]), # left_leg + np.array([8, 9, 10]), # right_leg + np.array([0, 1, 14, 15, 16, 17]), # head + ] + + else: + num_node, neighbor_link, connect_joint, parts = 0, [], [], [] + logging.info('') + logging.error('Error: Do NOT exist this dataset: {}!'.format(self.dataset)) + raise ValueError() + self_link = [(i, i) for i in range(num_node)] + edge = self_link + neighbor_link + return num_node, edge, connect_joint, parts + + def _get_hop_distance(self): + A = np.zeros((self.num_node, self.num_node)) + for i, j in self.edge: + A[j, i] = 1 + A[i, j] = 1 + hop_dis = np.zeros((self.num_node, self.num_node)) + np.inf + transfer_mat = [np.linalg.matrix_power(A, d) for d in range(self.max_hop + 1)] + arrive_mat = (np.stack(transfer_mat) > 0) + for d in range(self.max_hop, -1, -1): + hop_dis[arrive_mat[d]] = d + return hop_dis + + def _get_adjacency(self): + hop_dis = self._get_hop_distance() + valid_hop = range(0, self.max_hop + 1, self.dilation) + adjacency = np.zeros((self.num_node, self.num_node)) + for hop in valid_hop: + adjacency[hop_dis == hop] = 1 + normalize_adjacency = self._normalize_digraph(adjacency) + A = np.zeros((len(valid_hop), self.num_node, self.num_node)) + for i, hop in enumerate(valid_hop): + A[i][hop_dis == hop] = normalize_adjacency[hop_dis == hop] + return A + + def _normalize_digraph(self, A): + Dl = np.sum(A, 0) + num_node = A.shape[0] + Dn = np.zeros((num_node, num_node)) + for i in range(num_node): + if Dl[i] > 0: + Dn[i, i] = Dl[i]**(-1) + AD = np.dot(A, Dn) + return AD + + +class TemporalBasicBlock(nn.Module): + """ + TemporalConv_Res_Block + Arxiv: https://arxiv.org/abs/2010.09978 + Github: https://github.com/Thomas-yx/ResGCNv1 + """ + def __init__(self, channels, temporal_window_size, stride=1, residual=False,reduction=0,get_res=False,tcn_stride=False): + super(TemporalBasicBlock, self).__init__() + + padding = ((temporal_window_size - 1) // 2, 0) + + if not residual: + self.residual = lambda x: 0 + elif stride == 1: + self.residual = lambda x: x + else: + self.residual = nn.Sequential( + nn.Conv2d(channels, channels, 1, (stride,1)), + nn.BatchNorm2d(channels), + ) + + self.conv = nn.Conv2d(channels, channels, (temporal_window_size,1), (stride,1), padding) + self.bn = nn.BatchNorm2d(channels) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x, res_module): + + res_block = self.residual(x) + + x = self.conv(x) + x = self.bn(x) + x = self.relu(x + res_block + res_module) + + return x + + +class TemporalBottleneckBlock(nn.Module): + """ + TemporalConv_Res_Bottleneck + Arxiv: https://arxiv.org/abs/2010.09978 + Github: https://github.com/Thomas-yx/ResGCNv1 + """ + def __init__(self, channels, temporal_window_size, stride=1, residual=False, reduction=4,get_res=False, tcn_stride=False): + super(TemporalBottleneckBlock, self).__init__() + tcn_stride =False + padding = ((temporal_window_size - 1) // 2, 0) + inter_channels = channels // reduction + if get_res: + if tcn_stride: + stride =2 + self.residual = nn.Sequential( + nn.Conv2d(channels, channels, 1, (2,1)), + nn.BatchNorm2d(channels), + ) + tcn_stride= True + else: + if not residual: + self.residual = lambda x: 0 + elif stride == 1: + self.residual = lambda x: x + else: + self.residual = nn.Sequential( + nn.Conv2d(channels, channels, 1, (2,1)), + nn.BatchNorm2d(channels), + ) + tcn_stride= True + + self.conv_down = nn.Conv2d(channels, inter_channels, 1) + self.bn_down = nn.BatchNorm2d(inter_channels) + if tcn_stride: + stride=2 + self.conv = nn.Conv2d(inter_channels, inter_channels, (temporal_window_size,1), (stride,1), padding) + self.bn = nn.BatchNorm2d(inter_channels) + self.conv_up = nn.Conv2d(inter_channels, channels, 1) + self.bn_up = nn.BatchNorm2d(channels) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x, res_module): + + res_block = self.residual(x) + + x = self.conv_down(x) + x = self.bn_down(x) + x = self.relu(x) + + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + + x = self.conv_up(x) + x = self.bn_up(x) + x = self.relu(x + res_block + res_module) + return x + + + +class SpatialGraphConv(nn.Module): + """ + SpatialGraphConv_Basic_Block + Arxiv: https://arxiv.org/abs/1801.07455 + Github: https://github.com/yysijie/st-gcn + """ + def __init__(self, in_channels, out_channels, max_graph_distance): + super(SpatialGraphConv, self).__init__() + + # spatial class number (distance = 0 for class 0, distance = 1 for class 1, ...) + self.s_kernel_size = max_graph_distance + 1 + + # weights of different spatial classes + self.gcn = nn.Conv2d(in_channels, out_channels*self.s_kernel_size, 1) + + def forward(self, x, A): + + # numbers in same class have same weight + x = self.gcn(x) + + # divide nodes into different classes + n, kc, t, v = x.size() + x = x.view(n, self.s_kernel_size, kc//self.s_kernel_size, t, v).contiguous() + + # spatial graph convolution + x = torch.einsum('nkctv,kvw->nctw', (x, A[:self.s_kernel_size])).contiguous() + + return x + +class SpatialBasicBlock(nn.Module): + """ + SpatialGraphConv_Res_Block + Arxiv: https://arxiv.org/abs/2010.09978 + Github: https://github.com/Thomas-yx/ResGCNv1 + """ + def __init__(self, in_channels, out_channels, max_graph_distance, residual=False,reduction=0): + super(SpatialBasicBlock, self).__init__() + + if not residual: + self.residual = lambda x: 0 + elif in_channels == out_channels: + self.residual = lambda x: x + else: + self.residual = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 1), + nn.BatchNorm2d(out_channels), + ) + + self.conv = SpatialGraphConv(in_channels, out_channels, max_graph_distance) + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x, A): + + res_block = self.residual(x) + + x = self.conv(x, A) + x = self.bn(x) + x = self.relu(x + res_block) + + return x + +class SpatialBottleneckBlock(nn.Module): + """ + SpatialGraphConv_Res_Bottleneck + Arxiv: https://arxiv.org/abs/2010.09978 + Github: https://github.com/Thomas-yx/ResGCNv1 + """ + + def __init__(self, in_channels, out_channels, max_graph_distance, residual=False, reduction=4): + super(SpatialBottleneckBlock, self).__init__() + + inter_channels = out_channels // reduction + + if not residual: + self.residual = lambda x: 0 + elif in_channels == out_channels: + self.residual = lambda x: x + else: + self.residual = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 1), + nn.BatchNorm2d(out_channels), + ) + + self.conv_down = nn.Conv2d(in_channels, inter_channels, 1) + self.bn_down = nn.BatchNorm2d(inter_channels) + self.conv = SpatialGraphConv(inter_channels, inter_channels, max_graph_distance) + self.bn = nn.BatchNorm2d(inter_channels) + self.conv_up = nn.Conv2d(inter_channels, out_channels, 1) + self.bn_up = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x, A): + + res_block = self.residual(x) + + x = self.conv_down(x) + x = self.bn_down(x) + x = self.relu(x) + + x = self.conv(x, A) + x = self.bn(x) + x = self.relu(x) + + x = self.conv_up(x) + x = self.bn_up(x) + x = self.relu(x + res_block) + + return x + +class SpatialAttention(nn.Module): + """ + This class implements Spatial Transformer. + Function adapted from: https://github.com/leaderj1001/Attention-Augmented-Conv2d + """ + def __init__(self, in_channels, out_channel, A, num_point, dk_factor=0.25, kernel_size=1, Nh=8, num=4, stride=1): + super(SpatialAttention, self).__init__() + self.in_channels = in_channels + self.kernel_size = kernel_size + self.dk = int(dk_factor * out_channel) + self.dv = int(out_channel) + self.num = num + self.Nh = Nh + self.num_point=num_point + self.A = A[0] + A[1] + A[2] + self.stride = stride + self.padding = (self.kernel_size - 1) // 2 + + assert self.Nh != 0, "integer division or modulo by zero, Nh >= 1" + assert self.dk % self.Nh == 0, "dk should be divided by Nh. (example: out_channels: 20, dk: 40, Nh: 4)" + assert self.dv % self.Nh == 0, "dv should be divided by Nh. (example: out_channels: 20, dv: 4, Nh: 4)" + assert stride in [1, 2], str(stride) + " Up to 2 strides are allowed." + + self.qkv_conv = nn.Conv2d(self.in_channels, 2 * self.dk + self.dv, kernel_size=self.kernel_size, + stride=stride, + padding=self.padding) + + self.attn_out = nn.Conv2d(self.dv, self.dv, kernel_size=1, stride=1) + + def forward(self, x): + # Input x + # (batch_size, channels, 1, joints) + B, _, T, V = x.size() + + # flat_q, flat_k, flat_v + # (batch_size, Nh, dvh or dkh, joints) + # dvh = dv / Nh, dkh = dk / Nh + # q, k, v obtained by doing 2D convolution on the input (q=XWq, k=XWk, v=XWv) + flat_q, flat_k, flat_v, q, k, v = self.compute_flat_qkv(x, self.dk, self.dv, self.Nh) + + # Calculate the scores, obtained by doing q*k + # (batch_size, Nh, joints, dkh)*(batch_size, Nh, dkh, joints) = (batch_size, Nh, joints,joints) + # The multiplication can also be divided (multi_matmul) in case of space problems + + logits = torch.matmul(flat_q.transpose(2, 3), flat_k) + + weights = F.softmax(logits, dim=-1) + + # attn_out + # (batch, Nh, joints, dvh) + # weights*V + # (batch, Nh, joints, joints)*(batch, Nh, joints, dvh)=(batch, Nh, joints, dvh) + attn_out = torch.matmul(weights, flat_v.transpose(2, 3)) + + attn_out = torch.reshape(attn_out, (B, self.Nh, T, V, self.dv // self.Nh)) + + attn_out = attn_out.permute(0, 1, 4, 2, 3) + + # combine_heads_2d, combine heads only after having calculated each Z separately + # (batch, Nh*dv, 1, joints) + attn_out = self.combine_heads_2d(attn_out) + + # Multiply for W0 (batch, out_channels, 1, joints) with out_channels=dv + attn_out = self.attn_out(attn_out) + return attn_out + + def compute_flat_qkv(self, x, dk, dv, Nh): + qkv = self.qkv_conv(x) + # T=1 in this case, because we are considering each frame separately + N, _, T, V = qkv.size() + + q, k, v = torch.split(qkv, [dk, dk, dv], dim=1) + q = self.split_heads_2d(q, Nh) + k = self.split_heads_2d(k, Nh) + v = self.split_heads_2d(v, Nh) + + dkh = dk // Nh + q = q*(dkh ** -0.5) + flat_q = torch.reshape(q, (N, Nh, dkh, T * V)) + flat_k = torch.reshape(k, (N, Nh, dkh, T * V)) + flat_v = torch.reshape(v, (N, Nh, dv // self.Nh, T * V)) + return flat_q, flat_k, flat_v, q, k, v + + def split_heads_2d(self, x, Nh): + B, channels, T, V = x.size() + ret_shape = (B, Nh, channels // Nh, T, V) + split = torch.reshape(x, ret_shape) + return split + + def combine_heads_2d(self, x): + batch, Nh, dv, T, V = x.size() + ret_shape = (batch, Nh * dv, T, V) + return torch.reshape(x, ret_shape) + \ No newline at end of file diff --git a/test.sh b/test.sh index 81daa38..443b7bc 100644 --- a/test.sh +++ b/test.sh @@ -29,4 +29,4 @@ CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 o # CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m torch.distributed.launch --nproc_per_node=8 opengait/main.py --cfgs ./configs/gaitpart/gaitpart_OUMVLP.yaml --phase test # GaitGL -# CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m torch.distributed.launch --nproc_per_node=8 opengait/main.py --cfgs ./configs/gaitgl/gaitgl_OUMVLP.yaml --phase test +# CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m torch.distributed.launch --nproc_per_node=8 opengait/main.py --cfgs ./configs/gaitgl/gaitgl_OUMVLP.yaml --phase test \ No newline at end of file diff --git a/train.sh b/train.sh index 5910b37..e503966 100644 --- a/train.sh +++ b/train.sh @@ -29,4 +29,4 @@ CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 o # CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m torch.distributed.launch --nproc_per_node=8 opengait/main.py --cfgs ./configs/gaitpart/gaitpart_OUMVLP.yaml --phase train # GaitGL -# CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m torch.distributed.launch --nproc_per_node=8 opengait/main.py --cfgs ./configs/gaitgl/gaitgl_OUMVLP.yaml --phase train +# CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m torch.distributed.launch --nproc_per_node=8 opengait/main.py --cfgs ./configs/gaitgl/gaitgl_OUMVLP.yaml --phase train \ No newline at end of file