feat: add drf author checkpoint compatibility bundle

2026-03-14 17:12:27 +08:00
parent d4e2a59ad2
commit 5f98844aff
18 changed files with 1144 additions and 8 deletions
@@ -0,0 +1,62 @@
+# DRF Author Checkpoint Artifact
+
+This artifact bundle archives the author-provided DRF checkpoint together with the compatibility configs that make it usable in this repo.
+
+Files:
+- `DRF_118_unordered_iter2w_lr0.001_8830-08000.pt`
+- `author_original_config.yaml`
+- `eval_112_1gpu.yaml`
+- `eval_118_defaultroot_1gpu.yaml`
+- `eval_118_splitroot_1gpu.yaml`
+- `eval_118_aligned_1gpu.yaml`
+- `eval_118_paper_1gpu.yaml`
+
+Best recovered path:
+- config: `configs/drf/drf_author_eval_118_aligned_1gpu.yaml`
+- result:
+  - `80.24 Acc / 76.73 Prec / 76.40 Rec / 76.56 F1`
+
+## Run Commands
+
+Recommended eval:
+
+```bash
+CUDA_VISIBLE_DEVICES=GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \
+uv run torchrun --nproc_per_node=1 --master_port=29693 \
+  opengait/main.py \
+  --cfgs ./configs/drf/drf_author_eval_118_aligned_1gpu.yaml \
+  --phase test
+```
+
+Other compatibility checks:
+
+```bash
+CUDA_VISIBLE_DEVICES=GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \
+uv run torchrun --nproc_per_node=1 --master_port=29695 \
+  opengait/main.py \
+  --cfgs ./configs/drf/drf_author_eval_112_1gpu.yaml \
+  --phase test
+
+CUDA_VISIBLE_DEVICES=GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \
+uv run torchrun --nproc_per_node=1 --master_port=29696 \
+  opengait/main.py \
+  --cfgs ./configs/drf/drf_author_eval_118_splitroot_1gpu.yaml \
+  --phase test
+
+CUDA_VISIBLE_DEVICES=GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \
+uv run torchrun --nproc_per_node=1 --master_port=29697 \
+  opengait/main.py \
+  --cfgs ./configs/drf/drf_author_eval_118_paper_1gpu.yaml \
+  --phase test
+```
+
+## Paths To Change On Another Machine
+
+If you move this artifact bundle or use a different dataset location, update:
+- `data_cfg.dataset_root`
+- `data_cfg.dataset_partition`
+- `evaluator_cfg.restore_hint`
+
+In this repo, the checkpoint is archived at:
+- `artifact/scoliosis_drf_author_118_compat/DRF_118_unordered_iter2w_lr0.001_8830-08000.pt`
+
@@ -0,0 +1,106 @@
+data_cfg:
+  dataset_name: Scoliosis1K
+  data_type: heatmap
+  dataset_root: /home/kj666/project_temp/opengait/datasets/Scoliosis1K/Scoliosis1K-drf-pkl
+  dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_112.json
+  num_workers: 1
+  data_in_use: [True, False] # heatmap, sil
+  remove_no_gallery: false # Remove probe if no gallery for it
+  test_dataset_name: Scoliosis1K
+
+evaluator_cfg:
+  enable_float16: true
+  restore_ckpt_strict: true
+  restore_hint: ./artifact/scoliosis_sconet_112_bodyonly_plaince_adamw_cosine/DRF_118_unordered_iter2w_lr0.001_8830-08000.pt
+  save_name: DRF_118_unordered_iter2w_lr0.001_8830-08000.pt
+  eval_func: evaluate_scoliosis
+  sampler:
+    batch_shuffle: false
+    batch_size: 1
+    sample_type: all_ordered # all indicates whole sequence used to test, while ordered means input sequence by its natural order; Other options:   fixed_unordered
+    frames_all_limit: 720 # limit the number of sampled frames to prevent out of memory
+  metric: euc # cos
+  transform:
+    - type: BaseSilCuttingTransform
+
+loss_cfg:
+  - loss_term_weight: 1.0
+    margin: 0.2
+    type: TripletLoss
+    log_prefix: triplet
+  - loss_term_weight: 1.0
+    scale: 16
+    type: CrossEntropyLoss
+    log_prefix: softmax
+    log_accuracy: true
+
+
+model_cfg:
+  model: DRF
+  backbone_cfg:
+    type: ResNet9
+    block: BasicBlock
+    in_channel: 2
+    channels: # Layers configuration for automatically model construction
+      - 64
+      - 128
+      - 256
+      - 512
+    layers:
+      - 1
+      - 1
+      - 1
+      - 1
+    strides:
+      - 1
+      - 2
+      - 2
+      - 1
+    maxpool: false
+  SeparateFCs:
+    in_channels: 512
+    out_channels: 256
+    parts_num: 16
+  SeparateBNNecks:
+    class_num: 3
+    in_channels: 256
+    parts_num: 16
+  bin_num:
+    - 16
+
+optimizer_cfg:
+  lr: 0.01
+  momentum: 0.9
+  solver: SGD
+  weight_decay: 0.0005
+
+scheduler_cfg:
+  gamma: 0.1
+  milestones: # Learning Rate Reduction at each milestones
+    - 10000
+    - 14000
+    - 18000
+  scheduler: MultiStepLR
+
+trainer_cfg:
+  find_unused_parameters: True
+  enable_float16: true # half_percesion float for memory reduction and speedup
+  fix_BN: false
+  with_test: true
+  log_iter: 100
+  restore_ckpt_strict: true
+  restore_hint: 0
+  save_iter: 2000
+  save_name: DRF_118_unordered_iter2w_lr0.001_8830-08000.pt
+  total_iter: 20000
+  sampler:
+    batch_shuffle: true
+    batch_size:
+      - 8 # TripletSampler, batch_size[0] indicates Number of Identity
+      - 8 #                 batch_size[1] indicates Samples sequqnce for each Identity
+    frames_num_fixed: 30 # fixed frames number for training
+    frames_skip_num: 2
+    sample_type: fixed_unordered # fixed control input frames number, unordered for controlling order of input tensor; Other options: unfixed_ordered or all_ordered
+    type: TripletSampler
+  transform:
+    - type: BaseSilCuttingTransform
@@ -0,0 +1,65 @@
+data_cfg:
+  dataset_name: Scoliosis1K
+  data_type: heatmap
+  dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl
+  dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_112.json
+  num_workers: 1
+  data_in_use: [true, true]
+  remove_no_gallery: false
+  test_dataset_name: Scoliosis1K
+
+evaluator_cfg:
+  enable_float16: true
+  restore_ckpt_strict: true
+  restore_hint: ./artifact/scoliosis_drf_author_118_compat/DRF_118_unordered_iter2w_lr0.001_8830-08000.pt
+  save_name: DRF_author_eval_112_1gpu
+  eval_func: evaluate_scoliosis
+  sampler:
+    batch_shuffle: false
+    batch_size: 1
+    sample_type: all_ordered
+    type: InferenceSampler
+    frames_all_limit: 720
+  metric: euc
+  transform:
+    - type: BaseSilCuttingTransform
+    - type: NoOperation
+
+model_cfg:
+  model: DRF
+  label_order:
+    - negative
+    - positive
+    - neutral
+  num_pairs: 8
+  num_metrics: 3
+  backbone_cfg:
+    type: ResNet9
+    block: BasicBlock
+    in_channel: 2
+    channels:
+      - 64
+      - 128
+      - 256
+      - 512
+    layers:
+      - 1
+      - 1
+      - 1
+      - 1
+    strides:
+      - 1
+      - 2
+      - 2
+      - 1
+    maxpool: false
+  SeparateFCs:
+    in_channels: 512
+    out_channels: 256
+    parts_num: 16
+  SeparateBNNecks:
+    class_num: 3
+    in_channels: 256
+    parts_num: 16
+  bin_num:
+    - 16
@@ -0,0 +1,65 @@
+data_cfg:
+  dataset_name: Scoliosis1K
+  data_type: heatmap
+  dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-aligned
+  dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
+  num_workers: 1
+  data_in_use: [true, true]
+  remove_no_gallery: false
+  test_dataset_name: Scoliosis1K
+
+evaluator_cfg:
+  enable_float16: true
+  restore_ckpt_strict: true
+  restore_hint: ./artifact/scoliosis_drf_author_118_compat/DRF_118_unordered_iter2w_lr0.001_8830-08000.pt
+  save_name: DRF_author_eval_118_aligned_1gpu
+  eval_func: evaluate_scoliosis
+  sampler:
+    batch_shuffle: false
+    batch_size: 1
+    sample_type: all_ordered
+    type: InferenceSampler
+    frames_all_limit: 720
+  metric: euc
+  transform:
+    - type: BaseSilCuttingTransform
+    - type: NoOperation
+
+model_cfg:
+  model: DRF
+  label_order:
+    - negative
+    - positive
+    - neutral
+  num_pairs: 8
+  num_metrics: 3
+  backbone_cfg:
+    type: ResNet9
+    block: BasicBlock
+    in_channel: 2
+    channels:
+      - 64
+      - 128
+      - 256
+      - 512
+    layers:
+      - 1
+      - 1
+      - 1
+      - 1
+    strides:
+      - 1
+      - 2
+      - 2
+      - 1
+    maxpool: false
+  SeparateFCs:
+    in_channels: 512
+    out_channels: 256
+    parts_num: 16
+  SeparateBNNecks:
+    class_num: 3
+    in_channels: 256
+    parts_num: 16
+  bin_num:
+    - 16
@@ -0,0 +1,65 @@
+data_cfg:
+  dataset_name: Scoliosis1K
+  data_type: heatmap
+  dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl
+  dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
+  num_workers: 1
+  data_in_use: [true, true]
+  remove_no_gallery: false
+  test_dataset_name: Scoliosis1K
+
+evaluator_cfg:
+  enable_float16: true
+  restore_ckpt_strict: true
+  restore_hint: ./artifact/scoliosis_drf_author_118_compat/DRF_118_unordered_iter2w_lr0.001_8830-08000.pt
+  save_name: DRF_author_eval_118_1gpu
+  eval_func: evaluate_scoliosis
+  sampler:
+    batch_shuffle: false
+    batch_size: 1
+    sample_type: all_ordered
+    type: InferenceSampler
+    frames_all_limit: 720
+  metric: euc
+  transform:
+    - type: BaseSilCuttingTransform
+    - type: NoOperation
+
+model_cfg:
+  model: DRF
+  label_order:
+    - negative
+    - positive
+    - neutral
+  num_pairs: 8
+  num_metrics: 3
+  backbone_cfg:
+    type: ResNet9
+    block: BasicBlock
+    in_channel: 2
+    channels:
+      - 64
+      - 128
+      - 256
+      - 512
+    layers:
+      - 1
+      - 1
+      - 1
+      - 1
+    strides:
+      - 1
+      - 2
+      - 2
+      - 1
+    maxpool: false
+  SeparateFCs:
+    in_channels: 512
+    out_channels: 256
+    parts_num: 16
+  SeparateBNNecks:
+    class_num: 3
+    in_channels: 256
+    parts_num: 16
+  bin_num:
+    - 16
@@ -0,0 +1,65 @@
+data_cfg:
+  dataset_name: Scoliosis1K
+  data_type: heatmap
+  dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-paper
+  dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
+  num_workers: 1
+  data_in_use: [true, true]
+  remove_no_gallery: false
+  test_dataset_name: Scoliosis1K
+
+evaluator_cfg:
+  enable_float16: true
+  restore_ckpt_strict: true
+  restore_hint: ./artifact/scoliosis_drf_author_118_compat/DRF_118_unordered_iter2w_lr0.001_8830-08000.pt
+  save_name: DRF_author_eval_118_paper_1gpu
+  eval_func: evaluate_scoliosis
+  sampler:
+    batch_shuffle: false
+    batch_size: 1
+    sample_type: all_ordered
+    type: InferenceSampler
+    frames_all_limit: 720
+  metric: euc
+  transform:
+    - type: BaseSilTransform
+    - type: NoOperation
+
+model_cfg:
+  model: DRF
+  label_order:
+    - negative
+    - positive
+    - neutral
+  num_pairs: 8
+  num_metrics: 3
+  backbone_cfg:
+    type: ResNet9
+    block: BasicBlock
+    in_channel: 2
+    channels:
+      - 64
+      - 128
+      - 256
+      - 512
+    layers:
+      - 1
+      - 1
+      - 1
+      - 1
+    strides:
+      - 1
+      - 2
+      - 2
+      - 1
+    maxpool: false
+  SeparateFCs:
+    in_channels: 512
+    out_channels: 256
+    parts_num: 16
+  SeparateBNNecks:
+    class_num: 3
+    in_channels: 256
+    parts_num: 16
+  bin_num:
+    - 16
@@ -0,0 +1,65 @@
+data_cfg:
+  dataset_name: Scoliosis1K
+  data_type: heatmap
+  dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118
+  dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
+  num_workers: 1
+  data_in_use: [true, true]
+  remove_no_gallery: false
+  test_dataset_name: Scoliosis1K
+
+evaluator_cfg:
+  enable_float16: true
+  restore_ckpt_strict: true
+  restore_hint: ./artifact/scoliosis_drf_author_118_compat/DRF_118_unordered_iter2w_lr0.001_8830-08000.pt
+  save_name: DRF_author_eval_118_splitroot_1gpu
+  eval_func: evaluate_scoliosis
+  sampler:
+    batch_shuffle: false
+    batch_size: 1
+    sample_type: all_ordered
+    type: InferenceSampler
+    frames_all_limit: 720
+  metric: euc
+  transform:
+    - type: BaseSilCuttingTransform
+    - type: NoOperation
+
+model_cfg:
+  model: DRF
+  label_order:
+    - negative
+    - positive
+    - neutral
+  num_pairs: 8
+  num_metrics: 3
+  backbone_cfg:
+    type: ResNet9
+    block: BasicBlock
+    in_channel: 2
+    channels:
+      - 64
+      - 128
+      - 256
+      - 512
+    layers:
+      - 1
+      - 1
+      - 1
+      - 1
+    strides:
+      - 1
+      - 2
+      - 2
+      - 1
+    maxpool: false
+  SeparateFCs:
+    in_channels: 512
+    out_channels: 256
+    parts_num: 16
+  SeparateBNNecks:
+    class_num: 3
+    in_channels: 256
+    parts_num: 16
+  bin_num:
+    - 16
@@ -0,0 +1,65 @@
+data_cfg:
+  dataset_name: Scoliosis1K
+  data_type: heatmap
+  dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl
+  dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_112.json
+  num_workers: 1
+  data_in_use: [true, true]
+  remove_no_gallery: false
+  test_dataset_name: Scoliosis1K
+
+evaluator_cfg:
+  enable_float16: true
+  restore_ckpt_strict: true
+  restore_hint: ./artifact/scoliosis_drf_author_118_compat/DRF_118_unordered_iter2w_lr0.001_8830-08000.pt
+  save_name: DRF_author_eval_112_1gpu
+  eval_func: evaluate_scoliosis
+  sampler:
+    batch_shuffle: false
+    batch_size: 1
+    sample_type: all_ordered
+    type: InferenceSampler
+    frames_all_limit: 720
+  metric: euc
+  transform:
+    - type: BaseSilCuttingTransform
+    - type: NoOperation
+
+model_cfg:
+  model: DRF
+  label_order:
+    - negative
+    - positive
+    - neutral
+  num_pairs: 8
+  num_metrics: 3
+  backbone_cfg:
+    type: ResNet9
+    block: BasicBlock
+    in_channel: 2
+    channels:
+      - 64
+      - 128
+      - 256
+      - 512
+    layers:
+      - 1
+      - 1
+      - 1
+      - 1
+    strides:
+      - 1
+      - 2
+      - 2
+      - 1
+    maxpool: false
+  SeparateFCs:
+    in_channels: 512
+    out_channels: 256
+    parts_num: 16
+  SeparateBNNecks:
+    class_num: 3
+    in_channels: 256
+    parts_num: 16
+  bin_num:
+    - 16
@@ -0,0 +1,65 @@
+data_cfg:
+  dataset_name: Scoliosis1K
+  data_type: heatmap
+  dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl
+  dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
+  num_workers: 1
+  data_in_use: [true, true]
+  remove_no_gallery: false
+  test_dataset_name: Scoliosis1K
+
+evaluator_cfg:
+  enable_float16: true
+  restore_ckpt_strict: true
+  restore_hint: ./artifact/scoliosis_drf_author_118_compat/DRF_118_unordered_iter2w_lr0.001_8830-08000.pt
+  save_name: DRF_author_eval_118_1gpu
+  eval_func: evaluate_scoliosis
+  sampler:
+    batch_shuffle: false
+    batch_size: 1
+    sample_type: all_ordered
+    type: InferenceSampler
+    frames_all_limit: 720
+  metric: euc
+  transform:
+    - type: BaseSilCuttingTransform
+    - type: NoOperation
+
+model_cfg:
+  model: DRF
+  label_order:
+    - negative
+    - positive
+    - neutral
+  num_pairs: 8
+  num_metrics: 3
+  backbone_cfg:
+    type: ResNet9
+    block: BasicBlock
+    in_channel: 2
+    channels:
+      - 64
+      - 128
+      - 256
+      - 512
+    layers:
+      - 1
+      - 1
+      - 1
+      - 1
+    strides:
+      - 1
+      - 2
+      - 2
+      - 1
+    maxpool: false
+  SeparateFCs:
+    in_channels: 512
+    out_channels: 256
+    parts_num: 16
+  SeparateBNNecks:
+    class_num: 3
+    in_channels: 256
+    parts_num: 16
+  bin_num:
+    - 16
@@ -0,0 +1,65 @@
+data_cfg:
+  dataset_name: Scoliosis1K
+  data_type: heatmap
+  dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-aligned
+  dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
+  num_workers: 1
+  data_in_use: [true, true]
+  remove_no_gallery: false
+  test_dataset_name: Scoliosis1K
+
+evaluator_cfg:
+  enable_float16: true
+  restore_ckpt_strict: true
+  restore_hint: ./artifact/scoliosis_drf_author_118_compat/DRF_118_unordered_iter2w_lr0.001_8830-08000.pt
+  save_name: DRF_author_eval_118_aligned_1gpu
+  eval_func: evaluate_scoliosis
+  sampler:
+    batch_shuffle: false
+    batch_size: 1
+    sample_type: all_ordered
+    type: InferenceSampler
+    frames_all_limit: 720
+  metric: euc
+  transform:
+    - type: BaseSilCuttingTransform
+    - type: NoOperation
+
+model_cfg:
+  model: DRF
+  label_order:
+    - negative
+    - positive
+    - neutral
+  num_pairs: 8
+  num_metrics: 3
+  backbone_cfg:
+    type: ResNet9
+    block: BasicBlock
+    in_channel: 2
+    channels:
+      - 64
+      - 128
+      - 256
+      - 512
+    layers:
+      - 1
+      - 1
+      - 1
+      - 1
+    strides:
+      - 1
+      - 2
+      - 2
+      - 1
+    maxpool: false
+  SeparateFCs:
+    in_channels: 512
+    out_channels: 256
+    parts_num: 16
+  SeparateBNNecks:
+    class_num: 3
+    in_channels: 256
+    parts_num: 16
+  bin_num:
+    - 16
@@ -0,0 +1,65 @@
+data_cfg:
+  dataset_name: Scoliosis1K
+  data_type: heatmap
+  dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-paper
+  dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
+  num_workers: 1
+  data_in_use: [true, true]
+  remove_no_gallery: false
+  test_dataset_name: Scoliosis1K
+
+evaluator_cfg:
+  enable_float16: true
+  restore_ckpt_strict: true
+  restore_hint: ./artifact/scoliosis_drf_author_118_compat/DRF_118_unordered_iter2w_lr0.001_8830-08000.pt
+  save_name: DRF_author_eval_118_paper_1gpu
+  eval_func: evaluate_scoliosis
+  sampler:
+    batch_shuffle: false
+    batch_size: 1
+    sample_type: all_ordered
+    type: InferenceSampler
+    frames_all_limit: 720
+  metric: euc
+  transform:
+    - type: BaseSilTransform
+    - type: NoOperation
+
+model_cfg:
+  model: DRF
+  label_order:
+    - negative
+    - positive
+    - neutral
+  num_pairs: 8
+  num_metrics: 3
+  backbone_cfg:
+    type: ResNet9
+    block: BasicBlock
+    in_channel: 2
+    channels:
+      - 64
+      - 128
+      - 256
+      - 512
+    layers:
+      - 1
+      - 1
+      - 1
+      - 1
+    strides:
+      - 1
+      - 2
+      - 2
+      - 1
+    maxpool: false
+  SeparateFCs:
+    in_channels: 512
+    out_channels: 256
+    parts_num: 16
+  SeparateBNNecks:
+    class_num: 3
+    in_channels: 256
+    parts_num: 16
+  bin_num:
+    - 16
@@ -0,0 +1,65 @@
+data_cfg:
+  dataset_name: Scoliosis1K
+  data_type: heatmap
+  dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118
+  dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
+  num_workers: 1
+  data_in_use: [true, true]
+  remove_no_gallery: false
+  test_dataset_name: Scoliosis1K
+
+evaluator_cfg:
+  enable_float16: true
+  restore_ckpt_strict: true
+  restore_hint: ./artifact/scoliosis_drf_author_118_compat/DRF_118_unordered_iter2w_lr0.001_8830-08000.pt
+  save_name: DRF_author_eval_118_splitroot_1gpu
+  eval_func: evaluate_scoliosis
+  sampler:
+    batch_shuffle: false
+    batch_size: 1
+    sample_type: all_ordered
+    type: InferenceSampler
+    frames_all_limit: 720
+  metric: euc
+  transform:
+    - type: BaseSilCuttingTransform
+    - type: NoOperation
+
+model_cfg:
+  model: DRF
+  label_order:
+    - negative
+    - positive
+    - neutral
+  num_pairs: 8
+  num_metrics: 3
+  backbone_cfg:
+    type: ResNet9
+    block: BasicBlock
+    in_channel: 2
+    channels:
+      - 64
+      - 128
+      - 256
+      - 512
+    layers:
+      - 1
+      - 1
+      - 1
+      - 1
+    strides:
+      - 1
+      - 2
+      - 2
+      - 1
+    maxpool: false
+  SeparateFCs:
+    in_channels: 512
+    out_channels: 256
+    parts_num: 16
+  SeparateBNNecks:
+    class_num: 3
+    in_channels: 256
+    parts_num: 16
+  bin_num:
+    - 16
@@ -0,0 +1,192 @@
+# DRF Author Checkpoint Compatibility Note
+
+This note records what happened when evaluating the author-provided DRF bundle in this repo:
+
+- checkpoint: `artifact/scoliosis_drf_author_118_compat/DRF_118_unordered_iter2w_lr0.001_8830-08000.pt`
+- config: `ckpt/drf_author/drf_scoliosis1k_20000.yaml`
+
+The short version:
+- the weight file is real and structurally usable
+- the provided YAML is not a reliable source of truth
+- the main problem was integration-contract mismatch, not a broken checkpoint
+
+## What Was Wrong
+
+The author bundle was internally inconsistent in several ways.
+
+### 1. Split mismatch
+
+The DRF paper says the main experiment uses `1:1:8`, i.e. the `118` split.
+
+But the provided YAML pointed to:
+- `./datasets/Scoliosis1K/Scoliosis1K_112.json`
+
+while the checkpoint filename itself says:
+- `DRF_118_...`
+
+So the bundle already disagreed with itself.
+
+### 2. Class-order mismatch
+
+The biggest hidden bug was class ordering.
+
+The current repo evaluator assumes:
+- `negative = 0`
+- `neutral = 1`
+- `positive = 2`
+
+But the author stub in `research/drf.py` uses:
+- `negative = 0`
+- `positive = 1`
+- `neutral = 2`
+
+That means an otherwise good checkpoint can look very bad if logits are interpreted in the wrong class order.
+
+### 3. Legacy module-name mismatch
+
+The author checkpoint stores PGA weights under:
+- `attention_layer.*`
+
+The current repo uses:
+- `PGA.*`
+
+This is a small compatibility issue, but it must be remapped before loading.
+
+### 4. Preprocessing/runtime-contract mismatch
+
+The author checkpoint does not line up with the stale YAML’s full runtime contract.
+
+Most importantly, it did **not** work well with the more paper-literal local export:
+- `Scoliosis1K-drf-pkl-118-paper`
+
+It worked much better with the more OpenGait-like aligned export:
+- `Scoliosis1K-drf-pkl-118-aligned`
+
+That strongly suggests the checkpoint was trained against a preprocessing/runtime path closer to the aligned OpenGait integration than to the later local “paper-literal” summed-heatmap ablation.
+
+## What Was Added In-Tree
+
+The current repo now has a small compatibility layer in:
+- `opengait/modeling/models/drf.py`
+
+It does two things:
+- remaps legacy keys `attention_layer.* -> PGA.*`
+- supports configurable `model_cfg.label_order`
+
+The model also canonicalizes inference logits back into the repo’s evaluator order, so author checkpoints can be evaluated without modifying the evaluator itself.
+
+## Tested Compatibility Results
+
+### Best usable author-checkpoint path
+
+Config:
+- `configs/drf/drf_author_eval_118_aligned_1gpu.yaml`
+
+Dataset/runtime:
+- dataset root: `Scoliosis1K-drf-pkl-118-aligned`
+- partition: `Scoliosis1K_118.json`
+- transform: `BaseSilCuttingTransform`
+- label order:
+  - `negative`
+  - `positive`
+  - `neutral`
+
+Result:
+- `80.24 Acc / 76.73 Prec / 76.40 Rec / 76.56 F1`
+
+This is the strongest recovered path so far.
+
+### Other tested paths
+
+`configs/drf/drf_author_eval_118_splitroot_1gpu.yaml`
+- dataset root: `Scoliosis1K-drf-pkl-118`
+- result:
+  - `77.17 Acc / 73.61 Prec / 72.59 Rec / 72.98 F1`
+
+`configs/drf/drf_author_eval_112_1gpu.yaml`
+- dataset root: `Scoliosis1K-drf-pkl`
+- partition: `Scoliosis1K_112.json`
+- result:
+  - `85.19 Acc / 57.98 Prec / 56.65 Rec / 57.30 F1`
+
+`configs/drf/drf_author_eval_118_paper_1gpu.yaml`
+- dataset root: `Scoliosis1K-drf-pkl-118-paper`
+- transform: `BaseSilTransform`
+- result:
+  - `27.24 Acc / 9.08 Prec / 33.33 Rec / 14.27 F1`
+
+## Interpretation
+
+What these results mean:
+
+- the checkpoint is not garbage
+- the original “very bad” local eval was mostly a compatibility failure
+- the largest single hidden bug was the class-order mismatch
+- the author checkpoint is also sensitive to which local DRF dataset root is used
+
+What they do **not** mean:
+
+- we have perfectly reconstructed the author’s original training path
+- the provided YAML is trustworthy as-is
+- the paper’s full DRF claim is fully reproduced here
+
+The strongest recovered result:
+- `80.24 / 76.73 / 76.40 / 76.56`
+
+This is close to the paper’s reported `ScoNet-MT^ske` F1 and much better than our earlier broken compat evals, but it is still below the paper’s DRF headline result:
+- paper DRF: `86.0 Acc / 84.1 Prec / 79.2 Rec / 80.8 F1`
+
+## Practical Recommendation
+
+If someone wants to use the author checkpoint in this repo today, the recommended path is:
+
+1. use `configs/drf/drf_author_eval_118_aligned_1gpu.yaml`
+2. keep the author label order:
+   - `negative, positive, neutral`
+3. keep the legacy `attention_layer -> PGA` remap in the model
+4. do **not** assume the stale `112` YAML is the correct training/eval contract
+
+If someone wants to push this further, the highest-value next step is:
+- finetune from the author checkpoint on the aligned `118` path instead of starting DRF from scratch
+
+## How To Run
+
+Recommended eval:
+
+```bash
+CUDA_VISIBLE_DEVICES=GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \
+uv run torchrun --nproc_per_node=1 --master_port=29693 \
+  opengait/main.py \
+  --cfgs ./configs/drf/drf_author_eval_118_aligned_1gpu.yaml \
+  --phase test
+```
+
+Other compatibility checks:
+
+```bash
+CUDA_VISIBLE_DEVICES=GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \
+uv run torchrun --nproc_per_node=1 --master_port=29695 \
+  opengait/main.py \
+  --cfgs ./configs/drf/drf_author_eval_112_1gpu.yaml \
+  --phase test
+
+CUDA_VISIBLE_DEVICES=GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \
+uv run torchrun --nproc_per_node=1 --master_port=29696 \
+  opengait/main.py \
+  --cfgs ./configs/drf/drf_author_eval_118_splitroot_1gpu.yaml \
+  --phase test
+
+CUDA_VISIBLE_DEVICES=GPU-9cc7b26e-90d4-0c49-4d4c-060e528ffba6 \
+uv run torchrun --nproc_per_node=1 --master_port=29697 \
+  opengait/main.py \
+  --cfgs ./configs/drf/drf_author_eval_118_paper_1gpu.yaml \
+  --phase test
+```
+
+If someone wants to reproduce this on another machine, the usual paths to change are:
+- `data_cfg.dataset_root`
+- `data_cfg.dataset_partition`
+- `evaluator_cfg.restore_hint`
+
+The archived artifact bundle is:
+- `artifact/scoliosis_drf_author_118_compat`
@@ -7,6 +7,7 @@ Related notes:
 - [ScoNet and DRF: Status, Architecture, and Reproduction Notes](sconet-drf-status-and-training.md)
 - [Scoliosis Training Change Log](scoliosis_training_change_log.md)
 - [Scoliosis Reproducibility Audit](scoliosis_reproducibility_audit.md)
+- [DRF Author Checkpoint Compatibility Note](drf_author_checkpoint_compat.md)
 - [systemd-run Training](systemd-run-training.md)

 ## Current Best Known Result
@@ -72,6 +73,7 @@ These are the main branches already explored, so the next person does not rerun
 - `DRF`
  - implemented and tested
  - underperformed the plain skeleton baseline in the first serious practical run
+  - the author-provided checkpoint is now partially recoverable on the aligned `118` path
  - still worth one final warm-start retry, but not the default winner

 ## What Not To Do First
@@ -120,7 +122,9 @@ Goal:
 Recommended setup:
 - split: `1:1:2`
 - representation: start from the same `body-only` skeleton maps
- initialization: warm-start from the strong skeleton model, not from random init
+- initialization: warm-start from either:
+  - the strong practical skeleton model, or
+  - the recovered author DRF checkpoint if the goal is paper-side DRF recovery
 - optimizer: small-LR `AdamW`
 - scheduler: cosine

@@ -130,9 +134,15 @@ Recommended strategy:
 3. use a smaller LR than the plain baseline finetune
 4. if needed, freeze or partially freeze the backbone for a short warmup so the PAV/PGA branch learns without immediately disturbing the already-good visual branch

+If the goal is specifically DRF recovery rather than practical best-model development, use this as the author-checkpoint starting point:
+- config: `configs/drf/drf_author_eval_118_aligned_1gpu.yaml`
+- recovered author checkpoint result:
+  - `80.24 Acc / 76.73 Prec / 76.40 Rec / 76.56 F1`
+
 Why:
 - the earlier DRF bridge peaked early and then degraded
 - local evidence suggests the prior branch is currently weak or noisy
+- the scratch DRF path was weak, but the recovered author checkpoint is already much better than that path
 - DRF deserves one fair test from a strong starting point, not another scratch run

 Success criterion:
@@ -11,6 +11,7 @@ Related notes:
 - [ScoNet and DRF: Status, Architecture, and Reproduction Notes](sconet-drf-status-and-training.md)
 - [Scoliosis Training Change Log](scoliosis_training_change_log.md)
 - [Scoliosis: Next Experiments](scoliosis_next_experiments.md)
+- [DRF Author Checkpoint Compatibility Note](drf_author_checkpoint_compat.md)

 Primary references:
 - ScoNet paper: [arXiv-2407.05726v3-main.tex](papers/arXiv-2407.05726v3-main.tex)
@@ -155,6 +156,41 @@ Conclusion:
 - DRF currently loses to the stronger plain skeleton baseline in this repo
 - the published DRF advantage is not established locally

+### 9a. The author-provided DRF checkpoint is partially recoverable
+
+This changed one important part of the audit:
+- the author checkpoint itself is not unusable
+- the earlier very poor local eval was mostly a compatibility failure
+
+The recovered best author-checkpoint path is:
+- config: `configs/drf/drf_author_eval_118_aligned_1gpu.yaml`
+- result:
+  - `80.24 Acc / 76.73 Prec / 76.40 Rec / 76.56 F1`
+
+This is still below the paper's DRF headline result:
+- `86.0 / 84.1 / 79.2 / 80.8`
+
+But it is far better than the earlier broken compat evals, which means:
+- the weight file is real
+- the stale author YAML is not a reliable runtime contract
+
+The main causes were:
+- split mismatch:
+  - checkpoint name says `118`
+  - provided YAML points to `112`
+- class-order mismatch:
+  - author stub uses `negative=0, positive=1, neutral=2`
+  - repo evaluator assumes `negative=0, neutral=1, positive=2`
+- legacy module naming mismatch:
+  - `attention_layer.*` vs `PGA.*`
+- preprocessing/runtime mismatch:
+  - the checkpoint aligns much better with `Scoliosis1K-drf-pkl-118-aligned`
+  - it performs very badly on the local `118-paper` export
+
+Conclusion:
+- the author checkpoint can be made meaningfully usable in this repo
+- but the provided bundle still does not fully specify the original training/eval contract
+
 ### 10. The paper-level `1:1:8` skeleton story is not reproduced here

 What happened locally:
@@ -254,6 +290,7 @@ Practical bottom line:
 - keep the `body-only` skeleton baseline as the mainline path
 - keep the retained best checkpoint family as the working artifact
 - treat DRF as an optional follow-up branch, not the current winner
+- if DRF work continues, the author checkpoint on the aligned `118` path is now a much better starting point than scratch DRF

 Current strongest practical checkpoint:
 - `92.38 Acc / 90.30 Prec / 87.39 Rec / 88.70 F1`
@@ -38,6 +38,10 @@ Use it for:
 | 2026-03-10 | `ScoNet_skeleton_112_sigma15_joint8_bodyonly_plaince_bridge_1gpu_10k` | ScoNet-MT-ske bridge | `Scoliosis1K-drf-pkl-118-sigma15-joint8-bodyonly` + `Scoliosis1K_112.json` | Same plain-CE `1:1:2` bridge, relaunched on the `5070 Ti` only per user request | complete | best proxy subset at `7000`: `88.28/69.12/74.15/68.80`; full test at `7000`: `83.16/68.24/80.02/68.47`; final proxy at `10000`: `75.00/65.00/63.41/54.55` (Acc/Prec/Rec/F1) |
 | 2026-03-10 | `ScoNet_skeleton_112_sigma15_joint8_headlite_plaince_bridge_1gpu_10k` | ScoNet-MT-ske bridge | `Scoliosis1K-drf-pkl-112-sigma15-joint8-headlite` + `Scoliosis1K_112.json` | Added `head-lite` structure (nose plus shoulder links, no eyes/ears) on top of the plain-CE `1:1:2` bridge; first `3090` launch OOMed due unrelated occupancy, then relaunched on the UUID-pinned `5070 Ti` | complete | best proxy subset at `7000`: `86.72/70.15/89.00/70.44`; full test at `7000`: `78.07/65.42/80.50/62.08` (Acc/Prec/Rec/F1) |
 | 2026-03-10 | `DRF_skeleton_112_sigma15_joint8_bodyonly_plaince_bridge_1gpu_10k` | DRF bridge | `Scoliosis1K-drf-pkl-118-sigma15-joint8-bodyonly` + `Scoliosis1K_112.json` | First practical DRF run on the winning `1:1:2` skeleton recipe: `body-only`, plain CE, SGD, `10k` bridge schedule, fixed proxy subset seed `112` | complete | best proxy subset at `2000`: `88.28/61.79/60.31/60.93`; full test at `2000`: `80.21/58.92/59.23/57.84` (Acc/Prec/Rec/F1) |
+| 2026-03-14 | `DRF_author_eval_112_1gpu` | DRF author checkpoint compat | `Scoliosis1K-drf-pkl` + `Scoliosis1K_112.json` | Re-evaluated the author-provided checkpoint after adding legacy module-name remap and correcting the author class order; kept the stale `112` path to test whether the provided YAML was trustworthy | complete | `85.19/57.98/56.65/57.30` (Acc/Prec/Rec/F1) |
+| 2026-03-14 | `DRF_author_eval_118_splitroot_1gpu` | DRF author checkpoint compat | `Scoliosis1K-drf-pkl-118` + `Scoliosis1K_118.json` | Same author-checkpoint compat path, but switched to the `118` split-specific local DRF dataset root while keeping `BaseSilCuttingTransform` and author class order | complete | `77.17/73.61/72.59/72.98` (Acc/Prec/Rec/F1) |
+| 2026-03-14 | `DRF_author_eval_118_aligned_1gpu` | DRF author checkpoint compat | `Scoliosis1K-drf-pkl-118-aligned` + `Scoliosis1K_118.json` | Same author-checkpoint compat path, but evaluated on the aligned `118` DRF export; this is currently the best recovered author-checkpoint runtime contract | complete | `80.24/76.73/76.40/76.56` (Acc/Prec/Rec/F1) |
+| 2026-03-14 | `DRF_author_eval_118_paper_1gpu` | DRF author checkpoint compat | `Scoliosis1K-drf-pkl-118-paper` + `Scoliosis1K_118.json` | Tested the author checkpoint against the local paper-literal summed-heatmap export with `BaseSilTransform` to see whether it matched the later paper-style preprocessing branch | complete | `27.24/9.08/33.33/14.27` (Acc/Prec/Rec/F1) |
 | 2026-03-10 | `ScoNet_skeleton_112_sigma15_joint8_bodyonly_plaince_main_1gpu_20k` | ScoNet-MT-ske mainline | `Scoliosis1K-drf-pkl-118-sigma15-joint8-bodyonly` + `Scoliosis1K_112.json` | Promoted the winning practical skeleton recipe to a longer `20k` run with full `TEST_SET` eval and checkpoint save every `1000`; no proxy subset, same plain CE + SGD setup | interrupted | superseded by the true-resume continuation below |
 | 2026-03-10 | `ScoNet_skeleton_112_sigma15_joint8_bodyonly_plaince_resume_1gpu_20k` | ScoNet-MT-ske mainline | `Scoliosis1K-drf-pkl-118-sigma15-joint8-bodyonly` + `Scoliosis1K_112.json` | True continuation of the earlier plain-CE `1:1:2` `10k` bridge from its `latest.pt`, extended to `20k` with full `TEST_SET` eval and checkpoint save every `1000` | interrupted | superseded by the AdamW finetune branch below |
 | 2026-03-10 | `ScoNet_skeleton_112_sigma15_joint8_bodyonly_plaince_adamw_finetune_1gpu_20k` | ScoNet-MT-ske finetune | `Scoliosis1K-drf-pkl-118-sigma15-joint8-bodyonly` + `Scoliosis1K_112.json` | AdamW finetune from the earlier plain-CE `1:1:2` `10k` checkpoint; restores model weights only, resets optimizer/scheduler state, keeps full `TEST_SET` eval and checkpoint save every `1000` | interrupted | superseded by the longer overnight 40k finetune below |
@@ -69,4 +73,5 @@ Current best `ScoNet-MT-ske`-style result:
 - A later full-test rerun of the retained `body-only + plain CE` `7000` checkpoint reproduced the same `83.16 / 68.24 / 80.02 / 68.47` result exactly, so that number is now explicitly reconfirmed rather than just carried forward from the original run log.
 - `Head-lite` looked stronger than `body-only` on the fixed 128-sequence proxy subset at `7000`, but it did not transfer to the full test set: `78.07 Acc / 65.42 Prec / 80.50 Rec / 62.08 F1`, which is clearly below the `body-only + plain CE` full-test result.
 - The first practical DRF bridge on the winning `1:1:2` recipe did not beat the plain skeleton baseline. Its best retained checkpoint (`2000`) reached only `80.21 Acc / 58.92 Prec / 59.23 Rec / 57.84 F1` on the full test set, versus `83.16 / 68.24 / 80.02 / 68.47` for `body-only + plain CE` at `7000`. The working local interpretation is that the added PAV/PGA path is currently injecting a weak or noisy prior rather than a useful complementary signal.
+- The author-provided DRF checkpoint turned out to be partially recoverable once the local integration contract was corrected. The main hidden bug was class-order mismatch: the author stub uses `negative=0, positive=1, neutral=2`, while the repo evaluator assumes `negative=0, neutral=1, positive=2`. After adding label-order compatibility and the legacy `attention_layer -> PGA` remap, the checkpoint became meaningfully usable. The best recovered path so far is the aligned `118` export, which reached `80.24 Acc / 76.73 Prec / 76.40 Rec / 76.56 F1`. The stale author YAML is therefore not a trustworthy source of the original runtime contract; the checkpoint aligns much better with an `118` aligned/OpenGait-like path than with the later local `118-paper` export.
 - Extending the practical `1:1:2` body-only plain-CE baseline with an `AdamW` cosine finetune changed the picture substantially. The final `80000` checkpoint still evaluated well (`90.64 / 72.87 / 93.19 / 75.74`), but the real win was an earlier retained checkpoint: `27000` reproduced exactly at `92.38 Acc / 90.30 Prec / 87.39 Rec / 88.70 F1` on a standalone full-test rerun. So for this practical path, the best result is no longer the original SGD bridge checkpoint but the retained `AdamW` cosine finetune checkpoint.
@@ -1,5 +1,7 @@
 from __future__ import annotations

+from collections import OrderedDict
+from collections.abc import Mapping
 from typing import Any

 import torch
@@ -21,6 +23,11 @@ from ..modules import (
 class DRF(BaseModelBody):
    """Dual Representation Framework from arXiv:2509.00872v1."""

+    LEGACY_STATE_PREFIXES: dict[str, str] = {
+        "attention_layer.": "PGA.",
+    }
+    CANONICAL_LABEL_ORDER: tuple[str, str, str] = ("negative", "neutral", "positive")
+
    def build_network(self, model_cfg: dict[str, Any]) -> None:
        self.Backbone = self.get_backbone(model_cfg["backbone_cfg"])
        self.Backbone = SetBlockWrapper(self.Backbone)
@@ -34,6 +41,13 @@ class DRF(BaseModelBody):
            num_pairs=model_cfg.get("num_pairs", 8),
            num_metrics=model_cfg.get("num_metrics", 3),
        )
+        self.label_order = resolve_label_order(model_cfg.get("label_order"))
+        self.label_map = {label: idx for idx, label in enumerate(self.label_order)}
+        self.canonical_inference_logits = bool(model_cfg.get("canonical_inference_logits", True))
+        self.logit_canonical_indices = torch.tensor(
+            [self.label_map[label] for label in self.CANONICAL_LABEL_ORDER],
+            dtype=torch.long,
+        )

    def forward(
        self,
@@ -48,7 +62,7 @@ class DRF(BaseModelBody):
    ) -> dict[str, dict[str, Any]]:
        ipts, pids, labels, _, seqL, key_features = inputs
        label_ids = torch.as_tensor(
-            [LABEL_MAP[str(label).lower()] for label in labels],
+            [self.label_map[str(label).lower()] for label in labels],
            device=pids.device,
            dtype=torch.long,
        )
@@ -69,6 +83,7 @@ class DRF(BaseModelBody):

        embed_2, logits = self.BNNecks(embed_1)
        del embed_2
+        inference_logits = self.canonicalize_logits(logits)

        return {
            "training_feat": {
@@ -79,10 +94,52 @@ class DRF(BaseModelBody):
                "image/sils": rearrange(heatmaps, "n c s h w -> (n s) c h w"),
            },
            "inference_feat": {
-                "embeddings": logits,
+                "embeddings": inference_logits,
            },
        }

+    def canonicalize_logits(
+        self,
+        logits: Float[torch.Tensor, "batch classes parts"],
+    ) -> Float[torch.Tensor, "batch classes parts"]:
+        if not self.canonical_inference_logits or tuple(self.label_order) == self.CANONICAL_LABEL_ORDER:
+            return logits
+        indices = self.logit_canonical_indices.to(device=logits.device)
+        return logits.index_select(dim=1, index=indices)
+
+    @classmethod
+    def remap_legacy_state_dict(
+        cls,
+        state_dict: Mapping[str, torch.Tensor],
+    ) -> OrderedDict[str, torch.Tensor]:
+        """Map older author checkpoint names onto the current DRF module tree."""
+
+        remapped_state = OrderedDict[str, torch.Tensor]()
+        for key, value in state_dict.items():
+            remapped_key = key
+            for old_prefix, new_prefix in cls.LEGACY_STATE_PREFIXES.items():
+                if remapped_key.startswith(old_prefix):
+                    remapped_key = new_prefix + remapped_key[len(old_prefix) :]
+                    break
+            remapped_state[remapped_key] = value
+
+        metadata = getattr(state_dict, "_metadata", None)
+        if metadata is not None:
+            setattr(remapped_state, "_metadata", metadata)
+        return remapped_state
+
+    def load_state_dict(
+        self,
+        state_dict: Mapping[str, torch.Tensor],
+        strict: bool = True,
+        assign: bool = False,
+    ) -> Any:
+        return super().load_state_dict(
+            self.remap_legacy_state_dict(state_dict),
+            strict=strict,
+            assign=assign,
+        )
+

 class PAVGuidedAttention(nn.Module):
    channel_att: nn.Sequential
@@ -119,11 +176,25 @@ class PAVGuidedAttention(nn.Module):
        return embeddings * channel_att * spatial_att


-LABEL_MAP: dict[str, int] = {
-    "negative": 0,
-    "neutral": 1,
-    "positive": 2,
-}
+def resolve_label_order(label_order_cfg: Any) -> tuple[str, str, str]:
+    if label_order_cfg is None:
+        return DRF.CANONICAL_LABEL_ORDER
+
+    if not isinstance(label_order_cfg, list | tuple):
+        raise TypeError(
+            "DRF model_cfg.label_order must be a list/tuple of "
+            "['negative', 'neutral', 'positive'] in the desired logit order."
+        )
+
+    normalized_order = tuple(str(label).lower() for label in label_order_cfg)
+    expected = set(DRF.CANONICAL_LABEL_ORDER)
+    if len(normalized_order) != 3 or set(normalized_order) != expected:
+        raise ValueError(
+            "DRF model_cfg.label_order must contain exactly "
+            "negative/neutral/positive once each; "
+            f"got {label_order_cfg!r}"
+        )
+    return normalized_order


 def canonicalize_pav(