From 4e0b0a18dc9eb86354c064cff68bf7f71df2bd0c Mon Sep 17 00:00:00 2001 From: crosstyan Date: Sun, 8 Mar 2026 17:34:33 +0800 Subject: [PATCH] Document ScoNet naming and Scoliosis eval configs --- ...et_scoliosis1k_local_eval_2gpu_better.yaml | 101 +++++++++++ ...coliosis1k_local_eval_2gpu_better_112.yaml | 101 +++++++++++ .../sconet_scoliosis1k_skeleton_118.yaml | 9 +- datasets/Scoliosis1K/README.md | 20 +++ docs/sconet-drf-status-and-training.md | 158 ++++++++++-------- 5 files changed, 315 insertions(+), 74 deletions(-) create mode 100644 configs/sconet/sconet_scoliosis1k_local_eval_2gpu_better.yaml create mode 100644 configs/sconet/sconet_scoliosis1k_local_eval_2gpu_better_112.yaml diff --git a/configs/sconet/sconet_scoliosis1k_local_eval_2gpu_better.yaml b/configs/sconet/sconet_scoliosis1k_local_eval_2gpu_better.yaml new file mode 100644 index 0000000..eb7b1ed --- /dev/null +++ b/configs/sconet/sconet_scoliosis1k_local_eval_2gpu_better.yaml @@ -0,0 +1,101 @@ +data_cfg: + dataset_name: Scoliosis1K + dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-sil-pkl + dataset_partition: /mnt/public/data/Scoliosis1K/Scoliosis1K_1116.json + num_workers: 1 + remove_no_gallery: false + test_dataset_name: Scoliosis1K + +evaluator_cfg: + enable_float16: true + restore_ckpt_strict: true + restore_hint: ./ckpt/ScoNet-20000-better.pt + save_name: ScoNet_better + eval_func: evaluate_scoliosis + sampler: + batch_shuffle: false + batch_size: 2 + sample_type: all_ordered + frames_all_limit: 720 + metric: euc + transform: + - type: BaseSilCuttingTransform + +loss_cfg: + - loss_term_weight: 1.0 + margin: 0.2 + type: TripletLoss + log_prefix: triplet + - loss_term_weight: 1.0 + scale: 16 + type: CrossEntropyLoss + log_prefix: softmax + log_accuracy: true + +model_cfg: + model: ScoNet + backbone_cfg: + type: ResNet9 + block: BasicBlock + channels: + - 64 + - 128 + - 256 + - 512 + layers: + - 1 + - 1 + - 1 + - 1 + strides: + - 1 + - 2 + - 2 + - 1 + maxpool: false + SeparateFCs: + in_channels: 512 + out_channels: 256 + parts_num: 16 + SeparateBNNecks: + class_num: 3 + in_channels: 256 + parts_num: 16 + bin_num: + - 16 + +optimizer_cfg: + lr: 0.1 + momentum: 0.9 + solver: SGD + weight_decay: 0.0005 + +scheduler_cfg: + gamma: 0.1 + milestones: + - 10000 + - 14000 + - 18000 + scheduler: MultiStepLR + +trainer_cfg: + enable_float16: true + fix_BN: false + with_test: false + log_iter: 100 + restore_ckpt_strict: true + restore_hint: 0 + save_iter: 20000 + save_name: ScoNet_better + sync_BN: true + total_iter: 20000 + sampler: + batch_shuffle: true + batch_size: + - 8 + - 8 + frames_num_fixed: 30 + sample_type: fixed_unordered + type: TripletSampler + transform: + - type: BaseSilCuttingTransform diff --git a/configs/sconet/sconet_scoliosis1k_local_eval_2gpu_better_112.yaml b/configs/sconet/sconet_scoliosis1k_local_eval_2gpu_better_112.yaml new file mode 100644 index 0000000..c5f6dfd --- /dev/null +++ b/configs/sconet/sconet_scoliosis1k_local_eval_2gpu_better_112.yaml @@ -0,0 +1,101 @@ +data_cfg: + dataset_name: Scoliosis1K + dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-sil-pkl + dataset_partition: /mnt/public/data/Scoliosis1K/Scoliosis1K_112.json + num_workers: 1 + remove_no_gallery: false + test_dataset_name: Scoliosis1K + +evaluator_cfg: + enable_float16: true + restore_ckpt_strict: true + restore_hint: ./ckpt/ScoNet-20000-better.pt + save_name: ScoNet_better_112 + eval_func: evaluate_scoliosis + sampler: + batch_shuffle: false + batch_size: 2 + sample_type: all_ordered + frames_all_limit: 720 + metric: euc + transform: + - type: BaseSilCuttingTransform + +loss_cfg: + - loss_term_weight: 1.0 + margin: 0.2 + type: TripletLoss + log_prefix: triplet + - loss_term_weight: 1.0 + scale: 16 + type: CrossEntropyLoss + log_prefix: softmax + log_accuracy: true + +model_cfg: + model: ScoNet + backbone_cfg: + type: ResNet9 + block: BasicBlock + channels: + - 64 + - 128 + - 256 + - 512 + layers: + - 1 + - 1 + - 1 + - 1 + strides: + - 1 + - 2 + - 2 + - 1 + maxpool: false + SeparateFCs: + in_channels: 512 + out_channels: 256 + parts_num: 16 + SeparateBNNecks: + class_num: 3 + in_channels: 256 + parts_num: 16 + bin_num: + - 16 + +optimizer_cfg: + lr: 0.1 + momentum: 0.9 + solver: SGD + weight_decay: 0.0005 + +scheduler_cfg: + gamma: 0.1 + milestones: + - 10000 + - 14000 + - 18000 + scheduler: MultiStepLR + +trainer_cfg: + enable_float16: true + fix_BN: false + with_test: false + log_iter: 100 + restore_ckpt_strict: true + restore_hint: 0 + save_iter: 20000 + save_name: ScoNet_better_112 + sync_BN: true + total_iter: 20000 + sampler: + batch_shuffle: true + batch_size: + - 8 + - 8 + frames_num_fixed: 30 + sample_type: fixed_unordered + type: TripletSampler + transform: + - type: BaseSilCuttingTransform diff --git a/configs/sconet/sconet_scoliosis1k_skeleton_118.yaml b/configs/sconet/sconet_scoliosis1k_skeleton_118.yaml index 590118d..ec8d4db 100644 --- a/configs/sconet/sconet_scoliosis1k_skeleton_118.yaml +++ b/configs/sconet/sconet_scoliosis1k_skeleton_118.yaml @@ -1,10 +1,7 @@ data_cfg: dataset_name: Scoliosis1K - dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-aligned + dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K_sigma_8.0/pkl dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json - data_in_use: - - true - - false num_workers: 1 remove_no_gallery: false test_dataset_name: Scoliosis1K @@ -13,7 +10,7 @@ evaluator_cfg: enable_float16: true restore_ckpt_strict: true restore_hint: 20000 - save_name: ScoNet_skeleton_118 + save_name: ScoNet_skeleton_118_sigma8 eval_func: evaluate_scoliosis sampler: batch_shuffle: false @@ -90,7 +87,7 @@ trainer_cfg: restore_ckpt_strict: true restore_hint: 0 save_iter: 20000 - save_name: ScoNet_skeleton_118 + save_name: ScoNet_skeleton_118_sigma8 sync_BN: true total_iter: 20000 sampler: diff --git a/datasets/Scoliosis1K/README.md b/datasets/Scoliosis1K/README.md index b343bd7..efaba07 100644 --- a/datasets/Scoliosis1K/README.md +++ b/datasets/Scoliosis1K/README.md @@ -69,6 +69,26 @@ python -m torch.distributed.launch --nproc_per_node=4 \ opengait/main.py --cfgs configs/sconet/sconet_scoliosis1k.yaml --phase test --log_to_file ``` +### Modality sanity check + +The silhouette and skeleton-map pipelines are different experiments and should not be mixed when you interpret results. + +* `Scoliosis1K-sil-pkl` is the silhouette modality used by the standard ScoNet configs. +* pose-derived heatmap roots such as `Scoliosis1K_sigma_8.0/pkl` or DRF exports are skeleton-map inputs and require `in_channel: 2`. + +Naming note: + +* in this repo, the local `ScoNet` training config and model class are usually the paper's `ScoNet-MT`, not the CE-only paper `ScoNet` +* in these docs, `ScoNet-MT-ske` means the skeleton-map variant of that same model path +* checkpoint filenames like `ScoNet-20000-better.pt` do not identify the modality by name alone + +A strong silhouette checkpoint does not validate the skeleton-map path. In particular, `ckpt/ScoNet-20000-better.pt` is a silhouette checkpoint: + +* its first convolution expects 1-channel input +* the matching eval config points to `Scoliosis1K-sil-pkl` + +So if you are debugging DRF or `ScoNet-MT-ske` reproduction, do not use `ScoNet-20000-better.pt` as evidence that the heatmap preprocessing is correct. + --- ## Pose-to-Heatmap Conversion diff --git a/docs/sconet-drf-status-and-training.md b/docs/sconet-drf-status-and-training.md index 8e47438..081a020 100644 --- a/docs/sconet-drf-status-and-training.md +++ b/docs/sconet-drf-status-and-training.md @@ -1,86 +1,108 @@ -# ScoNet and DRF: Status, Architecture, and Training Guide +# ScoNet and DRF: Status, Architecture, and Reproduction Notes -This document provides a technical overview of the Scoliosis screening models in OpenGait, mapping paper concepts to the repository's implementation status. +This note records the current Scoliosis1K implementation status in this repo and the main conclusions from the recent reproduction/debugging work. -## DRF implementation status in OpenGait +## Current status -As of the current version, the **Dual Representation Framework (DRF)** described in the MICCAI 2025 paper *"Pose as Clinical Prior: Learning Dual Representations for Scoliosis Screening"* is **not yet explicitly implemented** as a standalone model in this repository. +- `opengait/modeling/models/sconet.py` is still the standard Scoliosis1K baseline in this repo. +- The class is named `ScoNet`, but functionally it is the paper's multi-task variant because training uses both `CrossEntropyLoss` and `TripletLoss`. +- `opengait/modeling/models/drf.py` is now implemented as a standalone DRF model in this repo. +- Logging supports TensorBoard and optional Weights & Biases through `opengait/utils/msg_manager.py`. -### Current State -- **ScoNet-MT (Functional Implementation)**: While the class in `opengait/modeling/models/sconet.py` is named `ScoNet`, it is functionally the **ScoNet-MT** (Multi-Task) variant described in the MICCAI 2024 paper. It utilizes both classification and triplet losses. -- **Dual Representation (DRF)**: While `opengait/modeling/models/skeletongait++.py` implements a dual-representation (silhouette + pose heatmap) architecture for gait recognition, the specific DRF screening model (MICCAI 2025) is not yet explicitly implemented as a standalone class. -- **Naming Note**: The repository uses the base name `ScoNet` for the multi-task implementation, as it is the high-performance variant recommended for use. +## Naming clarification -### Implementation Blueprint for DRF -To implement DRF within the OpenGait framework, follow this structure: -1. **Model Location**: Create `opengait/modeling/models/drf.py` inheriting from `BaseModel`. -2. **Input Handling**: Extend `inputs_pretreament` to handle both silhouettes and pose heatmaps (refer to `SkeletonGaitPP.inputs_pretreament` in `skeletongait++.py`). -3. **Dual-Branch Backbone**: Use separate early layers for silhouette and skeleton map streams, then fuse via `AttentionFusion` (from `skeletongait++.py:135`) or a PAV-Guided Attention module as described in the DRF paper. -4. **Forward Contract**: - - `training_feat`: Must include `triplet` (for identity/feature consistency) and `softmax` (for screening classification). - - `visual_summary`: Include `image/sils` and `image/heatmaps` for TensorBoard visualization. - - `inference_feat`: Return `logits` for classification. -5. **Config**: Create `configs/drf/drf_scoliosis1k.yaml` specifying `model: DRF` and configuring the dual-stream backbone. -6. **Evaluator**: Use `eval_func: evaluate_scoliosis` in the config to leverage the existing screening metrics (Accuracy, Precision, Recall, F1). -7. **Dataset**: Requires the **Scoliosis1K-Pose** dataset which provides 17 anatomical keypoints in MS-COCO format alongside the existing silhouettes. +The name `ScoNet` is overloaded across the paper, config files, and checkpoints. Use the mapping below when reading this repo: ---- +| Local name | What it means here | Closest paper name | +| :--- | :--- | :--- | +| `ScoNet` model class | `opengait/modeling/models/sconet.py` with both CE and triplet losses | `ScoNet-MT` | +| `configs/sconet/sconet_scoliosis1k.yaml` | standard Scoliosis1K silhouette training recipe in this repo | `ScoNet-MT` training recipe | +| `ScoNet-*.pt` checkpoint filenames | local checkpoint naming inherited from the repo/config | usually `ScoNet-MT` if trained with the default config | +| `ScoNet-MT-ske` in these docs | same ScoNet code path, but fed 2-channel skeleton maps | paper notation `ScoNet-MT^{ske}` | +| `DRF` | `ScoNet-MT-ske` plus PGA/PAV guidance | `DRF` | -## ScoNet/ScoNet-MT architecture mapping +So: -> [!IMPORTANT] -> **Naming Clarification**: The implementation in this repository is **ScoNet-MT**, not the single-task ScoNet. -> - **ScoNet (Single-Task)**: Defined in the paper as using only CrossEntropyLoss. -> - **ScoNet-MT (Multi-Task)**: Defined as using $L_{total} = L_{ce} + L_{triplet}$. -> -> **Evidence for ScoNet-MT in this repo:** -> 1. **Dual Loss Configuration**: `configs/sconet/sconet_scoliosis1k.yaml` (lines 24-33) defines both `TripletLoss` (margin: 0.2) and `CrossEntropyLoss`. -> 2. **Dual-Key Forward Pass**: `sconet.py` (lines 42-46) returns both `'triplet'` and `'softmax'` keys in the `training_feat` dictionary. -> 3. **Triplet Sampling**: The trainer uses `TripletSampler` with `batch_size: [8, 8]` (P=8, K=8) to support triplet mining (config lines 92-99). -> -> A "pure" ScoNet implementation would require removing the `TripletLoss`, switching to a standard `InferenceSampler`, and removing the `triplet` key from the model's `forward` return. +- paper `ScoNet` means the single-task CE-only model +- repo `ScoNet` usually means the multi-task variant unless someone explicitly removes triplet loss +- a checkpoint named `ScoNet-...pt` is not enough to tell the modality by itself; check input channels and dataset root -The `ScoNet` (functionally ScoNet-MT) implementation in `opengait/modeling/models/sconet.py` maps to the paper as follows: +## Important modality note + +The strongest local ScoNet checkpoint we checked, `ckpt/ScoNet-20000-better.pt`, is a silhouette checkpoint, not a skeleton-map checkpoint. + +Evidence: + +- its first convolution weight has shape `(64, 1, 3, 3)`, so it expects 1-channel input +- the matching eval config points to `Scoliosis1K-sil-pkl` +- the skeleton-map configs in this repo use `in_channel: 2` + +This matters because a good result from `ScoNet-20000-better.pt` only validates the silhouette path. It does not validate the heatmap/skeleton-map preprocessing used by DRF or by a `ScoNet-MT-ske`-style control. + +## What was checked against `f754f6f3831e9f83bb28f4e2f63dd43d8bcf9dc4` + +The upstream ScoNet training recipe itself is effectively unchanged: + +- `configs/sconet/sconet_scoliosis1k.yaml` is unchanged +- `opengait/modeling/models/sconet.py` is unchanged +- `opengait/main.py`, `opengait/modeling/base_model.py`, `opengait/data/dataset.py`, `opengait/data/collate_fn.py`, and `opengait/evaluation/evaluator.py` only differ in import cleanup and logging hooks + +So the current failure is not explained by a changed optimizer, scheduler, sampler, train loop, or evaluator. + +For the skeleton-map control, the only required functional drift from the upstream ScoNet config was: + +- use a heatmap dataset root instead of `Scoliosis1K-sil-pkl` +- switch the partition to `Scoliosis1K_118.json` +- set `model_cfg.backbone_cfg.in_channel: 2` +- reduce test `batch_size` to match the local 2-GPU DDP evaluator constraint + +## Local reproduction findings + +The main findings so far are: + +- `ScoNet-20000-better.pt` on the `1:1:2` silhouette split reproduced cleanly at `95.05%` accuracy and `85.12%` macro-F1. +- The `1:1:8` skeleton-map control trained with healthy optimization metrics but evaluated very poorly. +- A recent `ScoNet-MT-ske`-style control on `Scoliosis1K_sigma_8.0/pkl` finished with `36.45%` accuracy and `32.78%` macro-F1. +- That result is far below the paper's `1:1:8` ScoNet-MT range and far below the silhouette baseline behavior. + +The current working conclusion is: + +- the core ScoNet trainer is not the problem +- the strong silhouette checkpoint is not evidence that the skeleton-map path works +- the main remaining suspect is the skeleton-map representation and preprocessing path + +For readability in this repo's docs, `ScoNet-MT-ske` refers to the skeleton-map variant that the DRF paper writes as `ScoNet-MT^{ske}`. + +## Architecture mapping + +`ScoNet` in this repo maps to the paper as follows: | Paper Component | Code Reference | Description | | :--- | :--- | :--- | -| **Backbone** | `ResNet9` in `backbones/resnet.py` | A customized ResNet with 4 layers and configurable channels. | -| **Temporal Aggregation** | `self.TP` (Temporal Pooling) | Uses `PackSequenceWrapper(torch.max)` to aggregate frame features. | -| **Spatial Features** | `self.HPP` (Horizontal Pooling) | `HorizontalPoolingPyramid` with `bin_num: 16`. | -| **Feature Mapping** | `self.FCs` (`SeparateFCs`) | Maps pooled features to a latent embedding space. | -| **Classification Head** | `self.BNNecks` (`SeparateBNNecks`) | Produces logits for the 3-class screening task. | -| **Label Mapping** | `sconet.py` lines 21-23 | `negative: 0`, `neutral: 1`, `positive: 2`. | +| Backbone | `ResNet9` in `opengait/modeling/backbones/resnet.py` | Four residual stages with channels `[64, 128, 256, 512]`. | +| Temporal aggregation | `PackSequenceWrapper(torch.max)` | Temporal max pooling over frames. | +| Spatial pooling | `HorizontalPoolingPyramid` | 16-bin horizontal partition. | +| Feature mapping | `SeparateFCs` | Maps pooled features into the embedding space. | +| Classification head | `SeparateBNNecks` | Produces screening logits. | +| Losses | `TripletLoss` + `CrossEntropyLoss` | This is why the repo implementation is functionally ScoNet-MT. | ---- +## Training path summary -## Training guide (dataloader, optimizer, logging) +The standard Scoliosis1K ScoNet recipe is: -### Dataloader Setup -The training configuration is defined in `configs/sconet/sconet_scoliosis1k.yaml`: -- **Sampler**: `TripletSampler` (standard for OpenGait). -- **Batch Size**: `[8, 8]` (8 identities, 8 sequences per identity). -- **Sequence Sampling**: `fixed_unordered` with `frames_num_fixed: 30`. -- **Transform**: `BaseSilCuttingTransform` for silhouette preprocessing. +- sampler: `TripletSampler` +- train batch layout: `8 x 8` +- train sample type: `fixed_unordered` +- train frames: `30` +- transform: `BaseSilCuttingTransform` +- optimizer: `SGD(lr=0.1, momentum=0.9, weight_decay=5e-4)` +- scheduler: `MultiStepLR` with milestones `[10000, 14000, 18000]` +- total iterations: `20000` -### Optimizer and Scheduler -- **Optimizer**: SGD - - `lr: 0.1` - - `momentum: 0.9` - - `weight_decay: 0.0005` -- **Scheduler**: `MultiStepLR` - - `milestones: [10000, 14000, 18000]` - - `gamma: 0.1` -- **Total Iterations**: 20,000. +The skeleton-map control used the same recipe, except for the modality-specific changes listed above. -### Logging -- **TensorBoard**: OpenGait natively supports TensorBoard logging. Training losses (`triplet`, `softmax`) and accuracies are logged every `log_iter: 100`. -- **WandB**: There is **no native Weights & Biases (WandB) integration** in the current codebase. Users wishing to use WandB must manually integrate it into `opengait/utils/msg_manager.py` or `opengait/main.py`. -- **Evaluation**: Metrics (Accuracy, Precision, Recall, F1) are computed by `evaluate_scoliosis` in `opengait/evaluation/evaluator.py` and logged to the console/file. +## Recommended next checks ---- - -## Evidence References -- **Model Implementation**: `opengait/modeling/models/sconet.py` -- **Training Config**: `configs/sconet/sconet_scoliosis1k.yaml` -- **Evaluation Logic**: `opengait/evaluation/evaluator.py::evaluate_scoliosis` -- **Backbone Definition**: `opengait/modeling/backbones/resnet.py::ResNet9` +1. Train a pure silhouette `1:1:8` baseline from the upstream ScoNet config as a clean sanity control. +2. Treat skeleton-map preprocessing as the primary debugging target until a `ScoNet-MT-ske`-style run gets close to the paper. +3. Only after the skeleton baseline is credible should DRF/PAV-specific conclusions be treated as decisive.