Add resumable ScoNet skeleton training diagnostics
This commit is contained in:
@@ -89,6 +89,8 @@ CUDA_VISIBLE_DEVICES=0,1 uv run python -m torch.distributed.launch --nproc_per_n
|
||||
```
|
||||
|
||||
> **Note:** The `--nproc_per_node` argument must exactly match the number of GPUs specified in `CUDA_VISIBLE_DEVICES`. For single-GPU evaluation, use `CUDA_VISIBLE_DEVICES=0` and `--nproc_per_node=1` with the DDP launcher.
|
||||
>
|
||||
> **Resume Tip:** To survive interrupted training runs, set `trainer_cfg.resume_every_iter` to a non-zero value and optionally `trainer_cfg.auto_resume_latest: true`. OpenGait will keep `output/.../checkpoints/latest.pt` updated for crash recovery.
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -68,6 +68,9 @@ trainer_cfg:
|
||||
optimizer_reset: false
|
||||
scheduler_reset: false
|
||||
restore_hint: 0
|
||||
auto_resume_latest: false
|
||||
resume_every_iter: 0
|
||||
resume_keep: 3
|
||||
save_iter: 2000
|
||||
save_name: tmp
|
||||
sync_BN: false
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
coco18tococo17_args:
|
||||
transfer_to_coco17: False
|
||||
|
||||
padkeypoints_args:
|
||||
pad_method: knn
|
||||
use_conf: True
|
||||
|
||||
norm_args:
|
||||
pose_format: coco
|
||||
use_conf: ${padkeypoints_args.use_conf}
|
||||
heatmap_image_height: 128
|
||||
|
||||
heatmap_generator_args:
|
||||
sigma: 1.5
|
||||
use_score: ${padkeypoints_args.use_conf}
|
||||
img_h: ${norm_args.heatmap_image_height}
|
||||
img_w: ${norm_args.heatmap_image_height}
|
||||
with_limb: null
|
||||
with_kp: null
|
||||
|
||||
align_args:
|
||||
align: True
|
||||
final_img_size: 64
|
||||
offset: 0
|
||||
heatmap_image_size: ${norm_args.heatmap_image_height}
|
||||
@@ -0,0 +1,28 @@
|
||||
coco18tococo17_args:
|
||||
transfer_to_coco17: False
|
||||
|
||||
padkeypoints_args:
|
||||
pad_method: knn
|
||||
use_conf: True
|
||||
|
||||
norm_args:
|
||||
pose_format: coco
|
||||
use_conf: ${padkeypoints_args.use_conf}
|
||||
heatmap_image_height: 128
|
||||
|
||||
heatmap_generator_args:
|
||||
sigma: 1.5
|
||||
use_score: ${padkeypoints_args.use_conf}
|
||||
img_h: ${norm_args.heatmap_image_height}
|
||||
img_w: ${norm_args.heatmap_image_height}
|
||||
with_limb: null
|
||||
with_kp: null
|
||||
|
||||
sigma_limb: 1.5
|
||||
sigma_joint: 8.0
|
||||
|
||||
align_args:
|
||||
align: True
|
||||
final_img_size: 64
|
||||
offset: 0
|
||||
heatmap_image_size: ${norm_args.heatmap_image_height}
|
||||
@@ -0,0 +1,105 @@
|
||||
data_cfg:
|
||||
dataset_name: Scoliosis1K
|
||||
dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15
|
||||
dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
|
||||
data_in_use:
|
||||
- true
|
||||
- false
|
||||
num_workers: 1
|
||||
remove_no_gallery: false
|
||||
test_dataset_name: Scoliosis1K
|
||||
|
||||
evaluator_cfg:
|
||||
enable_float16: true
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 20000
|
||||
save_name: ScoNet_skeleton_118_sigma15
|
||||
eval_func: evaluate_scoliosis
|
||||
sampler:
|
||||
batch_shuffle: false
|
||||
batch_size: 1
|
||||
sample_type: all_ordered
|
||||
frames_all_limit: 720
|
||||
metric: euc
|
||||
transform:
|
||||
- type: BaseSilCuttingTransform
|
||||
|
||||
loss_cfg:
|
||||
- loss_term_weight: 1.0
|
||||
margin: 0.2
|
||||
type: TripletLoss
|
||||
log_prefix: triplet
|
||||
- loss_term_weight: 1.0
|
||||
scale: 16
|
||||
type: CrossEntropyLoss
|
||||
log_prefix: softmax
|
||||
log_accuracy: true
|
||||
|
||||
model_cfg:
|
||||
model: ScoNet
|
||||
backbone_cfg:
|
||||
type: ResNet9
|
||||
block: BasicBlock
|
||||
in_channel: 2
|
||||
channels:
|
||||
- 64
|
||||
- 128
|
||||
- 256
|
||||
- 512
|
||||
layers:
|
||||
- 1
|
||||
- 1
|
||||
- 1
|
||||
- 1
|
||||
strides:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 1
|
||||
maxpool: false
|
||||
SeparateFCs:
|
||||
in_channels: 512
|
||||
out_channels: 256
|
||||
parts_num: 16
|
||||
SeparateBNNecks:
|
||||
class_num: 3
|
||||
in_channels: 256
|
||||
parts_num: 16
|
||||
bin_num:
|
||||
- 16
|
||||
|
||||
optimizer_cfg:
|
||||
lr: 0.1
|
||||
momentum: 0.9
|
||||
solver: SGD
|
||||
weight_decay: 0.0005
|
||||
|
||||
scheduler_cfg:
|
||||
gamma: 0.1
|
||||
milestones:
|
||||
- 10000
|
||||
- 14000
|
||||
- 18000
|
||||
scheduler: MultiStepLR
|
||||
|
||||
trainer_cfg:
|
||||
enable_float16: true
|
||||
fix_BN: false
|
||||
with_test: false
|
||||
log_iter: 100
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 0
|
||||
save_iter: 20000
|
||||
save_name: ScoNet_skeleton_118_sigma15
|
||||
sync_BN: true
|
||||
total_iter: 20000
|
||||
sampler:
|
||||
batch_shuffle: true
|
||||
batch_size:
|
||||
- 8
|
||||
- 8
|
||||
frames_num_fixed: 30
|
||||
sample_type: fixed_unordered
|
||||
type: TripletSampler
|
||||
transform:
|
||||
- type: BaseSilCuttingTransform
|
||||
@@ -0,0 +1,105 @@
|
||||
data_cfg:
|
||||
dataset_name: Scoliosis1K
|
||||
dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15
|
||||
dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
|
||||
data_in_use:
|
||||
- true
|
||||
- false
|
||||
num_workers: 1
|
||||
remove_no_gallery: false
|
||||
test_dataset_name: Scoliosis1K
|
||||
|
||||
evaluator_cfg:
|
||||
enable_float16: true
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 20000
|
||||
save_name: ScoNet_skeleton_118_sigma15_bs12x8
|
||||
eval_func: evaluate_scoliosis
|
||||
sampler:
|
||||
batch_shuffle: false
|
||||
batch_size: 1
|
||||
sample_type: all_ordered
|
||||
frames_all_limit: 720
|
||||
metric: euc
|
||||
transform:
|
||||
- type: BaseSilCuttingTransform
|
||||
|
||||
loss_cfg:
|
||||
- loss_term_weight: 1.0
|
||||
margin: 0.2
|
||||
type: TripletLoss
|
||||
log_prefix: triplet
|
||||
- loss_term_weight: 1.0
|
||||
scale: 16
|
||||
type: CrossEntropyLoss
|
||||
log_prefix: softmax
|
||||
log_accuracy: true
|
||||
|
||||
model_cfg:
|
||||
model: ScoNet
|
||||
backbone_cfg:
|
||||
type: ResNet9
|
||||
block: BasicBlock
|
||||
in_channel: 2
|
||||
channels:
|
||||
- 64
|
||||
- 128
|
||||
- 256
|
||||
- 512
|
||||
layers:
|
||||
- 1
|
||||
- 1
|
||||
- 1
|
||||
- 1
|
||||
strides:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 1
|
||||
maxpool: false
|
||||
SeparateFCs:
|
||||
in_channels: 512
|
||||
out_channels: 256
|
||||
parts_num: 16
|
||||
SeparateBNNecks:
|
||||
class_num: 3
|
||||
in_channels: 256
|
||||
parts_num: 16
|
||||
bin_num:
|
||||
- 16
|
||||
|
||||
optimizer_cfg:
|
||||
lr: 0.1
|
||||
momentum: 0.9
|
||||
solver: SGD
|
||||
weight_decay: 0.0005
|
||||
|
||||
scheduler_cfg:
|
||||
gamma: 0.1
|
||||
milestones:
|
||||
- 10000
|
||||
- 14000
|
||||
- 18000
|
||||
scheduler: MultiStepLR
|
||||
|
||||
trainer_cfg:
|
||||
enable_float16: true
|
||||
fix_BN: false
|
||||
with_test: false
|
||||
log_iter: 100
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 0
|
||||
save_iter: 20000
|
||||
save_name: ScoNet_skeleton_118_sigma15_bs12x8
|
||||
sync_BN: true
|
||||
total_iter: 20000
|
||||
sampler:
|
||||
batch_shuffle: true
|
||||
batch_size:
|
||||
- 12
|
||||
- 8
|
||||
frames_num_fixed: 30
|
||||
sample_type: fixed_unordered
|
||||
type: TripletSampler
|
||||
transform:
|
||||
- type: BaseSilCuttingTransform
|
||||
@@ -0,0 +1,105 @@
|
||||
data_cfg:
|
||||
dataset_name: Scoliosis1K
|
||||
dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15-joint8-sharedalign
|
||||
dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
|
||||
data_in_use:
|
||||
- true
|
||||
- false
|
||||
num_workers: 1
|
||||
remove_no_gallery: false
|
||||
test_dataset_name: Scoliosis1K
|
||||
|
||||
evaluator_cfg:
|
||||
enable_float16: true
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 20000
|
||||
save_name: ScoNet_skeleton_118_sigma15_joint8_sharedalign_bs12x8
|
||||
eval_func: evaluate_scoliosis
|
||||
sampler:
|
||||
batch_shuffle: false
|
||||
batch_size: 1
|
||||
sample_type: all_ordered
|
||||
frames_all_limit: 720
|
||||
metric: euc
|
||||
transform:
|
||||
- type: BaseSilCuttingTransform
|
||||
|
||||
loss_cfg:
|
||||
- loss_term_weight: 1.0
|
||||
margin: 0.2
|
||||
type: TripletLoss
|
||||
log_prefix: triplet
|
||||
- loss_term_weight: 1.0
|
||||
scale: 16
|
||||
type: CrossEntropyLoss
|
||||
log_prefix: softmax
|
||||
log_accuracy: true
|
||||
|
||||
model_cfg:
|
||||
model: ScoNet
|
||||
backbone_cfg:
|
||||
type: ResNet9
|
||||
block: BasicBlock
|
||||
in_channel: 2
|
||||
channels:
|
||||
- 64
|
||||
- 128
|
||||
- 256
|
||||
- 512
|
||||
layers:
|
||||
- 1
|
||||
- 1
|
||||
- 1
|
||||
- 1
|
||||
strides:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 1
|
||||
maxpool: false
|
||||
SeparateFCs:
|
||||
in_channels: 512
|
||||
out_channels: 256
|
||||
parts_num: 16
|
||||
SeparateBNNecks:
|
||||
class_num: 3
|
||||
in_channels: 256
|
||||
parts_num: 16
|
||||
bin_num:
|
||||
- 16
|
||||
|
||||
optimizer_cfg:
|
||||
lr: 0.1
|
||||
momentum: 0.9
|
||||
solver: SGD
|
||||
weight_decay: 0.0005
|
||||
|
||||
scheduler_cfg:
|
||||
gamma: 0.1
|
||||
milestones:
|
||||
- 10000
|
||||
- 14000
|
||||
- 18000
|
||||
scheduler: MultiStepLR
|
||||
|
||||
trainer_cfg:
|
||||
enable_float16: true
|
||||
fix_BN: false
|
||||
with_test: false
|
||||
log_iter: 100
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 0
|
||||
save_iter: 20000
|
||||
save_name: ScoNet_skeleton_118_sigma15_joint8_sharedalign_bs12x8
|
||||
sync_BN: true
|
||||
total_iter: 20000
|
||||
sampler:
|
||||
batch_shuffle: true
|
||||
batch_size:
|
||||
- 12
|
||||
- 8
|
||||
frames_num_fixed: 30
|
||||
sample_type: fixed_unordered
|
||||
type: TripletSampler
|
||||
transform:
|
||||
- type: BaseSilCuttingTransform
|
||||
@@ -0,0 +1,108 @@
|
||||
data_cfg:
|
||||
dataset_name: Scoliosis1K
|
||||
dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15-joint8-sharedalign
|
||||
dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
|
||||
data_in_use:
|
||||
- true
|
||||
- false
|
||||
num_workers: 1
|
||||
remove_no_gallery: false
|
||||
test_dataset_name: Scoliosis1K
|
||||
|
||||
evaluator_cfg:
|
||||
enable_float16: true
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 20000
|
||||
save_name: ScoNet_skeleton_118_sigma15_joint8_sharedalign_2gpu_bs12x8
|
||||
eval_func: evaluate_scoliosis
|
||||
sampler:
|
||||
batch_shuffle: false
|
||||
batch_size: 2
|
||||
sample_type: all_ordered
|
||||
frames_all_limit: 720
|
||||
metric: euc
|
||||
transform:
|
||||
- type: BaseSilCuttingTransform
|
||||
|
||||
loss_cfg:
|
||||
- loss_term_weight: 1.0
|
||||
margin: 0.2
|
||||
type: TripletLoss
|
||||
log_prefix: triplet
|
||||
- loss_term_weight: 1.0
|
||||
scale: 16
|
||||
type: CrossEntropyLoss
|
||||
log_prefix: softmax
|
||||
log_accuracy: true
|
||||
|
||||
model_cfg:
|
||||
model: ScoNet
|
||||
backbone_cfg:
|
||||
type: ResNet9
|
||||
block: BasicBlock
|
||||
in_channel: 2
|
||||
channels:
|
||||
- 64
|
||||
- 128
|
||||
- 256
|
||||
- 512
|
||||
layers:
|
||||
- 1
|
||||
- 1
|
||||
- 1
|
||||
- 1
|
||||
strides:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 1
|
||||
maxpool: false
|
||||
SeparateFCs:
|
||||
in_channels: 512
|
||||
out_channels: 256
|
||||
parts_num: 16
|
||||
SeparateBNNecks:
|
||||
class_num: 3
|
||||
in_channels: 256
|
||||
parts_num: 16
|
||||
bin_num:
|
||||
- 16
|
||||
|
||||
optimizer_cfg:
|
||||
lr: 0.1
|
||||
momentum: 0.9
|
||||
solver: SGD
|
||||
weight_decay: 0.0005
|
||||
|
||||
scheduler_cfg:
|
||||
gamma: 0.1
|
||||
milestones:
|
||||
- 10000
|
||||
- 14000
|
||||
- 18000
|
||||
scheduler: MultiStepLR
|
||||
|
||||
trainer_cfg:
|
||||
enable_float16: true
|
||||
fix_BN: false
|
||||
with_test: false
|
||||
log_iter: 100
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 0
|
||||
auto_resume_latest: true
|
||||
resume_every_iter: 500
|
||||
resume_keep: 3
|
||||
save_iter: 20000
|
||||
save_name: ScoNet_skeleton_118_sigma15_joint8_sharedalign_2gpu_bs12x8
|
||||
sync_BN: true
|
||||
total_iter: 20000
|
||||
sampler:
|
||||
batch_shuffle: true
|
||||
batch_size:
|
||||
- 12
|
||||
- 8
|
||||
frames_num_fixed: 30
|
||||
sample_type: fixed_unordered
|
||||
type: TripletSampler
|
||||
transform:
|
||||
- type: BaseSilCuttingTransform
|
||||
@@ -75,6 +75,7 @@ The silhouette and skeleton-map pipelines are different experiments and should n
|
||||
|
||||
* `Scoliosis1K-sil-pkl` is the silhouette modality used by the standard ScoNet configs.
|
||||
* pose-derived heatmap roots such as `Scoliosis1K_sigma_8.0/pkl` or DRF exports are skeleton-map inputs and require `in_channel: 2`.
|
||||
* DRF does **not** use the silhouette stream as an input. It uses `0_heatmap.pkl` plus `1_pav.pkl`.
|
||||
|
||||
Naming note:
|
||||
|
||||
@@ -89,6 +90,18 @@ A strong silhouette checkpoint does not validate the skeleton-map path. In parti
|
||||
|
||||
So if you are debugging DRF or `ScoNet-MT-ske` reproduction, do not use `ScoNet-20000-better.pt` as evidence that the heatmap preprocessing is correct.
|
||||
|
||||
### Overlay caveat
|
||||
|
||||
Do not treat a direct overlay between `Scoliosis1K-sil-pkl` and pose-derived skeleton maps as a valid alignment test.
|
||||
|
||||
Reason:
|
||||
|
||||
* the released silhouette modality is an estimated segmentation output from `PP-HumanSeg v2`
|
||||
* the released pose modality is an estimated keypoint output from `ViTPose`
|
||||
* the two modalities are normalized by different preprocessing pipelines before they reach OpenGait
|
||||
|
||||
So a silhouette-vs-skeleton mismatch in a debug figure is usually a cross-modality frame-of-reference issue, not proof that the raw dataset is bad. The more important check for skeleton-map debugging is whether the **limb and joint channels align with each other** inside `0_heatmap.pkl`.
|
||||
|
||||
---
|
||||
|
||||
## Pose-to-Heatmap Conversion
|
||||
@@ -146,6 +159,21 @@ If you explicitly want train-only PAV min-max statistics, add:
|
||||
--stats_partition=./datasets/Scoliosis1K/Scoliosis1K_118.json
|
||||
```
|
||||
|
||||
### Heatmap debugging notes
|
||||
|
||||
Current confirmed findings from local debugging:
|
||||
|
||||
* the raw pose dataset itself looks healthy; poor `ScoNet-MT-ske` results are not explained by obvious missing-joint collapse
|
||||
* a larger heatmap sigma can materially blur away the articulated structure; `sigma=8` was much broader than the silhouette geometry, while smaller sigma values recovered more structure
|
||||
* an earlier bug aligned the limb and joint channels separately; that made the two channels of `0_heatmap.pkl` slightly misregistered
|
||||
* the heatmap path is now patched so limb and joint channels share one alignment crop
|
||||
|
||||
Remaining caution:
|
||||
|
||||
* the exported skeleton map is stored as `64x64`
|
||||
* if the runtime config uses `BaseSilCuttingTransform`, the network actually sees `64x44`
|
||||
* that symmetric left/right crop is not automatically wrong, but it is still a meaningful ablation point for skeleton-map experiments
|
||||
|
||||
The output layout is:
|
||||
|
||||
```text
|
||||
|
||||
@@ -8,7 +8,8 @@ import pickle
|
||||
import argparse
|
||||
import numpy as np
|
||||
from glob import glob
|
||||
from typing import Literal
|
||||
from copy import deepcopy
|
||||
from typing import Any, Literal
|
||||
from tqdm import tqdm
|
||||
import matplotlib.cm as cm
|
||||
import torch.distributed as dist
|
||||
@@ -516,7 +517,7 @@ class GatherTransform(object):
|
||||
"""
|
||||
Gather the different transforms.
|
||||
"""
|
||||
def __init__(self, base_transform, transform_bone, transform_joint):
|
||||
def __init__(self, base_transform, transform_bone, transform_joint, align_transform=None):
|
||||
|
||||
"""
|
||||
base_transform: Some common transform, e.g., COCO18toCOCO17, PadKeypoints, CenterAndScale
|
||||
@@ -526,12 +527,15 @@ class GatherTransform(object):
|
||||
self.base_transform = base_transform
|
||||
self.transform_bone = transform_bone
|
||||
self.transform_joint = transform_joint
|
||||
self.align_transform = align_transform
|
||||
|
||||
def __call__(self, pose_data):
|
||||
x = self.base_transform(pose_data)
|
||||
heatmap_bone = self.transform_bone(x) # [T, 1, H, W]
|
||||
heatmap_joint = self.transform_joint(x) # [T, 1, H, W]
|
||||
heatmap = np.concatenate([heatmap_bone, heatmap_joint], axis=1)
|
||||
if self.align_transform is not None:
|
||||
heatmap = self.align_transform(heatmap)
|
||||
return heatmap
|
||||
|
||||
class HeatmapAlignment():
|
||||
@@ -543,23 +547,32 @@ class HeatmapAlignment():
|
||||
|
||||
def center_crop(self, heatmap):
|
||||
"""
|
||||
Input: [1, heatmap_image_size, heatmap_image_size]
|
||||
Output: [1, final_img_size, final_img_size]
|
||||
Input: [C, heatmap_image_size, heatmap_image_size]
|
||||
Output: [C, final_img_size, final_img_size]
|
||||
"""
|
||||
raw_heatmap = heatmap[0]
|
||||
if self.align:
|
||||
y_sum = raw_heatmap.sum(axis=1)
|
||||
y_top = (y_sum != 0).argmax(axis=0)
|
||||
y_btm = (y_sum != 0).cumsum(axis=0).argmax(axis=0)
|
||||
height = y_btm - y_top + 1
|
||||
raw_heatmap = raw_heatmap[y_top - self.offset: y_btm + 1 + self.offset, (self.heatmap_image_size // 2) - (height // 2) : (self.heatmap_image_size // 2) + (height // 2) + 1]
|
||||
raw_heatmap = cv2.resize(raw_heatmap, (self.final_img_size, self.final_img_size), interpolation=cv2.INTER_AREA)
|
||||
return raw_heatmap[np.newaxis, :, :] # [1, final_img_size, final_img_size]
|
||||
raw_heatmap = heatmap
|
||||
if self.align:
|
||||
support_map = raw_heatmap.max(axis=0)
|
||||
y_sum = support_map.sum(axis=1)
|
||||
nonzero_rows = np.flatnonzero(y_sum != 0)
|
||||
if nonzero_rows.size != 0:
|
||||
y_top = max(int(nonzero_rows[0]) - self.offset, 0)
|
||||
y_btm = min(int(nonzero_rows[-1]) + self.offset, self.heatmap_image_size - 1)
|
||||
height = y_btm - y_top + 1
|
||||
x_center = self.heatmap_image_size // 2
|
||||
x_left = max(x_center - (height // 2), 0)
|
||||
x_right = min(x_center + (height // 2) + 1, self.heatmap_image_size)
|
||||
raw_heatmap = raw_heatmap[:, y_top:y_btm + 1, x_left:x_right]
|
||||
resized = np.stack([
|
||||
cv2.resize(channel, (self.final_img_size, self.final_img_size), interpolation=cv2.INTER_AREA)
|
||||
for channel in raw_heatmap
|
||||
], axis=0)
|
||||
return resized # [C, final_img_size, final_img_size]
|
||||
|
||||
def __call__(self, heatmap_imgs):
|
||||
"""
|
||||
heatmap_imgs: (T, 1, raw_size, raw_size)
|
||||
return (T, 1, final_img_size, final_img_size)
|
||||
heatmap_imgs: (T, C, raw_size, raw_size)
|
||||
return (T, C, final_img_size, final_img_size)
|
||||
"""
|
||||
original_dtype = heatmap_imgs.dtype
|
||||
heatmap_imgs = heatmap_imgs.astype(np.float32) / 255.0
|
||||
@@ -570,12 +583,14 @@ class HeatmapAlignment():
|
||||
return heatmap_imgs.astype(original_dtype)
|
||||
|
||||
def GenerateHeatmapTransform(
|
||||
coco18tococo17_args,
|
||||
padkeypoints_args,
|
||||
norm_args,
|
||||
heatmap_generator_args,
|
||||
align_args,
|
||||
coco18tococo17_args: dict[str, Any],
|
||||
padkeypoints_args: dict[str, Any],
|
||||
norm_args: dict[str, Any],
|
||||
heatmap_generator_args: dict[str, Any],
|
||||
align_args: dict[str, Any],
|
||||
reduction: Literal["upstream", "max", "sum"] = "upstream",
|
||||
sigma_limb: float | None = None,
|
||||
sigma_joint: float | None = None,
|
||||
):
|
||||
|
||||
base_transform = T.Compose([
|
||||
@@ -584,34 +599,44 @@ def GenerateHeatmapTransform(
|
||||
CenterAndScaleNormalizer(**norm_args),
|
||||
])
|
||||
|
||||
heatmap_generator_args["with_limb"] = True
|
||||
heatmap_generator_args["with_kp"] = False
|
||||
bone_generator_args = deepcopy(heatmap_generator_args)
|
||||
joint_generator_args = deepcopy(heatmap_generator_args)
|
||||
|
||||
bone_generator_args["with_limb"] = True
|
||||
bone_generator_args["with_kp"] = False
|
||||
if sigma_limb is not None:
|
||||
bone_generator_args["sigma"] = sigma_limb
|
||||
bone_image_transform = (
|
||||
HeatmapToImage()
|
||||
if reduction == "upstream"
|
||||
else HeatmapReducer(reduction=reduction)
|
||||
)
|
||||
transform_bone = T.Compose([
|
||||
GeneratePoseTarget(**heatmap_generator_args),
|
||||
GeneratePoseTarget(**bone_generator_args),
|
||||
bone_image_transform,
|
||||
HeatmapAlignment(**align_args)
|
||||
])
|
||||
|
||||
heatmap_generator_args["with_limb"] = False
|
||||
heatmap_generator_args["with_kp"] = True
|
||||
joint_generator_args["with_limb"] = False
|
||||
joint_generator_args["with_kp"] = True
|
||||
if sigma_joint is not None:
|
||||
joint_generator_args["sigma"] = sigma_joint
|
||||
joint_image_transform = (
|
||||
HeatmapToImage()
|
||||
if reduction == "upstream"
|
||||
else HeatmapReducer(reduction=reduction)
|
||||
)
|
||||
transform_joint = T.Compose([
|
||||
GeneratePoseTarget(**heatmap_generator_args),
|
||||
GeneratePoseTarget(**joint_generator_args),
|
||||
joint_image_transform,
|
||||
HeatmapAlignment(**align_args)
|
||||
])
|
||||
|
||||
transform = T.Compose([
|
||||
GatherTransform(base_transform, transform_bone, transform_joint) # [T, 2, H, W]
|
||||
GatherTransform(
|
||||
base_transform,
|
||||
transform_bone,
|
||||
transform_joint,
|
||||
HeatmapAlignment(**align_args),
|
||||
) # [T, 2, H, W]
|
||||
])
|
||||
|
||||
return transform
|
||||
|
||||
@@ -98,6 +98,15 @@ def load_heatmap_cfg(cfg_path: str) -> dict[str, Any]:
|
||||
return cast(dict[str, Any], replaced)
|
||||
|
||||
|
||||
def optional_cfg_float(cfg: dict[str, Any], key: str) -> float | None:
|
||||
value = cfg.get(key)
|
||||
if value is None:
|
||||
return None
|
||||
if not isinstance(value, (int, float)):
|
||||
raise TypeError(f"Expected numeric value for {key}, got {type(value).__name__}")
|
||||
return float(value)
|
||||
|
||||
|
||||
def build_pose_transform(cfg: dict[str, Any]) -> T.Compose:
|
||||
return T.Compose([
|
||||
heatmap_prep.COCO18toCOCO17(**cfg["coco18tococo17_args"]),
|
||||
@@ -192,6 +201,8 @@ def main() -> None:
|
||||
heatmap_generator_args=heatmap_cfg["heatmap_generator_args"],
|
||||
align_args=heatmap_cfg["align_args"],
|
||||
reduction=cast(HeatmapReduction, args.heatmap_reduction),
|
||||
sigma_limb=optional_cfg_float(heatmap_cfg, "sigma_limb"),
|
||||
sigma_joint=optional_cfg_float(heatmap_cfg, "sigma_joint"),
|
||||
)
|
||||
|
||||
pose_paths = iter_pose_paths(args.pose_data_path)
|
||||
|
||||
@@ -59,9 +59,12 @@
|
||||
### trainer_cfg
|
||||
* Trainer configuration
|
||||
> * Args
|
||||
> * restore_hint: `int` value indicates the iteration number of restored checkpoint; `str` value indicates the path to restored checkpoint. The option is often used to finetune on new dataset or restore the interrupted training process.
|
||||
> * restore_hint: `int` value indicates the iteration number of restored checkpoint; `str` value indicates the path to restored checkpoint. Use `latest` to restore the latest rolling resume checkpoint. The option is often used to finetune on new dataset or restore the interrupted training process.
|
||||
> * auto_resume_latest: If `True` and `restore_hint==0`, automatically resume from `output/.../checkpoints/latest.pt` when it exists.
|
||||
> * fix_BN: If `True`, we fix the weight of all `BatchNorm` layers.
|
||||
> * log_iter: Log the information per `log_iter` iterations.
|
||||
> * resume_every_iter: Save a rolling resume checkpoint every `resume_every_iter` iterations. These checkpoints update `checkpoints/latest.pt` and are intended for crash recovery.
|
||||
> * resume_keep: Number of rolling resume checkpoints retained under `checkpoints/resume/`. Set `0` to keep all of them.
|
||||
> * save_iter: Save the checkpoint per `save_iter` iterations.
|
||||
> * with_test: If `True`, we test the model every `save_iter` iterations. A bit of performance impact.(*Disable in Default*)
|
||||
> * optimizer_reset: If `True` and `restore_hint!=0`, reset the optimizer while restoring the model.
|
||||
@@ -168,6 +171,9 @@ trainer_cfg:
|
||||
log_iter: 100
|
||||
restore_ckpt_strict: true
|
||||
restore_hint: 0
|
||||
auto_resume_latest: false
|
||||
resume_every_iter: 500
|
||||
resume_keep: 3
|
||||
save_iter: 10000
|
||||
save_name: Baseline
|
||||
sync_BN: true
|
||||
|
||||
+150
-14
@@ -9,8 +9,13 @@ Typical usage:
|
||||
BaseModel.run_train(model)
|
||||
BaseModel.run_test(model)
|
||||
"""
|
||||
import torch
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import os.path as osp
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
@@ -169,6 +174,13 @@ class BaseModel(MetaModel, nn.Module):
|
||||
restore_hint = self.engine_cfg['restore_hint']
|
||||
if restore_hint != 0:
|
||||
self.resume_ckpt(restore_hint)
|
||||
elif training and self.engine_cfg.get('auto_resume_latest', False):
|
||||
latest_ckpt = self._get_latest_resume_ckpt_path()
|
||||
if latest_ckpt is not None:
|
||||
self.msg_mgr.log_info(
|
||||
"Auto-resuming from latest checkpoint %s", latest_ckpt
|
||||
)
|
||||
self.resume_ckpt(latest_ckpt)
|
||||
|
||||
def get_backbone(self, backbone_cfg):
|
||||
"""Get the backbone of the model."""
|
||||
@@ -234,23 +246,112 @@ class BaseModel(MetaModel, nn.Module):
|
||||
scheduler = Scheduler(self.optimizer, **valid_arg)
|
||||
return scheduler
|
||||
|
||||
def _build_checkpoint(self, iteration: int) -> dict[str, Any]:
|
||||
checkpoint: dict[str, Any] = {
|
||||
'model': self.state_dict(),
|
||||
'optimizer': self.optimizer.state_dict(),
|
||||
'scheduler': self.scheduler.state_dict(),
|
||||
'iteration': iteration,
|
||||
'random_state': random.getstate(),
|
||||
'numpy_random_state': np.random.get_state(),
|
||||
'torch_random_state': torch.get_rng_state(),
|
||||
}
|
||||
if torch.cuda.is_available():
|
||||
checkpoint['cuda_random_state_all'] = torch.cuda.get_rng_state_all()
|
||||
if self.engine_cfg.get('enable_float16', False) and hasattr(self, 'Scaler'):
|
||||
checkpoint['scaler'] = self.Scaler.state_dict()
|
||||
return checkpoint
|
||||
|
||||
def _checkpoint_dir(self) -> str:
|
||||
return osp.join(self.save_path, "checkpoints")
|
||||
|
||||
def _resume_dir(self) -> str:
|
||||
return osp.join(self._checkpoint_dir(), "resume")
|
||||
|
||||
def _save_checkpoint_file(
|
||||
self,
|
||||
checkpoint: dict[str, Any],
|
||||
save_path: str,
|
||||
) -> None:
|
||||
mkdir(osp.dirname(save_path))
|
||||
tmp_path = save_path + ".tmp"
|
||||
torch.save(checkpoint, tmp_path)
|
||||
os.replace(tmp_path, save_path)
|
||||
|
||||
def _write_resume_meta(self, iteration: int, resume_path: str) -> None:
|
||||
meta_path = osp.join(self._checkpoint_dir(), "latest.json")
|
||||
meta = {
|
||||
"iteration": iteration,
|
||||
"path": resume_path,
|
||||
}
|
||||
tmp_path = meta_path + ".tmp"
|
||||
with open(tmp_path, "w", encoding="utf-8") as handle:
|
||||
json.dump(meta, handle, indent=2, sort_keys=True)
|
||||
os.replace(tmp_path, meta_path)
|
||||
|
||||
def _prune_resume_checkpoints(self, keep_count: int) -> None:
|
||||
if keep_count <= 0:
|
||||
return
|
||||
resume_dir = self._resume_dir()
|
||||
if not osp.isdir(resume_dir):
|
||||
return
|
||||
prefix = f"{self.engine_cfg['save_name']}-resume-"
|
||||
resume_files = sorted(
|
||||
file_name for file_name in os.listdir(resume_dir)
|
||||
if file_name.startswith(prefix) and file_name.endswith(".pt")
|
||||
)
|
||||
stale_files = resume_files[:-keep_count]
|
||||
for file_name in stale_files:
|
||||
os.remove(osp.join(resume_dir, file_name))
|
||||
|
||||
def _get_latest_resume_ckpt_path(self) -> str | None:
|
||||
latest_path = osp.join(self._checkpoint_dir(), "latest.pt")
|
||||
if osp.isfile(latest_path):
|
||||
return latest_path
|
||||
meta_path = osp.join(self._checkpoint_dir(), "latest.json")
|
||||
if osp.isfile(meta_path):
|
||||
with open(meta_path, "r", encoding="utf-8") as handle:
|
||||
latest_meta = json.load(handle)
|
||||
candidate = latest_meta.get("path")
|
||||
if isinstance(candidate, str) and osp.isfile(candidate):
|
||||
return candidate
|
||||
return None
|
||||
|
||||
def save_ckpt(self, iteration):
|
||||
if torch.distributed.get_rank() == 0:
|
||||
mkdir(osp.join(self.save_path, "checkpoints/"))
|
||||
save_name = self.engine_cfg['save_name']
|
||||
checkpoint = {
|
||||
'model': self.state_dict(),
|
||||
'optimizer': self.optimizer.state_dict(),
|
||||
'scheduler': self.scheduler.state_dict(),
|
||||
'iteration': iteration}
|
||||
torch.save(checkpoint,
|
||||
osp.join(self.save_path, 'checkpoints/{}-{:0>5}.pt'.format(save_name, iteration)))
|
||||
checkpoint = self._build_checkpoint(iteration)
|
||||
ckpt_path = osp.join(
|
||||
self._checkpoint_dir(),
|
||||
'{}-{:0>5}.pt'.format(save_name, iteration),
|
||||
)
|
||||
self._save_checkpoint_file(checkpoint, ckpt_path)
|
||||
|
||||
def save_resume_ckpt(self, iteration: int) -> None:
|
||||
if torch.distributed.get_rank() != 0:
|
||||
return
|
||||
checkpoint = self._build_checkpoint(iteration)
|
||||
save_name = self.engine_cfg['save_name']
|
||||
resume_path = osp.join(
|
||||
self._resume_dir(),
|
||||
f"{save_name}-resume-{iteration:0>5}.pt",
|
||||
)
|
||||
latest_path = osp.join(self._checkpoint_dir(), "latest.pt")
|
||||
self._save_checkpoint_file(checkpoint, resume_path)
|
||||
self._save_checkpoint_file(checkpoint, latest_path)
|
||||
self._write_resume_meta(iteration, resume_path)
|
||||
self._prune_resume_checkpoints(
|
||||
int(self.engine_cfg.get('resume_keep', 3))
|
||||
)
|
||||
|
||||
def _load_ckpt(self, save_name):
|
||||
load_ckpt_strict = self.engine_cfg['restore_ckpt_strict']
|
||||
|
||||
checkpoint = torch.load(save_name, map_location=torch.device(
|
||||
"cuda", self.device))
|
||||
checkpoint = torch.load(
|
||||
save_name,
|
||||
map_location=torch.device("cuda", self.device),
|
||||
weights_only=False,
|
||||
)
|
||||
model_state_dict = checkpoint['model']
|
||||
|
||||
if not load_ckpt_strict:
|
||||
@@ -271,6 +372,33 @@ class BaseModel(MetaModel, nn.Module):
|
||||
else:
|
||||
self.msg_mgr.log_warning(
|
||||
"Restore NO Scheduler from %s !!!" % save_name)
|
||||
if (
|
||||
self.engine_cfg.get('enable_float16', False)
|
||||
and hasattr(self, 'Scaler')
|
||||
and 'scaler' in checkpoint
|
||||
):
|
||||
self.Scaler.load_state_dict(checkpoint['scaler'])
|
||||
if 'random_state' in checkpoint:
|
||||
random.setstate(checkpoint['random_state'])
|
||||
if 'numpy_random_state' in checkpoint:
|
||||
np.random.set_state(checkpoint['numpy_random_state'])
|
||||
if 'torch_random_state' in checkpoint:
|
||||
torch_random_state = checkpoint['torch_random_state']
|
||||
if not isinstance(torch_random_state, torch.Tensor):
|
||||
torch_random_state = torch.as_tensor(
|
||||
torch_random_state,
|
||||
dtype=torch.uint8,
|
||||
)
|
||||
torch.set_rng_state(torch_random_state.cpu())
|
||||
if 'cuda_random_state_all' in checkpoint and torch.cuda.is_available():
|
||||
cuda_random_state_all = checkpoint['cuda_random_state_all']
|
||||
normalized_cuda_states = []
|
||||
for state in cuda_random_state_all:
|
||||
if not isinstance(state, torch.Tensor):
|
||||
state = torch.as_tensor(state, dtype=torch.uint8)
|
||||
normalized_cuda_states.append(state.cpu())
|
||||
torch.cuda.set_rng_state_all(normalized_cuda_states)
|
||||
self.iteration = int(checkpoint.get('iteration', self.iteration))
|
||||
self.msg_mgr.log_info("Restore Parameters from %s !!!" % save_name)
|
||||
|
||||
def resume_ckpt(self, restore_hint):
|
||||
@@ -278,10 +406,15 @@ class BaseModel(MetaModel, nn.Module):
|
||||
save_name = self.engine_cfg['save_name']
|
||||
save_name = osp.join(
|
||||
self.save_path, 'checkpoints/{}-{:0>5}.pt'.format(save_name, restore_hint))
|
||||
self.iteration = restore_hint
|
||||
elif isinstance(restore_hint, str):
|
||||
save_name = restore_hint
|
||||
self.iteration = 0
|
||||
if restore_hint == 'latest':
|
||||
save_name = self._get_latest_resume_ckpt_path()
|
||||
if save_name is None:
|
||||
raise FileNotFoundError(
|
||||
f"No latest checkpoint found under {self._checkpoint_dir()}"
|
||||
)
|
||||
else:
|
||||
save_name = restore_hint
|
||||
else:
|
||||
raise ValueError(
|
||||
"Error type for -Restore_Hint-, supported: int or string.")
|
||||
@@ -417,6 +550,9 @@ class BaseModel(MetaModel, nn.Module):
|
||||
visual_summary['scalar/learning_rate'] = model.optimizer.param_groups[0]['lr']
|
||||
|
||||
model.msg_mgr.train_step(loss_info, visual_summary)
|
||||
resume_every_iter = int(model.engine_cfg.get('resume_every_iter', 0))
|
||||
if resume_every_iter > 0 and model.iteration % resume_every_iter == 0:
|
||||
model.save_resume_ckpt(model.iteration)
|
||||
if model.iteration % model.engine_cfg['save_iter'] == 0:
|
||||
# save the checkpoint
|
||||
model.save_ckpt(model.iteration)
|
||||
|
||||
@@ -0,0 +1,421 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import pickle
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
import numpy as np
|
||||
from jaxtyping import Float
|
||||
from numpy.typing import NDArray
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
if str(REPO_ROOT) not in sys.path:
|
||||
sys.path.append(str(REPO_ROOT))
|
||||
|
||||
from datasets import pretreatment_scoliosis_drf as drf_prep
|
||||
|
||||
POSE_ROOT = Path("/mnt/public/data/Scoliosis1K/Scoliosis1K-pose-pkl")
|
||||
HEATMAP_ROOT = Path("/mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15-joint8-sharedalign")
|
||||
PARTITION_PATH = REPO_ROOT / "datasets/Scoliosis1K/Scoliosis1K_118.json"
|
||||
HEATMAP_CFG_PATH = REPO_ROOT / "configs/drf/pretreatment_heatmap_drf_sigma15_joint8.yaml"
|
||||
REPORT_PATH = REPO_ROOT / "research/scoliosis_dataset_analysis_118_sharedalign.md"
|
||||
JSON_PATH = REPO_ROOT / "research/scoliosis_dataset_analysis_118_sharedalign.json"
|
||||
|
||||
EPS = 1e-6
|
||||
THRESHOLD = 13.0
|
||||
SIDE_CUT = 10
|
||||
LABEL_TO_INT = {"negative": 0, "neutral": 1, "positive": 2}
|
||||
FloatArray = NDArray[np.float32]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SequenceKey:
|
||||
pid: str
|
||||
label: str
|
||||
seq: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunningStats:
|
||||
total: float = 0.0
|
||||
count: int = 0
|
||||
|
||||
def update(self, value: float, n: int = 1) -> None:
|
||||
self.total += value * n
|
||||
self.count += n
|
||||
|
||||
@property
|
||||
def mean(self) -> float:
|
||||
return self.total / max(self.count, 1)
|
||||
|
||||
|
||||
def load_partition_ids() -> tuple[set[str], set[str]]:
|
||||
with PARTITION_PATH.open("r", encoding="utf-8") as handle:
|
||||
partition = json.load(handle)
|
||||
return set(partition["TRAIN_SET"]), set(partition["TEST_SET"])
|
||||
|
||||
|
||||
def sequence_key_from_path(path: Path) -> SequenceKey:
|
||||
parts = path.parts
|
||||
return SequenceKey(pid=parts[-4], label=parts[-3], seq=parts[-2])
|
||||
|
||||
|
||||
def iter_pose_paths() -> list[Path]:
|
||||
return sorted(POSE_ROOT.glob("*/*/*/*.pkl"))
|
||||
|
||||
|
||||
def iter_heatmap_paths() -> list[Path]:
|
||||
return sorted(HEATMAP_ROOT.glob("*/*/*/0_heatmap.pkl"))
|
||||
|
||||
|
||||
def read_pickle(path: Path) -> object:
|
||||
with path.open("rb") as handle:
|
||||
return pickle.load(handle)
|
||||
|
||||
|
||||
def bbox_from_mask(mask: NDArray[np.bool_]) -> tuple[float, float, float, float] | None:
|
||||
rows = np.flatnonzero(mask.any(axis=1))
|
||||
cols = np.flatnonzero(mask.any(axis=0))
|
||||
if rows.size == 0 or cols.size == 0:
|
||||
return None
|
||||
y0 = int(rows[0])
|
||||
y1 = int(rows[-1])
|
||||
x0 = int(cols[0])
|
||||
x1 = int(cols[-1])
|
||||
width = float(x1 - x0 + 1)
|
||||
height = float(y1 - y0 + 1)
|
||||
center_x = float((x0 + x1) / 2.0)
|
||||
center_y = float((y0 + y1) / 2.0)
|
||||
return width, height, center_x, center_y
|
||||
|
||||
|
||||
def sequence_bbox_metrics(
|
||||
heatmap: Float[FloatArray, "frames channels height width"],
|
||||
threshold: float = THRESHOLD,
|
||||
) -> dict[str, float]:
|
||||
support = heatmap.max(axis=1)
|
||||
bone = heatmap[:, 0]
|
||||
joint = heatmap[:, 1]
|
||||
|
||||
widths: list[float] = []
|
||||
heights: list[float] = []
|
||||
centers_x: list[float] = []
|
||||
centers_y: list[float] = []
|
||||
active_fractions: list[float] = []
|
||||
cut_mass_ratios: list[float] = []
|
||||
bone_joint_dx: list[float] = []
|
||||
bone_joint_dy: list[float] = []
|
||||
|
||||
for frame_idx in range(support.shape[0]):
|
||||
frame = support[frame_idx]
|
||||
mask = frame > threshold
|
||||
bbox = bbox_from_mask(mask)
|
||||
if bbox is not None:
|
||||
width, height, center_x, center_y = bbox
|
||||
widths.append(width)
|
||||
heights.append(height)
|
||||
centers_x.append(center_x)
|
||||
centers_y.append(center_y)
|
||||
active_fractions.append(float(mask.mean()))
|
||||
|
||||
total_mass = float(frame.sum())
|
||||
if total_mass > EPS:
|
||||
clipped_mass = float(frame[:, :SIDE_CUT].sum() + frame[:, -SIDE_CUT:].sum())
|
||||
cut_mass_ratios.append(clipped_mass / total_mass)
|
||||
|
||||
bone_bbox = bbox_from_mask(bone[frame_idx] > threshold)
|
||||
joint_bbox = bbox_from_mask(joint[frame_idx] > threshold)
|
||||
if bone_bbox is not None and joint_bbox is not None:
|
||||
bone_joint_dx.append(abs(bone_bbox[2] - joint_bbox[2]))
|
||||
bone_joint_dy.append(abs(bone_bbox[3] - joint_bbox[3]))
|
||||
|
||||
def safe_mean(values: Iterable[float]) -> float:
|
||||
array = np.asarray(list(values), dtype=np.float32)
|
||||
return float(array.mean()) if array.size else 0.0
|
||||
|
||||
def safe_std(values: Iterable[float]) -> float:
|
||||
array = np.asarray(list(values), dtype=np.float32)
|
||||
return float(array.std()) if array.size else 0.0
|
||||
|
||||
return {
|
||||
"width_mean": safe_mean(widths),
|
||||
"height_mean": safe_mean(heights),
|
||||
"center_x_std": safe_std(centers_x),
|
||||
"center_y_std": safe_std(centers_y),
|
||||
"width_std": safe_std(widths),
|
||||
"height_std": safe_std(heights),
|
||||
"active_fraction_mean": safe_mean(active_fractions),
|
||||
"cut_mass_ratio_mean": safe_mean(cut_mass_ratios),
|
||||
"bone_joint_dx_mean": safe_mean(bone_joint_dx),
|
||||
"bone_joint_dy_mean": safe_mean(bone_joint_dy),
|
||||
}
|
||||
|
||||
|
||||
def softmax_rows(logits: NDArray[np.float64]) -> NDArray[np.float64]:
|
||||
shifted = logits - logits.max(axis=1, keepdims=True)
|
||||
exp = np.exp(shifted)
|
||||
return exp / exp.sum(axis=1, keepdims=True)
|
||||
|
||||
|
||||
def fit_softmax_regression(
|
||||
x: NDArray[np.float64],
|
||||
y: NDArray[np.int64],
|
||||
num_classes: int,
|
||||
steps: int = 4000,
|
||||
lr: float = 0.05,
|
||||
reg: float = 1e-4,
|
||||
) -> tuple[NDArray[np.float64], NDArray[np.float64]]:
|
||||
weights = np.zeros((x.shape[1], num_classes), dtype=np.float64)
|
||||
bias = np.zeros(num_classes, dtype=np.float64)
|
||||
one_hot = np.eye(num_classes, dtype=np.float64)[y]
|
||||
|
||||
for _ in range(steps):
|
||||
logits = x @ weights + bias
|
||||
probs = softmax_rows(logits)
|
||||
error = probs - one_hot
|
||||
grad_w = (x.T @ error) / x.shape[0] + reg * weights
|
||||
grad_b = error.mean(axis=0)
|
||||
weights -= lr * grad_w
|
||||
bias -= lr * grad_b
|
||||
|
||||
return weights, bias
|
||||
|
||||
|
||||
def evaluate_predictions(
|
||||
y_true: NDArray[np.int64],
|
||||
y_pred: NDArray[np.int64],
|
||||
num_classes: int,
|
||||
) -> dict[str, float]:
|
||||
accuracy = float((y_true == y_pred).mean())
|
||||
precisions: list[float] = []
|
||||
recalls: list[float] = []
|
||||
f1s: list[float] = []
|
||||
|
||||
for class_id in range(num_classes):
|
||||
tp = int(((y_true == class_id) & (y_pred == class_id)).sum())
|
||||
fp = int(((y_true != class_id) & (y_pred == class_id)).sum())
|
||||
fn = int(((y_true == class_id) & (y_pred != class_id)).sum())
|
||||
precision = tp / max(tp + fp, 1)
|
||||
recall = tp / max(tp + fn, 1)
|
||||
f1 = 2 * precision * recall / max(precision + recall, EPS)
|
||||
precisions.append(precision)
|
||||
recalls.append(recall)
|
||||
f1s.append(f1)
|
||||
|
||||
return {
|
||||
"accuracy": 100.0 * accuracy,
|
||||
"macro_precision": 100.0 * float(np.mean(precisions)),
|
||||
"macro_recall": 100.0 * float(np.mean(recalls)),
|
||||
"macro_f1": 100.0 * float(np.mean(f1s)),
|
||||
}
|
||||
|
||||
|
||||
def analyze() -> dict[str, object]:
|
||||
train_ids, test_ids = load_partition_ids()
|
||||
|
||||
heatmap_cfg = drf_prep.load_heatmap_cfg(str(HEATMAP_CFG_PATH))
|
||||
pose_transform = drf_prep.build_pose_transform(heatmap_cfg)
|
||||
|
||||
split_label_counts: dict[str, dict[str, int]] = {
|
||||
"train": defaultdict(int),
|
||||
"test": defaultdict(int),
|
||||
}
|
||||
pose_quality: dict[str, dict[str, RunningStats]] = {
|
||||
"train": defaultdict(RunningStats),
|
||||
"test": defaultdict(RunningStats),
|
||||
}
|
||||
valid_ratio: dict[str, dict[str, RunningStats]] = {
|
||||
"train": defaultdict(RunningStats),
|
||||
"test": defaultdict(RunningStats),
|
||||
}
|
||||
|
||||
for pose_path in iter_pose_paths():
|
||||
key = sequence_key_from_path(pose_path)
|
||||
split = "train" if key.pid in train_ids else "test"
|
||||
split_label_counts[split][key.label] += 1
|
||||
|
||||
pose = drf_prep.read_pose(str(pose_path))
|
||||
conf = pose[..., 2] if pose.shape[-1] >= 3 else np.ones(pose.shape[:-1], dtype=np.float32)
|
||||
pose_quality[split][key.label].update(float(conf.mean()))
|
||||
valid_ratio[split][key.label].update(float((conf > 0.05).mean()))
|
||||
|
||||
heatmap_metrics: dict[str, list[float]] = defaultdict(list)
|
||||
pav_vectors_train: list[NDArray[np.float64]] = []
|
||||
pav_vectors_test: list[NDArray[np.float64]] = []
|
||||
labels_train: list[int] = []
|
||||
labels_test: list[int] = []
|
||||
pav_means: dict[str, list[float]] = defaultdict(list)
|
||||
|
||||
for heatmap_path in iter_heatmap_paths():
|
||||
key = sequence_key_from_path(heatmap_path)
|
||||
split = "train" if key.pid in train_ids else "test"
|
||||
heatmap = np.asarray(read_pickle(heatmap_path), dtype=np.float32)
|
||||
metrics = sequence_bbox_metrics(heatmap)
|
||||
for metric_name, metric_value in metrics.items():
|
||||
heatmap_metrics[f"{split}.{metric_name}"].append(metric_value)
|
||||
heatmap_metrics[f"all.{metric_name}"].append(metric_value)
|
||||
|
||||
pav_path = heatmap_path.with_name("1_pav.pkl")
|
||||
pav_seq = np.asarray(read_pickle(pav_path), dtype=np.float32)
|
||||
pav_vector = pav_seq[0].reshape(-1).astype(np.float64)
|
||||
pav_means[key.label].append(float(pav_vector.mean()))
|
||||
if split == "train":
|
||||
pav_vectors_train.append(pav_vector)
|
||||
labels_train.append(LABEL_TO_INT[key.label])
|
||||
else:
|
||||
pav_vectors_test.append(pav_vector)
|
||||
labels_test.append(LABEL_TO_INT[key.label])
|
||||
|
||||
x_train = np.stack(pav_vectors_train, axis=0)
|
||||
x_test = np.stack(pav_vectors_test, axis=0)
|
||||
y_train = np.asarray(labels_train, dtype=np.int64)
|
||||
y_test = np.asarray(labels_test, dtype=np.int64)
|
||||
|
||||
mean = x_train.mean(axis=0, keepdims=True)
|
||||
std = np.maximum(x_train.std(axis=0, keepdims=True), EPS)
|
||||
x_train_std = (x_train - mean) / std
|
||||
x_test_std = (x_test - mean) / std
|
||||
weights, bias = fit_softmax_regression(x_train_std, y_train, num_classes=3)
|
||||
y_pred = np.argmax(x_test_std @ weights + bias, axis=1).astype(np.int64)
|
||||
pav_classifier = evaluate_predictions(y_test, y_pred, num_classes=3)
|
||||
|
||||
results: dict[str, object] = {
|
||||
"split_label_counts": split_label_counts,
|
||||
"pose_confidence_mean": {
|
||||
split: {label: stats.mean for label, stats in per_label.items()}
|
||||
for split, per_label in pose_quality.items()
|
||||
},
|
||||
"pose_valid_ratio_mean": {
|
||||
split: {label: stats.mean for label, stats in per_label.items()}
|
||||
for split, per_label in valid_ratio.items()
|
||||
},
|
||||
"pav_label_means": {
|
||||
label: float(np.mean(values))
|
||||
for label, values in pav_means.items()
|
||||
},
|
||||
"pav_softmax_probe": pav_classifier,
|
||||
"heatmap_metrics": {
|
||||
key: {
|
||||
"mean": float(np.mean(values)),
|
||||
"p95": float(np.percentile(values, 95)),
|
||||
}
|
||||
for key, values in heatmap_metrics.items()
|
||||
},
|
||||
}
|
||||
return results
|
||||
|
||||
|
||||
def format_report(results: dict[str, object]) -> str:
|
||||
split_counts = results["split_label_counts"]
|
||||
pose_conf = results["pose_confidence_mean"]
|
||||
pose_valid = results["pose_valid_ratio_mean"]
|
||||
heat = results["heatmap_metrics"]
|
||||
pav_probe = results["pav_softmax_probe"]
|
||||
pav_means = results["pav_label_means"]
|
||||
|
||||
def heat_stat(name: str) -> tuple[float, float]:
|
||||
entry = heat[f"all.{name}"]
|
||||
return entry["mean"], entry["p95"]
|
||||
|
||||
center_x_std_mean, center_x_std_p95 = heat_stat("center_x_std")
|
||||
center_y_std_mean, center_y_std_p95 = heat_stat("center_y_std")
|
||||
width_std_mean, width_std_p95 = heat_stat("width_std")
|
||||
height_std_mean, height_std_p95 = heat_stat("height_std")
|
||||
cut_ratio_mean, cut_ratio_p95 = heat_stat("cut_mass_ratio_mean")
|
||||
bone_joint_dx_mean, bone_joint_dx_p95 = heat_stat("bone_joint_dx_mean")
|
||||
bone_joint_dy_mean, bone_joint_dy_p95 = heat_stat("bone_joint_dy_mean")
|
||||
width_mean, width_p95 = heat_stat("width_mean")
|
||||
height_mean, height_p95 = heat_stat("height_mean")
|
||||
active_fraction_mean, active_fraction_p95 = heat_stat("active_fraction_mean")
|
||||
|
||||
return f"""# Scoliosis1K Dataset Analysis (1:1:8, shared-align skeleton maps)
|
||||
|
||||
## Split
|
||||
|
||||
Train counts:
|
||||
- negative: {split_counts["train"]["negative"]}
|
||||
- neutral: {split_counts["train"]["neutral"]}
|
||||
- positive: {split_counts["train"]["positive"]}
|
||||
|
||||
Test counts:
|
||||
- negative: {split_counts["test"]["negative"]}
|
||||
- neutral: {split_counts["test"]["neutral"]}
|
||||
- positive: {split_counts["test"]["positive"]}
|
||||
|
||||
## Raw pose quality
|
||||
|
||||
Mean keypoint confidence by split/class:
|
||||
- train negative: {pose_conf["train"]["negative"]:.4f}
|
||||
- train neutral: {pose_conf["train"]["neutral"]:.4f}
|
||||
- train positive: {pose_conf["train"]["positive"]:.4f}
|
||||
- test negative: {pose_conf["test"]["negative"]:.4f}
|
||||
- test neutral: {pose_conf["test"]["neutral"]:.4f}
|
||||
- test positive: {pose_conf["test"]["positive"]:.4f}
|
||||
|
||||
Mean valid-joint ratio (`conf > 0.05`) by split/class:
|
||||
- train negative: {pose_valid["train"]["negative"]:.4f}
|
||||
- train neutral: {pose_valid["train"]["neutral"]:.4f}
|
||||
- train positive: {pose_valid["train"]["positive"]:.4f}
|
||||
- test negative: {pose_valid["test"]["negative"]:.4f}
|
||||
- test neutral: {pose_valid["test"]["neutral"]:.4f}
|
||||
- test positive: {pose_valid["test"]["positive"]:.4f}
|
||||
|
||||
## PAV signal
|
||||
|
||||
Mean normalized PAV value by label:
|
||||
- negative: {pav_means["negative"]:.4f}
|
||||
- neutral: {pav_means["neutral"]:.4f}
|
||||
- positive: {pav_means["positive"]:.4f}
|
||||
|
||||
Train-on-train / test-on-test linear softmax probe over sequence-level PAV:
|
||||
- accuracy: {pav_probe["accuracy"]:.2f}%
|
||||
- macro precision: {pav_probe["macro_precision"]:.2f}%
|
||||
- macro recall: {pav_probe["macro_recall"]:.2f}%
|
||||
- macro F1: {pav_probe["macro_f1"]:.2f}%
|
||||
|
||||
## Shared-align heatmap geometry
|
||||
|
||||
Combined support bbox stats over all sequences:
|
||||
- width mean / p95: {width_mean:.2f} / {width_p95:.2f}
|
||||
- height mean / p95: {height_mean:.2f} / {height_p95:.2f}
|
||||
- active fraction mean / p95: {active_fraction_mean:.4f} / {active_fraction_p95:.4f}
|
||||
|
||||
Per-sequence temporal jitter (std over frames):
|
||||
- center-x std mean / p95: {center_x_std_mean:.3f} / {center_x_std_p95:.3f}
|
||||
- center-y std mean / p95: {center_y_std_mean:.3f} / {center_y_std_p95:.3f}
|
||||
- width std mean / p95: {width_std_mean:.3f} / {width_std_p95:.3f}
|
||||
- height std mean / p95: {height_std_mean:.3f} / {height_std_p95:.3f}
|
||||
|
||||
Residual limb-vs-joint bbox-center mismatch after shared alignment:
|
||||
- dx mean / p95: {bone_joint_dx_mean:.3f} / {bone_joint_dx_p95:.3f}
|
||||
- dy mean / p95: {bone_joint_dy_mean:.3f} / {bone_joint_dy_p95:.3f}
|
||||
|
||||
Estimated intensity mass in the columns removed by `BaseSilCuttingTransform`:
|
||||
- mean clipped-mass ratio: {cut_ratio_mean:.4f}
|
||||
- p95 clipped-mass ratio: {cut_ratio_p95:.4f}
|
||||
|
||||
## Reading
|
||||
|
||||
- The raw pose data does not look broken. Confidence and valid-joint ratios are high and similar across classes.
|
||||
- The sequence-level PAV still carries useful label signal, so the dataset is not devoid of scoliosis information.
|
||||
- Shared alignment removed the old limb-vs-joint registration bug; residual channel-center mismatch is now small.
|
||||
- The remaining suspicious area is the visual branch: the skeleton map still has frame-to-frame bbox jitter, and the support bbox is almost full-height (`~61.5 / 64`) and fairly dense (`~36%` active pixels), which may be washing out subtle asymmetry cues.
|
||||
- `BaseSilCuttingTransform` does not appear to be the main failure source for this shared-align export; the measured mass in the removed side margins is near zero.
|
||||
- The dataset itself looks usable; the bigger issue still appears to be how the current skeleton-map preprocessing/runtime path presents that data to ScoNet.
|
||||
"""
|
||||
|
||||
|
||||
def main() -> None:
|
||||
results = analyze()
|
||||
REPORT_PATH.write_text(format_report(results), encoding="utf-8")
|
||||
JSON_PATH.write_text(json.dumps(results, indent=2, sort_keys=True), encoding="utf-8")
|
||||
print(f"Wrote {REPORT_PATH}")
|
||||
print(f"Wrote {JSON_PATH}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,74 @@
|
||||
# Scoliosis1K Dataset Analysis (1:1:8, shared-align skeleton maps)
|
||||
|
||||
## Split
|
||||
|
||||
Train counts:
|
||||
- negative: 596
|
||||
- neutral: 74
|
||||
- positive: 74
|
||||
|
||||
Test counts:
|
||||
- negative: 204
|
||||
- neutral: 126
|
||||
- positive: 419
|
||||
|
||||
## Raw pose quality
|
||||
|
||||
Mean keypoint confidence by split/class:
|
||||
- train negative: 0.9016
|
||||
- train neutral: 0.9023
|
||||
- train positive: 0.8987
|
||||
- test negative: 0.9009
|
||||
- test neutral: 0.9020
|
||||
- test positive: 0.8999
|
||||
|
||||
Mean valid-joint ratio (`conf > 0.05`) by split/class:
|
||||
- train negative: 1.0000
|
||||
- train neutral: 1.0000
|
||||
- train positive: 1.0000
|
||||
- test negative: 1.0000
|
||||
- test neutral: 1.0000
|
||||
- test positive: 1.0000
|
||||
|
||||
## PAV signal
|
||||
|
||||
Mean normalized PAV value by label:
|
||||
- negative: 0.3068
|
||||
- neutral: 0.3546
|
||||
- positive: 0.3635
|
||||
|
||||
Train-on-train / test-on-test linear softmax probe over sequence-level PAV:
|
||||
- accuracy: 50.87%
|
||||
- macro precision: 50.50%
|
||||
- macro recall: 48.19%
|
||||
- macro F1: 39.88%
|
||||
|
||||
## Shared-align heatmap geometry
|
||||
|
||||
Combined support bbox stats over all sequences:
|
||||
- width mean / p95: 32.13 / 33.57
|
||||
- height mean / p95: 61.52 / 61.61
|
||||
- active fraction mean / p95: 0.3634 / 0.3738
|
||||
|
||||
Per-sequence temporal jitter (std over frames):
|
||||
- center-x std mean / p95: 0.864 / 1.243
|
||||
- center-y std mean / p95: 0.516 / 0.704
|
||||
- width std mean / p95: 2.152 / 2.804
|
||||
- height std mean / p95: 0.507 / 0.545
|
||||
|
||||
Residual limb-vs-joint bbox-center mismatch after shared alignment:
|
||||
- dx mean / p95: 0.195 / 0.229
|
||||
- dy mean / p95: 0.251 / 0.357
|
||||
|
||||
Estimated intensity mass in the columns removed by `BaseSilCuttingTransform`:
|
||||
- mean clipped-mass ratio: 0.0000
|
||||
- p95 clipped-mass ratio: 0.0000
|
||||
|
||||
## Reading
|
||||
|
||||
- The raw pose data does not look broken. Confidence and valid-joint ratios are high and similar across classes.
|
||||
- The sequence-level PAV still carries useful label signal, so the dataset is not devoid of scoliosis information.
|
||||
- Shared alignment removed the old limb-vs-joint registration bug; residual channel-center mismatch is now small.
|
||||
- The remaining suspicious area is the visual branch: the skeleton map still has frame-to-frame bbox jitter, and the support bbox is almost full-height (`~61.5 / 64`) and fairly dense (`~36%` active pixels), which may be washing out subtle asymmetry cues.
|
||||
- `BaseSilCuttingTransform` does not appear to be the main failure source for this shared-align export; the measured mass in the removed side margins is near zero.
|
||||
- The dataset itself looks usable; the bigger issue still appears to be how the current skeleton-map preprocessing/runtime path presents that data to ScoNet.
|
||||
Reference in New Issue
Block a user