diff --git a/README.md b/README.md index 473acf5..0c73aff 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,8 @@ CUDA_VISIBLE_DEVICES=0,1 uv run python -m torch.distributed.launch --nproc_per_n ``` > **Note:** The `--nproc_per_node` argument must exactly match the number of GPUs specified in `CUDA_VISIBLE_DEVICES`. For single-GPU evaluation, use `CUDA_VISIBLE_DEVICES=0` and `--nproc_per_node=1` with the DDP launcher. +> +> **Resume Tip:** To survive interrupted training runs, set `trainer_cfg.resume_every_iter` to a non-zero value and optionally `trainer_cfg.auto_resume_latest: true`. OpenGait will keep `output/.../checkpoints/latest.pt` updated for crash recovery. diff --git a/configs/default.yaml b/configs/default.yaml index 6743027..6434ab3 100644 --- a/configs/default.yaml +++ b/configs/default.yaml @@ -68,6 +68,9 @@ trainer_cfg: optimizer_reset: false scheduler_reset: false restore_hint: 0 + auto_resume_latest: false + resume_every_iter: 0 + resume_keep: 3 save_iter: 2000 save_name: tmp sync_BN: false diff --git a/configs/drf/pretreatment_heatmap_drf_sigma15.yaml b/configs/drf/pretreatment_heatmap_drf_sigma15.yaml new file mode 100644 index 0000000..3047c8e --- /dev/null +++ b/configs/drf/pretreatment_heatmap_drf_sigma15.yaml @@ -0,0 +1,25 @@ +coco18tococo17_args: + transfer_to_coco17: False + +padkeypoints_args: + pad_method: knn + use_conf: True + +norm_args: + pose_format: coco + use_conf: ${padkeypoints_args.use_conf} + heatmap_image_height: 128 + +heatmap_generator_args: + sigma: 1.5 + use_score: ${padkeypoints_args.use_conf} + img_h: ${norm_args.heatmap_image_height} + img_w: ${norm_args.heatmap_image_height} + with_limb: null + with_kp: null + +align_args: + align: True + final_img_size: 64 + offset: 0 + heatmap_image_size: ${norm_args.heatmap_image_height} diff --git a/configs/drf/pretreatment_heatmap_drf_sigma15_joint8.yaml b/configs/drf/pretreatment_heatmap_drf_sigma15_joint8.yaml new file mode 100644 index 0000000..8d5f310 --- /dev/null +++ b/configs/drf/pretreatment_heatmap_drf_sigma15_joint8.yaml @@ -0,0 +1,28 @@ +coco18tococo17_args: + transfer_to_coco17: False + +padkeypoints_args: + pad_method: knn + use_conf: True + +norm_args: + pose_format: coco + use_conf: ${padkeypoints_args.use_conf} + heatmap_image_height: 128 + +heatmap_generator_args: + sigma: 1.5 + use_score: ${padkeypoints_args.use_conf} + img_h: ${norm_args.heatmap_image_height} + img_w: ${norm_args.heatmap_image_height} + with_limb: null + with_kp: null + +sigma_limb: 1.5 +sigma_joint: 8.0 + +align_args: + align: True + final_img_size: 64 + offset: 0 + heatmap_image_size: ${norm_args.heatmap_image_height} diff --git a/configs/sconet/sconet_scoliosis1k_skeleton_118_sigma15_1gpu.yaml b/configs/sconet/sconet_scoliosis1k_skeleton_118_sigma15_1gpu.yaml new file mode 100644 index 0000000..4af4003 --- /dev/null +++ b/configs/sconet/sconet_scoliosis1k_skeleton_118_sigma15_1gpu.yaml @@ -0,0 +1,105 @@ +data_cfg: + dataset_name: Scoliosis1K + dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15 + dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json + data_in_use: + - true + - false + num_workers: 1 + remove_no_gallery: false + test_dataset_name: Scoliosis1K + +evaluator_cfg: + enable_float16: true + restore_ckpt_strict: true + restore_hint: 20000 + save_name: ScoNet_skeleton_118_sigma15 + eval_func: evaluate_scoliosis + sampler: + batch_shuffle: false + batch_size: 1 + sample_type: all_ordered + frames_all_limit: 720 + metric: euc + transform: + - type: BaseSilCuttingTransform + +loss_cfg: + - loss_term_weight: 1.0 + margin: 0.2 + type: TripletLoss + log_prefix: triplet + - loss_term_weight: 1.0 + scale: 16 + type: CrossEntropyLoss + log_prefix: softmax + log_accuracy: true + +model_cfg: + model: ScoNet + backbone_cfg: + type: ResNet9 + block: BasicBlock + in_channel: 2 + channels: + - 64 + - 128 + - 256 + - 512 + layers: + - 1 + - 1 + - 1 + - 1 + strides: + - 1 + - 2 + - 2 + - 1 + maxpool: false + SeparateFCs: + in_channels: 512 + out_channels: 256 + parts_num: 16 + SeparateBNNecks: + class_num: 3 + in_channels: 256 + parts_num: 16 + bin_num: + - 16 + +optimizer_cfg: + lr: 0.1 + momentum: 0.9 + solver: SGD + weight_decay: 0.0005 + +scheduler_cfg: + gamma: 0.1 + milestones: + - 10000 + - 14000 + - 18000 + scheduler: MultiStepLR + +trainer_cfg: + enable_float16: true + fix_BN: false + with_test: false + log_iter: 100 + restore_ckpt_strict: true + restore_hint: 0 + save_iter: 20000 + save_name: ScoNet_skeleton_118_sigma15 + sync_BN: true + total_iter: 20000 + sampler: + batch_shuffle: true + batch_size: + - 8 + - 8 + frames_num_fixed: 30 + sample_type: fixed_unordered + type: TripletSampler + transform: + - type: BaseSilCuttingTransform diff --git a/configs/sconet/sconet_scoliosis1k_skeleton_118_sigma15_1gpu_bs12x8.yaml b/configs/sconet/sconet_scoliosis1k_skeleton_118_sigma15_1gpu_bs12x8.yaml new file mode 100644 index 0000000..826564f --- /dev/null +++ b/configs/sconet/sconet_scoliosis1k_skeleton_118_sigma15_1gpu_bs12x8.yaml @@ -0,0 +1,105 @@ +data_cfg: + dataset_name: Scoliosis1K + dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15 + dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json + data_in_use: + - true + - false + num_workers: 1 + remove_no_gallery: false + test_dataset_name: Scoliosis1K + +evaluator_cfg: + enable_float16: true + restore_ckpt_strict: true + restore_hint: 20000 + save_name: ScoNet_skeleton_118_sigma15_bs12x8 + eval_func: evaluate_scoliosis + sampler: + batch_shuffle: false + batch_size: 1 + sample_type: all_ordered + frames_all_limit: 720 + metric: euc + transform: + - type: BaseSilCuttingTransform + +loss_cfg: + - loss_term_weight: 1.0 + margin: 0.2 + type: TripletLoss + log_prefix: triplet + - loss_term_weight: 1.0 + scale: 16 + type: CrossEntropyLoss + log_prefix: softmax + log_accuracy: true + +model_cfg: + model: ScoNet + backbone_cfg: + type: ResNet9 + block: BasicBlock + in_channel: 2 + channels: + - 64 + - 128 + - 256 + - 512 + layers: + - 1 + - 1 + - 1 + - 1 + strides: + - 1 + - 2 + - 2 + - 1 + maxpool: false + SeparateFCs: + in_channels: 512 + out_channels: 256 + parts_num: 16 + SeparateBNNecks: + class_num: 3 + in_channels: 256 + parts_num: 16 + bin_num: + - 16 + +optimizer_cfg: + lr: 0.1 + momentum: 0.9 + solver: SGD + weight_decay: 0.0005 + +scheduler_cfg: + gamma: 0.1 + milestones: + - 10000 + - 14000 + - 18000 + scheduler: MultiStepLR + +trainer_cfg: + enable_float16: true + fix_BN: false + with_test: false + log_iter: 100 + restore_ckpt_strict: true + restore_hint: 0 + save_iter: 20000 + save_name: ScoNet_skeleton_118_sigma15_bs12x8 + sync_BN: true + total_iter: 20000 + sampler: + batch_shuffle: true + batch_size: + - 12 + - 8 + frames_num_fixed: 30 + sample_type: fixed_unordered + type: TripletSampler + transform: + - type: BaseSilCuttingTransform diff --git a/configs/sconet/sconet_scoliosis1k_skeleton_118_sigma15_joint8_1gpu_bs12x8.yaml b/configs/sconet/sconet_scoliosis1k_skeleton_118_sigma15_joint8_1gpu_bs12x8.yaml new file mode 100644 index 0000000..2191524 --- /dev/null +++ b/configs/sconet/sconet_scoliosis1k_skeleton_118_sigma15_joint8_1gpu_bs12x8.yaml @@ -0,0 +1,105 @@ +data_cfg: + dataset_name: Scoliosis1K + dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15-joint8-sharedalign + dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json + data_in_use: + - true + - false + num_workers: 1 + remove_no_gallery: false + test_dataset_name: Scoliosis1K + +evaluator_cfg: + enable_float16: true + restore_ckpt_strict: true + restore_hint: 20000 + save_name: ScoNet_skeleton_118_sigma15_joint8_sharedalign_bs12x8 + eval_func: evaluate_scoliosis + sampler: + batch_shuffle: false + batch_size: 1 + sample_type: all_ordered + frames_all_limit: 720 + metric: euc + transform: + - type: BaseSilCuttingTransform + +loss_cfg: + - loss_term_weight: 1.0 + margin: 0.2 + type: TripletLoss + log_prefix: triplet + - loss_term_weight: 1.0 + scale: 16 + type: CrossEntropyLoss + log_prefix: softmax + log_accuracy: true + +model_cfg: + model: ScoNet + backbone_cfg: + type: ResNet9 + block: BasicBlock + in_channel: 2 + channels: + - 64 + - 128 + - 256 + - 512 + layers: + - 1 + - 1 + - 1 + - 1 + strides: + - 1 + - 2 + - 2 + - 1 + maxpool: false + SeparateFCs: + in_channels: 512 + out_channels: 256 + parts_num: 16 + SeparateBNNecks: + class_num: 3 + in_channels: 256 + parts_num: 16 + bin_num: + - 16 + +optimizer_cfg: + lr: 0.1 + momentum: 0.9 + solver: SGD + weight_decay: 0.0005 + +scheduler_cfg: + gamma: 0.1 + milestones: + - 10000 + - 14000 + - 18000 + scheduler: MultiStepLR + +trainer_cfg: + enable_float16: true + fix_BN: false + with_test: false + log_iter: 100 + restore_ckpt_strict: true + restore_hint: 0 + save_iter: 20000 + save_name: ScoNet_skeleton_118_sigma15_joint8_sharedalign_bs12x8 + sync_BN: true + total_iter: 20000 + sampler: + batch_shuffle: true + batch_size: + - 12 + - 8 + frames_num_fixed: 30 + sample_type: fixed_unordered + type: TripletSampler + transform: + - type: BaseSilCuttingTransform diff --git a/configs/sconet/sconet_scoliosis1k_skeleton_118_sigma15_joint8_2gpu_bs12x8.yaml b/configs/sconet/sconet_scoliosis1k_skeleton_118_sigma15_joint8_2gpu_bs12x8.yaml new file mode 100644 index 0000000..a2bcf56 --- /dev/null +++ b/configs/sconet/sconet_scoliosis1k_skeleton_118_sigma15_joint8_2gpu_bs12x8.yaml @@ -0,0 +1,108 @@ +data_cfg: + dataset_name: Scoliosis1K + dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15-joint8-sharedalign + dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json + data_in_use: + - true + - false + num_workers: 1 + remove_no_gallery: false + test_dataset_name: Scoliosis1K + +evaluator_cfg: + enable_float16: true + restore_ckpt_strict: true + restore_hint: 20000 + save_name: ScoNet_skeleton_118_sigma15_joint8_sharedalign_2gpu_bs12x8 + eval_func: evaluate_scoliosis + sampler: + batch_shuffle: false + batch_size: 2 + sample_type: all_ordered + frames_all_limit: 720 + metric: euc + transform: + - type: BaseSilCuttingTransform + +loss_cfg: + - loss_term_weight: 1.0 + margin: 0.2 + type: TripletLoss + log_prefix: triplet + - loss_term_weight: 1.0 + scale: 16 + type: CrossEntropyLoss + log_prefix: softmax + log_accuracy: true + +model_cfg: + model: ScoNet + backbone_cfg: + type: ResNet9 + block: BasicBlock + in_channel: 2 + channels: + - 64 + - 128 + - 256 + - 512 + layers: + - 1 + - 1 + - 1 + - 1 + strides: + - 1 + - 2 + - 2 + - 1 + maxpool: false + SeparateFCs: + in_channels: 512 + out_channels: 256 + parts_num: 16 + SeparateBNNecks: + class_num: 3 + in_channels: 256 + parts_num: 16 + bin_num: + - 16 + +optimizer_cfg: + lr: 0.1 + momentum: 0.9 + solver: SGD + weight_decay: 0.0005 + +scheduler_cfg: + gamma: 0.1 + milestones: + - 10000 + - 14000 + - 18000 + scheduler: MultiStepLR + +trainer_cfg: + enable_float16: true + fix_BN: false + with_test: false + log_iter: 100 + restore_ckpt_strict: true + restore_hint: 0 + auto_resume_latest: true + resume_every_iter: 500 + resume_keep: 3 + save_iter: 20000 + save_name: ScoNet_skeleton_118_sigma15_joint8_sharedalign_2gpu_bs12x8 + sync_BN: true + total_iter: 20000 + sampler: + batch_shuffle: true + batch_size: + - 12 + - 8 + frames_num_fixed: 30 + sample_type: fixed_unordered + type: TripletSampler + transform: + - type: BaseSilCuttingTransform diff --git a/datasets/Scoliosis1K/README.md b/datasets/Scoliosis1K/README.md index efaba07..fb0545d 100644 --- a/datasets/Scoliosis1K/README.md +++ b/datasets/Scoliosis1K/README.md @@ -75,6 +75,7 @@ The silhouette and skeleton-map pipelines are different experiments and should n * `Scoliosis1K-sil-pkl` is the silhouette modality used by the standard ScoNet configs. * pose-derived heatmap roots such as `Scoliosis1K_sigma_8.0/pkl` or DRF exports are skeleton-map inputs and require `in_channel: 2`. +* DRF does **not** use the silhouette stream as an input. It uses `0_heatmap.pkl` plus `1_pav.pkl`. Naming note: @@ -89,6 +90,18 @@ A strong silhouette checkpoint does not validate the skeleton-map path. In parti So if you are debugging DRF or `ScoNet-MT-ske` reproduction, do not use `ScoNet-20000-better.pt` as evidence that the heatmap preprocessing is correct. +### Overlay caveat + +Do not treat a direct overlay between `Scoliosis1K-sil-pkl` and pose-derived skeleton maps as a valid alignment test. + +Reason: + +* the released silhouette modality is an estimated segmentation output from `PP-HumanSeg v2` +* the released pose modality is an estimated keypoint output from `ViTPose` +* the two modalities are normalized by different preprocessing pipelines before they reach OpenGait + +So a silhouette-vs-skeleton mismatch in a debug figure is usually a cross-modality frame-of-reference issue, not proof that the raw dataset is bad. The more important check for skeleton-map debugging is whether the **limb and joint channels align with each other** inside `0_heatmap.pkl`. + --- ## Pose-to-Heatmap Conversion @@ -146,6 +159,21 @@ If you explicitly want train-only PAV min-max statistics, add: --stats_partition=./datasets/Scoliosis1K/Scoliosis1K_118.json ``` +### Heatmap debugging notes + +Current confirmed findings from local debugging: + +* the raw pose dataset itself looks healthy; poor `ScoNet-MT-ske` results are not explained by obvious missing-joint collapse +* a larger heatmap sigma can materially blur away the articulated structure; `sigma=8` was much broader than the silhouette geometry, while smaller sigma values recovered more structure +* an earlier bug aligned the limb and joint channels separately; that made the two channels of `0_heatmap.pkl` slightly misregistered +* the heatmap path is now patched so limb and joint channels share one alignment crop + +Remaining caution: + +* the exported skeleton map is stored as `64x64` +* if the runtime config uses `BaseSilCuttingTransform`, the network actually sees `64x44` +* that symmetric left/right crop is not automatically wrong, but it is still a meaningful ablation point for skeleton-map experiments + The output layout is: ```text diff --git a/datasets/pretreatment_heatmap.py b/datasets/pretreatment_heatmap.py index 1480d36..9c9f16b 100644 --- a/datasets/pretreatment_heatmap.py +++ b/datasets/pretreatment_heatmap.py @@ -8,7 +8,8 @@ import pickle import argparse import numpy as np from glob import glob -from typing import Literal +from copy import deepcopy +from typing import Any, Literal from tqdm import tqdm import matplotlib.cm as cm import torch.distributed as dist @@ -516,7 +517,7 @@ class GatherTransform(object): """ Gather the different transforms. """ - def __init__(self, base_transform, transform_bone, transform_joint): + def __init__(self, base_transform, transform_bone, transform_joint, align_transform=None): """ base_transform: Some common transform, e.g., COCO18toCOCO17, PadKeypoints, CenterAndScale @@ -526,12 +527,15 @@ class GatherTransform(object): self.base_transform = base_transform self.transform_bone = transform_bone self.transform_joint = transform_joint + self.align_transform = align_transform def __call__(self, pose_data): x = self.base_transform(pose_data) heatmap_bone = self.transform_bone(x) # [T, 1, H, W] heatmap_joint = self.transform_joint(x) # [T, 1, H, W] heatmap = np.concatenate([heatmap_bone, heatmap_joint], axis=1) + if self.align_transform is not None: + heatmap = self.align_transform(heatmap) return heatmap class HeatmapAlignment(): @@ -543,23 +547,32 @@ class HeatmapAlignment(): def center_crop(self, heatmap): """ - Input: [1, heatmap_image_size, heatmap_image_size] - Output: [1, final_img_size, final_img_size] + Input: [C, heatmap_image_size, heatmap_image_size] + Output: [C, final_img_size, final_img_size] """ - raw_heatmap = heatmap[0] - if self.align: - y_sum = raw_heatmap.sum(axis=1) - y_top = (y_sum != 0).argmax(axis=0) - y_btm = (y_sum != 0).cumsum(axis=0).argmax(axis=0) - height = y_btm - y_top + 1 - raw_heatmap = raw_heatmap[y_top - self.offset: y_btm + 1 + self.offset, (self.heatmap_image_size // 2) - (height // 2) : (self.heatmap_image_size // 2) + (height // 2) + 1] - raw_heatmap = cv2.resize(raw_heatmap, (self.final_img_size, self.final_img_size), interpolation=cv2.INTER_AREA) - return raw_heatmap[np.newaxis, :, :] # [1, final_img_size, final_img_size] + raw_heatmap = heatmap + if self.align: + support_map = raw_heatmap.max(axis=0) + y_sum = support_map.sum(axis=1) + nonzero_rows = np.flatnonzero(y_sum != 0) + if nonzero_rows.size != 0: + y_top = max(int(nonzero_rows[0]) - self.offset, 0) + y_btm = min(int(nonzero_rows[-1]) + self.offset, self.heatmap_image_size - 1) + height = y_btm - y_top + 1 + x_center = self.heatmap_image_size // 2 + x_left = max(x_center - (height // 2), 0) + x_right = min(x_center + (height // 2) + 1, self.heatmap_image_size) + raw_heatmap = raw_heatmap[:, y_top:y_btm + 1, x_left:x_right] + resized = np.stack([ + cv2.resize(channel, (self.final_img_size, self.final_img_size), interpolation=cv2.INTER_AREA) + for channel in raw_heatmap + ], axis=0) + return resized # [C, final_img_size, final_img_size] def __call__(self, heatmap_imgs): """ - heatmap_imgs: (T, 1, raw_size, raw_size) - return (T, 1, final_img_size, final_img_size) + heatmap_imgs: (T, C, raw_size, raw_size) + return (T, C, final_img_size, final_img_size) """ original_dtype = heatmap_imgs.dtype heatmap_imgs = heatmap_imgs.astype(np.float32) / 255.0 @@ -570,12 +583,14 @@ class HeatmapAlignment(): return heatmap_imgs.astype(original_dtype) def GenerateHeatmapTransform( - coco18tococo17_args, - padkeypoints_args, - norm_args, - heatmap_generator_args, - align_args, + coco18tococo17_args: dict[str, Any], + padkeypoints_args: dict[str, Any], + norm_args: dict[str, Any], + heatmap_generator_args: dict[str, Any], + align_args: dict[str, Any], reduction: Literal["upstream", "max", "sum"] = "upstream", + sigma_limb: float | None = None, + sigma_joint: float | None = None, ): base_transform = T.Compose([ @@ -584,34 +599,44 @@ def GenerateHeatmapTransform( CenterAndScaleNormalizer(**norm_args), ]) - heatmap_generator_args["with_limb"] = True - heatmap_generator_args["with_kp"] = False + bone_generator_args = deepcopy(heatmap_generator_args) + joint_generator_args = deepcopy(heatmap_generator_args) + + bone_generator_args["with_limb"] = True + bone_generator_args["with_kp"] = False + if sigma_limb is not None: + bone_generator_args["sigma"] = sigma_limb bone_image_transform = ( HeatmapToImage() if reduction == "upstream" else HeatmapReducer(reduction=reduction) ) transform_bone = T.Compose([ - GeneratePoseTarget(**heatmap_generator_args), + GeneratePoseTarget(**bone_generator_args), bone_image_transform, - HeatmapAlignment(**align_args) ]) - heatmap_generator_args["with_limb"] = False - heatmap_generator_args["with_kp"] = True + joint_generator_args["with_limb"] = False + joint_generator_args["with_kp"] = True + if sigma_joint is not None: + joint_generator_args["sigma"] = sigma_joint joint_image_transform = ( HeatmapToImage() if reduction == "upstream" else HeatmapReducer(reduction=reduction) ) transform_joint = T.Compose([ - GeneratePoseTarget(**heatmap_generator_args), + GeneratePoseTarget(**joint_generator_args), joint_image_transform, - HeatmapAlignment(**align_args) ]) transform = T.Compose([ - GatherTransform(base_transform, transform_bone, transform_joint) # [T, 2, H, W] + GatherTransform( + base_transform, + transform_bone, + transform_joint, + HeatmapAlignment(**align_args), + ) # [T, 2, H, W] ]) return transform diff --git a/datasets/pretreatment_scoliosis_drf.py b/datasets/pretreatment_scoliosis_drf.py index 3cbd1c2..1df52ee 100644 --- a/datasets/pretreatment_scoliosis_drf.py +++ b/datasets/pretreatment_scoliosis_drf.py @@ -98,6 +98,15 @@ def load_heatmap_cfg(cfg_path: str) -> dict[str, Any]: return cast(dict[str, Any], replaced) +def optional_cfg_float(cfg: dict[str, Any], key: str) -> float | None: + value = cfg.get(key) + if value is None: + return None + if not isinstance(value, (int, float)): + raise TypeError(f"Expected numeric value for {key}, got {type(value).__name__}") + return float(value) + + def build_pose_transform(cfg: dict[str, Any]) -> T.Compose: return T.Compose([ heatmap_prep.COCO18toCOCO17(**cfg["coco18tococo17_args"]), @@ -192,6 +201,8 @@ def main() -> None: heatmap_generator_args=heatmap_cfg["heatmap_generator_args"], align_args=heatmap_cfg["align_args"], reduction=cast(HeatmapReduction, args.heatmap_reduction), + sigma_limb=optional_cfg_float(heatmap_cfg, "sigma_limb"), + sigma_joint=optional_cfg_float(heatmap_cfg, "sigma_joint"), ) pose_paths = iter_pose_paths(args.pose_data_path) diff --git a/docs/3.detailed_config.md b/docs/3.detailed_config.md index 53c2ed4..8986c39 100644 --- a/docs/3.detailed_config.md +++ b/docs/3.detailed_config.md @@ -59,9 +59,12 @@ ### trainer_cfg * Trainer configuration > * Args -> * restore_hint: `int` value indicates the iteration number of restored checkpoint; `str` value indicates the path to restored checkpoint. The option is often used to finetune on new dataset or restore the interrupted training process. +> * restore_hint: `int` value indicates the iteration number of restored checkpoint; `str` value indicates the path to restored checkpoint. Use `latest` to restore the latest rolling resume checkpoint. The option is often used to finetune on new dataset or restore the interrupted training process. +> * auto_resume_latest: If `True` and `restore_hint==0`, automatically resume from `output/.../checkpoints/latest.pt` when it exists. > * fix_BN: If `True`, we fix the weight of all `BatchNorm` layers. > * log_iter: Log the information per `log_iter` iterations. +> * resume_every_iter: Save a rolling resume checkpoint every `resume_every_iter` iterations. These checkpoints update `checkpoints/latest.pt` and are intended for crash recovery. +> * resume_keep: Number of rolling resume checkpoints retained under `checkpoints/resume/`. Set `0` to keep all of them. > * save_iter: Save the checkpoint per `save_iter` iterations. > * with_test: If `True`, we test the model every `save_iter` iterations. A bit of performance impact.(*Disable in Default*) > * optimizer_reset: If `True` and `restore_hint!=0`, reset the optimizer while restoring the model. @@ -168,6 +171,9 @@ trainer_cfg: log_iter: 100 restore_ckpt_strict: true restore_hint: 0 + auto_resume_latest: false + resume_every_iter: 500 + resume_keep: 3 save_iter: 10000 save_name: Baseline sync_BN: true diff --git a/opengait/modeling/base_model.py b/opengait/modeling/base_model.py index cb99778..60f31cf 100644 --- a/opengait/modeling/base_model.py +++ b/opengait/modeling/base_model.py @@ -9,8 +9,13 @@ Typical usage: BaseModel.run_train(model) BaseModel.run_test(model) """ -import torch +import json +import os +import random +from typing import Any + import numpy as np +import torch import os.path as osp import torch.nn as nn import torch.optim as optim @@ -169,6 +174,13 @@ class BaseModel(MetaModel, nn.Module): restore_hint = self.engine_cfg['restore_hint'] if restore_hint != 0: self.resume_ckpt(restore_hint) + elif training and self.engine_cfg.get('auto_resume_latest', False): + latest_ckpt = self._get_latest_resume_ckpt_path() + if latest_ckpt is not None: + self.msg_mgr.log_info( + "Auto-resuming from latest checkpoint %s", latest_ckpt + ) + self.resume_ckpt(latest_ckpt) def get_backbone(self, backbone_cfg): """Get the backbone of the model.""" @@ -234,23 +246,112 @@ class BaseModel(MetaModel, nn.Module): scheduler = Scheduler(self.optimizer, **valid_arg) return scheduler + def _build_checkpoint(self, iteration: int) -> dict[str, Any]: + checkpoint: dict[str, Any] = { + 'model': self.state_dict(), + 'optimizer': self.optimizer.state_dict(), + 'scheduler': self.scheduler.state_dict(), + 'iteration': iteration, + 'random_state': random.getstate(), + 'numpy_random_state': np.random.get_state(), + 'torch_random_state': torch.get_rng_state(), + } + if torch.cuda.is_available(): + checkpoint['cuda_random_state_all'] = torch.cuda.get_rng_state_all() + if self.engine_cfg.get('enable_float16', False) and hasattr(self, 'Scaler'): + checkpoint['scaler'] = self.Scaler.state_dict() + return checkpoint + + def _checkpoint_dir(self) -> str: + return osp.join(self.save_path, "checkpoints") + + def _resume_dir(self) -> str: + return osp.join(self._checkpoint_dir(), "resume") + + def _save_checkpoint_file( + self, + checkpoint: dict[str, Any], + save_path: str, + ) -> None: + mkdir(osp.dirname(save_path)) + tmp_path = save_path + ".tmp" + torch.save(checkpoint, tmp_path) + os.replace(tmp_path, save_path) + + def _write_resume_meta(self, iteration: int, resume_path: str) -> None: + meta_path = osp.join(self._checkpoint_dir(), "latest.json") + meta = { + "iteration": iteration, + "path": resume_path, + } + tmp_path = meta_path + ".tmp" + with open(tmp_path, "w", encoding="utf-8") as handle: + json.dump(meta, handle, indent=2, sort_keys=True) + os.replace(tmp_path, meta_path) + + def _prune_resume_checkpoints(self, keep_count: int) -> None: + if keep_count <= 0: + return + resume_dir = self._resume_dir() + if not osp.isdir(resume_dir): + return + prefix = f"{self.engine_cfg['save_name']}-resume-" + resume_files = sorted( + file_name for file_name in os.listdir(resume_dir) + if file_name.startswith(prefix) and file_name.endswith(".pt") + ) + stale_files = resume_files[:-keep_count] + for file_name in stale_files: + os.remove(osp.join(resume_dir, file_name)) + + def _get_latest_resume_ckpt_path(self) -> str | None: + latest_path = osp.join(self._checkpoint_dir(), "latest.pt") + if osp.isfile(latest_path): + return latest_path + meta_path = osp.join(self._checkpoint_dir(), "latest.json") + if osp.isfile(meta_path): + with open(meta_path, "r", encoding="utf-8") as handle: + latest_meta = json.load(handle) + candidate = latest_meta.get("path") + if isinstance(candidate, str) and osp.isfile(candidate): + return candidate + return None + def save_ckpt(self, iteration): if torch.distributed.get_rank() == 0: - mkdir(osp.join(self.save_path, "checkpoints/")) save_name = self.engine_cfg['save_name'] - checkpoint = { - 'model': self.state_dict(), - 'optimizer': self.optimizer.state_dict(), - 'scheduler': self.scheduler.state_dict(), - 'iteration': iteration} - torch.save(checkpoint, - osp.join(self.save_path, 'checkpoints/{}-{:0>5}.pt'.format(save_name, iteration))) + checkpoint = self._build_checkpoint(iteration) + ckpt_path = osp.join( + self._checkpoint_dir(), + '{}-{:0>5}.pt'.format(save_name, iteration), + ) + self._save_checkpoint_file(checkpoint, ckpt_path) + + def save_resume_ckpt(self, iteration: int) -> None: + if torch.distributed.get_rank() != 0: + return + checkpoint = self._build_checkpoint(iteration) + save_name = self.engine_cfg['save_name'] + resume_path = osp.join( + self._resume_dir(), + f"{save_name}-resume-{iteration:0>5}.pt", + ) + latest_path = osp.join(self._checkpoint_dir(), "latest.pt") + self._save_checkpoint_file(checkpoint, resume_path) + self._save_checkpoint_file(checkpoint, latest_path) + self._write_resume_meta(iteration, resume_path) + self._prune_resume_checkpoints( + int(self.engine_cfg.get('resume_keep', 3)) + ) def _load_ckpt(self, save_name): load_ckpt_strict = self.engine_cfg['restore_ckpt_strict'] - checkpoint = torch.load(save_name, map_location=torch.device( - "cuda", self.device)) + checkpoint = torch.load( + save_name, + map_location=torch.device("cuda", self.device), + weights_only=False, + ) model_state_dict = checkpoint['model'] if not load_ckpt_strict: @@ -271,6 +372,33 @@ class BaseModel(MetaModel, nn.Module): else: self.msg_mgr.log_warning( "Restore NO Scheduler from %s !!!" % save_name) + if ( + self.engine_cfg.get('enable_float16', False) + and hasattr(self, 'Scaler') + and 'scaler' in checkpoint + ): + self.Scaler.load_state_dict(checkpoint['scaler']) + if 'random_state' in checkpoint: + random.setstate(checkpoint['random_state']) + if 'numpy_random_state' in checkpoint: + np.random.set_state(checkpoint['numpy_random_state']) + if 'torch_random_state' in checkpoint: + torch_random_state = checkpoint['torch_random_state'] + if not isinstance(torch_random_state, torch.Tensor): + torch_random_state = torch.as_tensor( + torch_random_state, + dtype=torch.uint8, + ) + torch.set_rng_state(torch_random_state.cpu()) + if 'cuda_random_state_all' in checkpoint and torch.cuda.is_available(): + cuda_random_state_all = checkpoint['cuda_random_state_all'] + normalized_cuda_states = [] + for state in cuda_random_state_all: + if not isinstance(state, torch.Tensor): + state = torch.as_tensor(state, dtype=torch.uint8) + normalized_cuda_states.append(state.cpu()) + torch.cuda.set_rng_state_all(normalized_cuda_states) + self.iteration = int(checkpoint.get('iteration', self.iteration)) self.msg_mgr.log_info("Restore Parameters from %s !!!" % save_name) def resume_ckpt(self, restore_hint): @@ -278,10 +406,15 @@ class BaseModel(MetaModel, nn.Module): save_name = self.engine_cfg['save_name'] save_name = osp.join( self.save_path, 'checkpoints/{}-{:0>5}.pt'.format(save_name, restore_hint)) - self.iteration = restore_hint elif isinstance(restore_hint, str): - save_name = restore_hint - self.iteration = 0 + if restore_hint == 'latest': + save_name = self._get_latest_resume_ckpt_path() + if save_name is None: + raise FileNotFoundError( + f"No latest checkpoint found under {self._checkpoint_dir()}" + ) + else: + save_name = restore_hint else: raise ValueError( "Error type for -Restore_Hint-, supported: int or string.") @@ -417,6 +550,9 @@ class BaseModel(MetaModel, nn.Module): visual_summary['scalar/learning_rate'] = model.optimizer.param_groups[0]['lr'] model.msg_mgr.train_step(loss_info, visual_summary) + resume_every_iter = int(model.engine_cfg.get('resume_every_iter', 0)) + if resume_every_iter > 0 and model.iteration % resume_every_iter == 0: + model.save_resume_ckpt(model.iteration) if model.iteration % model.engine_cfg['save_iter'] == 0: # save the checkpoint model.save_ckpt(model.iteration) diff --git a/research/analyze_scoliosis_dataset.py b/research/analyze_scoliosis_dataset.py new file mode 100644 index 0000000..f9dddb5 --- /dev/null +++ b/research/analyze_scoliosis_dataset.py @@ -0,0 +1,421 @@ +from __future__ import annotations + +import json +import pickle +import sys +from collections import defaultdict +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable + +import numpy as np +from jaxtyping import Float +from numpy.typing import NDArray + +REPO_ROOT = Path(__file__).resolve().parent.parent +if str(REPO_ROOT) not in sys.path: + sys.path.append(str(REPO_ROOT)) + +from datasets import pretreatment_scoliosis_drf as drf_prep + +POSE_ROOT = Path("/mnt/public/data/Scoliosis1K/Scoliosis1K-pose-pkl") +HEATMAP_ROOT = Path("/mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15-joint8-sharedalign") +PARTITION_PATH = REPO_ROOT / "datasets/Scoliosis1K/Scoliosis1K_118.json" +HEATMAP_CFG_PATH = REPO_ROOT / "configs/drf/pretreatment_heatmap_drf_sigma15_joint8.yaml" +REPORT_PATH = REPO_ROOT / "research/scoliosis_dataset_analysis_118_sharedalign.md" +JSON_PATH = REPO_ROOT / "research/scoliosis_dataset_analysis_118_sharedalign.json" + +EPS = 1e-6 +THRESHOLD = 13.0 +SIDE_CUT = 10 +LABEL_TO_INT = {"negative": 0, "neutral": 1, "positive": 2} +FloatArray = NDArray[np.float32] + + +@dataclass(frozen=True) +class SequenceKey: + pid: str + label: str + seq: str + + +@dataclass +class RunningStats: + total: float = 0.0 + count: int = 0 + + def update(self, value: float, n: int = 1) -> None: + self.total += value * n + self.count += n + + @property + def mean(self) -> float: + return self.total / max(self.count, 1) + + +def load_partition_ids() -> tuple[set[str], set[str]]: + with PARTITION_PATH.open("r", encoding="utf-8") as handle: + partition = json.load(handle) + return set(partition["TRAIN_SET"]), set(partition["TEST_SET"]) + + +def sequence_key_from_path(path: Path) -> SequenceKey: + parts = path.parts + return SequenceKey(pid=parts[-4], label=parts[-3], seq=parts[-2]) + + +def iter_pose_paths() -> list[Path]: + return sorted(POSE_ROOT.glob("*/*/*/*.pkl")) + + +def iter_heatmap_paths() -> list[Path]: + return sorted(HEATMAP_ROOT.glob("*/*/*/0_heatmap.pkl")) + + +def read_pickle(path: Path) -> object: + with path.open("rb") as handle: + return pickle.load(handle) + + +def bbox_from_mask(mask: NDArray[np.bool_]) -> tuple[float, float, float, float] | None: + rows = np.flatnonzero(mask.any(axis=1)) + cols = np.flatnonzero(mask.any(axis=0)) + if rows.size == 0 or cols.size == 0: + return None + y0 = int(rows[0]) + y1 = int(rows[-1]) + x0 = int(cols[0]) + x1 = int(cols[-1]) + width = float(x1 - x0 + 1) + height = float(y1 - y0 + 1) + center_x = float((x0 + x1) / 2.0) + center_y = float((y0 + y1) / 2.0) + return width, height, center_x, center_y + + +def sequence_bbox_metrics( + heatmap: Float[FloatArray, "frames channels height width"], + threshold: float = THRESHOLD, +) -> dict[str, float]: + support = heatmap.max(axis=1) + bone = heatmap[:, 0] + joint = heatmap[:, 1] + + widths: list[float] = [] + heights: list[float] = [] + centers_x: list[float] = [] + centers_y: list[float] = [] + active_fractions: list[float] = [] + cut_mass_ratios: list[float] = [] + bone_joint_dx: list[float] = [] + bone_joint_dy: list[float] = [] + + for frame_idx in range(support.shape[0]): + frame = support[frame_idx] + mask = frame > threshold + bbox = bbox_from_mask(mask) + if bbox is not None: + width, height, center_x, center_y = bbox + widths.append(width) + heights.append(height) + centers_x.append(center_x) + centers_y.append(center_y) + active_fractions.append(float(mask.mean())) + + total_mass = float(frame.sum()) + if total_mass > EPS: + clipped_mass = float(frame[:, :SIDE_CUT].sum() + frame[:, -SIDE_CUT:].sum()) + cut_mass_ratios.append(clipped_mass / total_mass) + + bone_bbox = bbox_from_mask(bone[frame_idx] > threshold) + joint_bbox = bbox_from_mask(joint[frame_idx] > threshold) + if bone_bbox is not None and joint_bbox is not None: + bone_joint_dx.append(abs(bone_bbox[2] - joint_bbox[2])) + bone_joint_dy.append(abs(bone_bbox[3] - joint_bbox[3])) + + def safe_mean(values: Iterable[float]) -> float: + array = np.asarray(list(values), dtype=np.float32) + return float(array.mean()) if array.size else 0.0 + + def safe_std(values: Iterable[float]) -> float: + array = np.asarray(list(values), dtype=np.float32) + return float(array.std()) if array.size else 0.0 + + return { + "width_mean": safe_mean(widths), + "height_mean": safe_mean(heights), + "center_x_std": safe_std(centers_x), + "center_y_std": safe_std(centers_y), + "width_std": safe_std(widths), + "height_std": safe_std(heights), + "active_fraction_mean": safe_mean(active_fractions), + "cut_mass_ratio_mean": safe_mean(cut_mass_ratios), + "bone_joint_dx_mean": safe_mean(bone_joint_dx), + "bone_joint_dy_mean": safe_mean(bone_joint_dy), + } + + +def softmax_rows(logits: NDArray[np.float64]) -> NDArray[np.float64]: + shifted = logits - logits.max(axis=1, keepdims=True) + exp = np.exp(shifted) + return exp / exp.sum(axis=1, keepdims=True) + + +def fit_softmax_regression( + x: NDArray[np.float64], + y: NDArray[np.int64], + num_classes: int, + steps: int = 4000, + lr: float = 0.05, + reg: float = 1e-4, +) -> tuple[NDArray[np.float64], NDArray[np.float64]]: + weights = np.zeros((x.shape[1], num_classes), dtype=np.float64) + bias = np.zeros(num_classes, dtype=np.float64) + one_hot = np.eye(num_classes, dtype=np.float64)[y] + + for _ in range(steps): + logits = x @ weights + bias + probs = softmax_rows(logits) + error = probs - one_hot + grad_w = (x.T @ error) / x.shape[0] + reg * weights + grad_b = error.mean(axis=0) + weights -= lr * grad_w + bias -= lr * grad_b + + return weights, bias + + +def evaluate_predictions( + y_true: NDArray[np.int64], + y_pred: NDArray[np.int64], + num_classes: int, +) -> dict[str, float]: + accuracy = float((y_true == y_pred).mean()) + precisions: list[float] = [] + recalls: list[float] = [] + f1s: list[float] = [] + + for class_id in range(num_classes): + tp = int(((y_true == class_id) & (y_pred == class_id)).sum()) + fp = int(((y_true != class_id) & (y_pred == class_id)).sum()) + fn = int(((y_true == class_id) & (y_pred != class_id)).sum()) + precision = tp / max(tp + fp, 1) + recall = tp / max(tp + fn, 1) + f1 = 2 * precision * recall / max(precision + recall, EPS) + precisions.append(precision) + recalls.append(recall) + f1s.append(f1) + + return { + "accuracy": 100.0 * accuracy, + "macro_precision": 100.0 * float(np.mean(precisions)), + "macro_recall": 100.0 * float(np.mean(recalls)), + "macro_f1": 100.0 * float(np.mean(f1s)), + } + + +def analyze() -> dict[str, object]: + train_ids, test_ids = load_partition_ids() + + heatmap_cfg = drf_prep.load_heatmap_cfg(str(HEATMAP_CFG_PATH)) + pose_transform = drf_prep.build_pose_transform(heatmap_cfg) + + split_label_counts: dict[str, dict[str, int]] = { + "train": defaultdict(int), + "test": defaultdict(int), + } + pose_quality: dict[str, dict[str, RunningStats]] = { + "train": defaultdict(RunningStats), + "test": defaultdict(RunningStats), + } + valid_ratio: dict[str, dict[str, RunningStats]] = { + "train": defaultdict(RunningStats), + "test": defaultdict(RunningStats), + } + + for pose_path in iter_pose_paths(): + key = sequence_key_from_path(pose_path) + split = "train" if key.pid in train_ids else "test" + split_label_counts[split][key.label] += 1 + + pose = drf_prep.read_pose(str(pose_path)) + conf = pose[..., 2] if pose.shape[-1] >= 3 else np.ones(pose.shape[:-1], dtype=np.float32) + pose_quality[split][key.label].update(float(conf.mean())) + valid_ratio[split][key.label].update(float((conf > 0.05).mean())) + + heatmap_metrics: dict[str, list[float]] = defaultdict(list) + pav_vectors_train: list[NDArray[np.float64]] = [] + pav_vectors_test: list[NDArray[np.float64]] = [] + labels_train: list[int] = [] + labels_test: list[int] = [] + pav_means: dict[str, list[float]] = defaultdict(list) + + for heatmap_path in iter_heatmap_paths(): + key = sequence_key_from_path(heatmap_path) + split = "train" if key.pid in train_ids else "test" + heatmap = np.asarray(read_pickle(heatmap_path), dtype=np.float32) + metrics = sequence_bbox_metrics(heatmap) + for metric_name, metric_value in metrics.items(): + heatmap_metrics[f"{split}.{metric_name}"].append(metric_value) + heatmap_metrics[f"all.{metric_name}"].append(metric_value) + + pav_path = heatmap_path.with_name("1_pav.pkl") + pav_seq = np.asarray(read_pickle(pav_path), dtype=np.float32) + pav_vector = pav_seq[0].reshape(-1).astype(np.float64) + pav_means[key.label].append(float(pav_vector.mean())) + if split == "train": + pav_vectors_train.append(pav_vector) + labels_train.append(LABEL_TO_INT[key.label]) + else: + pav_vectors_test.append(pav_vector) + labels_test.append(LABEL_TO_INT[key.label]) + + x_train = np.stack(pav_vectors_train, axis=0) + x_test = np.stack(pav_vectors_test, axis=0) + y_train = np.asarray(labels_train, dtype=np.int64) + y_test = np.asarray(labels_test, dtype=np.int64) + + mean = x_train.mean(axis=0, keepdims=True) + std = np.maximum(x_train.std(axis=0, keepdims=True), EPS) + x_train_std = (x_train - mean) / std + x_test_std = (x_test - mean) / std + weights, bias = fit_softmax_regression(x_train_std, y_train, num_classes=3) + y_pred = np.argmax(x_test_std @ weights + bias, axis=1).astype(np.int64) + pav_classifier = evaluate_predictions(y_test, y_pred, num_classes=3) + + results: dict[str, object] = { + "split_label_counts": split_label_counts, + "pose_confidence_mean": { + split: {label: stats.mean for label, stats in per_label.items()} + for split, per_label in pose_quality.items() + }, + "pose_valid_ratio_mean": { + split: {label: stats.mean for label, stats in per_label.items()} + for split, per_label in valid_ratio.items() + }, + "pav_label_means": { + label: float(np.mean(values)) + for label, values in pav_means.items() + }, + "pav_softmax_probe": pav_classifier, + "heatmap_metrics": { + key: { + "mean": float(np.mean(values)), + "p95": float(np.percentile(values, 95)), + } + for key, values in heatmap_metrics.items() + }, + } + return results + + +def format_report(results: dict[str, object]) -> str: + split_counts = results["split_label_counts"] + pose_conf = results["pose_confidence_mean"] + pose_valid = results["pose_valid_ratio_mean"] + heat = results["heatmap_metrics"] + pav_probe = results["pav_softmax_probe"] + pav_means = results["pav_label_means"] + + def heat_stat(name: str) -> tuple[float, float]: + entry = heat[f"all.{name}"] + return entry["mean"], entry["p95"] + + center_x_std_mean, center_x_std_p95 = heat_stat("center_x_std") + center_y_std_mean, center_y_std_p95 = heat_stat("center_y_std") + width_std_mean, width_std_p95 = heat_stat("width_std") + height_std_mean, height_std_p95 = heat_stat("height_std") + cut_ratio_mean, cut_ratio_p95 = heat_stat("cut_mass_ratio_mean") + bone_joint_dx_mean, bone_joint_dx_p95 = heat_stat("bone_joint_dx_mean") + bone_joint_dy_mean, bone_joint_dy_p95 = heat_stat("bone_joint_dy_mean") + width_mean, width_p95 = heat_stat("width_mean") + height_mean, height_p95 = heat_stat("height_mean") + active_fraction_mean, active_fraction_p95 = heat_stat("active_fraction_mean") + + return f"""# Scoliosis1K Dataset Analysis (1:1:8, shared-align skeleton maps) + +## Split + +Train counts: +- negative: {split_counts["train"]["negative"]} +- neutral: {split_counts["train"]["neutral"]} +- positive: {split_counts["train"]["positive"]} + +Test counts: +- negative: {split_counts["test"]["negative"]} +- neutral: {split_counts["test"]["neutral"]} +- positive: {split_counts["test"]["positive"]} + +## Raw pose quality + +Mean keypoint confidence by split/class: +- train negative: {pose_conf["train"]["negative"]:.4f} +- train neutral: {pose_conf["train"]["neutral"]:.4f} +- train positive: {pose_conf["train"]["positive"]:.4f} +- test negative: {pose_conf["test"]["negative"]:.4f} +- test neutral: {pose_conf["test"]["neutral"]:.4f} +- test positive: {pose_conf["test"]["positive"]:.4f} + +Mean valid-joint ratio (`conf > 0.05`) by split/class: +- train negative: {pose_valid["train"]["negative"]:.4f} +- train neutral: {pose_valid["train"]["neutral"]:.4f} +- train positive: {pose_valid["train"]["positive"]:.4f} +- test negative: {pose_valid["test"]["negative"]:.4f} +- test neutral: {pose_valid["test"]["neutral"]:.4f} +- test positive: {pose_valid["test"]["positive"]:.4f} + +## PAV signal + +Mean normalized PAV value by label: +- negative: {pav_means["negative"]:.4f} +- neutral: {pav_means["neutral"]:.4f} +- positive: {pav_means["positive"]:.4f} + +Train-on-train / test-on-test linear softmax probe over sequence-level PAV: +- accuracy: {pav_probe["accuracy"]:.2f}% +- macro precision: {pav_probe["macro_precision"]:.2f}% +- macro recall: {pav_probe["macro_recall"]:.2f}% +- macro F1: {pav_probe["macro_f1"]:.2f}% + +## Shared-align heatmap geometry + +Combined support bbox stats over all sequences: +- width mean / p95: {width_mean:.2f} / {width_p95:.2f} +- height mean / p95: {height_mean:.2f} / {height_p95:.2f} +- active fraction mean / p95: {active_fraction_mean:.4f} / {active_fraction_p95:.4f} + +Per-sequence temporal jitter (std over frames): +- center-x std mean / p95: {center_x_std_mean:.3f} / {center_x_std_p95:.3f} +- center-y std mean / p95: {center_y_std_mean:.3f} / {center_y_std_p95:.3f} +- width std mean / p95: {width_std_mean:.3f} / {width_std_p95:.3f} +- height std mean / p95: {height_std_mean:.3f} / {height_std_p95:.3f} + +Residual limb-vs-joint bbox-center mismatch after shared alignment: +- dx mean / p95: {bone_joint_dx_mean:.3f} / {bone_joint_dx_p95:.3f} +- dy mean / p95: {bone_joint_dy_mean:.3f} / {bone_joint_dy_p95:.3f} + +Estimated intensity mass in the columns removed by `BaseSilCuttingTransform`: +- mean clipped-mass ratio: {cut_ratio_mean:.4f} +- p95 clipped-mass ratio: {cut_ratio_p95:.4f} + +## Reading + +- The raw pose data does not look broken. Confidence and valid-joint ratios are high and similar across classes. +- The sequence-level PAV still carries useful label signal, so the dataset is not devoid of scoliosis information. +- Shared alignment removed the old limb-vs-joint registration bug; residual channel-center mismatch is now small. +- The remaining suspicious area is the visual branch: the skeleton map still has frame-to-frame bbox jitter, and the support bbox is almost full-height (`~61.5 / 64`) and fairly dense (`~36%` active pixels), which may be washing out subtle asymmetry cues. +- `BaseSilCuttingTransform` does not appear to be the main failure source for this shared-align export; the measured mass in the removed side margins is near zero. +- The dataset itself looks usable; the bigger issue still appears to be how the current skeleton-map preprocessing/runtime path presents that data to ScoNet. +""" + + +def main() -> None: + results = analyze() + REPORT_PATH.write_text(format_report(results), encoding="utf-8") + JSON_PATH.write_text(json.dumps(results, indent=2, sort_keys=True), encoding="utf-8") + print(f"Wrote {REPORT_PATH}") + print(f"Wrote {JSON_PATH}") + + +if __name__ == "__main__": + main() diff --git a/research/scoliosis_dataset_analysis_118_sharedalign.md b/research/scoliosis_dataset_analysis_118_sharedalign.md new file mode 100644 index 0000000..17e8ed6 --- /dev/null +++ b/research/scoliosis_dataset_analysis_118_sharedalign.md @@ -0,0 +1,74 @@ +# Scoliosis1K Dataset Analysis (1:1:8, shared-align skeleton maps) + +## Split + +Train counts: +- negative: 596 +- neutral: 74 +- positive: 74 + +Test counts: +- negative: 204 +- neutral: 126 +- positive: 419 + +## Raw pose quality + +Mean keypoint confidence by split/class: +- train negative: 0.9016 +- train neutral: 0.9023 +- train positive: 0.8987 +- test negative: 0.9009 +- test neutral: 0.9020 +- test positive: 0.8999 + +Mean valid-joint ratio (`conf > 0.05`) by split/class: +- train negative: 1.0000 +- train neutral: 1.0000 +- train positive: 1.0000 +- test negative: 1.0000 +- test neutral: 1.0000 +- test positive: 1.0000 + +## PAV signal + +Mean normalized PAV value by label: +- negative: 0.3068 +- neutral: 0.3546 +- positive: 0.3635 + +Train-on-train / test-on-test linear softmax probe over sequence-level PAV: +- accuracy: 50.87% +- macro precision: 50.50% +- macro recall: 48.19% +- macro F1: 39.88% + +## Shared-align heatmap geometry + +Combined support bbox stats over all sequences: +- width mean / p95: 32.13 / 33.57 +- height mean / p95: 61.52 / 61.61 +- active fraction mean / p95: 0.3634 / 0.3738 + +Per-sequence temporal jitter (std over frames): +- center-x std mean / p95: 0.864 / 1.243 +- center-y std mean / p95: 0.516 / 0.704 +- width std mean / p95: 2.152 / 2.804 +- height std mean / p95: 0.507 / 0.545 + +Residual limb-vs-joint bbox-center mismatch after shared alignment: +- dx mean / p95: 0.195 / 0.229 +- dy mean / p95: 0.251 / 0.357 + +Estimated intensity mass in the columns removed by `BaseSilCuttingTransform`: +- mean clipped-mass ratio: 0.0000 +- p95 clipped-mass ratio: 0.0000 + +## Reading + +- The raw pose data does not look broken. Confidence and valid-joint ratios are high and similar across classes. +- The sequence-level PAV still carries useful label signal, so the dataset is not devoid of scoliosis information. +- Shared alignment removed the old limb-vs-joint registration bug; residual channel-center mismatch is now small. +- The remaining suspicious area is the visual branch: the skeleton map still has frame-to-frame bbox jitter, and the support bbox is almost full-height (`~61.5 / 64`) and fairly dense (`~36%` active pixels), which may be washing out subtle asymmetry cues. +- `BaseSilCuttingTransform` does not appear to be the main failure source for this shared-align export; the measured mass in the removed side margins is near zero. +- The dataset itself looks usable; the bigger issue still appears to be how the current skeleton-map preprocessing/runtime path presents that data to ScoNet.