Add proxy eval and skeleton experiment tooling

This commit is contained in:
2026-03-09 23:11:35 +08:00
parent 36aef46a0d
commit 6c8cd2950c
16 changed files with 1107 additions and 69 deletions
@@ -0,0 +1,30 @@
coco18tococo17_args:
transfer_to_coco17: False
padkeypoints_args:
pad_method: knn
use_conf: True
norm_args:
pose_format: coco
use_conf: ${padkeypoints_args.use_conf}
heatmap_image_height: 128
heatmap_generator_args:
sigma: 1.5
use_score: ${padkeypoints_args.use_conf}
img_h: ${norm_args.heatmap_image_height}
img_w: ${norm_args.heatmap_image_height}
with_limb: null
with_kp: null
sigma_limb: 1.5
sigma_joint: 8.0
align_args:
align: True
final_img_size: 64
offset: 0
heatmap_image_size: ${norm_args.heatmap_image_height}
crop_mode: bbox_pad
preserve_aspect_ratio: True
@@ -0,0 +1,30 @@
coco18tococo17_args:
transfer_to_coco17: False
padkeypoints_args:
pad_method: knn
use_conf: True
norm_args:
pose_format: coco
use_conf: ${padkeypoints_args.use_conf}
heatmap_image_height: 128
heatmap_generator_args:
sigma: 1.5
use_score: ${padkeypoints_args.use_conf}
img_h: ${norm_args.heatmap_image_height}
img_w: ${norm_args.heatmap_image_height}
with_limb: null
with_kp: null
sigma_limb: 1.5
sigma_joint: 8.0
channel_gain_limb: 4.0
channel_gain_joint: 1.0
align_args:
align: True
final_img_size: 64
offset: 0
heatmap_image_size: ${norm_args.heatmap_image_height}
@@ -0,0 +1,29 @@
coco18tococo17_args:
transfer_to_coco17: False
padkeypoints_args:
pad_method: knn
use_conf: True
norm_args:
pose_format: coco
use_conf: ${padkeypoints_args.use_conf}
heatmap_image_height: 128
heatmap_generator_args:
sigma: 1.5
use_score: ${padkeypoints_args.use_conf}
img_h: ${norm_args.heatmap_image_height}
img_w: ${norm_args.heatmap_image_height}
with_limb: null
with_kp: null
sigma_limb: 1.5
sigma_joint: 8.0
align_args:
align: True
final_img_size: 64
offset: 0
heatmap_image_size: ${norm_args.heatmap_image_height}
scope: sequence
@@ -0,0 +1,111 @@
data_cfg:
dataset_name: Scoliosis1K
dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15-joint8-geomfix
dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
data_in_use:
- true
- false
num_workers: 1
remove_no_gallery: false
test_dataset_name: Scoliosis1K
test_seq_subset_size: 128
test_seq_subset_seed: 118
evaluator_cfg:
enable_float16: true
restore_ckpt_strict: true
restore_hint: 2000
save_name: ScoNet_skeleton_118_sigma15_joint8_geomfix_proxy_1gpu
eval_func: evaluate_scoliosis
sampler:
batch_shuffle: false
batch_size: 1
sample_type: all_ordered
type: InferenceSampler
frames_all_limit: 720
metric: euc
transform:
- type: BaseSilTransform
loss_cfg:
- loss_term_weight: 1.0
margin: 0.2
type: TripletLoss
log_prefix: triplet
- loss_term_weight: 1.0
scale: 16
type: CrossEntropyLoss
log_prefix: softmax
log_accuracy: true
model_cfg:
model: ScoNet
backbone_cfg:
type: ResNet9
block: BasicBlock
in_channel: 2
channels:
- 64
- 128
- 256
- 512
layers:
- 1
- 1
- 1
- 1
strides:
- 1
- 2
- 2
- 1
maxpool: false
SeparateFCs:
in_channels: 512
out_channels: 256
parts_num: 16
SeparateBNNecks:
class_num: 3
in_channels: 256
parts_num: 16
bin_num:
- 16
optimizer_cfg:
lr: 0.001
solver: AdamW
weight_decay: 0.0005
scheduler_cfg:
gamma: 0.1
milestones:
- 1000
- 1500
- 1800
scheduler: MultiStepLR
trainer_cfg:
enable_float16: true
fix_BN: false
with_test: true
log_iter: 100
restore_ckpt_strict: true
restore_hint: 0
auto_resume_latest: true
resume_every_iter: 500
resume_keep: 3
eval_iter: 500
save_iter: 2000
save_name: ScoNet_skeleton_118_sigma15_joint8_geomfix_proxy_1gpu
sync_BN: true
total_iter: 2000
sampler:
batch_shuffle: true
batch_size:
- 8
- 8
frames_num_fixed: 30
sample_type: fixed_unordered
type: TripletSampler
transform:
- type: BaseSilTransform
@@ -0,0 +1,107 @@
data_cfg:
dataset_name: Scoliosis1K
dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15-joint8-sharedalign-limb4
dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
num_workers: 1
remove_no_gallery: false
cache: false
test_dataset_name: Scoliosis1K
data_in_use:
- true
- false
evaluator_cfg:
enable_float16: true
restore_ckpt_strict: true
restore_hint: 20000
save_name: ScoNet_skeleton_118_sigma15_joint8_limb4_adamw_2gpu_bs12x8
eval_func: evaluate_scoliosis
sampler:
type: InferenceSampler
batch_size: 1
sample_type: all_ordered
batch_shuffle: false
frames_all_limit: 720
transform:
- type: BaseSilCuttingTransform
metric: euc
cross_view_gallery: false
loss_cfg:
- loss_term_weight: 1.0
margin: 0.3
type: TripletLoss
- loss_term_weight: 1.0
scale: 16
type: CrossEntropyLoss
model_cfg:
model: ScoNet
backbone_cfg:
type: ResNet9
block: BasicBlock
in_channel: 2
channels:
- 64
- 128
- 256
- 512
layers:
- 1
- 1
- 1
- 1
strides:
- 1
- 2
- 2
- 1
maxpool: false
SeparateFCs:
in_channels: 512
out_channels: 256
parts_num: 16
SeparateBNNecks:
class_num: 3
in_channels: 256
parts_num: 16
bin_num:
- 16
optimizer_cfg:
solver: AdamW
lr: 0.001
weight_decay: 0.0005
scheduler_cfg:
scheduler: MultiStepLR
milestones:
- 10000
- 15000
gamma: 0.1
trainer_cfg:
enable_float16: true
save_iter: 20000
sync_BN: true
restore_ckpt_strict: true
restore_hint: 0
save_name: ScoNet_skeleton_118_sigma15_joint8_limb4_adamw_2gpu_bs12x8
with_test: false
log_iter: 100
fix_BN: false
find_unused_parameters: false
sample_type: fixed_unordered
sampler:
type: TripletSampler
batch_shuffle: true
batch_size:
- 12
- 8
frames_num_fixed: 30
transform:
- type: BaseSilCuttingTransform
optim_reset: false
scheduler_reset: false
resume_every_iter: 1000
resume_keep: 3
@@ -0,0 +1,107 @@
data_cfg:
dataset_name: Scoliosis1K
dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15-joint8-sharedalign-limb4
dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
data_in_use:
- true
- false
num_workers: 1
remove_no_gallery: false
test_dataset_name: Scoliosis1K
evaluator_cfg:
enable_float16: true
restore_ckpt_strict: true
restore_hint: 20000
save_name: ScoNet_skeleton_118_sigma15_joint8_limb4_adamw_2gpu_bs12x8
eval_func: evaluate_scoliosis
sampler:
batch_shuffle: false
batch_size: 2
sample_type: all_ordered
frames_all_limit: 720
metric: euc
transform:
- type: BaseSilCuttingTransform
loss_cfg:
- loss_term_weight: 1.0
margin: 0.2
type: TripletLoss
log_prefix: triplet
- loss_term_weight: 1.0
scale: 16
type: CrossEntropyLoss
log_prefix: softmax
log_accuracy: true
model_cfg:
model: ScoNet
backbone_cfg:
type: ResNet9
block: BasicBlock
in_channel: 2
channels:
- 64
- 128
- 256
- 512
layers:
- 1
- 1
- 1
- 1
strides:
- 1
- 2
- 2
- 1
maxpool: false
SeparateFCs:
in_channels: 512
out_channels: 256
parts_num: 16
SeparateBNNecks:
class_num: 3
in_channels: 256
parts_num: 16
bin_num:
- 16
optimizer_cfg:
lr: 0.001
solver: AdamW
weight_decay: 0.0005
scheduler_cfg:
gamma: 0.1
milestones:
- 10000
- 14000
- 18000
scheduler: MultiStepLR
trainer_cfg:
enable_float16: true
fix_BN: false
with_test: false
log_iter: 100
restore_ckpt_strict: true
restore_hint: 0
auto_resume_latest: true
resume_every_iter: 500
resume_keep: 3
save_iter: 20000
save_name: ScoNet_skeleton_118_sigma15_joint8_limb4_adamw_2gpu_bs12x8
sync_BN: true
total_iter: 20000
sampler:
batch_shuffle: true
batch_size:
- 12
- 8
frames_num_fixed: 30
sample_type: fixed_unordered
type: TripletSampler
transform:
- type: BaseSilCuttingTransform
@@ -0,0 +1,108 @@
data_cfg:
dataset_name: Scoliosis1K
dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15-joint8-sharedalign
dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
data_in_use:
- true
- false
num_workers: 1
remove_no_gallery: false
test_dataset_name: Scoliosis1K
evaluator_cfg:
enable_float16: true
restore_ckpt_strict: true
restore_hint: 20000
save_name: ScoNet_skeleton_118_sigma15_joint8_sharedalign_nocut_adamw_1gpu_bs8x8
eval_func: evaluate_scoliosis
sampler:
batch_shuffle: false
batch_size: 1
sample_type: all_ordered
type: InferenceSampler
frames_all_limit: 720
metric: euc
transform:
- type: BaseSilTransform
loss_cfg:
- loss_term_weight: 1.0
margin: 0.2
type: TripletLoss
log_prefix: triplet
- loss_term_weight: 1.0
scale: 16
type: CrossEntropyLoss
log_prefix: softmax
log_accuracy: true
model_cfg:
model: ScoNet
backbone_cfg:
type: ResNet9
block: BasicBlock
in_channel: 2
channels:
- 64
- 128
- 256
- 512
layers:
- 1
- 1
- 1
- 1
strides:
- 1
- 2
- 2
- 1
maxpool: false
SeparateFCs:
in_channels: 512
out_channels: 256
parts_num: 16
SeparateBNNecks:
class_num: 3
in_channels: 256
parts_num: 16
bin_num:
- 16
optimizer_cfg:
lr: 0.001
solver: AdamW
weight_decay: 0.0005
scheduler_cfg:
gamma: 0.1
milestones:
- 10000
- 14000
- 18000
scheduler: MultiStepLR
trainer_cfg:
enable_float16: true
fix_BN: false
with_test: false
log_iter: 100
restore_ckpt_strict: true
restore_hint: 0
auto_resume_latest: true
resume_every_iter: 1000
resume_keep: 3
save_iter: 20000
save_name: ScoNet_skeleton_118_sigma15_joint8_sharedalign_nocut_adamw_1gpu_bs8x8
sync_BN: true
total_iter: 20000
sampler:
batch_shuffle: true
batch_size:
- 8
- 8
frames_num_fixed: 30
sample_type: fixed_unordered
type: TripletSampler
transform:
- type: BaseSilTransform
@@ -0,0 +1,111 @@
data_cfg:
dataset_name: Scoliosis1K
dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15-joint8-sharedalign
dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
data_in_use:
- true
- false
num_workers: 1
remove_no_gallery: false
test_dataset_name: Scoliosis1K
test_seq_subset_size: 128
test_seq_subset_seed: 118
evaluator_cfg:
enable_float16: true
restore_ckpt_strict: true
restore_hint: 2000
save_name: ScoNet_skeleton_118_sigma15_joint8_sharedalign_nocut_adamw_proxy_1gpu
eval_func: evaluate_scoliosis
sampler:
batch_shuffle: false
batch_size: 1
sample_type: all_ordered
type: InferenceSampler
frames_all_limit: 720
metric: euc
transform:
- type: BaseSilTransform
loss_cfg:
- loss_term_weight: 1.0
margin: 0.2
type: TripletLoss
log_prefix: triplet
- loss_term_weight: 1.0
scale: 16
type: CrossEntropyLoss
log_prefix: softmax
log_accuracy: true
model_cfg:
model: ScoNet
backbone_cfg:
type: ResNet9
block: BasicBlock
in_channel: 2
channels:
- 64
- 128
- 256
- 512
layers:
- 1
- 1
- 1
- 1
strides:
- 1
- 2
- 2
- 1
maxpool: false
SeparateFCs:
in_channels: 512
out_channels: 256
parts_num: 16
SeparateBNNecks:
class_num: 3
in_channels: 256
parts_num: 16
bin_num:
- 16
optimizer_cfg:
lr: 0.001
solver: AdamW
weight_decay: 0.0005
scheduler_cfg:
gamma: 0.1
milestones:
- 1000
- 1500
- 1800
scheduler: MultiStepLR
trainer_cfg:
enable_float16: true
fix_BN: false
with_test: true
log_iter: 100
restore_ckpt_strict: true
restore_hint: 0
auto_resume_latest: true
resume_every_iter: 500
resume_keep: 3
eval_iter: 500
save_iter: 2000
save_name: ScoNet_skeleton_118_sigma15_joint8_sharedalign_nocut_adamw_proxy_1gpu
sync_BN: true
total_iter: 2000
sampler:
batch_shuffle: true
batch_size:
- 8
- 8
frames_num_fixed: 30
sample_type: fixed_unordered
type: TripletSampler
transform:
- type: BaseSilTransform
@@ -0,0 +1,108 @@
data_cfg:
dataset_name: Scoliosis1K
dataset_root: /mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15-joint8-seqalign
dataset_partition: ./datasets/Scoliosis1K/Scoliosis1K_118.json
data_in_use:
- true
- false
num_workers: 1
remove_no_gallery: false
test_dataset_name: Scoliosis1K
evaluator_cfg:
enable_float16: true
restore_ckpt_strict: true
restore_hint: 20000
save_name: ScoNet_skeleton_118_sigma15_joint8_seqalign_2gpu_bs12x8
eval_func: evaluate_scoliosis
sampler:
batch_shuffle: false
batch_size: 2
sample_type: all_ordered
frames_all_limit: 720
metric: euc
transform:
- type: BaseSilCuttingTransform
loss_cfg:
- loss_term_weight: 1.0
margin: 0.2
type: TripletLoss
log_prefix: triplet
- loss_term_weight: 1.0
scale: 16
type: CrossEntropyLoss
log_prefix: softmax
log_accuracy: true
model_cfg:
model: ScoNet
backbone_cfg:
type: ResNet9
block: BasicBlock
in_channel: 2
channels:
- 64
- 128
- 256
- 512
layers:
- 1
- 1
- 1
- 1
strides:
- 1
- 2
- 2
- 1
maxpool: false
SeparateFCs:
in_channels: 512
out_channels: 256
parts_num: 16
SeparateBNNecks:
class_num: 3
in_channels: 256
parts_num: 16
bin_num:
- 16
optimizer_cfg:
lr: 0.1
momentum: 0.9
solver: SGD
weight_decay: 0.0005
scheduler_cfg:
gamma: 0.1
milestones:
- 10000
- 14000
- 18000
scheduler: MultiStepLR
trainer_cfg:
enable_float16: true
fix_BN: false
with_test: false
log_iter: 100
restore_ckpt_strict: true
restore_hint: 0
auto_resume_latest: true
resume_every_iter: 1000
resume_keep: 3
save_iter: 20000
save_name: ScoNet_skeleton_118_sigma15_joint8_seqalign_2gpu_bs12x8
sync_BN: true
total_iter: 20000
sampler:
batch_shuffle: true
batch_size:
- 12
- 8
frames_num_fixed: 30
sample_type: fixed_unordered
type: TripletSampler
transform:
- type: BaseSilCuttingTransform
+2
View File
@@ -167,6 +167,8 @@ Current confirmed findings from local debugging:
* a larger heatmap sigma can materially blur away the articulated structure; `sigma=8` was much broader than the silhouette geometry, while smaller sigma values recovered more structure
* an earlier bug aligned the limb and joint channels separately; that made the two channels of `0_heatmap.pkl` slightly misregistered
* the heatmap path is now patched so limb and joint channels share one alignment crop
* the heatmap aligner now also supports `align_args.scope: sequence`, which applies one shared crop box to the whole sequence instead of recomputing it frame by frame
* the heatmap config can also rebalance the two channels after alignment with `channel_gain_limb` / `channel_gain_joint`; this keeps the crop geometry fixed while changing limb-vs-joint strength
Remaining caution:
+128 -22
View File
@@ -517,7 +517,15 @@ class GatherTransform(object):
"""
Gather the different transforms.
"""
def __init__(self, base_transform, transform_bone, transform_joint, align_transform=None):
def __init__(
self,
base_transform,
transform_bone,
transform_joint,
align_transform=None,
limb_gain: float = 1.0,
joint_gain: float = 1.0,
) -> None:
"""
base_transform: Some common transform, e.g., COCO18toCOCO17, PadKeypoints, CenterAndScale
@@ -528,6 +536,22 @@ class GatherTransform(object):
self.transform_bone = transform_bone
self.transform_joint = transform_joint
self.align_transform = align_transform
self.limb_gain = limb_gain
self.joint_gain = joint_gain
def _apply_channel_gains(self, heatmap: np.ndarray) -> np.ndarray:
if self.limb_gain == 1.0 and self.joint_gain == 1.0:
return heatmap
original_dtype = heatmap.dtype
scaled = heatmap.astype(np.float32, copy=True)
scaled[:, 0] *= self.limb_gain
scaled[:, 1] *= self.joint_gain
scaled = np.clip(scaled, 0.0, 255.0)
if np.issubdtype(original_dtype, np.integer):
return scaled.astype(original_dtype)
return scaled.astype(original_dtype)
def __call__(self, pose_data):
x = self.base_transform(pose_data)
@@ -536,38 +560,109 @@ class GatherTransform(object):
heatmap = np.concatenate([heatmap_bone, heatmap_joint], axis=1)
if self.align_transform is not None:
heatmap = self.align_transform(heatmap)
return heatmap
return self._apply_channel_gains(heatmap)
AlignmentScope = Literal["frame", "sequence"]
AlignmentCropMode = Literal["square_center", "bbox_pad"]
class HeatmapAlignment():
def __init__(self, align=True, final_img_size=64, offset=0, heatmap_image_size=128) -> None:
def __init__(
self,
align: bool = True,
final_img_size: int = 64,
offset: int = 0,
heatmap_image_size: int = 128,
scope: AlignmentScope = "frame",
crop_mode: AlignmentCropMode = "square_center",
preserve_aspect_ratio: bool = False,
) -> None:
self.align = align
self.final_img_size = final_img_size
self.offset = offset
self.heatmap_image_size = heatmap_image_size
self.scope = scope
self.crop_mode = crop_mode
self.preserve_aspect_ratio = preserve_aspect_ratio
def _compute_crop_bounds(
self,
heatmap: np.ndarray,
) -> tuple[int, int, int, int] | None:
support_map = heatmap.max(axis=0)
y_sum = support_map.sum(axis=1)
x_sum = support_map.sum(axis=0)
nonzero_rows = np.flatnonzero(y_sum != 0)
nonzero_cols = np.flatnonzero(x_sum != 0)
if nonzero_rows.size == 0:
return None
if nonzero_cols.size == 0:
return None
y_top = max(int(nonzero_rows[0]) - self.offset, 0)
y_btm = min(int(nonzero_rows[-1]) + self.offset, self.heatmap_image_size - 1)
if self.crop_mode == "bbox_pad":
x_left = max(int(nonzero_cols[0]) - self.offset, 0)
x_right = min(int(nonzero_cols[-1]) + self.offset + 1, self.heatmap_image_size)
return y_top, y_btm, x_left, x_right
height = y_btm - y_top + 1
x_center = self.heatmap_image_size // 2
x_left = max(x_center - (height // 2), 0)
x_right = min(x_center + (height // 2) + 1, self.heatmap_image_size)
return y_top, y_btm, x_left, x_right
def _resize_and_pad(self, cropped_heatmap: np.ndarray) -> np.ndarray:
_, src_h, src_w = cropped_heatmap.shape
if src_h <= 0 or src_w <= 0:
return np.zeros(
(cropped_heatmap.shape[0], self.final_img_size, self.final_img_size),
dtype=np.float32,
)
scale = float(self.final_img_size) / float(max(src_h, src_w))
resized_h = max(1, int(round(src_h * scale)))
resized_w = max(1, int(round(src_w * scale)))
resized = np.stack([
cv2.resize(channel, (resized_w, resized_h), interpolation=cv2.INTER_AREA)
for channel in cropped_heatmap
], axis=0)
canvas = np.zeros(
(cropped_heatmap.shape[0], self.final_img_size, self.final_img_size),
dtype=np.float32,
)
y_offset = (self.final_img_size - resized_h) // 2
x_offset = (self.final_img_size - resized_w) // 2
canvas[:, y_offset:y_offset + resized_h, x_offset:x_offset + resized_w] = resized
return canvas
def _crop_and_resize(
self,
heatmap: np.ndarray,
crop_bounds: tuple[int, int, int, int] | None,
) -> np.ndarray:
raw_heatmap = heatmap
if crop_bounds is not None:
y_top, y_btm, x_left, x_right = crop_bounds
raw_heatmap = raw_heatmap[:, y_top:y_btm + 1, x_left:x_right]
if self.preserve_aspect_ratio:
return self._resize_and_pad(raw_heatmap)
return np.stack([
cv2.resize(channel, (self.final_img_size, self.final_img_size), interpolation=cv2.INTER_AREA)
for channel in raw_heatmap
], axis=0)
def center_crop(self, heatmap):
"""
Input: [C, heatmap_image_size, heatmap_image_size]
Output: [C, final_img_size, final_img_size]
"""
raw_heatmap = heatmap
if self.align:
support_map = raw_heatmap.max(axis=0)
y_sum = support_map.sum(axis=1)
nonzero_rows = np.flatnonzero(y_sum != 0)
if nonzero_rows.size != 0:
y_top = max(int(nonzero_rows[0]) - self.offset, 0)
y_btm = min(int(nonzero_rows[-1]) + self.offset, self.heatmap_image_size - 1)
height = y_btm - y_top + 1
x_center = self.heatmap_image_size // 2
x_left = max(x_center - (height // 2), 0)
x_right = min(x_center + (height // 2) + 1, self.heatmap_image_size)
raw_heatmap = raw_heatmap[:, y_top:y_btm + 1, x_left:x_right]
resized = np.stack([
cv2.resize(channel, (self.final_img_size, self.final_img_size), interpolation=cv2.INTER_AREA)
for channel in raw_heatmap
], axis=0)
return resized # [C, final_img_size, final_img_size]
crop_bounds = self._compute_crop_bounds(heatmap) if self.align else None
return self._crop_and_resize(heatmap, crop_bounds) # [C, final_img_size, final_img_size]
def __call__(self, heatmap_imgs):
"""
@@ -576,7 +671,14 @@ class HeatmapAlignment():
"""
original_dtype = heatmap_imgs.dtype
heatmap_imgs = heatmap_imgs.astype(np.float32) / 255.0
heatmap_imgs = np.array([self.center_crop(heatmap_img) for heatmap_img in heatmap_imgs], dtype=np.float32)
if self.align and self.scope == "sequence":
sequence_crop_bounds = self._compute_crop_bounds(heatmap_imgs.max(axis=0))
heatmap_imgs = np.array(
[self._crop_and_resize(heatmap_img, sequence_crop_bounds) for heatmap_img in heatmap_imgs],
dtype=np.float32,
)
else:
heatmap_imgs = np.array([self.center_crop(heatmap_img) for heatmap_img in heatmap_imgs], dtype=np.float32)
heatmap_imgs = heatmap_imgs * 255.0
if np.issubdtype(original_dtype, np.integer):
return np.clip(heatmap_imgs, 0.0, 255.0).astype(original_dtype)
@@ -591,6 +693,8 @@ def GenerateHeatmapTransform(
reduction: Literal["upstream", "max", "sum"] = "upstream",
sigma_limb: float | None = None,
sigma_joint: float | None = None,
channel_gain_limb: float | None = None,
channel_gain_joint: float | None = None,
):
base_transform = T.Compose([
@@ -636,6 +740,8 @@ def GenerateHeatmapTransform(
transform_bone,
transform_joint,
HeatmapAlignment(**align_args),
limb_gain=1.0 if channel_gain_limb is None else channel_gain_limb,
joint_gain=1.0 if channel_gain_joint is None else channel_gain_joint,
) # [T, 2, H, W]
])
+2
View File
@@ -203,6 +203,8 @@ def main() -> None:
reduction=cast(HeatmapReduction, args.heatmap_reduction),
sigma_limb=optional_cfg_float(heatmap_cfg, "sigma_limb"),
sigma_joint=optional_cfg_float(heatmap_cfg, "sigma_joint"),
channel_gain_limb=optional_cfg_float(heatmap_cfg, "channel_gain_limb"),
channel_gain_joint=optional_cfg_float(heatmap_cfg, "channel_gain_joint"),
)
pose_paths = iter_pose_paths(args.pose_data_path)
+42
View File
@@ -0,0 +1,42 @@
# Scoliosis Training Change Log
This file is the single run-by-run changelog for Scoliosis1K training and evaluation in this repo.
Use it for:
- what changed between runs
- which dataset/config/checkpoint was used
- what the resulting metrics were
- whether a run is still in progress
## Conventions
- Add one entry before launching a new training run.
- Update the same entry after training/eval completes.
- Record only the delta from the previous relevant run, not a full config dump.
- For skeleton-map control runs, use plain-text `ScoNet-MT-ske` naming in the notes even though the code class is `ScoNet`.
## Runs
| Date | Run | Model | Dataset | Main change vs previous relevant run | Status | Eval result |
| :--- | :--- | :--- | :--- | :--- | :--- | :--- |
| 2026-03-07 | `DRF` | DRF | `Scoliosis1K-drf-pkl-118` | First OpenGait DRF integration on paper `1:1:8` split using shared OpenGait skeleton/PAV path | complete | `58.08 Acc / 78.80 Prec / 60.22 Rec / 56.99 F1` |
| 2026-03-08 | `DRF_paper` | DRF | `Scoliosis1K-drf-pkl-118-paper` | More paper-literal preprocessing: summed/sparser heatmap path, dataset-level PAV normalization, body-prior path refinement | complete | `51.67 Acc / 72.37 Prec / 56.22 Rec / 50.92 F1` |
| 2026-03-08 | `ScoNet_skeleton_118` | ScoNet-MT-ske control | `Scoliosis1K-drf-pkl-118-paper` | Plain skeleton-map baseline on the paper-literal export to isolate DRF vs skeleton-path failure | complete | `38.85 Acc / 61.23 Prec / 46.75 Rec / 35.96 F1` |
| 2026-03-08 | `ScoNet_skeleton_118_sigma8` | ScoNet-MT-ske control | `Scoliosis1K_sigma_8.0/pkl` | Reused upstream/default sigma-8 heatmap export instead of the DRF paper-literal export | complete | `36.45 Acc / 69.17 Prec / 43.82 Rec / 32.78 F1` |
| 2026-03-08 | `ScoNet_skeleton_118_sigma15_bs12x8` | ScoNet-MT-ske control | `Scoliosis1K-drf-pkl-118-sigma15` | Lowered skeleton-map sigma from `8.0` to `1.5` to tighten the pose rasterization | complete | `46.33 Acc / 68.09 Prec / 51.92 Rec / 44.69 F1` |
| 2026-03-09 | `ScoNet_skeleton_118_sigma15_joint8_sharedalign_2gpu_bs12x8` | ScoNet-MT-ske control | `Scoliosis1K-drf-pkl-118-sigma15-joint8-sharedalign` | Fixed limb/joint channel misalignment, used mixed sigma `limb=1.5 / joint=8.0`, kept SGD | complete | `50.47 Acc / 69.31 Prec / 54.58 Rec / 48.63 F1` |
| 2026-03-09 | `ScoNet_skeleton_118_sigma15_joint8_limb4_adamw_2gpu_bs12x8` | ScoNet-MT-ske control | `Scoliosis1K-drf-pkl-118-sigma15-joint8-sharedalign-limb4` | Rebalanced channel intensity with `limb_gain=4.0`; switched optimizer from `SGD` to `AdamW` | complete | `48.60 Acc / 65.97 Prec / 53.19 Rec / 46.41 F1` |
| 2026-03-09 | `ScoNet_skeleton_118_sigma15_joint8_sharedalign_nocut_adamw_1gpu_bs8x8` | ScoNet-MT-ske control | `Scoliosis1K-drf-pkl-118-sigma15-joint8-sharedalign` | Switched runtime transform from `BaseSilCuttingTransform` to `BaseSilTransform` (`no-cut`), kept `AdamW`, reduced `8x8` due to 5070 Ti OOM at `12x8` | training | no eval yet |
| 2026-03-09 | `ScoNet_skeleton_118_sigma15_joint8_sharedalign_nocut_adamw_proxy_1gpu` | ScoNet-MT-ske proxy | `Scoliosis1K-drf-pkl-118-sigma15-joint8-sharedalign` | Fast proxy route: `no-cut`, `AdamW`, `8x8`, `total_iter=2000`, `eval_iter=500`, `test_seq_subset_size=128` | training | no eval yet |
## Current best skeleton baseline
Current best `ScoNet-MT-ske`-style result:
- `ScoNet_skeleton_118_sigma15_joint8_sharedalign_2gpu_bs12x8`
- `50.47 Acc / 69.31 Prec / 54.58 Rec / 48.63 F1`
## Notes
- `ckpt/ScoNet-20000-better.pt` is intentionally not listed here because it is a silhouette checkpoint, not a skeleton-map run.
- `DRF` runs are included because they are part of the same reproduction/debugging loop, but this log should stay focused on train/eval changes, not broader code refactors.
- The long `ScoNet_skeleton_118_sigma15_joint8_sharedalign_nocut_adamw_1gpu_bs8x8` run was intentionally interrupted and superseded by the shorter proxy run once fast-iteration support was added.
+55 -7
View File
@@ -1,10 +1,15 @@
import os
import pickle
import os.path as osp
import torch.utils.data as tordata
import json
import os
import os.path as osp
import pickle
import random
from typing import TypeVar
import torch.utils.data as tordata
from opengait.utils import get_msg_mgr
T = TypeVar("T")
class DataSet(tordata.Dataset):
def __init__(self, data_cfg, training):
@@ -66,6 +71,33 @@ class DataSet(tordata.Dataset):
for idx in range(len(self)):
self.__getitem__(idx)
@staticmethod
def _sample_items(
items: list[T],
subset_size: int | None,
subset_seed: int,
subset_name: str,
msg_mgr=None,
) -> list[T]:
if subset_size is None:
return items
if subset_size <= 0:
raise ValueError(f"{subset_name} must be positive, got {subset_size}")
if subset_size >= len(items):
return items
sampled_items = random.Random(subset_seed).sample(items, subset_size)
sampled_items.sort()
if msg_mgr is not None:
msg_mgr.log_info(
"Using %s subset: %d / %d items (seed=%d)",
subset_name,
len(sampled_items),
len(items),
subset_seed,
)
return sampled_items
def __dataset_parser(self, data_config, training):
dataset_root = data_config['dataset_root']
try:
@@ -80,9 +112,16 @@ class DataSet(tordata.Dataset):
label_list = os.listdir(dataset_root)
train_set = [label for label in train_set if label in label_list]
test_set = [label for label in test_set if label in label_list]
msg_mgr = get_msg_mgr()
train_set = self._sample_items(
train_set,
data_config.get("train_pid_subset_size"),
int(data_config.get("train_pid_subset_seed", 0)),
"train pid",
msg_mgr,
)
miss_pids = [label for label in label_list if label not in (
train_set + test_set)]
msg_mgr = get_msg_mgr()
def log_pid_list(pid_list):
if len(pid_list) >= 3:
@@ -121,5 +160,14 @@ class DataSet(tordata.Dataset):
'Find no .pkl file in %s-%s-%s.' % (lab, typ, vie))
return seqs_info_list
self.seqs_info = get_seqs_info_list(
train_set) if training else get_seqs_info_list(test_set)
if training:
self.seqs_info = get_seqs_info_list(train_set)
else:
self.seqs_info = get_seqs_info_list(test_set)
self.seqs_info = self._sample_items(
self.seqs_info,
data_config.get("test_seq_subset_size"),
int(data_config.get("test_seq_subset_seed", 0)),
"test sequence",
msg_mgr,
)
+22 -13
View File
@@ -553,22 +553,31 @@ class BaseModel(MetaModel, nn.Module):
resume_every_iter = int(model.engine_cfg.get('resume_every_iter', 0))
if resume_every_iter > 0 and model.iteration % resume_every_iter == 0:
model.save_resume_ckpt(model.iteration)
if model.iteration % model.engine_cfg['save_iter'] == 0:
save_iter = int(model.engine_cfg['save_iter'])
eval_iter = int(model.engine_cfg.get('eval_iter', 0))
should_save = save_iter > 0 and model.iteration % save_iter == 0
should_eval = False
if model.engine_cfg['with_test']:
if eval_iter > 0:
should_eval = model.iteration % eval_iter == 0
else:
should_eval = should_save
if should_save:
# save the checkpoint
model.save_ckpt(model.iteration)
# run test if with_test = true
if model.engine_cfg['with_test']:
model.msg_mgr.log_info("Running test...")
model.eval()
result_dict = BaseModel.run_test(model)
model.train()
if model.cfgs['trainer_cfg']['fix_BN']:
model.fix_BN()
if result_dict:
model.msg_mgr.write_to_tensorboard(result_dict)
model.msg_mgr.write_to_wandb(result_dict)
model.msg_mgr.reset_time()
if should_eval:
model.msg_mgr.log_info("Running test...")
model.eval()
result_dict = BaseModel.run_test(model)
model.train()
if model.cfgs['trainer_cfg']['fix_BN']:
model.fix_BN()
if result_dict:
model.msg_mgr.write_to_tensorboard(result_dict)
model.msg_mgr.write_to_wandb(result_dict)
model.msg_mgr.reset_time()
if model.iteration >= model.engine_cfg['total_iter']:
break
@@ -8,6 +8,7 @@ from dataclasses import dataclass
from pathlib import Path
from typing import Iterable
import click
import numpy as np
from jaxtyping import Float
from numpy.typing import NDArray
@@ -18,12 +19,12 @@ if str(REPO_ROOT) not in sys.path:
from datasets import pretreatment_scoliosis_drf as drf_prep
POSE_ROOT = Path("/mnt/public/data/Scoliosis1K/Scoliosis1K-pose-pkl")
HEATMAP_ROOT = Path("/mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15-joint8-sharedalign")
PARTITION_PATH = REPO_ROOT / "datasets/Scoliosis1K/Scoliosis1K_118.json"
HEATMAP_CFG_PATH = REPO_ROOT / "configs/drf/pretreatment_heatmap_drf_sigma15_joint8.yaml"
REPORT_PATH = REPO_ROOT / "research/scoliosis_dataset_analysis_118_sharedalign.md"
JSON_PATH = REPO_ROOT / "research/scoliosis_dataset_analysis_118_sharedalign.json"
DEFAULT_POSE_ROOT = Path("/mnt/public/data/Scoliosis1K/Scoliosis1K-pose-pkl")
DEFAULT_HEATMAP_ROOT = Path("/mnt/public/data/Scoliosis1K/Scoliosis1K-drf-pkl-118-sigma15-joint8-sharedalign")
DEFAULT_PARTITION_PATH = REPO_ROOT / "datasets/Scoliosis1K/Scoliosis1K_118.json"
DEFAULT_HEATMAP_CFG_PATH = REPO_ROOT / "configs/drf/pretreatment_heatmap_drf_sigma15_joint8.yaml"
DEFAULT_REPORT_PATH = REPO_ROOT / "research/scoliosis_dataset_analysis_118_sharedalign.md"
DEFAULT_JSON_PATH = REPO_ROOT / "research/scoliosis_dataset_analysis_118_sharedalign.json"
EPS = 1e-6
THRESHOLD = 13.0
@@ -53,8 +54,19 @@ class RunningStats:
return self.total / max(self.count, 1)
def load_partition_ids() -> tuple[set[str], set[str]]:
with PARTITION_PATH.open("r", encoding="utf-8") as handle:
@dataclass(frozen=True)
class AnalysisArgs:
pose_root: Path
heatmap_root: Path
partition_path: Path
heatmap_cfg_path: Path
report_path: Path
json_path: Path
report_title: str
def load_partition_ids(partition_path: Path) -> tuple[set[str], set[str]]:
with partition_path.open("r", encoding="utf-8") as handle:
partition = json.load(handle)
return set(partition["TRAIN_SET"]), set(partition["TEST_SET"])
@@ -64,12 +76,12 @@ def sequence_key_from_path(path: Path) -> SequenceKey:
return SequenceKey(pid=parts[-4], label=parts[-3], seq=parts[-2])
def iter_pose_paths() -> list[Path]:
return sorted(POSE_ROOT.glob("*/*/*/*.pkl"))
def iter_pose_paths(pose_root: Path) -> list[Path]:
return sorted(pose_root.glob("*/*/*/*.pkl"))
def iter_heatmap_paths() -> list[Path]:
return sorted(HEATMAP_ROOT.glob("*/*/*/0_heatmap.pkl"))
def iter_heatmap_paths(heatmap_root: Path) -> list[Path]:
return sorted(heatmap_root.glob("*/*/*/0_heatmap.pkl"))
def read_pickle(path: Path) -> object:
@@ -214,10 +226,10 @@ def evaluate_predictions(
}
def analyze() -> dict[str, object]:
train_ids, test_ids = load_partition_ids()
def analyze(args: AnalysisArgs) -> dict[str, object]:
train_ids, test_ids = load_partition_ids(args.partition_path)
heatmap_cfg = drf_prep.load_heatmap_cfg(str(HEATMAP_CFG_PATH))
heatmap_cfg = drf_prep.load_heatmap_cfg(str(args.heatmap_cfg_path))
pose_transform = drf_prep.build_pose_transform(heatmap_cfg)
split_label_counts: dict[str, dict[str, int]] = {
@@ -233,7 +245,7 @@ def analyze() -> dict[str, object]:
"test": defaultdict(RunningStats),
}
for pose_path in iter_pose_paths():
for pose_path in iter_pose_paths(args.pose_root):
key = sequence_key_from_path(pose_path)
split = "train" if key.pid in train_ids else "test"
split_label_counts[split][key.label] += 1
@@ -250,7 +262,7 @@ def analyze() -> dict[str, object]:
labels_test: list[int] = []
pav_means: dict[str, list[float]] = defaultdict(list)
for heatmap_path in iter_heatmap_paths():
for heatmap_path in iter_heatmap_paths(args.heatmap_root):
key = sequence_key_from_path(heatmap_path)
split = "train" if key.pid in train_ids else "test"
heatmap = np.asarray(read_pickle(heatmap_path), dtype=np.float32)
@@ -284,6 +296,11 @@ def analyze() -> dict[str, object]:
pav_classifier = evaluate_predictions(y_test, y_pred, num_classes=3)
results: dict[str, object] = {
"report_title": args.report_title,
"pose_root": str(args.pose_root),
"heatmap_root": str(args.heatmap_root),
"partition_path": str(args.partition_path),
"heatmap_cfg_path": str(args.heatmap_cfg_path),
"split_label_counts": split_label_counts,
"pose_confidence_mean": {
split: {label: stats.mean for label, stats in per_label.items()}
@@ -310,6 +327,11 @@ def analyze() -> dict[str, object]:
def format_report(results: dict[str, object]) -> str:
report_title = str(results["report_title"])
pose_root = str(results["pose_root"])
heatmap_root = str(results["heatmap_root"])
partition_path = str(results["partition_path"])
heatmap_cfg_path = str(results["heatmap_cfg_path"])
split_counts = results["split_label_counts"]
pose_conf = results["pose_confidence_mean"]
pose_valid = results["pose_valid_ratio_mean"]
@@ -332,7 +354,13 @@ def format_report(results: dict[str, object]) -> str:
height_mean, height_p95 = heat_stat("height_mean")
active_fraction_mean, active_fraction_p95 = heat_stat("active_fraction_mean")
return f"""# Scoliosis1K Dataset Analysis (1:1:8, shared-align skeleton maps)
return f"""# {report_title}
Inputs:
- pose root: `{pose_root}`
- heatmap root: `{heatmap_root}`
- partition: `{partition_path}`
- heatmap cfg: `{heatmap_cfg_path}`
## Split
@@ -377,7 +405,7 @@ Train-on-train / test-on-test linear softmax probe over sequence-level PAV:
- macro recall: {pav_probe["macro_recall"]:.2f}%
- macro F1: {pav_probe["macro_f1"]:.2f}%
## Shared-align heatmap geometry
## Heatmap geometry
Combined support bbox stats over all sequences:
- width mean / p95: {width_mean:.2f} / {width_p95:.2f}
@@ -402,19 +430,79 @@ Estimated intensity mass in the columns removed by `BaseSilCuttingTransform`:
- The raw pose data does not look broken. Confidence and valid-joint ratios are high and similar across classes.
- The sequence-level PAV still carries useful label signal, so the dataset is not devoid of scoliosis information.
- Shared alignment removed the old limb-vs-joint registration bug; residual channel-center mismatch is now small.
- The limb/joint alignment fix removed the old registration bug; residual channel-center mismatch is now small.
- The remaining suspicious area is the visual branch: the skeleton map still has frame-to-frame bbox jitter, and the support bbox is almost full-height (`~61.5 / 64`) and fairly dense (`~36%` active pixels), which may be washing out subtle asymmetry cues.
- `BaseSilCuttingTransform` does not appear to be the main failure source for this shared-align export; the measured mass in the removed side margins is near zero.
- `BaseSilCuttingTransform` does not appear to be the main failure source for this export; the measured mass in the removed side margins is near zero.
- The dataset itself looks usable; the bigger issue still appears to be how the current skeleton-map preprocessing/runtime path presents that data to ScoNet.
"""
def main() -> None:
results = analyze()
REPORT_PATH.write_text(format_report(results), encoding="utf-8")
JSON_PATH.write_text(json.dumps(results, indent=2, sort_keys=True), encoding="utf-8")
print(f"Wrote {REPORT_PATH}")
print(f"Wrote {JSON_PATH}")
@click.command()
@click.option(
"--pose-root",
type=click.Path(path_type=Path, file_okay=False),
default=DEFAULT_POSE_ROOT,
show_default=True,
)
@click.option(
"--heatmap-root",
type=click.Path(path_type=Path, file_okay=False),
default=DEFAULT_HEATMAP_ROOT,
show_default=True,
)
@click.option(
"--partition-path",
type=click.Path(path_type=Path, dir_okay=False),
default=DEFAULT_PARTITION_PATH,
show_default=True,
)
@click.option(
"--heatmap-cfg-path",
type=click.Path(path_type=Path, dir_okay=False),
default=DEFAULT_HEATMAP_CFG_PATH,
show_default=True,
)
@click.option(
"--report-path",
type=click.Path(path_type=Path, dir_okay=False),
default=DEFAULT_REPORT_PATH,
show_default=True,
)
@click.option(
"--json-path",
type=click.Path(path_type=Path, dir_okay=False),
default=DEFAULT_JSON_PATH,
show_default=True,
)
@click.option(
"--report-title",
type=str,
default="Scoliosis1K Dataset Analysis (1:1:8, shared-align skeleton maps)",
show_default=True,
)
def main(
pose_root: Path,
heatmap_root: Path,
partition_path: Path,
heatmap_cfg_path: Path,
report_path: Path,
json_path: Path,
report_title: str,
) -> None:
args = AnalysisArgs(
pose_root=pose_root,
heatmap_root=heatmap_root,
partition_path=partition_path,
heatmap_cfg_path=heatmap_cfg_path,
report_path=report_path,
json_path=json_path,
report_title=report_title,
)
results = analyze(args)
args.report_path.write_text(format_report(results), encoding="utf-8")
args.json_path.write_text(json.dumps(results, indent=2, sort_keys=True), encoding="utf-8")
print(f"Wrote {args.report_path}")
print(f"Wrote {args.json_path}")
if __name__ == "__main__":