diff --git a/datasets/Scoliosis1K/README.md b/datasets/Scoliosis1K/README.md index 5494a9d..737f9ef 100644 --- a/datasets/Scoliosis1K/README.md +++ b/datasets/Scoliosis1K/README.md @@ -69,6 +69,33 @@ python -m torch.distributed.launch --nproc_per_node=4 \ opengait/main.py --cfgs configs/sconet/sconet_scoliosis1k.yaml --phase test --log_to_file ``` +### Fixed-pool ratio comparison + +If you want to compare `1:1:2` against `1:1:8` without changing the evaluation +pool, do not compare `Scoliosis1K_112.json` against `Scoliosis1K_118.json` +directly. Those two files differ substantially in train/test membership. + +For a cleaner same-pool comparison, use: + +* `datasets/Scoliosis1K/Scoliosis1K_118.json` + * original `1:1:8` split +* `datasets/Scoliosis1K/Scoliosis1K_118_fixedpool_train112.json` + * same `TEST_SET` as `118` + * same positive/neutral `TRAIN_SET` ids as `118` + * downsampled `TRAIN_SET` negatives to `148`, giving train counts + `74 positive / 74 neutral / 148 negative` + +The helper used to generate that derived partition is: + +```bash +uv run python scripts/build_scoliosis_fixedpool_partition.py \ + --base-partition datasets/Scoliosis1K/Scoliosis1K_118.json \ + --dataset-root /mnt/public/data/Scoliosis1K/Scoliosis1K-sil-pkl \ + --negative-multiplier 2 \ + --output-path datasets/Scoliosis1K/Scoliosis1K_118_fixedpool_train112.json \ + --seed 118 +``` + ### Modality sanity check The silhouette and skeleton-map pipelines are different experiments and should not be mixed when you interpret results. diff --git a/datasets/Scoliosis1K/Scoliosis1K_118_fixedpool_train112.json b/datasets/Scoliosis1K/Scoliosis1K_118_fixedpool_train112.json new file mode 100644 index 0000000..e561fea --- /dev/null +++ b/datasets/Scoliosis1K/Scoliosis1K_118_fixedpool_train112.json @@ -0,0 +1,1051 @@ +{ + "TRAIN_SET": [ + "00010", + "00030", + "00036", + "00039", + "00041", + "00054", + "00057", + "00058", + "00068", + "00069", + "00085", + "00090", + "00107", + "00124", + "00134", + "00153", + "00158", + "00173", + "00174", + "00178", + "00185", + "00193", + "00196", + "00202", + "00204", + "00213", + "00219", + "00221", + "00230", + "00233", + "00244", + "00248", + "00268", + "00274", + "00275", + "00276", + "00277", + "00280", + "00283", + "00296", + "00310", + "00312", + "00313", + "00314", + "00319", + "00332", + "00348", + "00352", + "00356", + "00363", + "00372", + "00375", + "00376", + "00381", + "00384", + "00399", + "00400", + "00412", + "00414", + "00418", + "00420", + "00422", + "00427", + "00428", + "00429", + "00433", + "00443", + "00453", + "00455", + "00458", + "00461", + "00464", + "00480", + "00486", + "01295", + "01300", + "01301", + "01302", + "01303", + "01305", + "01308", + "01310", + "01312", + "01314", + "01315", + "01317", + "01325", + "01330", + "01341", + "01345", + "01354", + "01356", + "01358", + "01362", + "01365", + "01368", + "01369", + "01370", + "01372", + "01377", + "01378", + "01380", + "01382", + "01389", + "01390", + "01392", + "01395", + "01396", + "01397", + "01399", + "01404", + "01406", + "01409", + "01410", + "01411", + "01412", + "01413", + "01414", + "01416", + "01422", + "01427", + "01430", + "01431", + "01432", + "01436", + "01437", + "01438", + "01441", + "01442", + "01443", + "01449", + "01450", + "01453", + "01457", + "01463", + "01465", + "01468", + "01470", + "01472", + "01473", + "01474", + "01477", + "01482", + "01483", + "01488", + "01490", + "01491", + "01492", + "00497", + "00501", + "00505", + "00510", + "00518", + "00524", + "00535", + "00539", + "00554", + "00556", + "00557", + "00558", + "00567", + "00569", + "00576", + "00579", + "00584", + "00589", + "00594", + "00596", + "00597", + "00598", + "00605", + "00609", + "00621", + "00626", + "00629", + "00648", + "00650", + "00659", + "00662", + "00665", + "00667", + "00675", + "00684", + "00685", + "00687", + "00690", + "00694", + "00696", + "00697", + "00699", + "00702", + "00705", + "00712", + "00713", + "00717", + "00722", + "00724", + "00725", + "00737", + "00754", + "00761", + "00763", + "00764", + "00770", + "00780", + "00797", + "00799", + "00800", + "00803", + "00805", + "00807", + "00809", + "00845", + "00852", + "00853", + "00854", + "00858", + "00860", + "00862", + "00869", + "00879", + "00880", + "00887", + "00888", + "00893", + "00897", + "00905", + "00913", + "00916", + "00928", + "00933", + "00946", + "00949", + "00960", + "00963", + "00964", + "00971", + "00976", + "00981", + "00988", + "00989", + "01002", + "01007", + "01008", + "01012", + "01013", + "01015", + "01026", + "01040", + "01043", + "01044", + "01045", + "01052", + "01056", + "01062", + "01065", + "01067", + "01070", + "01078", + "01079", + "01081", + "01088", + "01098", + "01107", + "01118", + "01119", + "01120", + "01126", + "01128", + "01135", + "01139", + "01143", + "01144", + "01147", + "01155", + "01158", + "01160", + "01173", + "01190", + "01198", + "01201", + "01202", + "01209", + "01214", + "01215", + "01228", + "01241", + "01253", + "01263", + "01264", + "01266", + "01270", + "01273", + "01281", + "01282", + "01285" + ], + "TEST_SET": [ + "01466", + "01321", + "00490", + "01187", + "01293", + "00358", + "00645", + "00093", + "01336", + "00370", + "00328", + "00606", + "01418", + "01426", + "00457", + "01080", + "00271", + "00014", + "00272", + "00563", + "00602", + "01323", + "00465", + "00199", + "00142", + "00828", + "01060", + "00123", + "00456", + "00638", + "00369", + "00831", + "00269", + "00331", + "01280", + "00577", + "00397", + "00040", + "01033", + "00618", + "00625", + "00413", + "00033", + "00566", + "00122", + "00978", + "01375", + "01189", + "00466", + "01311", + "01030", + "01232", + "00114", + "01136", + "00943", + "01186", + "01298", + "00232", + "00362", + "01434", + "00528", + "01133", + "00866", + "01481", + "00170", + "00885", + "00073", + "00249", + "00125", + "00008", + "00901", + "01019", + "00367", + "00315", + "01309", + "00286", + "00474", + "00297", + "00094", + "01337", + "01141", + "01271", + "01480", + "00168", + "01348", + "00317", + "00653", + "00104", + "00459", + "00214", + "00206", + "01294", + "00430", + "01335", + "00181", + "01326", + "01391", + "00637", + "00050", + "00115", + "01433", + "00245", + "00715", + "00353", + "01343", + "00364", + "01115", + "00500", + "00477", + "01207", + "01467", + "00522", + "01037", + "00154", + "00293", + "00591", + "00401", + "01489", + "01479", + "01439", + "01464", + "00211", + "00207", + "00470", + "01460", + "01063", + "00469", + "00938", + "00568", + "00012", + "00305", + "00394", + "00383", + "00136", + "00285", + "00389", + "00270", + "01448", + "00450", + "00599", + "00471", + "01364", + "00338", + "01374", + "01327", + "00210", + "00478", + "01180", + "00035", + "01021", + "00043", + "00074", + "00388", + "00436", + "01444", + "01347", + "01353", + "00883", + "01383", + "01366", + "00259", + "00159", + "00042", + "00449", + "00224", + "00673", + "00079", + "00463", + "00020", + "01384", + "01329", + "00536", + "00278", + "00385", + "00817", + "00264", + "00438", + "00723", + "00682", + "00220", + "00452", + "00410", + "00056", + "01381", + "01023", + "00146", + "01307", + "00917", + "00445", + "00475", + "00736", + "00406", + "00116", + "00941", + "01085", + "00071", + "00656", + "00849", + "00533", + "00333", + "00131", + "00679", + "00779", + "01451", + "00482", + "01367", + "01025", + "00161", + "00007", + "01233", + "00492", + "00087", + "00896", + "00250", + "00891", + "00409", + "00437", + "01476", + "00467", + "01172", + "01289", + "00396", + "01333", + "01456", + "00308", + "01051", + "00148", + "00649", + "01355", + "00454", + "00434", + "00326", + "00393", + "00216", + "01010", + "00026", + "01393", + "00176", + "01415", + "00120", + "01140", + "00403", + "00145", + "00337", + "01265", + "00243", + "00366", + "00966", + "00786", + "00177", + "00102", + "01017", + "00239", + "00045", + "00425", + "01332", + "00374", + "00448", + "01331", + "00683", + "00347", + "00953", + "00959", + "00015", + "01429", + "00212", + "01357", + "01306", + "00016", + "01485", + "00081", + "00065", + "01328", + "00688", + "00900", + "01082", + "00408", + "00377", + "01194", + "00137", + "00439", + "00473", + "00778", + "01403", + "00138", + "00758", + "00302", + "01459", + "00671", + "01484", + "01221", + "00262", + "00180", + "00218", + "00937", + "01182", + "00088", + "00387", + "00257", + "00581", + "00491", + "00747", + "00037", + "01324", + "00005", + "00225", + "00155", + "00411", + "00119", + "00060", + "00291", + "00950", + "01246", + "00351", + "00149", + "00444", + "00435", + "00263", + "01421", + "00441", + "00672", + "00378", + "00121", + "00339", + "00281", + "00344", + "00479", + "00022", + "01009", + "00151", + "01296", + "01316", + "00327", + "00141", + "00915", + "00237", + "00282", + "00342", + "00279", + "00402", + "00325", + "00864", + "00548", + "00067", + "00290", + "01176", + "00172", + "00424", + "00261", + "01447", + "00910", + "00192", + "01454", + "01320", + "00182", + "01053", + "00003", + "00256", + "00368", + "00655", + "01036", + "00140", + "00267", + "00923", + "00945", + "01216", + "00186", + "01487", + "00098", + "00273", + "00059", + "00190", + "01462", + "00255", + "00476", + "00095", + "00670", + "00801", + "00657", + "00238", + "01419", + "00398", + "00203", + "00163", + "00341", + "01222", + "01083", + "00802", + "00106", + "00189", + "01408", + "00850", + "00335", + "00200", + "01230", + "01361", + "00077", + "00365", + "00101", + "00957", + "00001", + "00634", + "01192", + "00083", + "01417", + "01123", + "00046", + "01359", + "00716", + "01344", + "00511", + "00468", + "00379", + "00109", + "00147", + "00360", + "01191", + "00289", + "00006", + "00359", + "00105", + "00049", + "01475", + "00496", + "01424", + "00171", + "01376", + "00851", + "00062", + "00380", + "01407", + "01363", + "00226", + "00306", + "00299", + "00152", + "00295", + "00488", + "00110", + "01340", + "00820", + "00954", + "00018", + "00251", + "00072", + "01313", + "00936", + "01299", + "00303", + "00240", + "01243", + "00052", + "01322", + "01339", + "00487", + "01446", + "00485", + "00169", + "00547", + "00794", + "00055", + "01259", + "00808", + "01290", + "00681", + "00451", + "00242", + "00231", + "01458", + "00150", + "01388", + "01108", + "00127", + "00525", + "00126", + "00442", + "00113", + "00762", + "00354", + "00288", + "01039", + "00301", + "00855", + "00118", + "00951", + "00877", + "00139", + "01297", + "00197", + "00096", + "01452", + "01035", + "00084", + "00549", + "00419", + "00600", + "00162", + "00755", + "01153", + "00373", + "00179", + "00947", + "00355", + "01373", + "00426", + "00983", + "00205", + "00390", + "00330", + "01152", + "00075", + "00870", + "00329", + "00714", + "01401", + "00063", + "00782", + "01360", + "00044", + "00790", + "01193", + "00562", + "00322", + "00287", + "00222", + "01349", + "00307", + "01455", + "00184", + "00617", + "00051", + "01240", + "00097", + "01387", + "01398", + "00132", + "00031", + "00316", + "01250", + "00346", + "01428", + "00914", + "00294", + "01203", + "01435", + "00357", + "01425", + "00392", + "00195", + "00432", + "01138", + "00011", + "00258", + "00091", + "00070", + "00028", + "00709", + "00318", + "00382", + "01400", + "00080", + "00129", + "01352", + "01318", + "00198", + "00167", + "00252", + "00922", + "00415", + "00309", + "00405", + "00489", + "00194", + "00421", + "01420", + "01351", + "00229", + "00386", + "00183", + "00334", + "00053", + "00133", + "01334", + "01461", + "00635", + "01234", + "00156", + "00292", + "00350", + "00160", + "01169", + "00234", + "00336", + "00002", + "00517", + "00795", + "00404", + "00955", + "00208", + "00300", + "00025", + "00027", + "00349", + "00023", + "00934", + "00130", + "00343", + "00164", + "00311", + "01100", + "01178", + "00236", + "00925", + "00089", + "00201", + "01486", + "01249", + "01478", + "00304", + "01171", + "01225", + "00188", + "00241", + "01195", + "01445", + "00024", + "00320", + "00324", + "00066", + "01342", + "00038", + "00416", + "00521", + "01385", + "00082", + "00644", + "00099", + "00481", + "01440", + "00506", + "00235", + "00143", + "00013", + "00017", + "00078", + "01338", + "00228", + "00092", + "00166", + "00447", + "00086", + "00395", + "00032", + "00907", + "00987", + "01159", + "00061", + "01350", + "00423", + "00371", + "00340", + "01145", + "01042", + "01076", + "00227", + "01402", + "00157", + "00021", + "00345", + "00323", + "01319", + "00698", + "01059", + "00175", + "00034", + "01346", + "01371", + "00878", + "00004", + "00128", + "00165", + "00407", + "01116", + "00321", + "01471", + "00431", + "00361", + "00260", + "00700", + "00215", + "00117", + "00217", + "00144", + "01386", + "00135", + "00460", + "00472", + "00187", + "00253", + "01423", + "00973", + "01379", + "01231", + "00391", + "01113", + "00047", + "00029", + "01000", + "00775", + "00000", + "00284", + "01073", + "00111", + "00265", + "00009", + "01162", + "01394", + "00540", + "00100", + "00076", + "01469", + "00932", + "00064", + "00462", + "00446", + "00774", + "00440", + "01405", + "00209", + "00191", + "00108", + "00559", + "00103", + "00484", + "00298", + "00783", + "00246", + "00483", + "00223", + "00647", + "00417", + "00019", + "00756", + "00048", + "00247", + "00112", + "00254", + "00266", + "01304" + ] +} diff --git a/docs/drf_author_checkpoint_compat.md b/docs/drf_author_checkpoint_compat.md index d0c5189..d4a142e 100644 --- a/docs/drf_author_checkpoint_compat.md +++ b/docs/drf_author_checkpoint_compat.md @@ -212,6 +212,13 @@ One practical caveat on `1:1:2` vs `1:1:8` comparisons in this repo: - so local `112` vs `118` results should not be overinterpreted as a pure class-balance ablation unless the train/test pool is explicitly held fixed +To support a clean same-pool comparison, the repo now also includes: +- `datasets/Scoliosis1K/Scoliosis1K_118_fixedpool_train112.json` + +That partition keeps the full `118` `TEST_SET` unchanged and keeps the same +positive/neutral `TRAIN_SET` ids as `118`, but downsamples `TRAIN_SET` negatives +to `148` so the train ratio becomes `74 / 74 / 148` (`1:1:2`). + The strongest recovered result: - `80.24 / 76.73 / 76.40 / 76.56` diff --git a/scripts/build_scoliosis_fixedpool_partition.py b/scripts/build_scoliosis_fixedpool_partition.py new file mode 100644 index 0000000..11b06c4 --- /dev/null +++ b/scripts/build_scoliosis_fixedpool_partition.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +import json +import random +from collections import Counter +from pathlib import Path +from typing import TypedDict, cast + +import click + + +class Partition(TypedDict): + TRAIN_SET: list[str] + TEST_SET: list[str] + + +def infer_pid_label(dataset_root: Path, pid: str) -> str: + pid_root = dataset_root / pid + if not pid_root.exists(): + raise FileNotFoundError(f"PID root not found under dataset root: {pid_root}") + label_dirs = sorted([entry.name.lower() for entry in pid_root.iterdir() if entry.is_dir()]) + if len(label_dirs) != 1: + raise ValueError(f"Expected exactly one class dir for pid {pid}, got {label_dirs}") + label = label_dirs[0] + if label not in {"positive", "neutral", "negative"}: + raise ValueError(f"Unexpected label directory for pid {pid}: {label}") + return label + + +@click.command() +@click.option( + "--base-partition", + type=click.Path(path_type=Path, exists=True, dir_okay=False), + required=True, + help="Path to the source partition JSON, e.g. datasets/Scoliosis1K/Scoliosis1K_118.json", +) +@click.option( + "--dataset-root", + type=click.Path(path_type=Path, exists=True, file_okay=False), + required=True, + help="Dataset root used to infer each pid class label, e.g. /mnt/public/data/Scoliosis1K/Scoliosis1K-sil-pkl", +) +@click.option( + "--negative-multiplier", + type=int, + required=True, + help="Target negative count as a multiple of the positive/neutral count, e.g. 2 for 1:1:2", +) +@click.option( + "--output-path", + type=click.Path(path_type=Path, dir_okay=False), + required=True, + help="Path to write the derived partition JSON.", +) +@click.option( + "--seed", + type=int, + default=118, + show_default=True, + help="Random seed used when downsampling negatives.", +) +def main( + base_partition: Path, + dataset_root: Path, + negative_multiplier: int, + output_path: Path, + seed: int, +) -> None: + with base_partition.open("r", encoding="utf-8") as handle: + partition = cast(Partition, json.load(handle)) + + train_ids = list(partition["TRAIN_SET"]) + test_ids = list(partition["TEST_SET"]) + + train_by_label: dict[str, list[str]] = {"positive": [], "neutral": [], "negative": []} + for pid in train_ids: + label = infer_pid_label(dataset_root, pid) + train_by_label[label].append(pid) + + pos_count = len(train_by_label["positive"]) + neu_count = len(train_by_label["neutral"]) + neg_count = len(train_by_label["negative"]) + if pos_count != neu_count: + raise ValueError( + "This helper assumes equal positive/neutral train counts so that only " + + f"negative downsampling changes the ratio. Got positive={pos_count}, neutral={neu_count}." + ) + + target_negative_count = negative_multiplier * pos_count + if target_negative_count > neg_count: + raise ValueError( + f"Requested {target_negative_count} negatives but only {neg_count} are available " + + f"in base partition {base_partition}." + ) + + rng = random.Random(seed) + sampled_negatives = sorted(rng.sample(train_by_label["negative"], target_negative_count)) + derived_train = ( + sorted(train_by_label["positive"]) + + sorted(train_by_label["neutral"]) + + sampled_negatives + ) + + derived_partition = { + "TRAIN_SET": derived_train, + "TEST_SET": test_ids, + } + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("w", encoding="utf-8") as handle: + json.dump(derived_partition, handle, indent=2) + _ = handle.write("\n") + + train_counts = Counter(infer_pid_label(dataset_root, pid) for pid in derived_train) + test_counts = Counter(infer_pid_label(dataset_root, pid) for pid in test_ids) + click.echo(f"wrote {output_path}") + click.echo(f"train_counts={dict(train_counts)}") + click.echo(f"test_counts={dict(test_counts)}") + + +if __name__ == "__main__": + main()