Reconstruct the directory structure

2022-04-12 13:44:07 +08:00
parent 213b3a658f
commit 28f50410a5
32 changed files with 162 additions and 45 deletions
@@ -0,0 +1,129 @@
+{
+    "TRAIN_SET": [
+        "001",
+        "002",
+        "003",
+        "004",
+        "006",
+        "007",
+        "008",
+        "009",
+        "010",
+        "011",
+        "012",
+        "013",
+        "014",
+        "015",
+        "016",
+        "017",
+        "018",
+        "019",
+        "020",
+        "021",
+        "022",
+        "023",
+        "024",
+        "025",
+        "026",
+        "027",
+        "028",
+        "029",
+        "030",
+        "031",
+        "032",
+        "033",
+        "034",
+        "035",
+        "036",
+        "037",
+        "038",
+        "039",
+        "040",
+        "041",
+        "042",
+        "043",
+        "044",
+        "045",
+        "046",
+        "047",
+        "048",
+        "049",
+        "050",
+        "051",
+        "052",
+        "053",
+        "054",
+        "055",
+        "056",
+        "057",
+        "058",
+        "059",
+        "060",
+        "061",
+        "062",
+        "063",
+        "064",
+        "065",
+        "066",
+        "067",
+        "068",
+        "069",
+        "070",
+        "071",
+        "072",
+        "073",
+        "074"
+    ],
+    "TEST_SET": [
+        "075",
+        "076",
+        "077",
+        "078",
+        "079",
+        "080",
+        "081",
+        "082",
+        "083",
+        "084",
+        "085",
+        "086",
+        "087",
+        "088",
+        "089",
+        "090",
+        "091",
+        "092",
+        "093",
+        "094",
+        "095",
+        "096",
+        "097",
+        "098",
+        "099",
+        "100",
+        "101",
+        "102",
+        "103",
+        "104",
+        "105",
+        "106",
+        "107",
+        "108",
+        "109",
+        "110",
+        "111",
+        "112",
+        "113",
+        "114",
+        "115",
+        "116",
+        "117",
+        "118",
+        "119",
+        "120",
+        "121",
+        "122",
+        "123",
+        "124"
+    ]
+}
@@ -0,0 +1,130 @@
+{
+    "TRAIN_SET": [
+        "001",
+        "002",
+        "003",
+        "004",
+        "005",
+        "006",
+        "007",
+        "008",
+        "009",
+        "010",
+        "011",
+        "012",
+        "013",
+        "014",
+        "015",
+        "016",
+        "017",
+        "018",
+        "019",
+        "020",
+        "021",
+        "022",
+        "023",
+        "024",
+        "025",
+        "026",
+        "027",
+        "028",
+        "029",
+        "030",
+        "031",
+        "032",
+        "033",
+        "034",
+        "035",
+        "036",
+        "037",
+        "038",
+        "039",
+        "040",
+        "041",
+        "042",
+        "043",
+        "044",
+        "045",
+        "046",
+        "047",
+        "048",
+        "049",
+        "050",
+        "051",
+        "052",
+        "053",
+        "054",
+        "055",
+        "056",
+        "057",
+        "058",
+        "059",
+        "060",
+        "061",
+        "062",
+        "063",
+        "064",
+        "065",
+        "066",
+        "067",
+        "068",
+        "069",
+        "070",
+        "071",
+        "072",
+        "073",
+        "074"
+    ],
+    "TEST_SET": [
+        "075",
+        "076",
+        "077",
+        "078",
+        "079",
+        "080",
+        "081",
+        "082",
+        "083",
+        "084",
+        "085",
+        "086",
+        "087",
+        "088",
+        "089",
+        "090",
+        "091",
+        "092",
+        "093",
+        "094",
+        "095",
+        "096",
+        "097",
+        "098",
+        "099",
+        "100",
+        "101",
+        "102",
+        "103",
+        "104",
+        "105",
+        "106",
+        "107",
+        "108",
+        "109",
+        "110",
+        "111",
+        "112",
+        "113",
+        "114",
+        "115",
+        "116",
+        "117",
+        "118",
+        "119",
+        "120",
+        "121",
+        "122",
+        "123",
+        "124"
+    ]
+}
@@ -0,0 +1,27 @@
+# CASIA-B
+Download URL: http://www.cbsr.ia.ac.cn/GaitDatasetB-silh.zip
+- Original
+    ```
+    CASIA-B
+        001 (subject)
+            bg-01 (type)
+                000 (view)
+                    001-bg-01-000-001.png (frame)
+                    001-bg-01-000-002.png (frame)
+                    ......
+                ......
+            ......
+        ......
+    ```
+- Run `python misc/pretreatment.py --input_path CASIA-B --output_path CASIA-B-pkl`
+- Processed
+    ```
+    CASIA-B-pkl
+        001 (subject)
+            bg-01 (type)
+                    000 (view)
+                        000.pkl (contains all frames)
+                ......
+            ......
+        ......
+    ```
@@ -0,0 +1,77 @@
+# GREW Tutorial
+<!-- ![](http://hid2022.iapr-tc4.org/wp-content/uploads/sites/7/2022/03/%E5%9B%BE%E7%89%871-2.png) -->
+This is for [GREW-Benchmark](https://github.com/GREW-Benchmark/GREW-Benchmark). We report our result of 48% using the baseline model. In order for participants to better start the first step, we provide a tutorial on how to use OpenGait for GREW.
+
+## Preprocess the dataset
+Download the raw dataset from the [official link](https://www.grew-benchmark.org/download.html). You will get three compressed files, i.e. `train.zip`, `test.zip` and `distractor.zip`.
+
+Step 1: Unzip train and test:
+```shell
+unzip -P password train.zip (password is the obtained password)
+tar -xzvf train.tgz
+cd train
+ls *.tgz | xargs -n1 tar xzvf
+```
+
+```shell
+unzip -P password test.zip (password is the obtained password)
+tar -xzvf test.tgz
+cd test & cd gallery
+ls *.tgz | xargs -n1 tar xzvf
+cd .. & cd probe
+ls *.tgz | xargs -n1 tar xzvf
+```
+
+After unpacking these compressed files, run this command:
+
+Step2 : To rearrange directory of GREW dataset, turning to id-type-view structure, Run 
+```
+python datasets/GREW/rearrange_GREW.py --input_path Path_of_GREW-raw --output_path Path_of_GREW-rearranged
+```  
+
+Step3: Transforming images to pickle file, run 
+```
+python datasets/pretreatment.py --input_path Path_of_GREW-rearranged --output_path Path_of_GREW-pkl
+```
+Then you will see the structure like:
+
+- Processed
+    ```
+    GREW-pkl
+    ├── 00001train (subject in training set)
+        ├── 00
+            ├── 4XPn5Z28
+                ├── 4XPn5Z28.pkl
+            ├──5TXe8svE
+                ├── 5TXe8svE.pkl
+                ......
+    ├── 00001 (subject in testing set)
+        ├── 01
+            ├── 79XJefi8
+                ├── 79XJefi8.pkl
+        ├── 02
+            ├── t16VLaQf
+                ├── t16VLaQf.pkl
+    ├── probe
+        ├── etaGVnWf
+            ├── etaGVnWf.pkl
+        ├── eT1EXpgZ
+            ├── eT1EXpgZ.pkl
+        ...
+    ...
+    ```
+
+## Train the dataset
+Modify the `dataset_root` in `./config/baseline/baseline_GREW.yaml`, and then run this command:
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=4 opengait/main.py --cfgs ./config/baseline/baseline_GREW.yaml --phase train
+```
+
+## Get the submission file
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=4 opengait/main.py --cfgs ./config/baseline/baseline_GREW.yaml --phase test
+```
+The result will be generated in your working directory, you must rename and compress it as the requirements before submitting.
+
+## Evaluation locally
+While the original grew treat both seq_01 and seq_02 as gallery, but there is no ground truth for probe. Therefore, it is nessesary to upload the submission file on grew competitation. We seperate test set to: seq_01 as gallery, seq_02 as probe. Then you can modify `eval_func` in the `./config/baseline/baseline_GREW.yaml` to `identification_real_scene`, you can obtain result localy like setting of OUMVLP. 
@@ -0,0 +1,89 @@
+import argparse
+import os
+import shutil
+from pathlib import Path
+
+from tqdm import tqdm
+
+TOTAL_Test = 24000
+TOTAL_Train = 20000
+
+def rearrange_train(train_path: Path, output_path: Path) -> None:
+    progress = tqdm(total=TOTAL_Train)
+    for sid in train_path.iterdir():
+        if not sid.is_dir():
+            continue
+        for sub_seq in sid.iterdir():
+            if not sub_seq.is_dir():
+                continue
+            for subfile in os.listdir(sub_seq):
+                src = os.path.join(train_path, sid.name, sub_seq.name)
+                dst = os.path.join(output_path, sid.name+'train', '00', sub_seq.name)
+                os.makedirs(dst,exist_ok=True)
+                if subfile not in os.listdir(dst) and subfile.endswith('.png'):
+                    os.symlink(os.path.join(src, subfile),
+                               os.path.join(dst, subfile))
+        progress.update(1)
+
+def rearrange_test(test_path: Path, output_path: Path) -> None:
+    # for gallery
+    gallery = Path(os.path.join(test_path, 'gallery'))
+    probe = Path(os.path.join(test_path, 'probe'))
+    progress = tqdm(total=TOTAL_Test)
+    for sid in gallery.iterdir():
+        if not sid.is_dir():
+            continue
+        cnt = 1
+        for sub_seq in sid.iterdir():
+            if not sub_seq.is_dir():
+                continue
+            for subfile in sorted(os.listdir(sub_seq)):
+                src = os.path.join(gallery, sid.name, sub_seq.name)
+                dst = os.path.join(output_path, sid.name, '%02d'%cnt, sub_seq.name)
+                os.makedirs(dst,exist_ok=True)
+                if subfile not in os.listdir(dst) and subfile.endswith('.png'):
+                    os.symlink(os.path.join(src, subfile),
+                               os.path.join(dst, subfile))
+            cnt += 1
+            progress.update(1)
+    # for probe
+    for sub_seq in probe.iterdir():
+        if not sub_seq.is_dir():
+            continue
+        for subfile in os.listdir(sub_seq):
+            src = os.path.join(probe, sub_seq.name)
+            dst = os.path.join(output_path, 'probe', '03', sub_seq.name)
+            os.makedirs(dst,exist_ok=True)
+            if subfile not in os.listdir(dst) and subfile.endswith('.png'):
+                os.symlink(os.path.join(src, subfile),
+                            os.path.join(dst, subfile))
+            progress.update(1)
+
+def rearrange_GREW(input_path: Path, output_path: Path) -> None:
+    os.makedirs(output_path, exist_ok=True)
+
+    for folder in input_path.iterdir():
+        if not folder.is_dir():
+            continue
+
+        print(f'Rearranging {folder}')
+        if folder.name == 'train':
+            rearrange_train(folder,output_path)
+        if folder.name == 'test':
+            rearrange_test(folder, output_path)
+        if folder.name == 'distractor':
+            pass
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='GREW rearrange tool')
+    parser.add_argument('-i', '--input_path', required=True, type=str,
+                        help='Root path of raw dataset.')
+    parser.add_argument('-o', '--output_path', default='GREW_rearranged', type=str,
+                        help='Root path for output.')
+
+    args = parser.parse_args()
+
+    input_path = Path(args.input_path).resolve()
+    output_path = Path(args.output_path).resolve()
+    rearrange_GREW(input_path, output_path)
@@ -0,0 +1,26 @@
+# HID Tutorial
+![](http://hid2022.iapr-tc4.org/wp-content/uploads/sites/7/2022/03/%E5%9B%BE%E7%89%871-2.png)
+This is the official support for competition of [Human Identification at a Distance (HID)](http://hid2022.iapr-tc4.org/). We report our result of 68.7% using the baseline model and 80.0% with re-ranking. In order for participants to better start the first step, we provide a tutorial on how to use OpenGait for HID.
+
+## Preprocess the dataset
+Download the raw dataset from the [official link](http://hid2022.iapr-tc4.org/). You will get three compressed files, i.e. `train.tar`, `HID2022_test_gallery.zip` and `HID2022_test_probe.zip`.
+After unpacking these three files, run this command:
+```shell
+python misc/HID/pretreatment_HID.py --input_train_path="train" --input_gallery_path="HID2022_test_gallery" --input_probe_path="HID2022_test_probe" --output_path="HID-128-pkl" 
+```
+
+## Train the dataset
+Modify the `dataset_root` in `./misc/HID/baseline_hid.yaml`, and then run this command:
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=4 opengait/main.py --cfgs ./misc/HID/baseline_hid.yaml --phase train
+```
+You can also download the [trained model](https://github.com/ShiqiYu/OpenGait/releases/download/v1.1/pretrained_hid_model.zip) and place it in `output` after unzipping.
+
+## Get the submission file
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=4 opengait/main.py --cfgs ./misc/HID/baseline_hid.yaml --phase test
+```
+The result will be generated in your working directory.
+
+## Submit the result
+Follow the steps in the [official submission guide](https://codalab.lisn.upsaclay.fr/competitions/2542#participate), you need rename the file to `submission.csv` and compress it to a zip file. Finally, you can upload the zip file to the [official submission link](https://codalab.lisn.upsaclay.fr/competitions/2542#participate-submit_results).
@@ -0,0 +1,123 @@
+import os
+import cv2
+import numpy as np
+import argparse
+import pickle
+from tqdm import tqdm
+
+parser = argparse.ArgumentParser(description='Test')
+parser.add_argument('--input_train_path', default='', type=str,
+                    help='Root path of train.')
+parser.add_argument('--input_gallery_path', default='', type=str,
+                    help='Root path of gallery.')
+parser.add_argument('--input_probe_path', default='', type=str,
+                    help='Root path of probe.')
+parser.add_argument('--output_path', default='', type=str,
+                    help='Root path for output.')
+
+opt = parser.parse_args()
+
+OUTPUT_PATH = opt.output_path
+print('Pretreatment Start.\n'
+      'Input train path: {}\n'
+      'Input gallery path: {}\n'
+      'Input probe path: {}\n'
+      'Output path: {}\n'.format(
+          opt.input_train_path, opt.input_gallery_path, opt.input_probe_path, OUTPUT_PATH))
+
+INPUT_PATH = opt.input_train_path
+print("Walk the input train path")
+id_list = os.listdir(INPUT_PATH)
+id_list.sort()
+
+for _id in tqdm(id_list):
+    seq_type = os.listdir(os.path.join(INPUT_PATH, _id))
+    seq_type.sort()
+    for _seq_type in seq_type:
+        out_dir = os.path.join(OUTPUT_PATH, _id, _seq_type, "default")
+        count_frame = 0
+        all_imgs = []
+        frame_list = sorted(os.listdir(
+            os.path.join(INPUT_PATH, _id, _seq_type)))
+        for _frame_name in frame_list:
+            frame_path = os.path.join(
+                INPUT_PATH, _id, _seq_type, _frame_name)
+            img = cv2.imread(frame_path, cv2.IMREAD_GRAYSCALE)
+            if img is not None:
+                # Save the img
+                all_imgs.append(img)
+                count_frame += 1
+
+        all_imgs = np.asarray(all_imgs)
+
+        if count_frame > 0:
+            os.makedirs(out_dir, exist_ok=True)
+            all_imgs_pkl = os.path.join(out_dir, '{}.pkl'.format(_seq_type))
+            pickle.dump(all_imgs, open(all_imgs_pkl, 'wb'))
+
+        # Warn if the sequence contains less than 5 frames
+        if count_frame < 5:
+            print('Seq:{}-{}, less than 5 valid data.'.format(_id, _seq_type))
+
+print("Walk the input gallery path")
+INPUT_PATH = opt.input_gallery_path
+id_list = os.listdir(INPUT_PATH)
+id_list.sort()
+for _id in tqdm(id_list):
+    seq_type = os.listdir(os.path.join(INPUT_PATH, _id))
+    seq_type.sort()
+    for _seq_type in seq_type:
+        out_dir = os.path.join(OUTPUT_PATH, _id, _seq_type, "default")
+        count_frame = 0
+        all_imgs = []
+        frame_list = sorted(os.listdir(
+            os.path.join(INPUT_PATH, _id, _seq_type)))
+        for _frame_name in frame_list:
+            frame_path = os.path.join(
+                INPUT_PATH, _id, _seq_type, _frame_name)
+            img = cv2.imread(frame_path, cv2.IMREAD_GRAYSCALE)
+            if img is not None:
+                # Save the img
+                all_imgs.append(img)
+                count_frame += 1
+
+        all_imgs = np.asarray(all_imgs)
+
+        if count_frame > 0:
+            os.makedirs(out_dir, exist_ok=True)
+            all_imgs_pkl = os.path.join(out_dir, '{}.pkl'.format(_seq_type))
+            pickle.dump(all_imgs, open(all_imgs_pkl, 'wb'))
+
+        # Warn if the sequence contains less than 5 frames
+        if count_frame < 5:
+            print('Seq:{}-{}, less than 5 valid data.'.format(_id, _seq_type))
+    print("Finish {}".format(_id))
+
+print("Walk the input probe path")
+INPUT_PATH = opt.input_probe_path
+seq_type = os.listdir(INPUT_PATH)
+seq_type.sort()
+
+_id = "probe"
+for _seq_type in tqdm(seq_type):
+    out_dir = os.path.join(OUTPUT_PATH, _id, _seq_type, "default")
+    count_frame = 0
+    all_imgs = []
+    frame_list = sorted(os.listdir(
+        os.path.join(INPUT_PATH, _seq_type)))
+    for _frame_name in frame_list:
+        frame_path = os.path.join(
+            INPUT_PATH, _seq_type, _frame_name)
+        img = cv2.imread(frame_path, cv2.IMREAD_GRAYSCALE)
+        if img is not None:
+            # Save the img
+            all_imgs.append(img)
+            count_frame += 1
+    all_imgs = np.asarray(all_imgs)
+    if count_frame > 0:
+        os.makedirs(out_dir, exist_ok=True)
+        all_imgs_pkl = os.path.join(out_dir, '{}.pkl'.format(_seq_type))
+        pickle.dump(all_imgs, open(all_imgs_pkl, 'wb'))
+    # Warn if the sequence contains less than 5 frames
+    if count_frame < 5:
+        print('Seq:{}-{}, less than 5 valid data.'.format(_id, _seq_type))
@@ -0,0 +1,67 @@
+# OUMVLP
+Step1: Download URL: http://www.am.sanken.osaka-u.ac.jp/BiometricDB/GaitMVLP.html
+
+Step2: Unzip the dataset, you will get a structure directory like:
+```
+python datasets/OUMVLP/extractor.py --input_path Path_of_OUMVLP-base --output_path Path_of_OUMVLP-raw --password Given_Password
+```  
+
+- Original
+    ```
+    OUMVLP-raw
+        Silhouette_000-00 (view-sequence)
+            00001 (subject)
+                0001.png (frame)
+                0002.png (frame)
+                ......
+            00002
+                0001.png (frame)
+                0002.png (frame)
+                ......
+            ......
+        Silhouette_000-01
+            00001
+                0001.png (frame)
+                0002.png (frame)
+                ......
+            00002
+                0001.png (frame)
+                0002.png (frame)
+                ......
+            ......
+        Silhouette_015-00
+            ......
+        Silhouette_015-01
+            ......
+        ......
+    ```
+Step3 : To rearrange directory of OUMVLP dataset, turning to id-type-view structure, Run 
+```
+python datasets/OUMVLP/rearrange_OUMVLP.py --input_path Path_of_OUMVLP-raw --output_path Path_of_OUMVLP-rearranged
+```  
+
+Step4: Transforming images to pickle file, run 
+```
+python datasets/pretreatment.py --input_path Path_of_OUMVLP-rearranged --output_path Path_of_OUMVLP-pkl
+```
+
+- Processed
+    ```
+    OUMVLP-pkl
+        00001 (subject)
+            00 (sequence)
+                000 (view)
+                    000.pkl (contains all frames)
+                015 (view)
+                    015.pkl (contains all frames)
+                ...
+            01 (sequence)
+                000 (view)
+                    000.pkl (contains all frames)
+                015 (view)
+                    015.pkl (contains all frames)
+                ......
+        00002 (subject)
+            ......
+        ......
+    ```
@@ -0,0 +1,44 @@
+import argparse
+import os
+from pathlib import Path
+
+import py7zr
+from tqdm import tqdm
+
+
+def extractall(base_path: Path, output_path: Path, passwords) -> None:
+    """Extract all archives in base_path to output_path.
+
+    Args:
+        base_path (Path): Path to the directory containing the archives.
+        output_path (Path): Path to the directory to extract the archives to.
+    """
+
+    os.makedirs(output_path, exist_ok=True)
+    for file_path in tqdm(list(base_path.rglob('Silhouette_*.7z'))):
+        if output_path.joinpath(file_path.stem).exists():
+            continue
+        with py7zr.SevenZipFile(file_path, password=passwords) as archive:
+            total_items = len(
+                [f for f in archive.getnames() if f.endswith('.png')]
+            )
+            archive.extractall(output_path)
+
+        extracted_files = len(
+            list(output_path.joinpath(file_path.stem).rglob('*.png')))
+
+        assert extracted_files == total_items, f'{extracted_files} != {total_items}'
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='OUMVLP extractor')
+    parser.add_argument('-b', '--input_path', type=str,
+                        required=True, help='Base path to OUMVLP .7z files')
+    parser.add_argument('-o', '--output_path', type=str,
+                        required=True, help='Output path for extracted files')
+    parser.add_argument('-p', '--password', type=str,
+                        required=True, help='password for extracted files')
+
+    args = parser.parse_args()
+
+    extractall(Path(args.input_path), Path(args.output_path), args.password)
@@ -0,0 +1,47 @@
+import argparse
+import os
+import shutil
+from pathlib import Path
+
+from tqdm import tqdm
+
+
+TOTAL_SUBJECTS = 10307
+
+
+def sanitize(name: str) -> (str, str):
+    return name.split('_')[1].split('-')
+
+
+def rearrange(input_path: Path, output_path: Path) -> None:
+    os.makedirs(output_path, exist_ok=True)
+
+    for folder in input_path.iterdir():
+        print(f'Rearranging {folder}')
+        view, seq = sanitize(folder.name)
+        progress = tqdm(total=TOTAL_SUBJECTS)
+        for sid in folder.iterdir():
+            src = os.path.join(input_path, f'Silhouette_{view}-{seq}', sid.name)
+            dst = os.path.join(output_path, sid.name, seq, view)
+            os.makedirs(dst, exist_ok=True)
+            for subfile in os.listdir(src):
+                if subfile not in os.listdir(dst) and subfile.endswith('.png'):
+                    os.symlink(os.path.join(src, subfile),
+                               os.path.join(dst, subfile))
+                # else:
+                #     os.remove(os.path.join(src, subfile))
+            progress.update(1)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='OUMVLP rearrange tool')
+    parser.add_argument('-i', '--input_path', required=True, type=str,
+                        help='Root path of raw dataset.')
+    parser.add_argument('-o', '--output_path', default='OUMVLP_rearranged', type=str,
+                        help='Root path for output.')
+
+    args = parser.parse_args()
+
+    input_path = Path(args.input_path).resolve()
+    output_path = Path(args.output_path).resolve()
+    rearrange(input_path, output_path)
@@ -0,0 +1,24 @@
+# Datasets
+OpenGait officially supports a few gait datasets. In order to use them, you need to download them and use the code provided here to pre-process them to the format required by OpenGait.
+
+## Pre-process
+In general, we read the original image provided by the dataset and save a sequence as a pickle file to speed up the training IO. 
+
+The expected dataset structure is as follows:
+```
+    DATASET_ROOT/
+        001 (subject)/
+            bg-01 (type)/
+                    000 (view)/
+                        000.pkl (contains all frames)
+                ......
+            ......
+        ......
+```
+
+The specific preprocessing steps are described inside each dataset folder.
+
+## Split dataset
+For each dataset, we split the dataset into training and testing sets. The training set is used to train the model, and the testing set is used to evaluate the model.
+
+You can use the partition file in [dataset folder](CASIA-B/CASIA-B.json) directly, or you can create yours. Remember to set your path to the partition file in [config/*.yaml](../config/).
@@ -0,0 +1,150 @@
+# This source is based on https://github.com/AbnerHqC/GaitSet/blob/master/pretreatment.py
+import argparse
+import logging
+import multiprocessing as mp
+import os
+import pickle
+from collections import defaultdict
+from functools import partial
+from pathlib import Path
+from typing import Tuple
+
+import cv2
+import numpy as np
+from tqdm import tqdm
+
+
+def imgs2pickle(img_groups: Tuple, output_path: Path, img_size: int = 64, verbose: bool = False, dataset='CASIAB') -> None:
+    """Reads a group of images and saves the data in pickle format.
+
+    Args:
+        img_groups (Tuple): Tuple of (sid, seq, view) and list of image paths.
+        output_path (Path): Output path.
+        img_size (int, optional): Image resizing size. Defaults to 64.
+        verbose (bool, optional): Display debug info. Defaults to False.
+    """    
+    sinfo = img_groups[0]
+    img_paths = img_groups[1]
+    to_pickle = []
+    for img_file in sorted(img_paths):
+        if verbose:
+            logging.debug(f'Reading sid {sinfo[0]}, seq {sinfo[1]}, view {sinfo[2]} from {img_file}')
+
+        img = cv2.imread(str(img_file), cv2.IMREAD_GRAYSCALE)
+        
+        if dataset == 'GREW':
+            to_pickle.append(img.astype('uint8'))
+            continue
+
+        if img.sum() <= 10000:
+            if verbose:
+                logging.debug(f'Image sum: {img.sum()}')
+            logging.warning(f'{img_file} has no data.')
+            continue
+
+        # Get the upper and lower points
+        y_sum = img.sum(axis=1)
+        y_top = (y_sum != 0).argmax(axis=0)
+        y_btm = (y_sum != 0).cumsum(axis=0).argmax(axis=0)
+        img = img[y_top: y_btm + 1, :]
+
+        # As the height of a person is larger than the width,
+        # use the height to calculate resize ratio.
+        ratio = img.shape[1] / img.shape[0]
+        img = cv2.resize(img, (int(img_size * ratio), img_size), interpolation=cv2.INTER_CUBIC)
+
+        # Get the median of the x-axis and take it as the person's x-center.
+        x_csum = img.sum(axis=0).cumsum()
+        x_center = None
+        for idx, csum in enumerate(x_csum):
+            if csum > img.sum() / 2:
+                x_center = idx
+                break
+
+        if not x_center:
+            logging.warning(f'{img_file} has no center.')
+            continue
+
+        # Get the left and right points
+        half_width = img_size // 2
+        left = x_center - half_width
+        right = x_center + half_width
+        if left <= 0 or right >= img.shape[1]:
+            left += half_width
+            right += half_width
+            _ = np.zeros((img.shape[0], half_width))
+            img = np.concatenate([_, img, _], axis=1)
+
+        to_pickle.append(img[:, left: right].astype('uint8'))
+
+    if to_pickle:
+        to_pickle = np.asarray(to_pickle)
+        dst_path = os.path.join(output_path, *sinfo)
+        # print(img_paths[0].as_posix().split('/'),img_paths[0].as_posix().split('/')[-5])
+        # dst_path = os.path.join(output_path, img_paths[0].as_posix().split('/')[-5], *sinfo) if dataset == 'GREW' else dst
+        os.makedirs(dst_path, exist_ok=True)
+        pkl_path = os.path.join(dst_path, f'{sinfo[2]}.pkl')
+        if verbose:
+            logging.debug(f'Saving {pkl_path}...')
+        pickle.dump(to_pickle, open(pkl_path, 'wb'))   
+        logging.info(f'Saved {len(to_pickle)} valid frames to {pkl_path}.')
+
+
+    if len(to_pickle) < 5:
+        logging.warning(f'{sinfo} has less than 5 valid data.')
+
+
+
+def pretreat(input_path: Path, output_path: Path, img_size: int = 64, workers: int = 4, verbose: bool = False, dataset: str = 'CASIAB') -> None:
+    """Reads a dataset and saves the data in pickle format.
+
+    Args:
+        input_path (Path): Dataset root path.
+        output_path (Path): Output path.
+        img_size (int, optional): Image resizing size. Defaults to 64.
+        workers (int, optional): Number of thread workers. Defaults to 4.
+        verbose (bool, optional): Display debug info. Defaults to False.
+    """
+    img_groups = defaultdict(list)
+    logging.info(f'Listing {input_path}')
+    total_files = 0
+    for img_path in input_path.rglob('*.png'):
+        if 'gei.png' in img_path.as_posix():
+            continue
+        if verbose:
+            logging.debug(f'Adding {img_path}')
+        *_, sid, seq, view, _ = img_path.as_posix().split('/')
+        img_groups[(sid, seq, view)].append(img_path)
+        total_files += 1
+
+    logging.info(f'Total files listed: {total_files}')
+
+    progress = tqdm(total=len(img_groups), desc='Pretreating', unit='folder')
+
+    with mp.Pool(workers) as pool:
+        logging.info(f'Start pretreating {input_path}')
+        for _ in pool.imap_unordered(partial(imgs2pickle, output_path=output_path, img_size=img_size, verbose=verbose, dataset=dataset), img_groups.items()):
+            progress.update(1)
+    logging.info('Done')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='OpenGait dataset pretreatment module.')
+    parser.add_argument('-i', '--input_path', default='', type=str, help='Root path of raw dataset.')
+    parser.add_argument('-o', '--output_path', default='', type=str, help='Output path of pickled dataset.')
+    parser.add_argument('-l', '--log_file', default='./pretreatment.log', type=str, help='Log file path. Default: ./pretreatment.log')
+    parser.add_argument('-n', '--n_workers', default=4, type=int, help='Number of thread workers. Default: 4')
+    parser.add_argument('-r', '--img_size', default=64, type=int, help='Image resizing size. Default 64')
+    parser.add_argument('-d', '--dataset', default='CASIAB', type=str, help='Dataset for pretreatment.')
+    parser.add_argument('-v', '--verbose', default=False, action='store_true', help='Display debug info.')
+    args = parser.parse_args()
+
+    logging.basicConfig(level=logging.INFO, filename=args.log_file, filemode='w', format='[%(asctime)s - %(levelname)s]: %(message)s')
+    
+    if args.verbose:
+        logging.getLogger().setLevel(logging.DEBUG)
+        logging.info('Verbose mode is on.')
+        for k, v in args.__dict__.items():
+            logging.debug(f'{k}: {v}')
+
+    pretreat(input_path=Path(args.input_path), output_path=Path(args.output_path), img_size=args.img_size, workers=args.n_workers, verbose=args.verbose, dataset=args.dataset)