support CASIA-E dataset

2023-04-09 17:15:42 +08:00
parent 786aded8af
commit e69fb6f439
8 changed files with 1449 additions and 9 deletions
@@ -0,0 +1,43 @@
+# CASIA-E
+Application URL: https://www.scidb.cn/en/detail?dataSetId=57be0e918db743279baf44a38d013a06
+- Original
+    ```
+    test615-1014.zip
+    train001-500.zip
+    val501-614.zip
+    ```
+- Run `python datasets/CASIA-E/extractor.py --input_path CASIA-E/ --output_path CASIA-E-processed/ -n 8 -s 64`. \
+  `n` is number of workers. `s` is the target image size.
+- Processed
+    ```
+    CASIA-E-processed
+        forTrain # raw images
+            001 (subject)
+                H (height)
+                    scene1 (scene)
+                        bg (walking condition)
+                            000 (view)
+                                1 (sequence number)
+                                    xxx.jpg (images)
+                                    ......
+                                ......
+                            ......
+                        ......
+                    ......
+                ......
+            ......
+
+        opengait # pickle file
+            001 (subject)
+                H_scene1_bg_1 (type)
+                        000 (view)
+                            000.pkl (contains all frames)
+                        ......
+                ......
+            ......
+    ```
+
+## Evaluation
+Compared with the settings in the original paper, we only used 200 people for training, and the rest were used as the test set, and the division of gallery and probe is more practical and difficult.
+For specific experimental settings, please refer to configs/gaitbase/gaitbase_casiae.yaml.
+For the specific division of the probe and gallery, please refer to opengait/evaluation/evaluator.py.
@@ -0,0 +1,98 @@
+import argparse
+import os
+from pathlib import Path
+import tqdm
+import cv2
+import tarfile
+import zipfile
+from functools import partial
+import numpy as np
+import pickle
+import multiprocessing as mp
+
+
+def make_pkl_for_one_person(id_, output_path, img_size=64):
+    if id_.split(".")[-1] != "tar" or not os.path.exists(os.path.join(output_path, id_)):
+        return
+    with tarfile.TarFile(os.path.join(output_path, id_)) as f:
+        f.extractall(output_path)
+    os.remove(os.path.join(output_path, id_))
+    id_path = id_.split(".")[0]
+    input_path = os.path.join(output_path, "forTrain", id_path)
+    base_pkl_path = os.path.join(output_path, "opengait", id_path)
+    if not os.path.isdir(input_path):
+        print("Path not found: "+input_path)
+    for height in sorted(os.listdir(input_path)):
+        height_path = os.path.join(input_path, height)
+        for scene in sorted(os.listdir(height_path)):
+            scene_path = os.path.join(height_path, scene)
+            for type_ in sorted(os.listdir(scene_path)):
+                type_path = os.path.join(scene_path, type_)
+                for view in sorted(os.listdir(type_path)):
+                    view_path = os.path.join(type_path, view)
+                    for num in sorted(os.listdir(view_path)):
+                        num_path = os.path.join(view_path, num)
+                        imgs = []
+                        for file_ in sorted(os.listdir(num_path)):
+                            img = cv2.imread(os.path.join(
+                                num_path, file_), cv2.IMREAD_GRAYSCALE)
+                            if img_size != img.shape[0]:
+                                img = cv2.resize(
+                                    img, dsize=(img_size, img_size))
+                            imgs.append(img)
+                        if len(imgs) > 5:
+                            pkl_path = os.path.join(
+                                base_pkl_path, f"{height}-{scene}-{type_}-{num}", view)
+                            os.makedirs(pkl_path, exist_ok=True)
+                            pickle.dump(np.asarray(imgs), open(
+                                os.path.join(pkl_path, f"{view}.pkl"), "wb"))
+                        else:
+                            print("No enough imgs: "+num_path)
+
+
+def extractall(base_path: Path, output_path: Path, workers=1, img_size=64) -> None:
+    """Extract all archives in base_path to output_path.
+
+    Args:
+        base_path (Path): Path to the directory containing the archives.
+        output_path (Path): Path to the directory to extract the archives to.
+    """
+
+    os.makedirs(output_path, exist_ok=True)
+    print("Unzipping train set...")
+    with open(os.path.join(base_path, 'train001-500.zip'), 'rb') as f:
+        z = zipfile.ZipFile(f)
+        z.extractall(output_path)
+    print("Unzipping validation set...")
+    with open(os.path.join(base_path, 'val501-614.zip'), 'rb') as f:
+        z = zipfile.ZipFile(f)
+        z.extractall(output_path)
+    print("Unzipping test set...")
+    with open(os.path.join(base_path, 'test615-1014.zip'), 'rb') as f:
+        z = zipfile.ZipFile(f)
+        z.extractall(output_path)
+    print("Extracting tar file...")
+    os.makedirs(os.path.join(output_path,"forTrain"))
+    os.makedirs(os.path.join(output_path,"opengait"))
+    ids = os.listdir(os.path.join(output_path))
+    progress = tqdm.tqdm(total=len(ids), desc='Pretreating', unit='id')
+
+    with mp.Pool(workers) as pool:
+        for _ in pool.imap_unordered(partial(make_pkl_for_one_person, output_path=output_path, img_size=img_size), ids):
+            progress.update(1)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='CASIA-E extractor')
+    parser.add_argument('-b', '--input_path', type=str,
+                        required=True, help='Base path to CASIA-E zip files')
+    parser.add_argument('-o', '--output_path', type=str,
+                        required=True, help='Output path for extracted files. The pickle files are generated in ${output_path}/opengait/')
+    parser.add_argument('-s', '--img_size', default=64,
+                        type=int, help='Image resizing size. Default 64')
+    parser.add_argument('-n', '--num_workers',
+                        type=int, default=1, help='Number of workers')
+    args = parser.parse_args()
+
+    extractall(Path(args.input_path), Path(args.output_path),
+               args.num_workers, args.img_size)