Reconstruct the directory structure

2022-04-12 13:44:07 +08:00
parent 213b3a658f
commit 28f50410a5
32 changed files with 162 additions and 45 deletions
@@ -0,0 +1,67 @@
+# OUMVLP
+Step1: Download URL: http://www.am.sanken.osaka-u.ac.jp/BiometricDB/GaitMVLP.html
+
+Step2: Unzip the dataset, you will get a structure directory like:
+```
+python datasets/OUMVLP/extractor.py --input_path Path_of_OUMVLP-base --output_path Path_of_OUMVLP-raw --password Given_Password
+```  
+
+- Original
+    ```
+    OUMVLP-raw
+        Silhouette_000-00 (view-sequence)
+            00001 (subject)
+                0001.png (frame)
+                0002.png (frame)
+                ......
+            00002
+                0001.png (frame)
+                0002.png (frame)
+                ......
+            ......
+        Silhouette_000-01
+            00001
+                0001.png (frame)
+                0002.png (frame)
+                ......
+            00002
+                0001.png (frame)
+                0002.png (frame)
+                ......
+            ......
+        Silhouette_015-00
+            ......
+        Silhouette_015-01
+            ......
+        ......
+    ```
+Step3 : To rearrange directory of OUMVLP dataset, turning to id-type-view structure, Run 
+```
+python datasets/OUMVLP/rearrange_OUMVLP.py --input_path Path_of_OUMVLP-raw --output_path Path_of_OUMVLP-rearranged
+```  
+
+Step4: Transforming images to pickle file, run 
+```
+python datasets/pretreatment.py --input_path Path_of_OUMVLP-rearranged --output_path Path_of_OUMVLP-pkl
+```
+
+- Processed
+    ```
+    OUMVLP-pkl
+        00001 (subject)
+            00 (sequence)
+                000 (view)
+                    000.pkl (contains all frames)
+                015 (view)
+                    015.pkl (contains all frames)
+                ...
+            01 (sequence)
+                000 (view)
+                    000.pkl (contains all frames)
+                015 (view)
+                    015.pkl (contains all frames)
+                ......
+        00002 (subject)
+            ......
+        ......
+    ```
@@ -0,0 +1,44 @@
+import argparse
+import os
+from pathlib import Path
+
+import py7zr
+from tqdm import tqdm
+
+
+def extractall(base_path: Path, output_path: Path, passwords) -> None:
+    """Extract all archives in base_path to output_path.
+
+    Args:
+        base_path (Path): Path to the directory containing the archives.
+        output_path (Path): Path to the directory to extract the archives to.
+    """
+
+    os.makedirs(output_path, exist_ok=True)
+    for file_path in tqdm(list(base_path.rglob('Silhouette_*.7z'))):
+        if output_path.joinpath(file_path.stem).exists():
+            continue
+        with py7zr.SevenZipFile(file_path, password=passwords) as archive:
+            total_items = len(
+                [f for f in archive.getnames() if f.endswith('.png')]
+            )
+            archive.extractall(output_path)
+
+        extracted_files = len(
+            list(output_path.joinpath(file_path.stem).rglob('*.png')))
+
+        assert extracted_files == total_items, f'{extracted_files} != {total_items}'
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='OUMVLP extractor')
+    parser.add_argument('-b', '--input_path', type=str,
+                        required=True, help='Base path to OUMVLP .7z files')
+    parser.add_argument('-o', '--output_path', type=str,
+                        required=True, help='Output path for extracted files')
+    parser.add_argument('-p', '--password', type=str,
+                        required=True, help='password for extracted files')
+
+    args = parser.parse_args()
+
+    extractall(Path(args.input_path), Path(args.output_path), args.password)
@@ -0,0 +1,47 @@
+import argparse
+import os
+import shutil
+from pathlib import Path
+
+from tqdm import tqdm
+
+
+TOTAL_SUBJECTS = 10307
+
+
+def sanitize(name: str) -> (str, str):
+    return name.split('_')[1].split('-')
+
+
+def rearrange(input_path: Path, output_path: Path) -> None:
+    os.makedirs(output_path, exist_ok=True)
+
+    for folder in input_path.iterdir():
+        print(f'Rearranging {folder}')
+        view, seq = sanitize(folder.name)
+        progress = tqdm(total=TOTAL_SUBJECTS)
+        for sid in folder.iterdir():
+            src = os.path.join(input_path, f'Silhouette_{view}-{seq}', sid.name)
+            dst = os.path.join(output_path, sid.name, seq, view)
+            os.makedirs(dst, exist_ok=True)
+            for subfile in os.listdir(src):
+                if subfile not in os.listdir(dst) and subfile.endswith('.png'):
+                    os.symlink(os.path.join(src, subfile),
+                               os.path.join(dst, subfile))
+                # else:
+                #     os.remove(os.path.join(src, subfile))
+            progress.update(1)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='OUMVLP rearrange tool')
+    parser.add_argument('-i', '--input_path', required=True, type=str,
+                        help='Root path of raw dataset.')
+    parser.add_argument('-o', '--output_path', default='OUMVLP_rearranged', type=str,
+                        help='Root path for output.')
+
+    args = parser.parse_args()
+
+    input_path = Path(args.input_path).resolve()
+    output_path = Path(args.output_path).resolve()
+    rearrange(input_path, output_path)