OpenGait/datasets/CASIA-E/extractor.py

import argparse
import os
from pathlib import Path
import tqdm
import cv2
import tarfile
import zipfile
from functools import partial
import numpy as np
import pickle
import multiprocessing as mp


def make_pkl_for_one_person(id_, output_path, img_size=64):
    if id_.split(".")[-1] != "tar" or not os.path.exists(os.path.join(output_path, id_)):
        return
    with tarfile.TarFile(os.path.join(output_path, id_)) as f:
        f.extractall(output_path)
    os.remove(os.path.join(output_path, id_))
    id_path = id_.split(".")[0]
    input_path = os.path.join(output_path, "forTrain", id_path)
    base_pkl_path = os.path.join(output_path, "opengait", id_path)
    if not os.path.isdir(input_path):
        print("Path not found: "+input_path)
    for height in sorted(os.listdir(input_path)):
        height_path = os.path.join(input_path, height)
        for scene in sorted(os.listdir(height_path)):
            scene_path = os.path.join(height_path, scene)
            for type_ in sorted(os.listdir(scene_path)):
                type_path = os.path.join(scene_path, type_)
                for view in sorted(os.listdir(type_path)):
                    view_path = os.path.join(type_path, view)
                    for num in sorted(os.listdir(view_path)):
                        num_path = os.path.join(view_path, num)
                        imgs = []
                        for file_ in sorted(os.listdir(num_path)):
                            img = cv2.imread(os.path.join(
                                num_path, file_), cv2.IMREAD_GRAYSCALE)
                            if img_size != img.shape[0]:
                                img = cv2.resize(
                                    img, dsize=(img_size, img_size))
                            imgs.append(img)
                        if len(imgs) > 5:
                            pkl_path = os.path.join(
                                base_pkl_path, f"{height}-{scene}-{type_}-{num}", view)
                            os.makedirs(pkl_path, exist_ok=True)
                            pickle.dump(np.asarray(imgs), open(
                                os.path.join(pkl_path, f"{view}.pkl"), "wb"))
                        else:
                            print("No enough imgs: "+num_path)


def extractall(base_path: Path, output_path: Path, workers=1, img_size=64) -> None:
    """Extract all archives in base_path to output_path.

    Args:
        base_path (Path): Path to the directory containing the archives.
        output_path (Path): Path to the directory to extract the archives to.
    """

    os.makedirs(output_path, exist_ok=True)
    print("Unzipping train set...")
    with open(os.path.join(base_path, 'train001-500.zip'), 'rb') as f:
        z = zipfile.ZipFile(f)
        z.extractall(output_path)
    print("Unzipping validation set...")
    with open(os.path.join(base_path, 'val501-614.zip'), 'rb') as f:
        z = zipfile.ZipFile(f)
        z.extractall(output_path)
    print("Unzipping test set...")
    with open(os.path.join(base_path, 'test615-1014.zip'), 'rb') as f:
        z = zipfile.ZipFile(f)
        z.extractall(output_path)
    print("Extracting tar file...")
    os.makedirs(os.path.join(output_path,"forTrain"))
    os.makedirs(os.path.join(output_path,"opengait"))
    ids = os.listdir(os.path.join(output_path))
    progress = tqdm.tqdm(total=len(ids), desc='Pretreating', unit='id')

    with mp.Pool(workers) as pool:
        for _ in pool.imap_unordered(partial(make_pkl_for_one_person, output_path=output_path, img_size=img_size), ids):
            progress.update(1)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='CASIA-E extractor')
    parser.add_argument('-b', '--input_path', type=str,
                        required=True, help='Base path to CASIA-E zip files')
    parser.add_argument('-o', '--output_path', type=str,
                        required=True, help='Output path for extracted files. The pickle files are generated in ${output_path}/opengait/')
    parser.add_argument('-s', '--img_size', default=64,
                        type=int, help='Image resizing size. Default 64')
    parser.add_argument('-n', '--num_workers',
                        type=int, default=1, help='Number of workers')
    args = parser.parse_args()

    extractall(Path(args.input_path), Path(args.output_path),
               args.num_workers, args.img_size)