Reconstruct the directory structure

This commit is contained in:
darkliang
2022-04-12 13:44:07 +08:00
parent 213b3a658f
commit 28f50410a5
32 changed files with 162 additions and 45 deletions
+129
View File
@@ -0,0 +1,129 @@
{
"TRAIN_SET": [
"001",
"002",
"003",
"004",
"006",
"007",
"008",
"009",
"010",
"011",
"012",
"013",
"014",
"015",
"016",
"017",
"018",
"019",
"020",
"021",
"022",
"023",
"024",
"025",
"026",
"027",
"028",
"029",
"030",
"031",
"032",
"033",
"034",
"035",
"036",
"037",
"038",
"039",
"040",
"041",
"042",
"043",
"044",
"045",
"046",
"047",
"048",
"049",
"050",
"051",
"052",
"053",
"054",
"055",
"056",
"057",
"058",
"059",
"060",
"061",
"062",
"063",
"064",
"065",
"066",
"067",
"068",
"069",
"070",
"071",
"072",
"073",
"074"
],
"TEST_SET": [
"075",
"076",
"077",
"078",
"079",
"080",
"081",
"082",
"083",
"084",
"085",
"086",
"087",
"088",
"089",
"090",
"091",
"092",
"093",
"094",
"095",
"096",
"097",
"098",
"099",
"100",
"101",
"102",
"103",
"104",
"105",
"106",
"107",
"108",
"109",
"110",
"111",
"112",
"113",
"114",
"115",
"116",
"117",
"118",
"119",
"120",
"121",
"122",
"123",
"124"
]
}
+130
View File
@@ -0,0 +1,130 @@
{
"TRAIN_SET": [
"001",
"002",
"003",
"004",
"005",
"006",
"007",
"008",
"009",
"010",
"011",
"012",
"013",
"014",
"015",
"016",
"017",
"018",
"019",
"020",
"021",
"022",
"023",
"024",
"025",
"026",
"027",
"028",
"029",
"030",
"031",
"032",
"033",
"034",
"035",
"036",
"037",
"038",
"039",
"040",
"041",
"042",
"043",
"044",
"045",
"046",
"047",
"048",
"049",
"050",
"051",
"052",
"053",
"054",
"055",
"056",
"057",
"058",
"059",
"060",
"061",
"062",
"063",
"064",
"065",
"066",
"067",
"068",
"069",
"070",
"071",
"072",
"073",
"074"
],
"TEST_SET": [
"075",
"076",
"077",
"078",
"079",
"080",
"081",
"082",
"083",
"084",
"085",
"086",
"087",
"088",
"089",
"090",
"091",
"092",
"093",
"094",
"095",
"096",
"097",
"098",
"099",
"100",
"101",
"102",
"103",
"104",
"105",
"106",
"107",
"108",
"109",
"110",
"111",
"112",
"113",
"114",
"115",
"116",
"117",
"118",
"119",
"120",
"121",
"122",
"123",
"124"
]
}
+27
View File
@@ -0,0 +1,27 @@
# CASIA-B
Download URL: http://www.cbsr.ia.ac.cn/GaitDatasetB-silh.zip
- Original
```
CASIA-B
001 (subject)
bg-01 (type)
000 (view)
001-bg-01-000-001.png (frame)
001-bg-01-000-002.png (frame)
......
......
......
......
```
- Run `python misc/pretreatment.py --input_path CASIA-B --output_path CASIA-B-pkl`
- Processed
```
CASIA-B-pkl
001 (subject)
bg-01 (type)
000 (view)
000.pkl (contains all frames)
......
......
......
```
File diff suppressed because one or more lines are too long
+77
View File
@@ -0,0 +1,77 @@
# GREW Tutorial
<!-- ![](http://hid2022.iapr-tc4.org/wp-content/uploads/sites/7/2022/03/%E5%9B%BE%E7%89%871-2.png) -->
This is for [GREW-Benchmark](https://github.com/GREW-Benchmark/GREW-Benchmark). We report our result of 48% using the baseline model. In order for participants to better start the first step, we provide a tutorial on how to use OpenGait for GREW.
## Preprocess the dataset
Download the raw dataset from the [official link](https://www.grew-benchmark.org/download.html). You will get three compressed files, i.e. `train.zip`, `test.zip` and `distractor.zip`.
Step 1: Unzip train and test:
```shell
unzip -P password train.zip (password is the obtained password)
tar -xzvf train.tgz
cd train
ls *.tgz | xargs -n1 tar xzvf
```
```shell
unzip -P password test.zip (password is the obtained password)
tar -xzvf test.tgz
cd test & cd gallery
ls *.tgz | xargs -n1 tar xzvf
cd .. & cd probe
ls *.tgz | xargs -n1 tar xzvf
```
After unpacking these compressed files, run this command:
Step2 : To rearrange directory of GREW dataset, turning to id-type-view structure, Run
```
python datasets/GREW/rearrange_GREW.py --input_path Path_of_GREW-raw --output_path Path_of_GREW-rearranged
```
Step3: Transforming images to pickle file, run
```
python datasets/pretreatment.py --input_path Path_of_GREW-rearranged --output_path Path_of_GREW-pkl
```
Then you will see the structure like:
- Processed
```
GREW-pkl
├── 00001train (subject in training set)
├── 00
├── 4XPn5Z28
├── 4XPn5Z28.pkl
├──5TXe8svE
├── 5TXe8svE.pkl
......
├── 00001 (subject in testing set)
├── 01
├── 79XJefi8
├── 79XJefi8.pkl
├── 02
├── t16VLaQf
├── t16VLaQf.pkl
├── probe
├── etaGVnWf
├── etaGVnWf.pkl
├── eT1EXpgZ
├── eT1EXpgZ.pkl
...
...
```
## Train the dataset
Modify the `dataset_root` in `./config/baseline/baseline_GREW.yaml`, and then run this command:
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=4 opengait/main.py --cfgs ./config/baseline/baseline_GREW.yaml --phase train
```
## Get the submission file
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=4 opengait/main.py --cfgs ./config/baseline/baseline_GREW.yaml --phase test
```
The result will be generated in your working directory, you must rename and compress it as the requirements before submitting.
## Evaluation locally
While the original grew treat both seq_01 and seq_02 as gallery, but there is no ground truth for probe. Therefore, it is nessesary to upload the submission file on grew competitation. We seperate test set to: seq_01 as gallery, seq_02 as probe. Then you can modify `eval_func` in the `./config/baseline/baseline_GREW.yaml` to `identification_real_scene`, you can obtain result localy like setting of OUMVLP.
+89
View File
@@ -0,0 +1,89 @@
import argparse
import os
import shutil
from pathlib import Path
from tqdm import tqdm
TOTAL_Test = 24000
TOTAL_Train = 20000
def rearrange_train(train_path: Path, output_path: Path) -> None:
progress = tqdm(total=TOTAL_Train)
for sid in train_path.iterdir():
if not sid.is_dir():
continue
for sub_seq in sid.iterdir():
if not sub_seq.is_dir():
continue
for subfile in os.listdir(sub_seq):
src = os.path.join(train_path, sid.name, sub_seq.name)
dst = os.path.join(output_path, sid.name+'train', '00', sub_seq.name)
os.makedirs(dst,exist_ok=True)
if subfile not in os.listdir(dst) and subfile.endswith('.png'):
os.symlink(os.path.join(src, subfile),
os.path.join(dst, subfile))
progress.update(1)
def rearrange_test(test_path: Path, output_path: Path) -> None:
# for gallery
gallery = Path(os.path.join(test_path, 'gallery'))
probe = Path(os.path.join(test_path, 'probe'))
progress = tqdm(total=TOTAL_Test)
for sid in gallery.iterdir():
if not sid.is_dir():
continue
cnt = 1
for sub_seq in sid.iterdir():
if not sub_seq.is_dir():
continue
for subfile in sorted(os.listdir(sub_seq)):
src = os.path.join(gallery, sid.name, sub_seq.name)
dst = os.path.join(output_path, sid.name, '%02d'%cnt, sub_seq.name)
os.makedirs(dst,exist_ok=True)
if subfile not in os.listdir(dst) and subfile.endswith('.png'):
os.symlink(os.path.join(src, subfile),
os.path.join(dst, subfile))
cnt += 1
progress.update(1)
# for probe
for sub_seq in probe.iterdir():
if not sub_seq.is_dir():
continue
for subfile in os.listdir(sub_seq):
src = os.path.join(probe, sub_seq.name)
dst = os.path.join(output_path, 'probe', '03', sub_seq.name)
os.makedirs(dst,exist_ok=True)
if subfile not in os.listdir(dst) and subfile.endswith('.png'):
os.symlink(os.path.join(src, subfile),
os.path.join(dst, subfile))
progress.update(1)
def rearrange_GREW(input_path: Path, output_path: Path) -> None:
os.makedirs(output_path, exist_ok=True)
for folder in input_path.iterdir():
if not folder.is_dir():
continue
print(f'Rearranging {folder}')
if folder.name == 'train':
rearrange_train(folder,output_path)
if folder.name == 'test':
rearrange_test(folder, output_path)
if folder.name == 'distractor':
pass
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GREW rearrange tool')
parser.add_argument('-i', '--input_path', required=True, type=str,
help='Root path of raw dataset.')
parser.add_argument('-o', '--output_path', default='GREW_rearranged', type=str,
help='Root path for output.')
args = parser.parse_args()
input_path = Path(args.input_path).resolve()
output_path = Path(args.output_path).resolve()
rearrange_GREW(input_path, output_path)
File diff suppressed because it is too large Load Diff
+26
View File
@@ -0,0 +1,26 @@
# HID Tutorial
![](http://hid2022.iapr-tc4.org/wp-content/uploads/sites/7/2022/03/%E5%9B%BE%E7%89%871-2.png)
This is the official support for competition of [Human Identification at a Distance (HID)](http://hid2022.iapr-tc4.org/). We report our result of 68.7% using the baseline model and 80.0% with re-ranking. In order for participants to better start the first step, we provide a tutorial on how to use OpenGait for HID.
## Preprocess the dataset
Download the raw dataset from the [official link](http://hid2022.iapr-tc4.org/). You will get three compressed files, i.e. `train.tar`, `HID2022_test_gallery.zip` and `HID2022_test_probe.zip`.
After unpacking these three files, run this command:
```shell
python misc/HID/pretreatment_HID.py --input_train_path="train" --input_gallery_path="HID2022_test_gallery" --input_probe_path="HID2022_test_probe" --output_path="HID-128-pkl"
```
## Train the dataset
Modify the `dataset_root` in `./misc/HID/baseline_hid.yaml`, and then run this command:
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=4 opengait/main.py --cfgs ./misc/HID/baseline_hid.yaml --phase train
```
You can also download the [trained model](https://github.com/ShiqiYu/OpenGait/releases/download/v1.1/pretrained_hid_model.zip) and place it in `output` after unzipping.
## Get the submission file
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.launch --nproc_per_node=4 opengait/main.py --cfgs ./misc/HID/baseline_hid.yaml --phase test
```
The result will be generated in your working directory.
## Submit the result
Follow the steps in the [official submission guide](https://codalab.lisn.upsaclay.fr/competitions/2542#participate), you need rename the file to `submission.csv` and compress it to a zip file. Finally, you can upload the zip file to the [official submission link](https://codalab.lisn.upsaclay.fr/competitions/2542#participate-submit_results).
+123
View File
@@ -0,0 +1,123 @@
import os
import cv2
import numpy as np
import argparse
import pickle
from tqdm import tqdm
parser = argparse.ArgumentParser(description='Test')
parser.add_argument('--input_train_path', default='', type=str,
help='Root path of train.')
parser.add_argument('--input_gallery_path', default='', type=str,
help='Root path of gallery.')
parser.add_argument('--input_probe_path', default='', type=str,
help='Root path of probe.')
parser.add_argument('--output_path', default='', type=str,
help='Root path for output.')
opt = parser.parse_args()
OUTPUT_PATH = opt.output_path
print('Pretreatment Start.\n'
'Input train path: {}\n'
'Input gallery path: {}\n'
'Input probe path: {}\n'
'Output path: {}\n'.format(
opt.input_train_path, opt.input_gallery_path, opt.input_probe_path, OUTPUT_PATH))
INPUT_PATH = opt.input_train_path
print("Walk the input train path")
id_list = os.listdir(INPUT_PATH)
id_list.sort()
for _id in tqdm(id_list):
seq_type = os.listdir(os.path.join(INPUT_PATH, _id))
seq_type.sort()
for _seq_type in seq_type:
out_dir = os.path.join(OUTPUT_PATH, _id, _seq_type, "default")
count_frame = 0
all_imgs = []
frame_list = sorted(os.listdir(
os.path.join(INPUT_PATH, _id, _seq_type)))
for _frame_name in frame_list:
frame_path = os.path.join(
INPUT_PATH, _id, _seq_type, _frame_name)
img = cv2.imread(frame_path, cv2.IMREAD_GRAYSCALE)
if img is not None:
# Save the img
all_imgs.append(img)
count_frame += 1
all_imgs = np.asarray(all_imgs)
if count_frame > 0:
os.makedirs(out_dir, exist_ok=True)
all_imgs_pkl = os.path.join(out_dir, '{}.pkl'.format(_seq_type))
pickle.dump(all_imgs, open(all_imgs_pkl, 'wb'))
# Warn if the sequence contains less than 5 frames
if count_frame < 5:
print('Seq:{}-{}, less than 5 valid data.'.format(_id, _seq_type))
print("Walk the input gallery path")
INPUT_PATH = opt.input_gallery_path
id_list = os.listdir(INPUT_PATH)
id_list.sort()
for _id in tqdm(id_list):
seq_type = os.listdir(os.path.join(INPUT_PATH, _id))
seq_type.sort()
for _seq_type in seq_type:
out_dir = os.path.join(OUTPUT_PATH, _id, _seq_type, "default")
count_frame = 0
all_imgs = []
frame_list = sorted(os.listdir(
os.path.join(INPUT_PATH, _id, _seq_type)))
for _frame_name in frame_list:
frame_path = os.path.join(
INPUT_PATH, _id, _seq_type, _frame_name)
img = cv2.imread(frame_path, cv2.IMREAD_GRAYSCALE)
if img is not None:
# Save the img
all_imgs.append(img)
count_frame += 1
all_imgs = np.asarray(all_imgs)
if count_frame > 0:
os.makedirs(out_dir, exist_ok=True)
all_imgs_pkl = os.path.join(out_dir, '{}.pkl'.format(_seq_type))
pickle.dump(all_imgs, open(all_imgs_pkl, 'wb'))
# Warn if the sequence contains less than 5 frames
if count_frame < 5:
print('Seq:{}-{}, less than 5 valid data.'.format(_id, _seq_type))
print("Finish {}".format(_id))
print("Walk the input probe path")
INPUT_PATH = opt.input_probe_path
seq_type = os.listdir(INPUT_PATH)
seq_type.sort()
_id = "probe"
for _seq_type in tqdm(seq_type):
out_dir = os.path.join(OUTPUT_PATH, _id, _seq_type, "default")
count_frame = 0
all_imgs = []
frame_list = sorted(os.listdir(
os.path.join(INPUT_PATH, _seq_type)))
for _frame_name in frame_list:
frame_path = os.path.join(
INPUT_PATH, _seq_type, _frame_name)
img = cv2.imread(frame_path, cv2.IMREAD_GRAYSCALE)
if img is not None:
# Save the img
all_imgs.append(img)
count_frame += 1
all_imgs = np.asarray(all_imgs)
if count_frame > 0:
os.makedirs(out_dir, exist_ok=True)
all_imgs_pkl = os.path.join(out_dir, '{}.pkl'.format(_seq_type))
pickle.dump(all_imgs, open(all_imgs_pkl, 'wb'))
# Warn if the sequence contains less than 5 frames
if count_frame < 5:
print('Seq:{}-{}, less than 5 valid data.'.format(_id, _seq_type))
File diff suppressed because it is too large Load Diff
+67
View File
@@ -0,0 +1,67 @@
# OUMVLP
Step1: Download URL: http://www.am.sanken.osaka-u.ac.jp/BiometricDB/GaitMVLP.html
Step2: Unzip the dataset, you will get a structure directory like:
```
python datasets/OUMVLP/extractor.py --input_path Path_of_OUMVLP-base --output_path Path_of_OUMVLP-raw --password Given_Password
```
- Original
```
OUMVLP-raw
Silhouette_000-00 (view-sequence)
00001 (subject)
0001.png (frame)
0002.png (frame)
......
00002
0001.png (frame)
0002.png (frame)
......
......
Silhouette_000-01
00001
0001.png (frame)
0002.png (frame)
......
00002
0001.png (frame)
0002.png (frame)
......
......
Silhouette_015-00
......
Silhouette_015-01
......
......
```
Step3 : To rearrange directory of OUMVLP dataset, turning to id-type-view structure, Run
```
python datasets/OUMVLP/rearrange_OUMVLP.py --input_path Path_of_OUMVLP-raw --output_path Path_of_OUMVLP-rearranged
```
Step4: Transforming images to pickle file, run
```
python datasets/pretreatment.py --input_path Path_of_OUMVLP-rearranged --output_path Path_of_OUMVLP-pkl
```
- Processed
```
OUMVLP-pkl
00001 (subject)
00 (sequence)
000 (view)
000.pkl (contains all frames)
015 (view)
015.pkl (contains all frames)
...
01 (sequence)
000 (view)
000.pkl (contains all frames)
015 (view)
015.pkl (contains all frames)
......
00002 (subject)
......
......
```
+44
View File
@@ -0,0 +1,44 @@
import argparse
import os
from pathlib import Path
import py7zr
from tqdm import tqdm
def extractall(base_path: Path, output_path: Path, passwords) -> None:
"""Extract all archives in base_path to output_path.
Args:
base_path (Path): Path to the directory containing the archives.
output_path (Path): Path to the directory to extract the archives to.
"""
os.makedirs(output_path, exist_ok=True)
for file_path in tqdm(list(base_path.rglob('Silhouette_*.7z'))):
if output_path.joinpath(file_path.stem).exists():
continue
with py7zr.SevenZipFile(file_path, password=passwords) as archive:
total_items = len(
[f for f in archive.getnames() if f.endswith('.png')]
)
archive.extractall(output_path)
extracted_files = len(
list(output_path.joinpath(file_path.stem).rglob('*.png')))
assert extracted_files == total_items, f'{extracted_files} != {total_items}'
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='OUMVLP extractor')
parser.add_argument('-b', '--input_path', type=str,
required=True, help='Base path to OUMVLP .7z files')
parser.add_argument('-o', '--output_path', type=str,
required=True, help='Output path for extracted files')
parser.add_argument('-p', '--password', type=str,
required=True, help='password for extracted files')
args = parser.parse_args()
extractall(Path(args.input_path), Path(args.output_path), args.password)
+47
View File
@@ -0,0 +1,47 @@
import argparse
import os
import shutil
from pathlib import Path
from tqdm import tqdm
TOTAL_SUBJECTS = 10307
def sanitize(name: str) -> (str, str):
return name.split('_')[1].split('-')
def rearrange(input_path: Path, output_path: Path) -> None:
os.makedirs(output_path, exist_ok=True)
for folder in input_path.iterdir():
print(f'Rearranging {folder}')
view, seq = sanitize(folder.name)
progress = tqdm(total=TOTAL_SUBJECTS)
for sid in folder.iterdir():
src = os.path.join(input_path, f'Silhouette_{view}-{seq}', sid.name)
dst = os.path.join(output_path, sid.name, seq, view)
os.makedirs(dst, exist_ok=True)
for subfile in os.listdir(src):
if subfile not in os.listdir(dst) and subfile.endswith('.png'):
os.symlink(os.path.join(src, subfile),
os.path.join(dst, subfile))
# else:
# os.remove(os.path.join(src, subfile))
progress.update(1)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='OUMVLP rearrange tool')
parser.add_argument('-i', '--input_path', required=True, type=str,
help='Root path of raw dataset.')
parser.add_argument('-o', '--output_path', default='OUMVLP_rearranged', type=str,
help='Root path for output.')
args = parser.parse_args()
input_path = Path(args.input_path).resolve()
output_path = Path(args.output_path).resolve()
rearrange(input_path, output_path)
+24
View File
@@ -0,0 +1,24 @@
# Datasets
OpenGait officially supports a few gait datasets. In order to use them, you need to download them and use the code provided here to pre-process them to the format required by OpenGait.
## Pre-process
In general, we read the original image provided by the dataset and save a sequence as a pickle file to speed up the training IO.
The expected dataset structure is as follows:
```
DATASET_ROOT/
001 (subject)/
bg-01 (type)/
000 (view)/
000.pkl (contains all frames)
......
......
......
```
The specific preprocessing steps are described inside each dataset folder.
## Split dataset
For each dataset, we split the dataset into training and testing sets. The training set is used to train the model, and the testing set is used to evaluate the model.
You can use the partition file in [dataset folder](CASIA-B/CASIA-B.json) directly, or you can create yours. Remember to set your path to the partition file in [config/*.yaml](../config/).
+150
View File
@@ -0,0 +1,150 @@
# This source is based on https://github.com/AbnerHqC/GaitSet/blob/master/pretreatment.py
import argparse
import logging
import multiprocessing as mp
import os
import pickle
from collections import defaultdict
from functools import partial
from pathlib import Path
from typing import Tuple
import cv2
import numpy as np
from tqdm import tqdm
def imgs2pickle(img_groups: Tuple, output_path: Path, img_size: int = 64, verbose: bool = False, dataset='CASIAB') -> None:
"""Reads a group of images and saves the data in pickle format.
Args:
img_groups (Tuple): Tuple of (sid, seq, view) and list of image paths.
output_path (Path): Output path.
img_size (int, optional): Image resizing size. Defaults to 64.
verbose (bool, optional): Display debug info. Defaults to False.
"""
sinfo = img_groups[0]
img_paths = img_groups[1]
to_pickle = []
for img_file in sorted(img_paths):
if verbose:
logging.debug(f'Reading sid {sinfo[0]}, seq {sinfo[1]}, view {sinfo[2]} from {img_file}')
img = cv2.imread(str(img_file), cv2.IMREAD_GRAYSCALE)
if dataset == 'GREW':
to_pickle.append(img.astype('uint8'))
continue
if img.sum() <= 10000:
if verbose:
logging.debug(f'Image sum: {img.sum()}')
logging.warning(f'{img_file} has no data.')
continue
# Get the upper and lower points
y_sum = img.sum(axis=1)
y_top = (y_sum != 0).argmax(axis=0)
y_btm = (y_sum != 0).cumsum(axis=0).argmax(axis=0)
img = img[y_top: y_btm + 1, :]
# As the height of a person is larger than the width,
# use the height to calculate resize ratio.
ratio = img.shape[1] / img.shape[0]
img = cv2.resize(img, (int(img_size * ratio), img_size), interpolation=cv2.INTER_CUBIC)
# Get the median of the x-axis and take it as the person's x-center.
x_csum = img.sum(axis=0).cumsum()
x_center = None
for idx, csum in enumerate(x_csum):
if csum > img.sum() / 2:
x_center = idx
break
if not x_center:
logging.warning(f'{img_file} has no center.')
continue
# Get the left and right points
half_width = img_size // 2
left = x_center - half_width
right = x_center + half_width
if left <= 0 or right >= img.shape[1]:
left += half_width
right += half_width
_ = np.zeros((img.shape[0], half_width))
img = np.concatenate([_, img, _], axis=1)
to_pickle.append(img[:, left: right].astype('uint8'))
if to_pickle:
to_pickle = np.asarray(to_pickle)
dst_path = os.path.join(output_path, *sinfo)
# print(img_paths[0].as_posix().split('/'),img_paths[0].as_posix().split('/')[-5])
# dst_path = os.path.join(output_path, img_paths[0].as_posix().split('/')[-5], *sinfo) if dataset == 'GREW' else dst
os.makedirs(dst_path, exist_ok=True)
pkl_path = os.path.join(dst_path, f'{sinfo[2]}.pkl')
if verbose:
logging.debug(f'Saving {pkl_path}...')
pickle.dump(to_pickle, open(pkl_path, 'wb'))
logging.info(f'Saved {len(to_pickle)} valid frames to {pkl_path}.')
if len(to_pickle) < 5:
logging.warning(f'{sinfo} has less than 5 valid data.')
def pretreat(input_path: Path, output_path: Path, img_size: int = 64, workers: int = 4, verbose: bool = False, dataset: str = 'CASIAB') -> None:
"""Reads a dataset and saves the data in pickle format.
Args:
input_path (Path): Dataset root path.
output_path (Path): Output path.
img_size (int, optional): Image resizing size. Defaults to 64.
workers (int, optional): Number of thread workers. Defaults to 4.
verbose (bool, optional): Display debug info. Defaults to False.
"""
img_groups = defaultdict(list)
logging.info(f'Listing {input_path}')
total_files = 0
for img_path in input_path.rglob('*.png'):
if 'gei.png' in img_path.as_posix():
continue
if verbose:
logging.debug(f'Adding {img_path}')
*_, sid, seq, view, _ = img_path.as_posix().split('/')
img_groups[(sid, seq, view)].append(img_path)
total_files += 1
logging.info(f'Total files listed: {total_files}')
progress = tqdm(total=len(img_groups), desc='Pretreating', unit='folder')
with mp.Pool(workers) as pool:
logging.info(f'Start pretreating {input_path}')
for _ in pool.imap_unordered(partial(imgs2pickle, output_path=output_path, img_size=img_size, verbose=verbose, dataset=dataset), img_groups.items()):
progress.update(1)
logging.info('Done')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='OpenGait dataset pretreatment module.')
parser.add_argument('-i', '--input_path', default='', type=str, help='Root path of raw dataset.')
parser.add_argument('-o', '--output_path', default='', type=str, help='Output path of pickled dataset.')
parser.add_argument('-l', '--log_file', default='./pretreatment.log', type=str, help='Log file path. Default: ./pretreatment.log')
parser.add_argument('-n', '--n_workers', default=4, type=int, help='Number of thread workers. Default: 4')
parser.add_argument('-r', '--img_size', default=64, type=int, help='Image resizing size. Default 64')
parser.add_argument('-d', '--dataset', default='CASIAB', type=str, help='Dataset for pretreatment.')
parser.add_argument('-v', '--verbose', default=False, action='store_true', help='Display debug info.')
args = parser.parse_args()
logging.basicConfig(level=logging.INFO, filename=args.log_file, filemode='w', format='[%(asctime)s - %(levelname)s]: %(message)s')
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
logging.info('Verbose mode is on.')
for k, v in args.__dict__.items():
logging.debug(f'{k}: {v}')
pretreat(input_path=Path(args.input_path), output_path=Path(args.output_path), img_size=args.img_size, workers=args.n_workers, verbose=args.verbose, dataset=args.dataset)