Merge pull request #280 from Ahzyuan/fix_oumvlp_pose_extract

fix: OUMVLP-Pose pkl data temporal discontinuity
This commit is contained in:
Chengwei Ye
2025-09-15 16:50:00 +08:00
committed by GitHub
3 changed files with 288 additions and 15 deletions
+29 -4
View File
@@ -37,7 +37,7 @@ python datasets/OUMVLP/extractor.py --input_path Path_of_OUMVLP-base --output_pa
```
Step3-1 : To rearrange directory of OUMVLP dataset(for silhouette), turning to id-type-view structure, Run
```
python datasets/OUMVLP/rearrange_OUMVLP.py --input_path Path_of_OUMVLP-raw --output_path Path_of_OUMVLP-rearranged
python datasets/OUMVLP/rearrange_OUMVLP.py --input_path Path_of_OUMVLP-raw --output_path Path_of_OUMVLP-silu-rearranged
```
Step3-2 : To rearrange directory of OUMVLP dataset(for pose), turning to id-type-view structure, Run
```
@@ -46,12 +46,37 @@ python datasets/OUMVLP/rearrange_OUMVLP_pose.py --input_path Path_of_OUMVLP-pose
Step4-1: Transforming images to pickle file, run
```
python datasets/pretreatment.py --input_path Path_of_OUMVLP-rearranged --output_path Path_of_OUMVLP-pkl
python datasets/pretreatment.py --input_path Path_of_OUMVLP-silu-rearranged --output_path Path_of_OUMVLP-pkl
```
Step4-2: Transforming pose txts to pickle file, run
> [!IMPORTANT]
> Before extracting pose pkls, **you need to possess the pose selection index files** ([Why](https://github.com/ShiqiYu/OpenGait/pull/280)). Here are two ways to get it:
> 1. `Approach 1`: Directly download it:
> - Open [Download Link](https://drive.google.com/drive/folders/1gkXdrVtNuGbU5wd8lWoPfAo_qYpokm52?usp=sharing), choose `AlphaPose` or `OpenPose` version
> - Find a suitable location to unzip it, like `<somewhere>/OUMVLP/Pose/match_idx`.
> - Move the zip file into the `match_idx` dir and unzip it there.
> - You will finally get the index root: `<somewhere>/OUMVLP/Pose/match_idx/AlphaPose`
> *(Here we take `AlphaPose` version as an example, this path is what we call `Path_of_OUMVLP-pose-index` below)*
>
> 2. `Approach 2`: Run the following command to generate it by yourself (**rearranged silhouette dataset is needed**):
>
> ```bash
> python datasets/OUMVLP/pose_index_extractor.py \
> -p Path_of_OUMVLP-pose-rearranged \
> -s Path_of_OUMVLP-silu-rearranged \
> -o Path_of_OUMVLP-pose-index
> ```
```bash
python datasets/pretreatment.py \
--input_path Path_of_OUMVLP-pose-rearranged \
--output_path Path_of_OUMVLP-pose-pkl \
--pose \
--dataset OUMVLP \
--oumvlp_index_dir Path_of_OUMVLP-pose-index
```
python datasets/pretreatment.py --input_path Path_of_GREW-pose-rearranged --output_path Path_of_GREW-pose-pkl --pose --dataset GREW
```
gernerate the 17 Number of Pose Points Format from 18 Number of Pose Points
```
python datasets/OUMVLP/rearrange_OUMVLP_pose.py --input_path Path_of_OUMVLP-pose18 --output_path Path_of_OUMVLP-pose17
+195
View File
@@ -0,0 +1,195 @@
import re
import os
import json
import logging
import argparse
import pickle as pk
from typing import Tuple
from pathlib import Path
from functools import partial
from collections import defaultdict
import cv2
import numpy as np
from tqdm import tqdm
import multiprocessing as mp
"""
This script tries to match all the potential poses detected in a frame with the silhouette of the same frame in OUMVLP dataset,
and selects the pose that best matches the silhouette as the final pose for that frame, save its index in a pickle file which
is used when extracting pose pkls.
More info please refer to https://github.com/ShiqiYu/OpenGait/pull/280
"""
def pose_silu_match_score(pose: np.ndarray, silu: np.ndarray) -> float:
"""
Calculate the matching score between a 2D pose and a silhouette image using the sum of all joints' pixel intensity.
Args:
pose (np.ndarray): 2D pose, shape (n_joints, 3)
silu (np.ndarray): silhouette image, shape (H, W, 3)
Returns:
float: matching score
"""
pose_coord = pose[:,:2].astype(np.int32)
H, W, *_ = silu.shape
valid_joints = (pose_coord[:, 1] >=0) & (pose_coord[:, 1] < H) & \
(pose_coord[:, 0] >=0) & (pose_coord[:, 0] < W)
if np.sum(valid_joints) == len(pose_coord):
# only calculate score for points that are inside the silu img
# use the sum of all joints' pixel intensity as the score
return np.sum(silu[pose_coord[:, 1], pose_coord[:, 0]])
else:
# if pose coord is out of bound, return -inf
return -np.inf
def perseq_pipeline(txt_groups: Tuple, rearrange_silu_root: Path, output_path: Path, verbose: bool = False) -> None:
"""
Generate and save the pose selection index pickle file for a given sequence.
Args:
txt_groups (Tuple): Tuple of (sid, seq, view) and list of pose json paths.
rearrange_silu_root (Path): Root dir of rearranged silu dataset.
output_path (Path): Output path.
verbose (bool, optional): Display debug info. Defaults to False.
"""
# resolve seq info
sinfo = txt_groups[0]
txt_paths = txt_groups[1]
pick_idx = dict()
# prepare output dir & resume last work
dst_path = os.path.join(output_path, *sinfo)
os.makedirs(dst_path, exist_ok=True)
pkl_path = os.path.join(dst_path, 'pose_selection_idx.pkl')
if os.path.exists(pkl_path):
logging.debug(f'Pose index file {pkl_path} already exists, skipping...')
return
# extract
for txt_file in sorted(txt_paths):
# get the frame index (digit str before extension) of current frame
try:
frame_idx = re.findall(r'(\d+).json', os.path.basename(txt_file))[0]
except IndexError:
# adapt to different name format for json files in ID 00001
frame_idx = re.findall(r'\d{4}', os.path.basename(txt_file))[0]
with open(txt_file) as f:
jsondata = json.load(f)
person_num = len(jsondata['people'])
# if no person or 1 person detected in this frame
# we don't need to do the matching, just use the first or skip this frame when extracting pose pkl
# see datasets/pretreatment.py#Line: 167~168 and Line: 173
if person_num <= 1:
continue
# multiple people detected in this frame
else:
# load the reference silu image
img_name = f'{frame_idx}.png'
img_path = os.path.join(rearrange_silu_root, *sinfo, img_name)
if not os.path.exists(img_path):
logging.warning(
f'Pose reference silu({img_path}) of seq({'-'.join(sinfo)}) not exists, the matching for frame {frame_idx} is skipped. '
+ 'This means that the first person in the frame will be used as the pose data, and this may cause performance degradation.'
)
continue
silu_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
# determine which pose has the highest matching score
person_poses = [np.array(p["pose_keypoints_2d"]).reshape(-1,3) for p in jsondata['people']]
max_score_idx = np.argmax([pose_silu_match_score(p, silu_img) for p in person_poses])
# use the pose with the highest matching score to be the pkl data
pick_idx[frame_idx] = max_score_idx
# dump the index dict
if verbose:
logging.debug(f'Saving {pkl_path}... ')
with open(pkl_path, 'wb') as f:
pk.dump(pick_idx, f)
logging.debug(f'Saved {len(pick_idx)} indexs to {pkl_path}.')
def main(rearrange_pose_root: Path, rearrange_silu_root: Path, output_path: Path, workers: int = 4, verbose: bool = False) -> None:
"""Reads a dataset and saves the data in pickle format.
Args:
rearrange_pose_root (Path): Root path of the rearranged oumvlp pose dataset.
rearrange_silu_root (Path): Root path of the rearranged oumvlp silu dataset.
output_path (Path): The selection index output path. The final structure is: output_path/sid/seq/view/pose_selection_idx.pkl
workers (int, optional): Number of thread workers. Defaults to 4.
verbose (bool, optional): Display debug info. Defaults to False.
"""
txt_groups = defaultdict(list)
logging.info(f'Listing {rearrange_pose_root}')
total_files = 0
for json_path in rearrange_pose_root.rglob('*.json'):
if verbose:
logging.debug(f'Adding {json_path}')
*_, sid, seq, view, _ = json_path.as_posix().split(os.path.sep)
txt_groups[(sid, seq, view)].append(json_path)
total_files += 1
logging.info(f'Total files listed: {total_files}')
progress = tqdm(total=len(txt_groups), desc='Extracting Matching Pose Index', unit='seq')
with mp.Pool(workers) as pool:
logging.info(f'Start extracting pose indexes for {rearrange_pose_root}')
for _ in pool.imap_unordered(
partial(perseq_pipeline, rearrange_silu_root=rearrange_silu_root, output_path=output_path, verbose=verbose),
txt_groups.items()
):
progress.update(1)
logging.info('Done')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='OUMVLP pose selection index extraction module.')
parser.add_argument('-p', '--rearrange_pose_root', required=True, type=str, help='Root path of the rearranged oumvlp pose dataset.')
parser.add_argument('-s', '--rearrange_silu_root', required=True, type=str, help='Root path of the rearranged oumvlp silu dataset.')
parser.add_argument('-o', '--output_path', required=True, type=str, help='Output path of pickled dataset.')
parser.add_argument('-l', '--log_file', default='./pretreatment.log', type=str, help='Log file path. Default: ./pretreatment.log')
parser.add_argument('-n', '--n_workers', default=4, type=int, help='Number of thread workers. Default: 4')
parser.add_argument('-v', '--verbose', default=False, action='store_true', help='Display debug info.')
args = parser.parse_args()
# logging and verbose mode
logging.basicConfig(level=logging.INFO, filename=args.log_file, filemode='w', format='[%(asctime)s - %(levelname)s]: %(message)s')
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
logging.info('Verbose mode is on.')
for k, v in args.__dict__.items():
logging.debug(f'{k}: {v}')
# arguments validation
args.rearrange_pose_root = os.path.abspath(args.rearrange_pose_root)
assert os.path.exists(args.rearrange_pose_root), f"The specified oumvlp pose root({args.rearrange_pose_root}) does not exist."
args.rearrange_silu_root = os.path.abspath(args.rearrange_silu_root)
assert os.path.exists(args.rearrange_silu_root), f"The specified oumvlp silu root({args.rearrange_silu_root}) does not exist."
args.output_path = os.path.abspath(args.output_path)
os.makedirs(args.output_path, exist_ok=True)
# run
main(
rearrange_pose_root=Path(args.rearrange_pose_root),
rearrange_silu_root=Path(args.rearrange_silu_root),
output_path=Path(args.output_path),
workers=args.n_workers,
verbose=args.verbose
)
+64 -11
View File
@@ -3,6 +3,7 @@ import argparse
import logging
import multiprocessing as mp
import os
import re
import pickle
from collections import defaultdict
from functools import partial
@@ -127,31 +128,59 @@ def pretreat(input_path: Path, output_path: Path, img_size: int = 64, workers: i
progress.update(1)
logging.info('Done')
def txts2pickle(txt_groups: Tuple, output_path: Path, verbose: bool = False, dataset='CASIAB') -> None:
def txts2pickle(txt_groups: Tuple, output_path: Path, verbose: bool = False, dataset='CASIAB', **kwargs) -> None:
"""
Reads a group of images and saves the data in pickle format.
Args:
img_groups (Tuple): Tuple of (sid, seq, view) and list of image paths.
txt_groups (Tuple): Tuple of (sid, seq, view) and list of image paths.
output_path (Path): Output path.
img_size (int, optional): Image resizing size. Defaults to 64.
verbose (bool, optional): Display debug info. Defaults to False.
dataset (str, optional): Dataset name. Defaults to 'CASIAB'.
kwargs (dict, optional): Additional arguments. It receives 'oumvlp_index_dir' when dataset is 'OUMVLP'.
"""
sinfo = txt_groups[0]
txt_paths = txt_groups[1]
to_pickle = []
if dataset == 'OUMVLP':
# load pose selection index
idx_file = os.path.join(kwargs['oumvlp_index_dir'], *sinfo, 'pose_selection_idx.pkl')
try:
with open(idx_file, 'rb') as f:
frame_wise_idx = pickle.load(f) # dict, structure is {txt_file_name(str): selected_pose_idx(int)}
except FileNotFoundError:
logging.warning(
f'No pose selection index found for sequence: {sinfo}, will use the first detected pose for each frame. '
+ 'This may cause performance degradation, see https://github.com/ShiqiYu/OpenGait/pull/280 for more details. '
+ 'You can avoid this warning by re-get the index files following Step4-2 in datasets/OUMVLP/README.md.'
)
frame_wise_idx = dict()
# apply selection index for each frame in current sequence
for txt_file in sorted(txt_paths):
try:
with open(txt_file) as f:
jsondata = json.load(f)
# no person detected in this frame
if len(jsondata['people'])==0:
continue
data = np.array(jsondata["people"][0]["pose_keypoints_2d"]).reshape(-1,3)
# get the frame index (digit str before extension) of current frame
try:
frame_idx = re.findall(r'(\d+).json', os.path.basename(txt_file))[0]
except IndexError:
# adapt to different name format for json files in ID 00001
frame_idx = re.findall(r'\d{4}', os.path.basename(txt_file))[0]
# use the first person if no index file or less than one pose in current frame
pose_idx = frame_wise_idx.get(frame_idx, 0)
data = np.array(jsondata["people"][pose_idx]["pose_keypoints_2d"]).reshape(-1,3)
to_pickle.append(data)
except:
print(txt_file)
print(f"Fail to extract pkl for frame({txt_file}), seq({sinfo}).")
else:
for txt_file in sorted(txt_paths):
if verbose:
@@ -173,16 +202,16 @@ def txts2pickle(txt_groups: Tuple, output_path: Path, verbose: bool = False, dat
logging.warning(f'{sinfo} has less than 5 valid data.')
def pretreat_pose(input_path: Path, output_path: Path, workers: int = 4, verbose: bool = False, dataset='CASIAB') -> None:
def pretreat_pose(input_path: Path, output_path: Path, workers: int = 4, verbose: bool = False, dataset='CASIAB', **kwargs) -> None:
"""Reads a dataset and saves the data in pickle format.
Args:
input_path (Path): Dataset root path.
output_path (Path): Output path.
img_size (int, optional): Image resizing size. Defaults to 64.
workers (int, optional): Number of thread workers. Defaults to 4.
verbose (bool, optional): Display debug info. Defaults to False.
dataset (str, optional): Dataset name. Defaults to 'CASIAB'.
kwargs (dict, optional): Additional arguments. It receives 'oumvlp_index_dir' when dataset is 'OUMVLP'.
"""
txt_groups = defaultdict(list)
logging.info(f'Listing {input_path}')
@@ -208,7 +237,10 @@ def pretreat_pose(input_path: Path, output_path: Path, workers: int = 4, verbose
with mp.Pool(workers) as pool:
logging.info(f'Start pretreating {input_path}')
for _ in pool.imap_unordered(partial(txts2pickle, output_path=output_path, verbose=verbose, dataset=args.dataset), txt_groups.items()):
for _ in pool.imap_unordered(
partial(txts2pickle, output_path=output_path, verbose=verbose, dataset=args.dataset, **kwargs),
txt_groups.items()
):
progress.update(1)
logging.info('Done')
@@ -224,6 +256,9 @@ if __name__ == '__main__':
parser.add_argument('-d', '--dataset', default='CASIAB', type=str, help='Dataset for pretreatment.')
parser.add_argument('-v', '--verbose', default=False, action='store_true', help='Display debug info.')
parser.add_argument('-p', '--pose', default=False, action='store_true', help='Processing pose.')
parser.add_argument('-oid', '--oumvlp_index_dir', default='', type=str,
help='Path of the directory containing all index files for extracting oumvlp pose pkl, which is necessary to promise the temporal consistency of extracted pose sequence. '
+ 'Note: this argument is only used when extracting oumvlp pose pkl, more info please refer to Step4-2 in datasets/OUMVLP/README.md. ')
args = parser.parse_args()
logging.basicConfig(level=logging.INFO, filename=args.log_file, filemode='w', format='[%(asctime)s - %(levelname)s]: %(message)s')
@@ -234,6 +269,24 @@ if __name__ == '__main__':
for k, v in args.__dict__.items():
logging.debug(f'{k}: {v}')
if args.pose:
pretreat_pose(input_path=Path(args.input_path), output_path=Path(args.output_path), workers=args.n_workers, verbose=args.verbose, dataset=args.dataset)
if args.dataset.lower() == "oumvlp":
assert args.oumvlp_index_dir, (
"When extracting the oumvlp pose pkl, please specify the path of the directory containing all index files using the `--oumvlp_index_dir` argument."
+ "If you don't know what it is, please refer to Step4-2 in datasets/OUMVLP/README.md for more details."
)
args.oumvlp_index_dir = os.path.abspath(args.oumvlp_index_dir)
assert os.path.exists(args.oumvlp_index_dir), f"The specified oumvlp index files' directory({args.oumvlp_index_dir}) does not exist."
logging.info(f'Using the oumvlp index files in {args.oumvlp_index_dir}')
pretreat_pose(
input_path=Path(args.input_path),
output_path=Path(args.output_path),
workers=args.n_workers,
verbose=args.verbose,
dataset=args.dataset,
oumvlp_index_dir=args.oumvlp_index_dir
)
else:
pretreat(input_path=Path(args.input_path), output_path=Path(args.output_path), img_size=args.img_size, workers=args.n_workers, verbose=args.verbose, dataset=args.dataset)