Refine DRF preprocessing and body-prior pipeline

This commit is contained in:
2026-03-08 04:04:15 +08:00
parent fddbf6eeda
commit bbb41e8dd9
10 changed files with 448 additions and 53 deletions
+54 -12
View File
@@ -118,8 +118,8 @@ class GeneratePoseTarget:
ed_x = min(tmp_ed_x + 1, img_w)
st_y = max(tmp_st_y, 0)
ed_y = min(tmp_ed_y + 1, img_h)
x = np.arange(st_x, ed_x, 1, np.float32)
y = np.arange(st_y, ed_y, 1, np.float32)
x = np.arange(st_x, ed_x, dtype=np.float32)
y = np.arange(st_y, ed_y, dtype=np.float32)
# if the keypoint not in the heatmap coordinate system
if not (len(x) and len(y)):
@@ -166,8 +166,8 @@ class GeneratePoseTarget:
min_y = max(tmp_min_y, 0)
max_y = min(tmp_max_y + 1, img_h)
x = np.arange(min_x, max_x, 1, np.float32)
y = np.arange(min_y, max_y, 1, np.float32)
x = np.arange(min_x, max_x, dtype=np.float32)
y = np.arange(min_y, max_y, dtype=np.float32)
if not (len(x) and len(y)):
continue
@@ -324,9 +324,37 @@ class HeatmapToImage:
heatmaps = [cv2.resize(x, (neww, newh)) for x in heatmaps]
return np.ascontiguousarray(np.mean(np.array(heatmaps), axis=-1, keepdims=True).transpose(0,3,1,2))
class HeatmapReducer:
"""Reduce stacked joint/limb heatmaps to a single grayscale channel."""
def __init__(self, reduction: str = "max") -> None:
if reduction not in {"max", "sum"}:
raise ValueError(f"Unsupported heatmap reduction: {reduction}")
self.reduction = reduction
def __call__(self, heatmaps: np.ndarray) -> np.ndarray:
"""
heatmaps: (T, C, H, W)
return: (T, 1, H, W)
"""
if self.reduction == "max":
reduced = np.max(heatmaps, axis=1, keepdims=True)
reduced = np.clip(reduced, 0.0, 1.0)
return (reduced * 255).astype(np.uint8)
reduced = np.sum(heatmaps, axis=1, keepdims=True)
return (reduced * 255.0).astype(np.float32)
class CenterAndScaleNormalizer:
def __init__(self, pose_format="coco", use_conf=True, heatmap_image_height=128) -> None:
def __init__(
self,
pose_format="coco",
use_conf=True,
heatmap_image_height=128,
target_body_height=None,
) -> None:
"""
Parameters:
- pose_format (str): Specifies the format of the keypoints.
@@ -334,10 +362,13 @@ class CenterAndScaleNormalizer:
The supported formats are "coco" or "openpose-x" where 'x' can be either 18 or 25, indicating the number of keypoints used by the OpenPose model.
- use_conf (bool): Indicates whether confidence scores.
- heatmap_image_height (int): Sets the height (in pixels) for the heatmap images that will be normlization.
- target_body_height (float | None): Optional normalized body height. When omitted,
preserve the historical SkeletonGait scaling heuristic.
"""
self.pose_format = pose_format
self.use_conf = use_conf
self.heatmap_image_height = heatmap_image_height
self.target_body_height = target_body_height
def __call__(self, data):
"""
@@ -369,7 +400,13 @@ class CenterAndScaleNormalizer:
# Scale-normalization
y_max = np.max(pose_seq[:, :, 1], axis=-1) # [t]
y_min = np.min(pose_seq[:, :, 1], axis=-1) # [t]
pose_seq *= ((self.heatmap_image_height // 1.5) / (y_max - y_min)[:, np.newaxis, np.newaxis]) # [t, v, 2]
target_body_height = (
float(self.target_body_height)
if self.target_body_height is not None
else float(self.heatmap_image_height // 1.5)
)
body_height = np.maximum(y_max - y_min, 1e-6)
pose_seq *= (target_body_height / body_height)[:, np.newaxis, np.newaxis] # [t, v, 2]
pose_seq += self.heatmap_image_height // 2
@@ -523,16 +560,21 @@ class HeatmapAlignment():
heatmap_imgs: (T, 1, raw_size, raw_size)
return (T, 1, final_img_size, final_img_size)
"""
heatmap_imgs = heatmap_imgs / 255.
heatmap_imgs = np.array([self.center_crop(heatmap_img) for heatmap_img in heatmap_imgs])
return (heatmap_imgs * 255).astype('uint8')
original_dtype = heatmap_imgs.dtype
heatmap_imgs = heatmap_imgs.astype(np.float32) / 255.0
heatmap_imgs = np.array([self.center_crop(heatmap_img) for heatmap_img in heatmap_imgs], dtype=np.float32)
heatmap_imgs = heatmap_imgs * 255.0
if np.issubdtype(original_dtype, np.integer):
return np.clip(heatmap_imgs, 0.0, 255.0).astype(original_dtype)
return heatmap_imgs.astype(original_dtype)
def GenerateHeatmapTransform(
coco18tococo17_args,
padkeypoints_args,
norm_args,
heatmap_generator_args,
align_args
align_args,
reduction="max",
):
base_transform = T.Compose([
@@ -545,7 +587,7 @@ def GenerateHeatmapTransform(
heatmap_generator_args["with_kp"] = False
transform_bone = T.Compose([
GeneratePoseTarget(**heatmap_generator_args),
HeatmapToImage(),
HeatmapReducer(reduction=reduction),
HeatmapAlignment(**align_args)
])
@@ -553,7 +595,7 @@ def GenerateHeatmapTransform(
heatmap_generator_args["with_kp"] = True
transform_joint = T.Compose([
GeneratePoseTarget(**heatmap_generator_args),
HeatmapToImage(),
HeatmapReducer(reduction=reduction),
HeatmapAlignment(**align_args)
])