update gaitedge

2022-07-19 14:14:48 +08:00
parent 13894439a4
commit 4b681fb9bd
7 changed files with 31 additions and 22 deletions
@@ -3,13 +3,17 @@
 This [paper](https://arxiv.org/abs/2203.03972) has been accepted by ECCV 2022.

 ## Abstract
-Gait is one of the most promising biometrics to identify individuals at a long distance. Although most previous methods have focused on recognizing the silhouettes, several end-to-end methods that extract gait features directly from RGB images perform better. However, we argue that these end-to-end methods inevitably suffer from the gait-unrelated noises, i.e., low-level texture and colorful information. Experimentally, we design both the cross-domain evaluation and visualization to stand for this view. In this work, we propose a novel end-to-end framework named GaitEdge which can effectively block gait-unrelated information and release end-to-end training potential. Specifically, GaitEdge synthesizes the output of the pedestrian segmentation network and then feeds it to the subsequent recognition network, where the synthetic silhouettes consist of trainable edges of bodies and fixed interiors to limit the information that the recognition network receives. Besides, GaitAlign for aligning silhouettes is embedded into the GaitEdge without loss of differentiability. Experimental results on CASIA-B and our newly built TTG-200 indicate that GaitEdge significantly outperforms the previous methods and provides a more practical end-to-end paradigm for blocking RGB noises effectively.
+Gait is one of the most promising biometrics to identify individuals at a long distance. Although most previous methods have focused on recognizing the silhouettes, several end-to-end methods that extract gait features directly from RGB images perform better. However, we demonstrate that these end-to-end methods may inevitably suffer from the gait-irrelevant noises, i.e., low-level texture and colorful information. Experimentally, we design the **cross-domain** evaluation to support this view. In this work, we propose a novel end-to-end framework named **GaitEdge** which can effectively block gait-irrelevant information and release end-to-end training potential Specifically, GaitEdge synthesizes the output of the pedestrian segmentation network and then feeds it to the subsequent recognition network, where the synthetic silhouettes consist of trainable edges of bodies and fixed interiors to limit the information that the recognition network receives. Besides, **GaitAlign** for aligning silhouettes is embedded into the GaitEdge without losing differentiability. Experimental results on CASIA-B and our newly built TTG-200 indicate that GaitEdge significantly outperforms the previous methods and provides a more practical end-to-end paradigm.

 ![img](../../assets/gaitedge.png)
-
+## CASIA-B*
+Since the silhouettes of CASIA-B were obtained by the outdated background subtraction, there exists much noise caused by the background and clothes of subjects. Hence, we re-annotate the
+silhouettes of CASIA-B and denote it as CASIA-B*. Refer to [here](../../datasets/CASIA-B*/README.md) for more details.
 ## Performance
 |    Model   |  NM  |  BG  |  CL  | TTG-200 (cross-domain) |                  Configuration                 |
 |:----------:|:----:|:----:|:----:|:----------------------:|:----------------------------------------------:|
 |   GaitGL   | 94.0 | 89.6 | 81.0 |          53.2          |      [phase1_rec.yaml](./phase1_rec.yaml)      |
 | GaitGL-E2E | 99.1 | 98.2 | 89.1 |          45.6          |      [phase2_e2e.yaml](./phase2_e2e.yaml)      |
 |  GaitEdge  | 98.0 | 96.3 | 88.0 |          53.9          | [phase2_gaitedge.yaml](./phase2_gaitedge.yaml) |
+
+***The results here are higher than those in the paper because we use a different optimization strategy. But this does not affect the conclusion of the paper.***
@@ -1,6 +1,6 @@
 # Note  : *** the batch_size should be equal to the gpus number at the test phase!!! ***
 data_cfg:
-  dataset_name: CASIA-B_new
+  dataset_name: CASIA-B*
  dataset_root: your_path
  dataset_partition: ./datasets/CASIA-B*/CASIA-B*.json
  num_workers: 1
@@ -1,6 +1,6 @@
 # Note  : *** the batch_size should be equal to the gpus number at the test phase!!! ***
 data_cfg:
-  dataset_name: CASIA-B_new
+  dataset_name: CASIA-B*
  dataset_root: your_path
  dataset_partition: ./datasets/CASIA-B*/CASIA-B*.json
  num_workers: 1
@@ -1,5 +1,5 @@
 data_cfg:
-  dataset_name: CASIA-B_new
+  dataset_name: CASIA-B*
  dataset_root: your_path
  dataset_partition: ./datasets/CASIA-B*/CASIA-B*.json
  num_workers: 1
@@ -1,5 +1,5 @@
 data_cfg:
-  dataset_name: CASIA-B_new
+  dataset_name: CASIA-B*
  dataset_root: your_path
  dataset_partition: ./datasets/CASIA-B*/CASIA-B*.json
  num_workers: 1
@@ -4,7 +4,7 @@ import torch.optim as optim

 from ..base_model import BaseModel
 from .gaitgl import GaitGL
-from ..modules import SilhouetteCropAndResize
+from ..modules import GaitAlign
 from torchvision.transforms import Resize
 from utils import get_valid_args, get_attr_from, is_list_or_tuple
 import os.path as osp
@@ -46,10 +46,12 @@ class GaitEdge(GaitGL):
        super(GaitEdge, self).build_network(model_cfg["GaitGL"])
        self.Backbone = self.get_backbone(model_cfg['Segmentation'])
        self.align = model_cfg['align']
-        self.CROP = SilhouetteCropAndResize()
+        self.gait_align = GaitAlign()
        self.resize = Resize((64, 44))
        self.is_edge = model_cfg['edge']
        self.seg_lr = model_cfg['seg_lr']
+        self.kernel = torch.ones(
+            (model_cfg['kernel_size'], model_cfg['kernel_size'])).cuda()

    def finetune_parameters(self):
        fine_tune_params = list()
@@ -88,14 +90,22 @@ class GaitEdge(GaitGL):
                "Error type for -Restore_Hint-, supported: int or string.")
        self._load_ckpt(save_name)

+    def preprocess(self, sils):
+
+        dilated_mask = (morph.dilation(sils, self.kernel).detach()
+                        ) > 0.5  # Dilation
+        eroded_mask = (morph.erosion(sils, self.kernel).detach()
+                       ) > 0.5   # Dilation
+        edge_mask = dilated_mask ^ eroded_mask
+        return edge_mask, eroded_mask
+
    def forward(self, inputs):
        ipts, labs, _, _, seqL = inputs

        ratios = ipts[0]
        rgbs = ipts[1]
        sils = ipts[2]
-        # if len(sils.size()) == 4:
-        #     sils = sils.unsqueeze(2)
+
        n, s, c, h, w = rgbs.size()
        rgbs = rgbs.view(n*s, c, h, w)
        sils = sils.view(n*s, 1, h, w)
@@ -103,24 +113,19 @@ class GaitEdge(GaitGL):
        logits = torch.sigmoid(logis)
        mask = torch.round(logits).float()
        if self.is_edge:
-            kernel_1 = torch.ones((3, 3)).cuda()
-            kernel_2 = torch.ones((3, 3)).cuda()
-
-            dilated_mask = (morph.dilation(sils, kernel_1).detach()
-                            ) > 0.5  # Dilation
-            eroded_mask = (morph.erosion(sils, kernel_2).detach()
-                           ) > 0.5   # Dilation
-            edge_mask = dilated_mask ^ eroded_mask
+            edge_mask, eroded_mask = self.preprocess(sils)

+            # Gait Synthesis
            new_logits = edge_mask*logits+eroded_mask*sils
+
            if self.align:
-                cropped_logits = self.CROP(
+                cropped_logits = self.gait_align(
                    new_logits, sils, ratios)
            else:
                cropped_logits = self.resize(new_logits)
        else:
            if self.align:
-                cropped_logits = self.CROP(
+                cropped_logits = self.gait_align(
                    logits, mask, ratios)
            else:
                cropped_logits = self.resize(logits)
@@ -183,9 +183,9 @@ class BasicConv3d(nn.Module):
        return outs


-class SilhouetteCropAndResize(nn.Module):
+class GaitAlign(nn.Module):
    def __init__(self, H=64, W=44, eps=1, **kwargs):
-        super(SilhouetteCropAndResize, self).__init__()
+        super(GaitAlign, self).__init__()
        self.H, self.W, self.eps = H, W, eps
        self.Pad = nn.ZeroPad2d((int(self.W / 2), int(self.W / 2), 0, 0))
        self.RoiPool = RoIAlign((self.H, self.W), 1, sampling_ratio=-1)