OpenGait release(pre-beta version).

2021-10-18 14:30:21 +08:00
commit 57ee4a448e
41 changed files with 3772 additions and 0 deletions
@@ -0,0 +1,151 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from ..base_model import BaseModel
+from ..modules import SeparateFCs, BasicConv3d, PackSequenceWrapper
+
+
+class GLConv(nn.Module):
+    def __init__(self, in_channels, out_channels, halving, fm_sign=False, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False, **kwargs):
+        super(GLConv, self).__init__()
+        self.halving = halving
+        self.fm_sign = fm_sign
+        self.global_conv3d = BasicConv3d(
+            in_channels, out_channels, kernel_size, stride, padding, bias, **kwargs)
+        self.local_conv3d = BasicConv3d(
+            in_channels, out_channels, kernel_size, stride, padding, bias, **kwargs)
+
+    def forward(self, x):
+        '''
+            x: [n, c, s, h, w]
+        '''
+        gob_feat = self.global_conv3d(x)
+        if self.halving == 0:
+            lcl_feat = self.local_conv3d(x)
+        else:
+            h = x.size(3)
+            split_size = int(h // 2**self.halving)
+            lcl_feat = x.split(split_size, 3)
+            lcl_feat = torch.cat([self.local_conv3d(_) for _ in lcl_feat], 3)
+
+        if not self.fm_sign:
+            feat = F.leaky_relu(gob_feat) + F.leaky_relu(lcl_feat)
+        else:
+            feat = F.leaky_relu(torch.cat([gob_feat, lcl_feat], dim=3))
+        return feat
+
+
+class GeMHPP(nn.Module):
+    def __init__(self, bin_num=[64], p=6.5, eps=1.0e-6):
+        super(GeMHPP, self).__init__()
+        self.bin_num = bin_num
+        self.p = nn.Parameter(
+            torch.ones(1)*p)
+        self.eps = eps
+
+    def gem(self, ipts):
+        return F.avg_pool2d(ipts.clamp(min=self.eps).pow(self.p), (1, ipts.size(-1))).pow(1. / self.p)
+
+    def forward(self, x):
+        """
+            x  : [n, c, h, w]
+            ret: [n, c, p] 
+        """
+        n, c = x.size()[:2]
+        features = []
+        for b in self.bin_num:
+            z = x.view(n, c, b, -1)
+            z = self.gem(z).squeeze(-1)
+            features.append(z)
+        return torch.cat(features, -1)
+
+
+class GaitGL(BaseModel):
+    """
+        GaitGL: Gait Recognition via Effective Global-Local Feature Representation and Local Temporal Aggregation
+        Arxiv : https://arxiv.org/pdf/2011.01461.pdf
+    """
+
+    def __init__(self, *args, **kargs):
+        super(GaitGL, self).__init__(*args, **kargs)
+
+    def build_network(self, model_cfg):
+        in_c = model_cfg['channels']
+        class_num = model_cfg['class_num']
+
+        # For CASIA-B
+        self.conv3d = nn.Sequential(
+            BasicConv3d(1, in_c[0], kernel_size=(3, 3, 3),
+                        stride=(1, 1, 1), padding=(1, 1, 1)),
+            nn.LeakyReLU(inplace=True)
+        )
+        self.LTA = nn.Sequential(
+            BasicConv3d(in_c[0], in_c[0], kernel_size=(
+                3, 1, 1), stride=(3, 1, 1), padding=(0, 0, 0)),
+            nn.LeakyReLU(inplace=True)
+        )
+
+        self.GLConvA0 = GLConv(in_c[0], in_c[1], halving=3, fm_sign=False, kernel_size=(
+            3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
+        self.MaxPool0 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
+
+        self.GLConvA1 = GLConv(in_c[1], in_c[2], halving=3, fm_sign=False, kernel_size=(
+            3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
+        self.GLConvB2 = GLConv(in_c[2], in_c[2], halving=3, fm_sign=True,  kernel_size=(
+            3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
+
+        self.Head0 = SeparateFCs(64, in_c[2], in_c[2])
+        self.Bn = nn.BatchNorm1d(in_c[2])
+        self.Head1 = SeparateFCs(64, in_c[2], class_num)
+
+        self.TP = PackSequenceWrapper(torch.max)
+        self.HPP = GeMHPP()
+
+    def forward(self, inputs):
+        ipts, labs, _, _, seqL = inputs
+        seqL = None if not self.training else seqL
+
+        sils = ipts[0].unsqueeze(1)
+        del ipts
+        n, _, s, h, w = sils.size()
+        if s < 3:
+            repeat = 3 if s == 1 else 2
+            sils = sils.repeat(1, 1, repeat, 1, 1)
+
+        outs = self.conv3d(sils)
+        outs = self.LTA(outs)
+
+        outs = self.GLConvA0(outs)
+        outs = self.MaxPool0(outs)
+
+        outs = self.GLConvA1(outs)
+        outs = self.GLConvB2(outs)  # [n, c, s, h, w]
+
+        outs = self.TP(outs, dim=2, seq_dim=2, seqL=seqL)[0]  # [n, c, h, w]
+        outs = self.HPP(outs)  # [n, c, p]
+        outs = outs.permute(2, 0, 1).contiguous()  # [p, n, c]
+
+        gait = self.Head0(outs)  # [p, n, c]
+        gait = gait.permute(1, 2, 0).contiguous()  # [n, c, p]
+        bnft = self.Bn(gait)  # [n, c, p]
+        logi = self.Head1(bnft.permute(2, 0, 1).contiguous())  # [p, n, c]
+
+        gait = gait.permute(0, 2, 1).contiguous()  # [n, p, c]
+        bnft = bnft.permute(0, 2, 1).contiguous()  # [n, p, c]
+        logi = logi.permute(1, 0, 2).contiguous()  # [n, p, c]
+
+        n, _, s, h, w = sils.size()
+        retval = {
+            'training_feat': {
+                'triplet': {'embeddings': bnft, 'labels': labs},
+                'softmax': {'logits': logi, 'labels': labs}
+            },
+            'visual_summary': {
+                'image/sils': sils.view(n*s, 1, h, w)
+            },
+            'inference_feat': {
+                'embeddings': bnft
+            }
+        }
+        return retval