Solve the problem of dimension misuse. (#59)

* commit for fix dimension * fix dimension for all method * restore config * clean up baseline config * add contiguous * rm comment
2022-06-28 12:27:16 +08:00
parent 715e7448fa
commit 14fa5212d4
14 changed files with 99 additions and 121 deletions
@@ -19,26 +19,21 @@ class Baseline(BaseModel):

        sils = ipts[0]
        if len(sils.size()) == 4:
-            sils = sils.unsqueeze(2)
+            sils = sils.unsqueeze(1)

        del ipts
-        outs = self.Backbone(sils)  # [n, s, c, h, w]
+        outs = self.Backbone(sils)  # [n, c, s, h, w]

        # Temporal Pooling, TP
-        outs = self.TP(outs, seqL, dim=1)[0]  # [n, c, h, w]
+        outs = self.TP(outs, seqL, options={"dim": 2})[0]  # [n, c, h, w]
        # Horizontal Pooling Matching, HPM
        feat = self.HPP(outs)  # [n, c, p]
-        feat = feat.permute(2, 0, 1).contiguous()  # [p, n, c]

-        embed_1 = self.FCs(feat)  # [p, n, c]
-        embed_2, logits = self.BNNecks(embed_1)  # [p, n, c]
-
-        embed_1 = embed_1.permute(1, 0, 2).contiguous()  # [n, p, c]
-        embed_2 = embed_2.permute(1, 0, 2).contiguous()  # [n, p, c]
-        logits = logits.permute(1, 0, 2).contiguous()  # [n, p, c]
+        embed_1 = self.FCs(feat)  # [n, c, p]
+        embed_2, logits = self.BNNecks(embed_1)  # [n, c, p]
        embed = embed_1

-        n, s, _, h, w = sils.size()
+        n, _, s, h, w = sils.size()
        retval = {
            'training_feat': {
                'triplet': {'embeddings': embed_1, 'labels': labs},
@@ -75,7 +75,7 @@ class GaitGL(BaseModel):
        class_num = model_cfg['class_num']
        dataset_name = self.cfgs['data_cfg']['dataset_name']

-        if dataset_name in ['OUMVLP','GREW']:
+        if dataset_name in ['OUMVLP', 'GREW']:
            # For OUMVLP and GREW
            self.conv3d = nn.Sequential(
                BasicConv3d(1, in_c[0], kernel_size=(3, 3, 3),
@@ -135,12 +135,11 @@ class GaitGL(BaseModel):
            self.GLConvB2 = GLConv(in_c[2], in_c[2], halving=3, fm_sign=True,  kernel_size=(
                3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))

-        
        self.TP = PackSequenceWrapper(torch.max)
        self.HPP = GeMHPP()
-        
+
        self.Head0 = SeparateFCs(64, in_c[-1], in_c[-1])
-        
+
        if 'SeparateBNNecks' in model_cfg.keys():
            self.BNNecks = SeparateBNNecks(**model_cfg['SeparateBNNecks'])
            self.Bn_head = False
@@ -171,22 +170,18 @@ class GaitGL(BaseModel):
        outs = self.GLConvA1(outs)
        outs = self.GLConvB2(outs)  # [n, c, s, h, w]

-        outs = self.TP(outs, dim=2, seq_dim=2, seqL=seqL)[0]  # [n, c, h, w]
+        outs = self.TP(outs, seqL=seqL, options={"dim": 2})[0]  # [n, c, h, w]
        outs = self.HPP(outs)  # [n, c, p]
-        outs = outs.permute(2, 0, 1).contiguous()  # [p, n, c]

-        gait = self.Head0(outs)  # [p, n, c]
-        
-        if self.Bn_head: # Original GaitGL Head
-            gait = gait.permute(1, 2, 0).contiguous()  # [n, c, p]
+        gait = self.Head0(outs)  # [n, c, p]
+
+        if self.Bn_head:  # Original GaitGL Head
            bnft = self.Bn(gait)  # [n, c, p]
-            logi = self.Head1(bnft.permute(2, 0, 1).contiguous())  # [p, n, c]
-            embed = bnft.permute(0, 2, 1).contiguous()  # [n, p, c]
-        else: # BNNechk as Head
-            bnft, logi = self.BNNecks(gait)  # [p, n, c]   
-            embed = gait.permute(1, 0, 2).contiguous()  # [n, p, c]     
-
-        logi = logi.permute(1, 0, 2).contiguous()  # [n, p, c]
+            logi = self.Head1(bnft)  # [n, c, p]
+            embed = bnft
+        else:  # BNNechk as Head
+            bnft, logi = self.BNNecks(gait)  # [n, c, p]
+            embed = gait

        n, _, s, h, w = sils.size()
        retval = {
@@ -45,12 +45,12 @@ class TemporalFeatureAggregator(nn.Module):

    def forward(self, x):
        """
-          Input:  x,   [n, s, c, p]
-          Output: ret, [n, p, c]
+          Input:  x,   [n, c, s, p]
+          Output: ret, [n, c, p]
        """
-        n, s, c, p = x.size()
-        x = x.permute(3, 0, 2, 1).contiguous()  # [p, n, c, s]
-        feature = x.split(1, 0)  # [[n, c, s], ...]
+        n, c, s, p = x.size()
+        x = x.permute(3, 0, 1, 2).contiguous()  # [p, n, c, s]
+        feature = x.split(1, 0)  # [[1, n, c, s], ...]
        x = x.view(-1, c, s)

        # MTB1: ConvNet1d & Sigmoid
@@ -73,7 +73,7 @@ class TemporalFeatureAggregator(nn.Module):

        # Temporal Pooling
        ret = self.TP(feature3x1 + feature3x3, dim=-1)[0]  # [p, n, c]
-        ret = ret.permute(1, 0, 2).contiguous()  # [n, p, c]
+        ret = ret.permute(1, 2, 0).contiguous()  # [n, p, c]
        return ret


@@ -102,17 +102,16 @@ class GaitPart(BaseModel):

        sils = ipts[0]
        if len(sils.size()) == 4:
-            sils = sils.unsqueeze(2)
+            sils = sils.unsqueeze(1)

        del ipts
-        out = self.Backbone(sils)  # [n, s, c, h, w]
-        out = self.HPP(out)  # [n, s, c, p]
-        out = self.TFA(out, seqL)  # [n, p, c]
+        out = self.Backbone(sils)  # [n, c, s, h, w]
+        out = self.HPP(out)  # [n, c, s, p]
+        out = self.TFA(out, seqL)  # [n, c, p]

-        embs = self.Head(out.permute(1, 0, 2).contiguous())  # [p, n, c]
-        embs = embs.permute(1, 0, 2).contiguous()  # [n, p, c]
+        embs = self.Head(out)  # [n, c, p]

-        n, s, _, h, w = sils.size()
+        n, _, s, h, w = sils.size()
        retval = {
            'training_feat': {
                'triplet': {'embeddings': embs, 'labels': labs}
@@ -49,30 +49,28 @@ class GaitSet(BaseModel):
        ipts, labs, _, _, seqL = inputs
        sils = ipts[0]  # [n, s, h, w]
        if len(sils.size()) == 4:
-            sils = sils.unsqueeze(2)
+            sils = sils.unsqueeze(1)

        del ipts
        outs = self.set_block1(sils)
-        gl = self.set_pooling(outs, seqL, dim=1)[0]
+        gl = self.set_pooling(outs, seqL, options={"dim": 2})[0]
        gl = self.gl_block2(gl)

        outs = self.set_block2(outs)
-        gl = gl + self.set_pooling(outs, seqL, dim=1)[0]
+        gl = gl + self.set_pooling(outs, seqL, options={"dim": 2})[0]
        gl = self.gl_block3(gl)

        outs = self.set_block3(outs)
-        outs = self.set_pooling(outs, seqL, dim=1)[0]
+        outs = self.set_pooling(outs, seqL, options={"dim": 2})[0]
        gl = gl + outs

        # Horizontal Pooling Matching, HPM
        feature1 = self.HPP(outs)  # [n, c, p]
        feature2 = self.HPP(gl)  # [n, c, p]
        feature = torch.cat([feature1, feature2], -1)  # [n, c, p]
-        feature = feature.permute(2, 0, 1).contiguous()  # [p, n, c]
        embs = self.Head(feature)
-        embs = embs.permute(1, 0, 2).contiguous()  # [n, p, c]

-        n, s, _, h, w = sils.size()
+        n, _, s, h, w = sils.size()
        retval = {
            'training_feat': {
                'triplet': {'embeddings': embs, 'labels': labs}
@@ -89,12 +89,12 @@ class GLN(BaseModel):
        sils = ipts[0]  # [n, s, h, w]
        del ipts
        if len(sils.size()) == 4:
-            sils = sils.unsqueeze(2)
-        n, s, _, h, w = sils.size()
+            sils = sils.unsqueeze(1)
+        n, _, s, h, w = sils.size()

        ### stage 0 sil ###
        sil_0_outs = self.sil_stage_0(sils)
-        stage_0_sil_set = self.set_pooling(sil_0_outs, seqL, dim=1)[0]
+        stage_0_sil_set = self.set_pooling(sil_0_outs, seqL, options={"dim": 2})[0]

        ### stage 1 sil ###
        sil_1_ipts = self.MaxP_sil(sil_0_outs)
@@ -105,13 +105,13 @@ class GLN(BaseModel):
        sil_2_outs = self.sil_stage_2(sil_2_ipts)

        ### stage 1 set ###
-        set_1_ipts = self.set_pooling(sil_1_ipts, seqL, dim=1)[0]
-        stage_1_sil_set = self.set_pooling(sil_1_outs, seqL, dim=1)[0]
+        set_1_ipts = self.set_pooling(sil_1_ipts, seqL, options={"dim": 2})[0]
+        stage_1_sil_set = self.set_pooling(sil_1_outs, seqL, options={"dim": 2})[0]
        set_1_outs = self.set_stage_1(set_1_ipts) + stage_1_sil_set

        ### stage 2 set ###
        set_2_ipts = self.MaxP_set(set_1_outs)
-        stage_2_sil_set = self.set_pooling(sil_2_outs, seqL, dim=1)[0]
+        stage_2_sil_set = self.set_pooling(sil_2_outs, seqL, options={"dim": 2})[0]
        set_2_outs = self.set_stage_2(set_2_ipts) + stage_2_sil_set

        set1 = torch.cat((stage_0_sil_set, stage_0_sil_set), dim=1)
@@ -133,11 +133,9 @@ class GLN(BaseModel):
        set2 = self.HPP(set2)
        set3 = self.HPP(set3)

-        feature = torch.cat([set1, set2, set3], -
-                            1).permute(2, 0, 1).contiguous()
+        feature = torch.cat([set1, set2, set3], -1)

        feature = self.Head(feature)
-        feature = feature.permute(1, 0, 2).contiguous()  # n p c

        # compact_bloack
        if not self.pretrain: