diff --git a/configs/gaitedge/phase1_rec.yaml b/configs/gaitedge/phase1_rec.yaml new file mode 100644 index 0000000..e7d8b64 --- /dev/null +++ b/configs/gaitedge/phase1_rec.yaml @@ -0,0 +1,69 @@ +# Note : *** the batch_size should be equal to the gpus number at the test phase!!! *** +data_cfg: + dataset_name: CASIA-B_new + dataset_root: /home1/data/casiab-new-64-cut-pkl/ + dataset_partition: ./datasets/CASIA-B*/CASIA-B*.json + num_workers: 1 + remove_no_gallery: false + test_dataset_name: CASIA-B + +evaluator_cfg: + enable_float16: true + restore_ckpt_strict: true + restore_hint: 80000 + save_name: GaitGL + sampler: + batch_size: 4 + sample_type: all_ordered + type: InferenceSampler + transform: + - type: BaseSilTransform +loss_cfg: + - loss_term_weight: 1.0 + margin: 0.2 + type: TripletLoss + log_prefix: triplet + - loss_term_weight: 1.0 + scale: 1 + type: CrossEntropyLoss + log_accuracy: true + label_smooth: false + log_prefix: softmax + +model_cfg: + model: GaitGL + channels: [32, 64, 128] + class_num: 74 + +optimizer_cfg: + lr: 1.0e-4 + solver: Adam + weight_decay: 5.0e-4 + +scheduler_cfg: + gamma: 0.1 + milestones: + - 70000 + scheduler: MultiStepLR + +trainer_cfg: + enable_float16: true + with_test: true + log_iter: 100 + restore_ckpt_strict: true + restore_hint: 0 + save_iter: 10000 + save_name: GaitGL + sync_BN: true + total_iter: 80000 + sampler: + batch_shuffle: true + batch_size: + - 8 + - 8 + frames_num_fixed: 30 + frames_skip_num: 0 + sample_type: fixed_ordered + type: TripletSampler + transform: + - type: BaseSilTransform \ No newline at end of file diff --git a/configs/gaitedge/phase1_seg.yaml b/configs/gaitedge/phase1_seg.yaml new file mode 100644 index 0000000..aa2f943 --- /dev/null +++ b/configs/gaitedge/phase1_seg.yaml @@ -0,0 +1,71 @@ +# Note : *** the batch_size should be equal to the gpus number at the test phase!!! *** +data_cfg: + dataset_name: CASIA-B_new + dataset_root: /home1/data/casiab-128-end2end + dataset_partition: ./datasets/CASIA-B*/CASIA-B*.json + num_workers: 1 + remove_no_gallery: false + test_dataset_name: CASIA-B + +evaluator_cfg: + enable_float16: true + restore_ckpt_strict: true + restore_hint: 30000 + save_name: Segmentation + eval_func: mean_iou + sampler: + batch_size: 4 + sample_type: all_ordered + type: InferenceSampler + frames_all_limit: 720 + transform: + - type: NoOperation + - type: BaseRgbTransform + - type: BaseSilTransform + +loss_cfg: + - loss_term_weight: 1.0 + type: BinaryCrossEntropyLoss + log_prefix: bce + kld: false + +model_cfg: + model: Segmentation + backbone_cfg: + type: U_Net + in_channels: 3 + +optimizer_cfg: + lr: 0.1 + momentum: 0.9 + solver: SGD + weight_decay: 0.0005 + +scheduler_cfg: + gamma: 0.1 + milestones: # Learning Rate Reduction at each milestones + - 10000 + - 20000 + scheduler: MultiStepLR + +trainer_cfg: + enable_float16: true + with_test: true + log_iter: 100 + restore_ckpt_strict: true + restore_hint: 0 + save_iter: 10000 + save_name: Segmentation + total_iter: 30000 + sampler: + batch_shuffle: true + batch_size: + - 8 + - 16 + frames_num_fixed: 15 + sample_type: fixed_unordered + type: TripletSampler + transform: + - type: NoOperation + - type: BaseRgbTransform + - type: BaseSilTransform \ No newline at end of file diff --git a/configs/gaitedge/phase2_e2e.yaml b/configs/gaitedge/phase2_e2e.yaml new file mode 100644 index 0000000..12b5721 --- /dev/null +++ b/configs/gaitedge/phase2_e2e.yaml @@ -0,0 +1,90 @@ +data_cfg: + dataset_name: CASIA-B_new + dataset_root: /home1/data/casiab-128-end2end + dataset_partition: ./datasets/CASIA-B*/CASIA-B*.json + num_workers: 1 + remove_no_gallery: false # Remove probe if no gallery for it + test_dataset_name: CASIA-B + cache: false + +evaluator_cfg: + enable_float16: true + restore_ckpt_strict: true + restore_hint: 20000 + save_name: GaitGL_E2E + eval_func: identification_real_scene + sampler: + batch_size: 4 + sample_type: all_ordered + type: InferenceSampler + frames_all_limit: 512 # limit the number of sampled frames to prevent out of memory + metric: euc # cos + transform: + - type: NoOperation + - type: BaseRgbTransform + - type: BaseSilTransform + +loss_cfg: + - loss_term_weight: 1 + margin: 0.2 + type: TripletLoss + log_prefix: triplet + - loss_term_weight: 10 + type: BinaryCrossEntropyLoss + log_prefix: bce + - loss_term_weight: 1 + scale: 1 + type: CrossEntropyLoss + log_accuracy: true + label_smooth: false + log_prefix: softmax + +model_cfg: + model: GaitEdge + GaitGL: + channels: [32, 64, 128] + class_num: 74 + Segmentation: + type: U_Net + in_channels: 3 + freeze_half: true + edge: false + align: true + seg_lr: 0.00001 + +optimizer_cfg: + lr: 0.0001 + solver: Adam + weight_decay: 0.0005 +scheduler_cfg: + gamma: 0.1 + milestones: # Learning Rate Reduction at each milestones + - 10000 + scheduler: MultiStepLR +trainer_cfg: + enable_float16: true # half_percesion float for memory reduction and speedup + with_test: true + log_iter: 100 + restore_ckpt_strict: false + optimizer_reset: true + scheduler_reset: true + sync_BN: true + restore_hint: + - /home/leeeung/workspace/OpenGait/output/CASIA-B_new/Segmentation/Segmentation/checkpoints/Segmentation-25000.pt + - /home/leeeung/OpenGait/output/CASIA-B_new/GaitGL/GaitGL/checkpoints/GaitGL-80000.pt + save_iter: 2000 + save_name: GaitGL_E2E + total_iter: 20000 + sampler: + batch_shuffle: true + batch_size: + - 8 # TripletSampler, batch_size[0] indicates Number of Identity + - 8 # batch_size[1] indicates Samples sequqnce for each Identity + frames_num_fixed: 30 + frames_skip_num: 0 + sample_type: fixed_ordered + type: TripletSampler + transform: + - type: NoOperation + - type: BaseRgbTransform + - type: BaseSilTransform diff --git a/configs/gaitedge/phase2_gaitedge.yaml b/configs/gaitedge/phase2_gaitedge.yaml new file mode 100644 index 0000000..29975c9 --- /dev/null +++ b/configs/gaitedge/phase2_gaitedge.yaml @@ -0,0 +1,90 @@ +data_cfg: + dataset_name: CASIA-B_new + dataset_root: /home1/data/casiab-128-end2end + dataset_partition: ./datasets/CASIA-B*/CASIA-B*.json + num_workers: 1 + remove_no_gallery: false # Remove probe if no gallery for it + test_dataset_name: CASIA-B + cache: false + +evaluator_cfg: + enable_float16: true + restore_ckpt_strict: true + restore_hint: 20000 + save_name: GaitEdge + eval_func: identification_real_scene + sampler: + batch_size: 4 + sample_type: all_ordered + type: InferenceSampler + frames_all_limit: 512 # limit the number of sampled frames to prevent out of memory + metric: euc # cos + transform: + - type: NoOperation + - type: BaseRgbTransform + - type: BaseSilTransform + +loss_cfg: + - loss_term_weight: 1 + margin: 0.2 + type: TripletLoss + log_prefix: triplet + - loss_term_weight: 10 + type: BinaryCrossEntropyLoss + log_prefix: bce + - loss_term_weight: 1 + scale: 1 + type: CrossEntropyLoss + log_accuracy: true + label_smooth: false + log_prefix: softmax + +model_cfg: + model: GaitEdge + GaitGL: + channels: [32, 64, 128] + class_num: 74 + Segmentation: + type: U_Net + in_channels: 3 + freeze_half: true + edge: true + align: true + seg_lr: 0.00001 + +optimizer_cfg: + lr: 0.0001 + solver: Adam + weight_decay: 0.0005 +scheduler_cfg: + gamma: 0.1 + milestones: # Learning Rate Reduction at each milestones + - 10000 + scheduler: MultiStepLR +trainer_cfg: + enable_float16: true # half_percesion float for memory reduction and speedup + with_test: true + log_iter: 100 + restore_ckpt_strict: false + optimizer_reset: true + scheduler_reset: true + sync_BN: true + restore_hint: + - Segmentation-30000.pt + - GaitGL-80000.pt + save_iter: 2000 + save_name: GaitEdge + total_iter: 20000 + sampler: + batch_shuffle: true + batch_size: + - 8 # TripletSampler, batch_size[0] indicates Number of Identity + - 8 # batch_size[1] indicates Samples sequqnce for each Identity + frames_num_fixed: 30 + frames_skip_num: 0 + sample_type: fixed_ordered + type: TripletSampler + transform: + - type: NoOperation + - type: BaseRgbTransform + - type: BaseSilTransform diff --git a/configs/gaitgl/gaitgl.yaml b/configs/gaitgl/gaitgl.yaml index d5365d9..bce9272 100644 --- a/configs/gaitgl/gaitgl.yaml +++ b/configs/gaitgl/gaitgl.yaml @@ -8,7 +8,6 @@ data_cfg: test_dataset_name: CASIA-B evaluator_cfg: - enable_distributed: true enable_float16: false restore_ckpt_strict: true restore_hint: 80000 diff --git a/configs/gaitgl/gaitgl_GREW.yaml b/configs/gaitgl/gaitgl_GREW.yaml index cafd69b..aab8441 100644 --- a/configs/gaitgl/gaitgl_GREW.yaml +++ b/configs/gaitgl/gaitgl_GREW.yaml @@ -8,7 +8,6 @@ data_cfg: test_dataset_name: GREW evaluator_cfg: - enable_distributed: true enable_float16: false restore_ckpt_strict: true restore_hint: 250000 @@ -49,7 +48,6 @@ scheduler_cfg: scheduler: MultiStepLR trainer_cfg: - enable_distributed: true enable_float16: true with_test: false log_iter: 100 diff --git a/configs/gaitgl/gaitgl_GREW_BNNeck.yaml b/configs/gaitgl/gaitgl_GREW_BNNeck.yaml index cff7699..84c784a 100644 --- a/configs/gaitgl/gaitgl_GREW_BNNeck.yaml +++ b/configs/gaitgl/gaitgl_GREW_BNNeck.yaml @@ -8,7 +8,6 @@ data_cfg: test_dataset_name: GREW evaluator_cfg: - enable_distributed: true enable_float16: false restore_ckpt_strict: true restore_hint: 250000 @@ -53,7 +52,6 @@ scheduler_cfg: scheduler: MultiStepLR trainer_cfg: - enable_distributed: true enable_float16: true with_test: false log_iter: 100 diff --git a/configs/gaitgl/gaitgl_OUMVLP.yaml b/configs/gaitgl/gaitgl_OUMVLP.yaml index dcc1969..d9068ac 100644 --- a/configs/gaitgl/gaitgl_OUMVLP.yaml +++ b/configs/gaitgl/gaitgl_OUMVLP.yaml @@ -8,7 +8,6 @@ data_cfg: test_dataset_name: OUMVLP evaluator_cfg: - enable_distributed: true enable_float16: false restore_ckpt_strict: true restore_hint: 210000 @@ -48,7 +47,6 @@ scheduler_cfg: scheduler: MultiStepLR trainer_cfg: - enable_distributed: true enable_float16: true with_test: false log_iter: 100 diff --git a/datasets/CASIA-B*/CASIA-B*.json b/datasets/CASIA-B*/CASIA-B*.json new file mode 100644 index 0000000..a4daee6 --- /dev/null +++ b/datasets/CASIA-B*/CASIA-B*.json @@ -0,0 +1,130 @@ +{ + "TRAIN_SET": [ + "001", + "002", + "003", + "004", + "005", + "006", + "007", + "008", + "009", + "010", + "011", + "012", + "013", + "014", + "015", + "016", + "017", + "018", + "019", + "020", + "021", + "022", + "023", + "024", + "025", + "026", + "027", + "028", + "029", + "030", + "031", + "032", + "033", + "034", + "035", + "036", + "037", + "038", + "039", + "040", + "041", + "042", + "043", + "044", + "045", + "046", + "047", + "048", + "049", + "050", + "051", + "052", + "053", + "054", + "055", + "056", + "057", + "058", + "059", + "060", + "061", + "062", + "063", + "064", + "065", + "066", + "067", + "068", + "069", + "070", + "071", + "072", + "073", + "074" + ], + "TEST_SET": [ + "075", + "076", + "077", + "078", + "079", + "080", + "081", + "082", + "083", + "084", + "085", + "086", + "087", + "088", + "089", + "090", + "091", + "092", + "093", + "094", + "095", + "096", + "097", + "098", + "099", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "107", + "108", + "109", + "110", + "111", + "112", + "113", + "114", + "115", + "116", + "117", + "118", + "119", + "120", + "121", + "122", + "123", + "124" + ] +} \ No newline at end of file diff --git a/datasets/CASIA-B*/README.md b/datasets/CASIA-B*/README.md new file mode 100644 index 0000000..38f2dd3 --- /dev/null +++ b/datasets/CASIA-B*/README.md @@ -0,0 +1,2 @@ +# CASIA-B\* +CASIA-B\* is a re-segmented version of CASIA-B processed by Liang et al. The extra import of CASIA-B* owes to the background subtraction algorithm that CASIA-B uses for generating the silhouette data tends to produce much noise and is outdated for real-world applications nowadays. We use the up-to-date pretreatment strategy to re-segment the raw videos, i.e., the deep pedestrian track and segmentation algorithms. As a result, CASIA-B\* consists of the cropped RGB images, binary silhouettes, and the height-width ratio of the obtained bounding boxes. Please refer to [GaitEdge](../../configs/gaitedge/README.md) for more details. If you need this sub-set, please apply with the instruction mentioned in [http://www.cbsr.ia.ac.cn/english/Gait%20Databases.asp]. In the Email Subject, please mark the specific dataset you need, i.e., Dataset B*. \ No newline at end of file diff --git a/opengait/modeling/backbones/u_net.py b/opengait/modeling/backbones/u_net.py new file mode 100644 index 0000000..6d69ac8 --- /dev/null +++ b/opengait/modeling/backbones/u_net.py @@ -0,0 +1,105 @@ +import torch.nn as nn +import torch + + +class ConvBlock(nn.Module): + def __init__(self, ch_in, ch_out): + super(ConvBlock, self).__init__() + self.conv = nn.Sequential( + nn.Conv2d(ch_in, ch_out, kernel_size=3, + stride=1, padding=1, bias=True), + nn.BatchNorm2d(ch_out), + nn.ReLU(inplace=True), + nn.Conv2d(ch_out, ch_out, kernel_size=3, + stride=1, padding=1, bias=True), + nn.BatchNorm2d(ch_out), + nn.ReLU(inplace=True) + ) + + def forward(self, x): + x = self.conv(x) + return x + + +class UpConv(nn.Module): + def __init__(self, ch_in, ch_out): + super(UpConv, self).__init__() + self.up = nn.Sequential( + nn.Upsample(scale_factor=2), + nn.Conv2d(ch_in, ch_out, kernel_size=3, + stride=1, padding=1, bias=True), + nn.BatchNorm2d(ch_out), + nn.ReLU(inplace=True) + ) + + def forward(self, x): + x = self.up(x) + return x + + +class U_Net(nn.Module): + def __init__(self, in_channels=3, freeze_half=True): + super(U_Net, self).__init__() + + self.Maxpool = nn.MaxPool2d(kernel_size=2, stride=2) + + self.Conv1 = ConvBlock(ch_in=in_channels, ch_out=16) + self.Conv2 = ConvBlock(ch_in=16, ch_out=32) + self.Conv3 = ConvBlock(ch_in=32, ch_out=64) + self.Conv4 = ConvBlock(ch_in=64, ch_out=128) + self.freeze = freeze_half + # Begin Fine-tuning + if freeze_half: + self.Conv1.requires_grad_(False) + self.Conv2.requires_grad_(False) + self.Conv3.requires_grad_(False) + self.Conv4.requires_grad_(False) + # End Fine-tuning + + self.Up4 = UpConv(ch_in=128, ch_out=64) + self.Up_conv4 = ConvBlock(ch_in=128, ch_out=64) + + self.Up3 = UpConv(ch_in=64, ch_out=32) + self.Up_conv3 = ConvBlock(ch_in=64, ch_out=32) + + self.Up2 = UpConv(ch_in=32, ch_out=16) + self.Up_conv2 = ConvBlock(ch_in=32, ch_out=16) + + self.Conv_1x1 = nn.Conv2d( + 16, 1, kernel_size=1, stride=1, padding=0) + + def forward(self, x): + if self.freeze: + with torch.no_grad(): + # encoding path + # Begin Fine-tuning + + x1 = self.Conv1(x) + x2 = self.Maxpool(x1) + x2 = self.Conv2(x2) + x3 = self.Maxpool(x2) + x3 = self.Conv3(x3) + x4 = self.Maxpool(x3) + x4 = self.Conv4(x4) + # End Fine-tuning + else: + x1 = self.Conv1(x) + x2 = self.Maxpool(x1) + x2 = self.Conv2(x2) + x3 = self.Maxpool(x2) + x3 = self.Conv3(x3) + x4 = self.Maxpool(x3) + x4 = self.Conv4(x4) + + d4 = self.Up4(x4) + d4 = torch.cat((x3, d4), dim=1) + d4 = self.Up_conv4(d4) + d3 = self.Up3(d4) + d3 = torch.cat((x2, d3), dim=1) + d3 = self.Up_conv3(d3) + + d2 = self.Up2(d3) + d2 = torch.cat((x1, d2), dim=1) + d2 = self.Up_conv2(d2) + d1 = self.Conv_1x1(d2) + return d1 diff --git a/opengait/modeling/losses/bce.py b/opengait/modeling/losses/bce.py new file mode 100644 index 0000000..317738c --- /dev/null +++ b/opengait/modeling/losses/bce.py @@ -0,0 +1,41 @@ +import torch +from .base import BaseLoss +from utils import MeanIOU + + +class BinaryCrossEntropyLoss(BaseLoss): + def __init__(self, loss_term_weight=1.0, eps=1.0e-9): + super(BinaryCrossEntropyLoss, self).__init__(loss_term_weight) + self.eps = eps + + def forward(self, logits, labels): + """ + logits: [n, 1, h, w] + labels: [n, 1, h, w] + """ + # predts = torch.sigmoid(logits.float()) + labels = labels.float() + logits = logits.float() + + loss = - (labels * torch.log(logits + self.eps) + + (1 - labels) * torch.log(1. - logits + self.eps)) + + n = loss.size(0) + loss = loss.view(n, -1) + mean_loss = loss.mean() + hard_loss = loss.max() + miou = MeanIOU((logits > 0.5).float(), labels) + self.info.update({ + 'loss': mean_loss.detach().clone(), + 'hard_loss': hard_loss.detach().clone(), + 'miou': miou.detach().clone()}) + + return mean_loss, self.info + + +if __name__ == "__main__": + loss_func = BinaryCrossEntropyLoss() + ipts = torch.randn(1, 1, 128, 64) + tags = (torch.randn(1, 1, 128, 64) > 0.).float() + loss = loss_func(ipts, tags) + print(loss) diff --git a/opengait/modeling/models/gaitedge.py b/opengait/modeling/models/gaitedge.py new file mode 100644 index 0000000..12b622d --- /dev/null +++ b/opengait/modeling/models/gaitedge.py @@ -0,0 +1,135 @@ +import torch +from kornia import morphology as morph +import torch.optim as optim + +from ..base_model import BaseModel +from .gaitgl import GaitGL +from ..modules import SilhouetteCropAndResize +from torchvision.transforms import Resize +from utils import get_valid_args, get_attr_from, is_list_or_tuple +import os.path as osp + + +class Segmentation(BaseModel): + + def forward(self, inputs): + ipts, labs, typs, vies, seqL = inputs + del seqL + # ratios = ipts[0] + rgbs = ipts[1] + sils = ipts[2] + # del ipts + n, s, c, h, w = rgbs.size() + rgbs = rgbs.view(n*s, c, h, w) + sils = sils.view(n*s, 1, h, w) + logi = self.Backbone(rgbs) # [n*s, c, h, w] + logits = torch.sigmoid(logi) + pred = (logits > 0.5).float() # [n*s, c, h, w] + + retval = { + 'training_feat': { + 'bce': {'logits': logits, 'labels': sils} + }, + 'visual_summary': { + 'image/sils': sils, 'image/logits': logits, "image/pred": pred + }, + 'inference_feat': { + 'pred': pred, 'mask': sils + } + } + return retval + + +class GaitEdge(GaitGL): + + def build_network(self, model_cfg): + super(GaitEdge, self).build_network(model_cfg["GaitGL"]) + self.Backbone = self.get_backbone(model_cfg['Segmentation']) + self.align = model_cfg['align'] + self.CROP = SilhouetteCropAndResize() + self.resize = Resize((64, 44)) + self.is_edge = model_cfg['edge'] + self.seg_lr = model_cfg['seg_lr'] + + def finetune_parameters(self): + fine_tune_params = list() + others_params = list() + for name, p in self.named_parameters(): + if not p.requires_grad: + continue + if 'Backbone' in name: + fine_tune_params.append(p) + else: + others_params.append(p) + return [{'params': fine_tune_params, 'lr': self.seg_lr}, {'params': others_params}] + + def get_optimizer(self, optimizer_cfg): + self.msg_mgr.log_info(optimizer_cfg) + optimizer = get_attr_from([optim], optimizer_cfg['solver']) + valid_arg = get_valid_args(optimizer, optimizer_cfg, ['solver']) + optimizer = optimizer(self.finetune_parameters(), **valid_arg) + return optimizer + + def resume_ckpt(self, restore_hint): + if is_list_or_tuple(restore_hint): + for restore_hint_i in restore_hint: + self.resume_ckpt(restore_hint_i) + return + if isinstance(restore_hint, int): + save_name = self.engine_cfg['save_name'] + save_name = osp.join( + self.save_path, 'checkpoints/{}-{:0>5}.pt'.format(save_name, restore_hint)) + self.iteration = restore_hint + elif isinstance(restore_hint, str): + save_name = restore_hint + self.iteration = 0 + else: + raise ValueError( + "Error type for -Restore_Hint-, supported: int or string.") + self._load_ckpt(save_name) + + def forward(self, inputs): + ipts, labs, _, _, seqL = inputs + + ratios = ipts[0] + rgbs = ipts[1] + sils = ipts[2] + # if len(sils.size()) == 4: + # sils = sils.unsqueeze(2) + n, s, c, h, w = rgbs.size() + rgbs = rgbs.view(n*s, c, h, w) + sils = sils.view(n*s, 1, h, w) + logis = self.Backbone(rgbs) # [n, s, c, h, w] + logits = torch.sigmoid(logis) + mask = torch.round(logits).float() + if self.is_edge: + kernel_1 = torch.ones((3, 3)).cuda() + kernel_2 = torch.ones((3, 3)).cuda() + + dilated_mask = (morph.dilation(sils, kernel_1).detach() + ) > 0.5 # Dilation + eroded_mask = (morph.erosion(sils, kernel_2).detach() + ) > 0.5 # Dilation + edge_mask = dilated_mask ^ eroded_mask + + new_logits = edge_mask*logits+eroded_mask*sils + if self.align: + cropped_logits = self.CROP( + new_logits, sils, ratios) + else: + cropped_logits = self.resize(new_logits) + else: + if self.align: + cropped_logits = self.CROP( + logits, mask, ratios) + else: + cropped_logits = self.resize(logits) + _, c, H, W = cropped_logits.size() + cropped_logits = cropped_logits.view(n, s, H, W) + retval = super(GaitEdge, self).forward( + [[cropped_logits], labs, None, None, seqL]) + retval['training_feat']['bce'] = {'logits': logits, 'labels': sils} + retval['visual_summary']['image/roi'] = cropped_logits.view( + n*s, 1, H, W) + + return retval diff --git a/opengait/modeling/modules.py b/opengait/modeling/modules.py index 38e81c2..471fe3f 100644 --- a/opengait/modeling/modules.py +++ b/opengait/modeling/modules.py @@ -3,6 +3,7 @@ import numpy as np import torch.nn as nn import torch.nn.functional as F from utils import clones, is_list_or_tuple +from torchvision.ops import RoIAlign class HorizontalPoolingPyramid(): @@ -182,6 +183,61 @@ class BasicConv3d(nn.Module): return outs +class SilhouetteCropAndResize(nn.Module): + def __init__(self, H=64, W=44, eps=1, **kwargs): + super(SilhouetteCropAndResize, self).__init__() + self.H, self.W, self.eps = H, W, eps + self.Pad = nn.ZeroPad2d((int(self.W / 2), int(self.W / 2), 0, 0)) + self.RoiPool = RoIAlign((self.H, self.W), 1, sampling_ratio=-1) + + def forward(self, feature_map, binary_mask, w_h_ratio): + """ + In sils: [n, c, h, w] + w_h_ratio: [n, 1] + Out aligned_sils: [n, c, H, W] + """ + n, c, h, w = feature_map.size() + # w_h_ratio = w_h_ratio.repeat(1, 1) # [n, 1] + w_h_ratio = w_h_ratio.view(-1, 1) # [n, 1] + + h_sum = binary_mask.sum(-1) # [n, c, h] + _ = (h_sum >= self.eps).float().cumsum(axis=-1) # [n, c, h] + h_top = (_ == 0).float().sum(-1) # [n, c] + h_bot = (_ != torch.max(_, dim=-1, keepdim=True) + [0]).float().sum(-1) + 1. # [n, c] + + w_sum = binary_mask.sum(-2) # [n, c, w] + w_cumsum = w_sum.cumsum(axis=-1) # [n, c, w] + w_h_sum = w_sum.sum(-1).unsqueeze(-1) # [n, c, 1] + w_center = (w_cumsum < w_h_sum / 2.).float().sum(-1) # [n, c] + + p1 = self.W - self.H * w_h_ratio + p1 = p1 / 2. + p1 = torch.clamp(p1, min=0) # [n, c] + t_w = w_h_ratio * self.H / w + p2 = p1 / t_w # [n, c] + + height = h_bot - h_top # [n, c] + width = height * w / h # [n, c] + width_p = int(self.W / 2) + + feature_map = self.Pad(feature_map) + w_center = w_center + width_p # [n, c] + + w_left = w_center - width / 2 - p2 # [n, c] + w_right = w_center + width / 2 + p2 # [n, c] + + w_left = torch.clamp(w_left, min=0., max=w+2*width_p) + w_right = torch.clamp(w_right, min=0., max=w+2*width_p) + + boxes = torch.cat([w_left, h_top, w_right, h_bot], dim=-1) + # index of bbox in batch + box_index = torch.arange(n, device=feature_map.device) + rois = torch.cat([box_index.view(-1, 1), boxes], -1) + crops = self.RoiPool(feature_map, rois) # [n, c, H, W] + return crops + + def RmBN2dAffine(model): for m in model.modules(): if isinstance(m, nn.BatchNorm2d): diff --git a/opengait/utils/__init__.py b/opengait/utils/__init__.py index 8f72cd7..d45a6da 100644 --- a/opengait/utils/__init__.py +++ b/opengait/utils/__init__.py @@ -7,4 +7,5 @@ from .common import mkdir, clones from .common import MergeCfgsDict from .common import get_attr_from from .common import NoOp +from .common import MeanIOU from .msg_manager import get_msg_mgr \ No newline at end of file diff --git a/opengait/utils/common.py b/opengait/utils/common.py index 72b75a7..7925747 100644 --- a/opengait/utils/common.py +++ b/opengait/utils/common.py @@ -138,7 +138,7 @@ def clones(module, N): def config_loader(path): with open(path, 'r') as stream: src_cfgs = yaml.safe_load(stream) - with open("./config/default.yaml", 'r') as stream: + with open("./configs/default.yaml", 'r') as stream: dst_cfgs = yaml.safe_load(stream) MergeCfgsDict(src_cfgs, dst_cfgs) return dst_cfgs @@ -203,3 +203,15 @@ def get_ddp_module(module, **kwargs): def params_count(net): n_parameters = sum(p.numel() for p in net.parameters()) return 'Parameters Count: {:.5f}M'.format(n_parameters / 1e6) + + +def MeanIOU(msk1, msk2, eps=1.0e-9): + if not is_tensor(msk1): + msk1 = torch.from_numpy(msk1).cuda() + if not is_tensor(msk2): + msk2 = torch.from_numpy(msk2).cuda() + n = msk1.size(0) + inter = msk1 * msk2 + union = ((msk1 + msk2) > 0.).float() + MeIOU = inter.view(n, -1).sum(-1) / (union.view(n, -1).sum(-1) + eps) + return MeIOU diff --git a/opengait/utils/evaluation.py b/opengait/utils/evaluation.py index 3005ad7..371517f 100644 --- a/opengait/utils/evaluation.py +++ b/opengait/utils/evaluation.py @@ -3,7 +3,7 @@ from time import strftime, localtime import torch import numpy as np import torch.nn.functional as F -from utils import get_msg_mgr, mkdir +from utils import get_msg_mgr, mkdir, MeanIOU def cuda_dist(x, y, metric='euc'): @@ -124,10 +124,10 @@ def identification_real_scene(data, dataset, metric='euc'): gallery_seq_type = {'0001-1000': ['1', '2'], "HID2021": ['0'], '0001-1000-test': ['0'], - 'GREW': ['01']} + 'GREW': ['01'], 'TTG-200': ['1']} probe_seq_type = {'0001-1000': ['3', '4', '5', '6'], "HID2021": ['1'], '0001-1000-test': ['1'], - 'GREW': ['02']} + 'GREW': ['02'], 'TTG-200': ['2', '3', '4', '5', '6']} num_rank = 20 acc = np.zeros([num_rank]) - 1. @@ -274,3 +274,11 @@ def re_ranking(original_dist, query_num, k1, k2, lambda_value): del jaccard_dist final_dist = final_dist[:query_num, query_num:] return final_dist + + +def mean_iou(data, dataset): + labels = data['mask'] + pred = data['pred'] + miou = MeanIOU(pred, labels) + get_msg_mgr().log_info('mIOU: %.3f' % (miou.mean())) + return {"scalar/test_accuracy/mIOU": miou}