├── .DS_Store ├── LICENSE ├── README.md ├── datasets ├── .gitkeep ├── cubsample │ └── cubsample_info.txt ├── helen │ └── helen_info.txt └── voc2010_crop │ ├── bird_info.txt │ ├── cat_info.txt │ ├── cow_info.txt │ ├── dog_info.txt │ ├── horse_info.txt │ └── sheep_info.txt ├── demo.py ├── model ├── alexnet │ ├── __pycache__ │ │ └── alexnet.cpython-35.pyc │ ├── alexnet.py │ └── conv_mask.py ├── densenet_121 │ ├── conv_mask.py │ └── densenet_121.py ├── resnet_18 │ ├── conv_mask.py │ └── resnet_18.py ├── resnet_50 │ ├── conv_mask.py │ └── resnet_50.py ├── vgg_m │ ├── __pycache__ │ │ └── vgg_m.cpython-35.pyc │ ├── conv_mask.py │ └── vgg_m.py ├── vgg_s │ ├── __pycache__ │ │ ├── conv_mask.cpython-35.pyc │ │ └── vgg_s.cpython-35.pyc │ ├── conv_mask.py │ └── vgg_s.py └── vgg_vd_16 │ ├── __pycache__ │ ├── conv_mask.cpython-35.pyc │ └── vgg_vd_16.cpython-35.pyc │ ├── conv_mask.py │ └── vgg_vd_16.py ├── requirements.txt └── tools ├── classification.py ├── classification_multi.py ├── computeStability.py ├── computeStability_multi.py ├── getCNNFeature.py ├── getConvNetPara.py ├── getDistSqrtVar.py ├── get_celebaimdb.py ├── get_cubimdb.py ├── get_cubsampleimdb.py ├── get_helenimdb.py ├── get_ilsvrimdb.py ├── get_voc2010imdb.py ├── get_vocimdb.py ├── init_model.py ├── lib.py ├── load_data.py ├── load_data_multi.py ├── logistic.py ├── sgd.py ├── showresult.py ├── softmax.py └── train_model.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ada-shen/ICNN/1012f6cf55779b6c20973ca42ddcb3ea6de1e49c/.DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 ada-shen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ICNN 2 | 3 | This repository is a pytorch implementation of interpretable convolutional neural network 4 | ([arXiv](https://arxiv.org/abs/1710.00935), [IEEE T-PAMI](https://ieeexplore.ieee.org/document/9050545)). 5 | 6 | It is created by [Wen Shen](https://ada-shen.github.io), Ping Zhao, Qiming Li, [Chao Li]( 7 | http://www.ict.cas.cn/sourcedb_2018_ict_cas/cn/jssrck/201810/t20181030_5151364.html). 8 | 9 | 10 | ## Notice 11 | - **model** 12 | 13 | The project now supports three different vggs (vgg_vd_16, vgg_m, vgg_s), 14 | alexnet, resnet-18/50, and densenet-121. 15 | 16 | You can add your own model in the `/model` folder and register the model in `/tools/init_model.py`. 17 | 18 | - **dataset** 19 | 20 | The project now supports **vocpart, ilsvrc animalpart, cub200, 21 | celeba, voc2010_crop, helen**. 22 | 23 | You can add your own dataset in the way similar to the datasets above. 24 | 25 | **Note that** in our code, we will first make the data into imdb file, 26 | so if your dataset is large, the preprocessing time may be long, 27 | and the generated imdb file will be relatively large. 28 | 29 | ## Requirement 30 | 31 | The environment should have all packages in [requirements.txt](./requirements.txt) 32 | ```bash 33 | $ pip install -r requirements.txt 34 | ``` 35 | 36 | You can see that we recommend **pytorch=1.2.0**, this is because we find some bugs when pytorch=1.4.0, 37 | but there is no such problem in pytorch 1.2.0. We will continue to study this problem. 38 | 39 | ## Usage 40 | Here, we take **resnet-18 + voc2010_crop bird classification** as an example. 41 | 42 | To get the training results, we can run: 43 | ```bash 44 | $ python demo.py --model resnet_18 --dataset voc2010_crop --label_name bird 45 | ``` 46 | After running the instructions above, you will get a new folder whose path is 47 | `/resnet_18/voc2010_crop/bird` in the `/task/classification` folder. 48 | 49 | the new folder `bird` will contain a subfolder named `0` (correspond to your task_id) and three mat files (mean.mat, train.mat and val.mat). 50 | the `0` folder stores the model of every 10 epoches and log which contains 51 | **train/val loss** and **train/val accuracy** during network training. 52 | 53 | You can use the trained model to calculate other metrics or to look at middle-level features. 54 | 55 | 62 | 63 | ## Citation 64 | 65 | If you use this project in your research, please cite it. 66 | 67 | ``` 68 | @inproceedings{zhang2018interpretable, 69 | title={Interpretable convolutional neural networks}, 70 | author={Zhang, Quanshi and Wu, Nianying and Zhu, Song-Chun}, 71 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 72 | pages={8827--8836}, 73 | year={2018} 74 | } 75 | ``` 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /datasets/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ada-shen/ICNN/1012f6cf55779b6c20973ca42ddcb3ea6de1e49c/datasets/.gitkeep -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | #from tools import train_net 4 | from tools.lib import init_lr 5 | import random 6 | import numpy as np 7 | from tools.classification import classification 8 | from tools.classification_multi import classification_multi 9 | import torch 10 | 11 | def seed_torch(seed=0): 12 | random.seed(seed) 13 | os.environ['PYTHONHASHSEED'] = str(seed) 14 | np.random.seed(seed) 15 | torch.manual_seed(seed) 16 | torch.cuda.manual_seed(seed) 17 | torch.backends.cudnn.deterministic = True 18 | torch.backends.cudnn.benchmark = False 19 | #torch.backends.cudnn.enabled = False 20 | 21 | seed_torch(0) 22 | 23 | root_path = os.getcwd() #'/data2/lqm/pytorch_interpretable/py_icnn' 24 | parser = argparse.ArgumentParser('parameters') 25 | #info:gpu 26 | parser.add_argument('--gpu_id',type=int,default=0,help='select the id of the gpu') 27 | #info:task 28 | parser.add_argument('--task_name',type=str,default='classification',help='select classification or classification_multi') 29 | parser.add_argument('--task_id',type=int,default=0,help='0,1,2..') 30 | parser.add_argument('--dataset',type=str,default='voc2010_crop',help='select voc2010_crop, helen, cub200,cubsample' 31 | 'celeba, vocpart, ilsvrcanimalpart') 32 | parser.add_argument('--imagesize',type=int,default=224,help='') 33 | parser.add_argument('--label_name',type=str,default='bird',help='if voc2010_crop, set bird, cat, cow, dog, horse or sheep;' 34 | 'else, it does not matter') 35 | parser.add_argument('--label_num',type=int,default=1,help='keep the same number of label_name') 36 | parser.add_argument('--model',type=str,default='resnet_18',help='select vgg_vd_16, vgg_m, vgg_s, ' 37 | 'alexnet, resnet_18, resnet_50, densenet_121') 38 | parser.add_argument('--losstype',type=str,default='logistic',help='select logistic or softmax') 39 | #info:hyper-parameter 40 | parser.add_argument('--batchsize',type=int,default=8,help='select more than 8 may cause out of cuda memory, ' 41 | 'when you want to choose different batchsize, you also need to adjust line 94 of /tools/sgd.py at the same time to make them consistent') 42 | parser.add_argument('--dropoutrate',type=int,default=0,help='select the number between 0 and 1') 43 | parser.add_argument('--lr',type=int,default=0,help='see function init_lr in /tools/lib.py for details') 44 | parser.add_argument('--epochnum',type=int,default=0,help='see function init_lr in /tools/lib.py for details') 45 | parser.add_argument('--weightdecay',type=int,default=0.0005,help='0.02,0.002') 46 | parser.add_argument('--momentum',type=int,default=0.09,help='0.02,0.002') 47 | 48 | 49 | args = parser.parse_args() 50 | args.lr, args.epochnum = init_lr(args.model,args.label_num,args.losstype) #init lr and epochnum 51 | if(args.task_name=='classification'): 52 | if args.dataset == 'celeba': 53 | args.label_num = 40 54 | classification(root_path, args) 55 | else: 56 | if args.dataset == 'vocpart': 57 | args.label_name = ['bird','cat','cow','dog','horse','sheep'] 58 | args.label_num = 6 59 | classification_multi(root_path,args) 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /model/alexnet/__pycache__/alexnet.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ada-shen/ICNN/1012f6cf55779b6c20973ca42ddcb3ea6de1e49c/model/alexnet/__pycache__/alexnet.cpython-35.pyc -------------------------------------------------------------------------------- /model/alexnet/alexnet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import h5py 4 | import math 5 | import copy 6 | import scipy.io as io 7 | import numpy as np 8 | from scipy.io import loadmat 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | from model.vgg_s.conv_mask import conv_mask 13 | 14 | 15 | 16 | class alexnet(nn.Module): 17 | def __init__(self, pretrain_path, label_num, dropoutrate, losstype): 18 | super(alexnet, self).__init__() 19 | self.pretrian_path = pretrain_path 20 | self.dropoutrate = dropoutrate 21 | self.label_num = label_num 22 | self.losstype = losstype 23 | self.conv1 = nn.Sequential( 24 | nn.Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4), padding=(0, 0)), 25 | nn.ReLU(inplace=True), 26 | nn.LocalResponseNorm(5, alpha=0.00002, beta=0.75, k=1.0),) 27 | self.maxpool1 = nn.Sequential( 28 | nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(0, 0), dilation=(1, 1), ceil_mode=False), ) 29 | self.conv2 = nn.Sequential( 30 | nn.Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)), 31 | nn.ReLU(inplace=True), 32 | nn.LocalResponseNorm(5, alpha=0.00002, beta=0.75, k=1.0),) 33 | self.maxpool2 = nn.Sequential( 34 | nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(0, 0), dilation=(1, 1), ceil_mode=False), ) 35 | self.conv3 = nn.Sequential( 36 | nn.Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 37 | nn.ReLU(inplace=True), 38 | nn.Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 39 | nn.ReLU(inplace=True), 40 | nn.Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 41 | nn.ReLU(inplace=True), ) 42 | self.mask1 = nn.Sequential( 43 | conv_mask(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), labelnum=self.label_num, loss_type = self.losstype, ), ) 44 | self.maxpool3 = nn.Sequential( 45 | nn.ReLU(inplace=True), 46 | nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(0, 0), dilation=(1, 1), ceil_mode=False), ) 47 | self.mask2 = nn.Sequential( 48 | conv_mask(256, 4096, kernel_size=(6, 6), stride=(1, 1), padding=(0, 0), labelnum=self.label_num, loss_type = self.losstype, ), ) 49 | self.relu = nn.Sequential( 50 | nn.ReLU(inplace=True), ) 51 | self.line = nn.Sequential( 52 | nn.Dropout2d(p=self.dropoutrate), 53 | nn.Conv2d(4096, 4096, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)), 54 | nn.ReLU(inplace=True), 55 | nn.Dropout2d(p=self.dropoutrate), 56 | nn.Conv2d(4096, self.label_num, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)), ) 57 | self.init_weight() 58 | 59 | def init_weight(self): 60 | data = loadmat(self.pretrian_path) 61 | w, b = data['layers'][0][0][0]['weights'][0][0] 62 | self.conv1[0].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 63 | self.conv1[0].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 64 | 65 | w, b = data['layers'][0][4][0]['weights'][0][0] 66 | w = w.transpose([3, 2, 0, 1]) 67 | w = np.concatenate((w, w), axis=1) 68 | self.conv2[0].weight.data.copy_(torch.from_numpy(w)) 69 | self.conv2[0].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 70 | 71 | w, b = data['layers'][0][8][0]['weights'][0][0] 72 | self.conv3[0].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 73 | self.conv3[0].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 74 | w, b = data['layers'][0][10][0]['weights'][0][0] 75 | w = w.transpose([3, 2, 0, 1]) 76 | w = np.concatenate((w, w), axis=1) 77 | self.conv3[2].weight.data.copy_(torch.from_numpy(w)) 78 | self.conv3[2].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 79 | w, b = data['layers'][0][12][0]['weights'][0][0] 80 | w = w.transpose([3, 2, 0, 1]) 81 | w = np.concatenate((w, w), axis=1) 82 | self.conv3[4].weight.data.copy_(torch.from_numpy(w)) 83 | self.conv3[4].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 84 | 85 | torch.nn.init.normal_(self.mask1[0].weight.data, mean=0, std=0.01) 86 | torch.nn.init.normal_(self.mask2[0].weight.data, mean=0, std=0.01) 87 | 88 | torch.nn.init.normal_(self.line[1].weight.data, mean=0, std=0.01) 89 | torch.nn.init.zeros_(self.line[1].bias.data) 90 | torch.nn.init.normal_(self.line[4].weight.data, mean=0, std=0.01) 91 | torch.nn.init.zeros_(self.line[4].bias.data) 92 | 93 | def forward(self, x, label, Iter, density): 94 | x = self.conv1(x) 95 | x = F.pad(x, (0, 1, 0, 1)) 96 | x = self.maxpool1(x) 97 | 98 | x = self.conv2(x) 99 | x = F.pad(x, (0, 1, 0, 1)) 100 | x = self.maxpool2(x) 101 | 102 | x = self.conv3(x) 103 | x = self.mask1[0](x, label, Iter, density) 104 | x = self.maxpool3(x) 105 | 106 | x = self.mask2[0](x, label, Iter, density) 107 | x = self.relu(x) 108 | x = self.line(x) 109 | return x 110 | 111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /model/alexnet/conv_mask.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import math 3 | import copy 4 | import scipy.io as io 5 | from scipy.io import loadmat 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch.autograd import Function 10 | from torch.nn.parameter import Parameter 11 | 12 | 13 | 14 | 15 | class Divclass: 16 | def __init__(self, depthList, posList): 17 | self.depthList = depthList 18 | self.posList = posList 19 | 20 | 21 | def getMu(x): 22 | IsuseMax = 0 23 | bs = x.size()[0] 24 | depth = x.size()[1] 25 | h = x.size()[2] 26 | w = x.size()[3] 27 | x = x.transpose(2, 3).reshape([bs, depth, h * w]) 28 | if (IsuseMax): 29 | _, p = torch.max(x, dim=2) 30 | p = torch.reshape(p, (bs, depth, 1)).float() # index is long type 31 | mu_y = torch.ceil(p / h) 32 | mu_x = p - (mu_y - 1) * h 33 | sqrtvar = torch.Tensor([]) 34 | else: 35 | tmp_x = torch.Tensor(range(1, h + 1)).reshape(-1, 1).repeat([bs, depth, w, 1]).squeeze(3).cuda() 36 | tmp_y = torch.Tensor(range(1, w + 1)).repeat([bs, depth, h, 1]) 37 | tmp_y = tmp_y.transpose(2, 3).reshape([bs, depth, h * w]).cuda() 38 | sumXtmp = torch.sum(x, 2).unsqueeze(2) 39 | sumX = torch.max(sumXtmp, (torch.ones(sumXtmp.size()).cuda() * 0.000000001)) 40 | mu_xtmp = torch.round((torch.sum(tmp_x.mul(x), 2).unsqueeze(2)).div(sumX)) 41 | mu_x = torch.max(mu_xtmp, torch.ones(mu_xtmp.size()).cuda()) 42 | mu_ytmp = torch.round((torch.sum(tmp_y.mul(x), 2).unsqueeze(2)).div(sumX)) 43 | mu_y = torch.max(mu_ytmp, torch.ones(mu_ytmp.size()).cuda()) 44 | sqrtvartmp1 = mu_x.repeat([1, 1, h * w]) 45 | sqrtvartmp2 = mu_y.repeat([1, 1, h * w]) 46 | sqrtvar = torch.sqrt((torch.sum((tmp_x - sqrtvartmp1).mul(tmp_x - sqrtvartmp1).mul(x), 2).unsqueeze(2) + torch.sum((tmp_y - sqrtvartmp2).mul(tmp_y - sqrtvartmp2).mul(x), 2).unsqueeze(2)).div(sumX)) 47 | p = (mu_x + (mu_y - 1) * h).reshape([bs, depth, 1, 1]) 48 | tmp = torch.linspace(-1, 1, h).repeat(mu_x.size()).cuda() 49 | for i in range(bs): 50 | mu_x[i, :, :] = torch.gather(tmp[i, :, :], 1, (mu_x[i, :, :] - 1).long()) 51 | mu_y[i, :, :] = torch.gather(tmp[i, :, :], 1, (mu_y[i, :, :] - 1).long()) 52 | mu_x = mu_x.reshape([bs, depth, 1, 1]) 53 | mu_y = mu_y.reshape([bs, depth, 1, 1]) 54 | sqrtvar = sqrtvar.reshape([bs, depth]) 55 | return mu_x, mu_y, sqrtvar 56 | 57 | 58 | def getMask(mask_parameter, mask_weight, posTempX, posTempY, bs, depth, h, w): 59 | mask = torch.abs(posTempX - mask_parameter['mu_x'].repeat([1, 1, h, w])) 60 | mask = mask + torch.abs(posTempY - mask_parameter['mu_y'].repeat([1, 1, h, w])) 61 | mask = 1 - mask.mul(mask_weight.reshape(depth, 1, 1).repeat([bs, 1, h, w])) 62 | mask = torch.max(mask, torch.ones(mask.size()).cuda() * (-1)) 63 | for i in range(depth): 64 | if not (mask_parameter['filter'][i].equal(torch.ones(1))): 65 | mask[:, i, :, :] = 1 66 | return mask 67 | 68 | 69 | def get_sliceMag(sliceMag,label,x): 70 | for lab in range(label.shape[1]): 71 | index = (label[:, lab, :, :] == 1).reshape(label.shape[0]) 72 | if torch.sum(index) != 0: 73 | (tmp, idx) = torch.max(x[index, :, :, :], dim=2) 74 | (tmp, idx) = torch.max(tmp, dim=2) 75 | tmp = tmp.reshape(tmp.size()[0], tmp.size()[1], 1, 1) 76 | meantmp = torch.mean(tmp, 0) 77 | if (torch.sum(sliceMag[:, lab]) == 0): 78 | sliceMag[:, lab] = torch.max(meantmp,(torch.ones(meantmp.size()) * 0.1).cuda()).reshape(meantmp.size()[0]) 79 | else: 80 | tmptmp = 0.9 81 | index = (meantmp == 0).reshape(meantmp.size()[0]) 82 | meantmp[index, 0, 0] = sliceMag[index, 0].cuda() 83 | sliceMag[:, lab] = (sliceMag[:,lab] * tmptmp).cuda()+meantmp.reshape(meantmp.size()[0])*(1-tmptmp) 84 | return sliceMag 85 | 86 | 87 | class conv_mask_F(Function): 88 | @staticmethod 89 | def forward(self, x, weight, bias, mask_weight, padding, label, Iter, density, mask_parameter): 90 | bs = x.shape[0] 91 | depth = x.shape[1] 92 | h = x.shape[2] 93 | w = x.shape[3] 94 | posTemp_x = torch.linspace(-1, 1, h).reshape(-1, 1).repeat([depth, 1, w]) 95 | posTemp_y = torch.linspace(-1, 1, w).repeat([depth, h, 1]) 96 | posTempX = posTemp_x.repeat([bs, 1, 1, 1]).cuda() 97 | posTempY = posTemp_y.repeat([bs, 1, 1, 1]).cuda() 98 | mask_parameter['mu_x'], mask_parameter['mu_y'], mask_parameter['sqrtvar'] = getMu(x) 99 | mask = getMask(mask_parameter, mask_weight, posTempX, posTempY, bs, depth, h, w) 100 | input = x.mul(mask) 101 | x_relu = torch.max(input, torch.zeros(input.size()).cuda()) 102 | 103 | parameter_sliceMag = mask_parameter['sliceMag'].clone().data 104 | mask_parameter['sliceMag'] = get_sliceMag(mask_parameter['sliceMag'],label,x) 105 | 106 | self.save_for_backward(x, weight, bias, mask_weight, torch.Tensor([padding]), label, mask, Iter, density, 107 | mask_parameter['filter'], mask_parameter['mag'], mask_parameter['sqrtvar'], mask_parameter['strength'],parameter_sliceMag) 108 | 109 | return F.conv2d(x_relu, weight, bias, padding=padding) 110 | 111 | @staticmethod 112 | def backward(self, grad_output): 113 | x, weight, bias, mask_weight, padding, label, mask, Iter, density, parameter_filter, parameter_mag, parameter_sqrtvar, parameter_strength, parameter_sliceMag = self.saved_tensors 114 | 115 | input = x.mul(torch.max(mask, torch.zeros(mask.size()).cuda())) 116 | if self.needs_input_grad[0]: 117 | x_grad = torch.nn.grad.conv2d_input(input.shape, weight, grad_output, padding=int(padding.item())) 118 | if self.needs_input_grad[1]: 119 | weight_grad = torch.nn.grad.conv2d_weight(input, weight.shape, grad_output, padding=int(padding.item())) 120 | if bias is not None and self.needs_input_grad[2]: 121 | bias_grad = grad_output.sum(0).sum((1, 2)) 122 | 123 | depth = x.size()[1] 124 | h = x.size()[2] 125 | w = x.size()[3] 126 | depthList = (parameter_filter > 0).nonzero()[:, 0].reshape(-1, 1) 127 | labelNum = label.size()[1] 128 | Div_list = [] 129 | 130 | if (labelNum == 1): 131 | theClass = label 132 | posList = (theClass == 1).nonzero()[:, 0].reshape(-1, 1) 133 | Div = Divclass(depthList, posList) 134 | Div_list.append(Div) 135 | else: 136 | (theClass, indextmp) = torch.max(label, dim=1) 137 | theClass = theClass.unsqueeze(2) 138 | if (parameter_sliceMag.size()[0] == torch.Tensor([]).size()[0]): 139 | posList = (theClass == 1).nonzero()[:, 0].reshape(-1, 1) 140 | Div = Divclass(depthList, posList) 141 | Div_list.append(Div) 142 | else: 143 | sliceM = parameter_sliceMag 144 | for i in range(labelNum): 145 | Div = Divclass(depthList=torch.Tensor([]), posList=torch.Tensor([])) 146 | Div_list.append(Div) 147 | (val, index) = torch.max(sliceM[depthList, :].squeeze(1), dim=1) 148 | for lab in range(labelNum): 149 | (Div_list[lab].depthList, indextmp) = torch.sort(depthList[index == lab], dim=0) 150 | Div_list[lab].posList = (label[:, lab, :, :] == 1).nonzero()[:, 0].reshape(-1, 1) 151 | 152 | imgNum = label.size()[0] 153 | alpha = 0.5 154 | x_grad = x_grad.mul(torch.max(mask, torch.zeros(mask.size()).cuda())) 155 | 156 | if ((torch.sum(parameter_filter == 1)) > 0): 157 | parameter_strength = torch.mean(torch.mean(x.mul(mask), 2), 2).transpose(1, 0).cuda() 158 | mask_tmp = (torch.from_numpy(copy.deepcopy(mask.cpu().detach().numpy()[::-1, ::-1, :, :]))).cuda() 159 | alpha_logZ_pos = (torch.log(torch.mean(torch.exp(torch.mean(torch.mean(x.mul(mask_tmp), 2), 2).div(alpha)), 0)) * alpha).reshape(depth, 1) 160 | alpha_logZ_neg = (torch.log(torch.mean(torch.exp(torch.mean(torch.mean(-x, 2), 2).div(alpha)), 0)) * alpha).reshape(depth, 1) 161 | alpha_logZ_pos[torch.isinf(alpha_logZ_pos)] = torch.max(alpha_logZ_pos[torch.isinf(alpha_logZ_pos) == 0]) 162 | alpha_logZ_neg[torch.isinf(alpha_logZ_neg)] = torch.max(alpha_logZ_neg[torch.isinf(alpha_logZ_neg) == 0]) 163 | 164 | for lab in range(len(Div_list)): 165 | if (labelNum == 1): 166 | w_pos = 1 167 | w_neg = 1 168 | else: 169 | if (labelNum > 10): 170 | w_pos = 0.5 / (1 / labelNum) 171 | w_neg = 0.5 / (1 - 1 / labelNum) 172 | else: 173 | w_pos = 0.5 / density[lab] 174 | w_neg = 0.5 / (1 - density[lab]) 175 | 176 | mag = torch.ones([depth, imgNum]).div(1 / Iter).div(parameter_mag).cuda() 177 | dList = Div_list[lab].depthList 178 | dList = dList[(parameter_filter[dList] == 1).squeeze(1)].reshape(-1, 1) 179 | if (dList.size()[0] != torch.Tensor([]).size()[0]): 180 | List = Div_list[lab].posList.cuda() 181 | if (List.size()[0] != torch.Tensor([]).size()[0]): 182 | strength = torch.exp((parameter_strength[:, List].squeeze(2))[dList, :].squeeze(1).div(alpha)).mul((parameter_strength[:, List].squeeze(2))[dList, :].squeeze(1) - alpha_logZ_pos[dList].squeeze(1).repeat(1, List.size()[0]) + alpha) 183 | strength[torch.isinf(strength)] = torch.max(strength[torch.isinf(strength) == 0]) 184 | strength[torch.isnan(strength)] = 0 185 | strength = (strength.div((torch.mean(strength, 1).reshape(-1, 1).repeat(1, List.size()[0])).mul((mag[:, List].squeeze(2))[dList, :].squeeze(1)))).transpose(0, 1).reshape(List.size()[0],dList.size()[0], 1, 1) 186 | strength[torch.isnan(strength)] = 0 187 | strength[torch.isinf(strength)] = torch.max(strength[torch.isinf(strength) == 0]) 188 | index_dList = dList.repeat(List.size()[0], 1) 189 | index_List = List.reshape(-1, 1).repeat(1, dList.size()[0]).reshape(List.size()[0] * dList.size()[0], 1) 190 | x_grad[index_List, index_dList, :, :] = ((x_grad[List, :, :, :].squeeze(1))[:, dList, :, :].squeeze(2) - (mask[List, :, :,:].squeeze(1))[:,dList,:,:].squeeze(2).mul(strength.repeat(1, 1, h, w) * (0.00001 * w_pos))).reshape(List.size()[0] * dList.size()[0],1, h, w) 191 | 192 | list_neg = (label != 1).nonzero()[:, 0].reshape(-1, 1) 193 | if (list_neg.size()[0] != torch.Tensor([]).size()[0]): 194 | strength = torch.mean((torch.mean((x[list_neg, :, :, :].squeeze(1))[:, dList, :, :].squeeze(2), 2).unsqueeze(2)),3).unsqueeze(2).transpose(0, 1).reshape(dList.size()[0], list_neg.size()[0]) 195 | strength = torch.exp(-strength.div(alpha)).mul(-strength - alpha_logZ_neg[dList].squeeze(2).repeat(1, list_neg.size()[0]) + alpha) 196 | strength[torch.isinf(strength)] = torch.max(strength[torch.isinf(strength) == 0]) 197 | strength[torch.isnan(strength)] = 0 198 | strength = (strength.div((torch.mean(strength, 1).reshape(-1, 1).repeat(1, list_neg.size()[0])).mul((mag[:, list_neg].squeeze(2))[dList, :].squeeze(1)))).transpose(0, 1).reshape(list_neg.size()[0], dList.size()[0], 1, 1) 199 | strength[torch.isnan(strength)] = 0 200 | strength[torch.isinf(strength)] = torch.max(strength[torch.isinf(strength) == 0]) 201 | index_dList = dList.repeat(list_neg.size()[0], 1) 202 | index_list_neg = list_neg.reshape(-1, 1).repeat(1, dList.size()[0]).reshape(list_neg.size()[0] * dList.size()[0], 1) 203 | x_grad[index_list_neg, index_dList, :, :] = ((x_grad[list_neg, :, :, :].squeeze(1))[:, dList, :, :].squeeze(2) + (strength.reshape(list_neg.size()[0], dList.size()[0], 1, 1).repeat(1, 1, h, w)) * (0.00001 * w_neg)).reshape(list_neg.size()[0] * dList.size()[0], 1, h, w) 204 | 205 | beta = 3.0 206 | mask_weight_grad = torch.zeros(depth, 1).cuda() 207 | parameter_sqrtvar = parameter_sqrtvar.transpose(0, 1) 208 | 209 | for lab in range(len(Div_list)): 210 | dList = Div_list[lab].depthList.cuda() 211 | List = Div_list[lab].posList 212 | if ((dList.size()[0] != torch.Tensor([]).size()[0]) and (List.size()[0] != torch.Tensor([]).size()[0])): 213 | tmp = ((torch.sum((parameter_strength[:, List].squeeze(2))[dList, :].squeeze(1).mul((parameter_sqrtvar[:, List].squeeze(2))[dList, :].squeeze(1)), 1)). 214 | div(torch.sum((parameter_strength[:, List].squeeze(2))[dList, :].squeeze(1), 1))).reshape(-1, 1) 215 | tmptmp = beta / tmp 216 | tmp = torch.max(torch.min(tmptmp, torch.ones(tmptmp.size()).cuda() * 3),torch.ones(tmptmp.size()).cuda() * 1.5) 217 | tmp = (tmp - mask_weight[dList].squeeze(2)) * (-10000) 218 | mask_weight_grad[dList] = tmp.unsqueeze(2) 219 | 220 | return x_grad, weight_grad, bias_grad, mask_weight_grad, None, None, None, None, None, None, None, None, None, None, None 221 | 222 | 223 | class conv_mask(nn.Module): 224 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, labelnum, loss_type, ): 225 | super(conv_mask, self).__init__() 226 | self.in_channels = in_channels 227 | self.out_channels = out_channels 228 | self.kernel_size = kernel_size 229 | self.stride = stride 230 | self.padding = padding[0] 231 | self.alphainit = 2.0 232 | self.mask_parameter = None 233 | self.label_num = labelnum 234 | self.losstype = loss_type 235 | 236 | self.weight = Parameter(torch.Tensor(out_channels, in_channels, *kernel_size)) 237 | self.mask_weight = Parameter(torch.ones(in_channels, 1) * self.alphainit) 238 | self.bias = Parameter(torch.zeros(out_channels)) 239 | self.init_mask_parameter() 240 | 241 | 242 | def forward(self, x, label, Iter, density): 243 | out = conv_mask_F.apply(x, self.weight, self.bias, self.mask_weight, self.padding, label, Iter, density, self.mask_parameter) 244 | return out 245 | 246 | def init_mag(self): 247 | mag = torch.Tensor([0.1]) 248 | # mag need to be modified for multiple classifications 249 | if self.losstype == 'softmax': 250 | if self.label_num > 10: 251 | mag = mag / 50 252 | if self.model == 'vgg_m': 253 | mag = mag / 1000000 254 | else: 255 | mag = mag * 0.2 256 | return mag 257 | 258 | def init_mask_parameter(self): 259 | mag = self.init_mag() 260 | partRate = 1 261 | textureRate = 0 262 | partNum = round(partRate * self.in_channels) 263 | textureNum = round((textureRate + partRate) * self.in_channels) - partNum 264 | filtertype = torch.zeros(self.in_channels, 1) 265 | filtertype[0:partNum] = 1 266 | filtertype[partNum:partNum + textureNum] = 2 267 | sliceMag = torch.zeros(self.in_channels, self.label_num) 268 | self.mask_parameter = {'posTemp': {'posTemp_x': None, 'posTemp_y': None}, 269 | 'mu_x': None, 270 | 'mu_y': None, 271 | 'sqrtvar': None, 272 | 'strength': None, 273 | 'sliceMag': sliceMag, 274 | 'filter': filtertype, 275 | 'mag': mag} 276 | 277 | 278 | -------------------------------------------------------------------------------- /model/densenet_121/densenet_121.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import torch.utils.checkpoint as cp 5 | import re 6 | import torch 7 | from torch import Tensor 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | from collections import OrderedDict 11 | from model.densenet_121.conv_mask import conv_mask 12 | 13 | class _DenseLayer(nn.Module): 14 | def __init__(self, num_input_features, growth_rate, bn_size, drop_rate, memory_efficient=False): 15 | super(_DenseLayer, self).__init__() 16 | self.add_module('norm1', nn.BatchNorm2d(num_input_features)), 17 | self.add_module('relu1', nn.ReLU(inplace=True)), 18 | self.add_module('conv1', nn.Conv2d(num_input_features, bn_size * 19 | growth_rate, kernel_size=1, stride=1, 20 | bias=False)), 21 | self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)), 22 | self.add_module('relu2', nn.ReLU(inplace=True)), 23 | self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate, 24 | kernel_size=3, stride=1, padding=1, 25 | bias=False)), 26 | self.drop_rate = float(drop_rate) 27 | self.memory_efficient = memory_efficient 28 | 29 | def bn_function(self, inputs): 30 | # type: (List[Tensor]) -> Tensor 31 | concated_features = torch.cat(inputs, 1) 32 | bottleneck_output = self.conv1(self.relu1(self.norm1(concated_features))) # noqa: T484 33 | return bottleneck_output 34 | 35 | # todo: rewrite when torchscript supports any 36 | def any_requires_grad(self, input): 37 | # type: (List[Tensor]) -> bool 38 | for tensor in input: 39 | if tensor.requires_grad: 40 | return True 41 | return False 42 | ''' 43 | @torch.jit.unused # noqa: T484 44 | def call_checkpoint_bottleneck(self, input): 45 | # type: (List[Tensor]) -> Tensor 46 | def closure(*inputs): 47 | return self.bn_function(inputs) 48 | 49 | return cp.checkpoint(closure, *input) 50 | 51 | @torch.jit._overload_method # noqa: F811 52 | def forward(self, input): 53 | # type: (List[Tensor]) -> (Tensor) 54 | pass 55 | 56 | @torch.jit._overload_method # noqa: F811 57 | def forward(self, input): 58 | # type: (Tensor) -> (Tensor) 59 | pass 60 | ''' 61 | # torchscript does not yet support *args, so we overload method 62 | # allowing it to take either a List[Tensor] or single Tensor 63 | def forward(self, input): # noqa: F811 64 | if isinstance(input, Tensor): 65 | prev_features = [input] 66 | else: 67 | prev_features = input 68 | 69 | if self.memory_efficient and self.any_requires_grad(prev_features): 70 | if torch.jit.is_scripting(): 71 | raise Exception("Memory Efficient not supported in JIT") 72 | 73 | bottleneck_output = self.call_checkpoint_bottleneck(prev_features) 74 | else: 75 | bottleneck_output = self.bn_function(prev_features) 76 | 77 | new_features = self.conv2(self.relu2(self.norm2(bottleneck_output))) 78 | if self.drop_rate > 0: 79 | new_features = F.dropout(new_features, p=self.drop_rate, 80 | training=self.training) 81 | return new_features 82 | 83 | 84 | class _DenseBlock(nn.ModuleDict): 85 | _version = 2 86 | 87 | def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate, memory_efficient=False): 88 | super(_DenseBlock, self).__init__() 89 | for i in range(num_layers): 90 | layer = _DenseLayer( 91 | num_input_features + i * growth_rate, 92 | growth_rate=growth_rate, 93 | bn_size=bn_size, 94 | drop_rate=drop_rate, 95 | memory_efficient=memory_efficient, 96 | ) 97 | self.add_module('denselayer%d' % (i + 1), layer) 98 | 99 | def forward(self, init_features): 100 | features = [init_features] 101 | for name, layer in self.items(): 102 | new_features = layer(features) 103 | features.append(new_features) 104 | return torch.cat(features, 1) 105 | 106 | 107 | class _Transition(nn.Sequential): 108 | def __init__(self, num_input_features, num_output_features): 109 | super(_Transition, self).__init__() 110 | self.add_module('norm', nn.BatchNorm2d(num_input_features)) 111 | self.add_module('relu', nn.ReLU(inplace=True)) 112 | self.add_module('conv', nn.Conv2d(num_input_features, num_output_features, 113 | kernel_size=1, stride=1, bias=False)) 114 | self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2)) 115 | 116 | 117 | class densenet_121(nn.Module): 118 | r"""Densenet-BC model class, based on 119 | `"Densely Connected Convolutional Networks" `_ 120 | Args: 121 | growth_rate (int) - how many filters to add each layer (`k` in paper) 122 | block_config (list of 4 ints) - how many layers in each pooling block 123 | num_init_features (int) - the number of filters to learn in the first convolution layer 124 | bn_size (int) - multiplicative factor for number of bottle neck layers 125 | (i.e. bn_size * k features in the bottleneck layer) 126 | drop_rate (float) - dropout rate after each dense layer 127 | num_classes (int) - number of classification classes 128 | memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient, 129 | but slower. Default: *False*. See `"paper" `_ 130 | """ 131 | 132 | def __init__(self,pretrain_path,num_classes,dropout_rate,losstype, growth_rate=32, block_config=(6, 12, 24, 16), 133 | num_init_features=64, bn_size=4, drop_rate=0, memory_efficient=False): 134 | 135 | super(densenet_121, self).__init__() 136 | 137 | self.label_num = num_classes 138 | self.pretrian_path = pretrain_path 139 | self.losstype = losstype 140 | 141 | # First convolution 142 | self.features = nn.Sequential(OrderedDict([ 143 | ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, 144 | padding=3, bias=False)), 145 | ('norm0', nn.BatchNorm2d(num_init_features)), 146 | ('relu0', nn.ReLU(inplace=True)), 147 | ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), 148 | ])) 149 | 150 | self.mask1 = nn.Sequential( 151 | conv_mask(1024 , 1024 , kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), 152 | labelnum=self.label_num, 153 | loss_type=self.losstype, ), ) 154 | 155 | self.mask2 = nn.Sequential( 156 | conv_mask(1024, 1024 , kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), 157 | labelnum=self.label_num, 158 | loss_type=self.losstype, ), ) 159 | 160 | # Each denseblock 161 | num_features = num_init_features 162 | for i, num_layers in enumerate(block_config): 163 | block = _DenseBlock( 164 | num_layers=num_layers, 165 | num_input_features=num_features, 166 | bn_size=bn_size, 167 | growth_rate=growth_rate, 168 | drop_rate=drop_rate, 169 | memory_efficient=memory_efficient 170 | ) 171 | self.features.add_module('denseblock%d' % (i + 1), block) 172 | num_features = num_features + num_layers * growth_rate 173 | if i != len(block_config) - 1: 174 | trans = _Transition(num_input_features=num_features, 175 | num_output_features=num_features // 2) 176 | self.features.add_module('transition%d' % (i + 1), trans) 177 | num_features = num_features // 2 178 | 179 | # Final batch norm 180 | self.features.add_module('norm5', nn.BatchNorm2d(num_features)) 181 | 182 | # Linear layer 183 | self.classifier = nn.Linear(num_features, num_classes) 184 | 185 | # Official init from torch repo. 186 | for m in self.modules(): 187 | if isinstance(m, nn.Conv2d): 188 | nn.init.kaiming_normal_(m.weight) 189 | elif isinstance(m, nn.BatchNorm2d): 190 | nn.init.constant_(m.weight, 1) 191 | nn.init.constant_(m.bias, 0) 192 | elif isinstance(m, nn.Linear): 193 | nn.init.constant_(m.bias, 0) 194 | 195 | self.init_weight() 196 | 197 | def init_weight(self): 198 | pattern = re.compile( 199 | r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$') 200 | 201 | state_dict = torch.load(self.pretrian_path) 202 | for key in list(state_dict.keys()): 203 | res = pattern.match(key) 204 | if res: 205 | new_key = res.group(1) + res.group(2) 206 | state_dict[new_key] = state_dict[key] 207 | del state_dict[key] 208 | 209 | 210 | pretrained_dict = {k: v for k, v in state_dict.items() if 211 | 'classifier' not in k} 212 | model_dict = self.state_dict() 213 | #for k in model_dict: 214 | # print(k) 215 | # print('####################################') 216 | # print('####################################') 217 | #for k,v in state_dict.items(): 218 | # print(k) 219 | model_dict.update(pretrained_dict) 220 | self.load_state_dict(model_dict,strict=False) 221 | torch.nn.init.normal_(self.mask1[0].weight.data, mean=0, std=0.01) 222 | torch.nn.init.normal_(self.mask2[0].weight.data, mean=0, std=0.01) 223 | 224 | torch.nn.init.normal_(self.classifier.weight.data, mean=0, std=0.01) 225 | torch.nn.init.zeros_(self.classifier.bias.data) 226 | 227 | def forward(self, x, label, Iter, density): 228 | features = self.features(x) 229 | out = F.relu(features, inplace=True) 230 | 231 | out = self.mask1[0](out, label, Iter, density) 232 | out = F.relu(out, inplace=True) 233 | out = self.mask2[0](out, label, Iter, density) 234 | out = F.relu(out, inplace=True) 235 | 236 | out = F.adaptive_avg_pool2d(out, (1, 1)) 237 | out = torch.flatten(out, 1) 238 | out = self.classifier(out) 239 | return out -------------------------------------------------------------------------------- /model/resnet_18/resnet_18.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from model.resnet_18.conv_mask import conv_mask 8 | 9 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 10 | """3x3 convolution with padding""" 11 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 12 | padding=0, groups=groups, bias=False, dilation=dilation)#new padding 13 | 14 | def conv1x1(in_planes, out_planes, stride=1): 15 | """1x1 convolution""" 16 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 22 | base_width=64, dilation=1, norm_layer=None): 23 | super(BasicBlock, self).__init__() 24 | if norm_layer is None: 25 | norm_layer = nn.BatchNorm2d 26 | if groups != 1 or base_width != 64: 27 | raise ValueError('BasicBlock only supports groups=1 and base_width=64') 28 | if dilation > 1: 29 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 30 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 31 | self.conv1 = conv3x3(inplanes, planes, stride) 32 | self.bn1 = norm_layer(planes) 33 | self.relu = nn.ReLU(inplace=False) 34 | self.conv2 = conv3x3(planes, planes) 35 | self.bn2 = norm_layer(planes) 36 | self.downsample = downsample 37 | self.stride = stride 38 | self.pad2d = nn.ZeroPad2d(1)#new paddig 39 | 40 | def forward(self, x): 41 | identity = x 42 | out = self.pad2d(x) #new padding 43 | out = self.conv1(out) 44 | out = self.bn1(out) 45 | out = self.relu(out) 46 | 47 | out = self.pad2d(out) #new padding 48 | out = self.conv2(out) 49 | out = self.bn2(out) 50 | 51 | if self.downsample is not None: 52 | identity = self.downsample(x) 53 | 54 | out = out + identity 55 | out = self.relu(out) 56 | 57 | return out 58 | 59 | 60 | class resnet_18(nn.Module): 61 | def __init__(self, pretrain_path,num_classes,dropout_rate,losstype,block=BasicBlock, layers=[2,2,2,2], zero_init_residual=False, 62 | groups=1, width_per_group=64, replace_stride_with_dilation=None, 63 | norm_layer=None): 64 | super(resnet_18, self).__init__() 65 | if norm_layer is None: 66 | norm_layer = nn.BatchNorm2d 67 | self.label_num = num_classes 68 | self.pretrian_path = pretrain_path 69 | self._norm_layer = norm_layer 70 | self.losstype = losstype 71 | self.inplanes = 64 72 | self.dilation = 1 73 | if replace_stride_with_dilation is None: 74 | # each element in the tuple indicates if we should replace 75 | # the 2x2 stride with a dilated convolution instead 76 | replace_stride_with_dilation = [False, False, False] 77 | if len(replace_stride_with_dilation) != 3: 78 | raise ValueError("replace_stride_with_dilation should be None " 79 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) 80 | self.groups = groups 81 | self.base_width = width_per_group 82 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=0, 83 | bias=False)#new padding 84 | self.bn1 = norm_layer(self.inplanes) 85 | self.relu = nn.ReLU(inplace=False) 86 | self.relu1 = nn.ReLU(inplace=False) 87 | self.relu2 = nn.ReLU(inplace=False) 88 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)#new padding 89 | self.layer1 = self._make_layer(block, 64, layers[0]) 90 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, 91 | dilate=replace_stride_with_dilation[0]) 92 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, 93 | dilate=replace_stride_with_dilation[1]) 94 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, 95 | dilate=replace_stride_with_dilation[2]) 96 | self.mask1 = nn.Sequential( 97 | conv_mask(256* block.expansion, 256* block.expansion, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), labelnum=self.label_num, 98 | loss_type=self.losstype, ), ) 99 | 100 | self.mask2 = nn.Sequential( 101 | conv_mask(256 * block.expansion, 256 * block.expansion, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), 102 | labelnum=self.label_num, 103 | loss_type=self.losstype, ), ) 104 | 105 | self.avgpool = nn.Sequential( 106 | #nn.ReLU(inplace=True), 107 | nn.AdaptiveAvgPool2d((1, 1))) 108 | 109 | self.fc = nn.Linear(512* block.expansion, num_classes) 110 | 111 | self.pad2d_1 = nn.ZeroPad2d(1)#new paddig 112 | self.pad2d_3 = nn.ZeroPad2d(3)#new paddig 113 | 114 | for m in self.modules(): 115 | if isinstance(m, nn.Conv2d): 116 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 117 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 118 | nn.init.constant_(m.weight, 1) 119 | nn.init.constant_(m.bias, 0) 120 | 121 | # Zero-initialize the last BN in each residual branch, 122 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 123 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 124 | if zero_init_residual: 125 | for m in self.modules(): 126 | nn.init.constant_(m.bn2.weight, 0) 127 | 128 | self.init_weight() 129 | 130 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 131 | norm_layer = self._norm_layer 132 | downsample = None 133 | previous_dilation = self.dilation 134 | if dilate: 135 | self.dilation *= stride 136 | stride = 1 137 | if stride != 1 or self.inplanes != planes * block.expansion: 138 | downsample = nn.Sequential( 139 | conv1x1(self.inplanes, planes * block.expansion, stride), 140 | norm_layer(planes * block.expansion), 141 | ) 142 | 143 | layers = [] 144 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups, 145 | self.base_width, previous_dilation, norm_layer)) 146 | self.inplanes = planes * block.expansion 147 | for _ in range(1, blocks): 148 | layers.append(block(self.inplanes, planes, groups=self.groups, 149 | base_width=self.base_width, dilation=self.dilation, 150 | norm_layer=norm_layer)) 151 | 152 | return nn.Sequential(*layers) 153 | 154 | def init_weight(self): 155 | state_dict = torch.load(self.pretrian_path) 156 | pretrained_dict = {k: v for k, v in state_dict.items() if 157 | 'fc' not in k and 'layer4.2' not in k} # 'fc' not in k and 'layer4.1' not in k and 158 | model_dict = self.state_dict() 159 | #for k in model_dict: 160 | # print(k) 161 | # print('####################################') 162 | # print('####################################') 163 | #for k,v in state_dict.items(): 164 | # print(k) 165 | model_dict.update(pretrained_dict) 166 | self.load_state_dict(model_dict,strict=False) 167 | torch.nn.init.normal_(self.mask1[0].weight.data, mean=0, std=0.01) 168 | torch.nn.init.normal_(self.mask2[0].weight.data, mean=0, std=0.01) 169 | 170 | torch.nn.init.normal_(self.fc.weight.data, mean=0, std=0.01) 171 | torch.nn.init.zeros_(self.fc.bias.data) 172 | 173 | 174 | def forward(self, x, label, Iter, density): 175 | # See note [TorchScript super()] 176 | 177 | 178 | x = self.pad2d_3(x) #new padding 179 | x = self.conv1(x) 180 | x = self.bn1(x) 181 | x = self.relu(x) 182 | x = self.pad2d_1(x) 183 | x = self.maxpool(x) 184 | 185 | x = self.layer1(x) 186 | x = self.layer2(x) 187 | x = self.layer3(x) 188 | 189 | x = self.mask1[0](x, label, Iter, density) 190 | x = self.relu1(x) 191 | x = self.mask2[0](x, label, Iter, density) 192 | x = self.relu2(x) 193 | # f_map = x.detach() 194 | x = self.layer4(x) 195 | 196 | # f_map = x.detach() 197 | x = self.avgpool(x) 198 | 199 | x = torch.flatten(x, 1) 200 | x = self.fc(x) 201 | 202 | 203 | return x 204 | # def forward(self, x): 205 | # return self._forward_impl(x), None 206 | 207 | -------------------------------------------------------------------------------- /model/resnet_50/resnet_50.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from model.resnet_18.conv_mask import conv_mask 8 | 9 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 10 | """3x3 convolution with padding""" 11 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 12 | padding=0, groups=groups, bias=False, dilation=dilation)#new padding 13 | 14 | def conv1x1(in_planes, out_planes, stride=1): 15 | """1x1 convolution""" 16 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 17 | 18 | class Bottleneck(nn.Module): 19 | expansion = 4 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 22 | base_width=64, dilation=1, norm_layer=None): 23 | super(Bottleneck, self).__init__() 24 | if norm_layer is None: 25 | norm_layer = nn.BatchNorm2d 26 | width = int(planes * (base_width / 64.)) * groups 27 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 28 | self.conv1 = conv1x1(inplanes, width) 29 | self.bn1 = norm_layer(width) 30 | self.conv2 = conv3x3(width, width, stride, groups, dilation) 31 | self.bn2 = norm_layer(width) 32 | self.conv3 = conv1x1(width, planes * self.expansion) 33 | self.bn3 = norm_layer(planes * self.expansion) 34 | self.relu = nn.ReLU(inplace=False) 35 | self.downsample = downsample 36 | self.stride = stride 37 | self.pad2d = nn.ZeroPad2d(1)#new paddig 38 | 39 | def forward(self, x): 40 | identity = x 41 | 42 | out = self.conv1(x) 43 | out = self.bn1(out) 44 | out = self.relu(out) 45 | 46 | out = self.pad2d(out) #new padding 47 | out = self.conv2(out) 48 | out = self.bn2(out) 49 | out = self.relu(out) 50 | 51 | out = self.conv3(out) 52 | out = self.bn3(out) 53 | 54 | if self.downsample is not None: 55 | identity = self.downsample(x) 56 | 57 | out = out + identity 58 | out = self.relu(out) 59 | 60 | return out 61 | 62 | 63 | class resnet_50(nn.Module): 64 | def __init__(self, pretrain_path,num_classes,dropout_rate,losstype,block=Bottleneck, layers=[3,4,6,3], zero_init_residual=False, 65 | groups=1, width_per_group=64, replace_stride_with_dilation=None, 66 | norm_layer=None): 67 | super(resnet_50, self).__init__() 68 | if norm_layer is None: 69 | norm_layer = nn.BatchNorm2d 70 | self.pretrian_path = pretrain_path 71 | self._norm_layer = norm_layer 72 | self.label_num = num_classes 73 | self.losstype = losstype 74 | self.inplanes = 64 75 | self.dilation = 1 76 | if replace_stride_with_dilation is None: 77 | # each element in the tuple indicates if we should replace 78 | # the 2x2 stride with a dilated convolution instead 79 | replace_stride_with_dilation = [False, False, False] 80 | if len(replace_stride_with_dilation) != 3: 81 | raise ValueError("replace_stride_with_dilation should be None " 82 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) 83 | self.groups = groups 84 | self.base_width = width_per_group 85 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=0, 86 | bias=False)#new padding 87 | self.bn1 = norm_layer(self.inplanes) 88 | self.relu = nn.ReLU(inplace=False) 89 | self.relu1 = nn.ReLU(inplace=False) 90 | self.relu2 = nn.ReLU(inplace=False) 91 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)#new padding 92 | self.layer1 = self._make_layer(block, 64, layers[0]) 93 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, 94 | dilate=replace_stride_with_dilation[0]) 95 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, 96 | dilate=replace_stride_with_dilation[1]) 97 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, 98 | dilate=replace_stride_with_dilation[2]) 99 | self.mask1 = nn.Sequential( 100 | conv_mask(256 * block.expansion, 256 * block.expansion, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), labelnum=self.label_num, 101 | loss_type=self.losstype, ), ) 102 | 103 | self.mask2 = nn.Sequential( 104 | conv_mask(256 * block.expansion, 256 * block.expansion, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), 105 | labelnum=self.label_num, 106 | loss_type=self.losstype, ), ) 107 | 108 | self.avgpool = nn.Sequential( 109 | nn.ReLU(inplace=True), 110 | nn.AdaptiveAvgPool2d((1, 1))) 111 | 112 | 113 | self.fc = nn.Linear(512*block.expansion, num_classes) 114 | 115 | self.pad2d_1 = nn.ZeroPad2d(1) # new paddig 116 | self.pad2d_3 = nn.ZeroPad2d(3) # new paddig 117 | 118 | for m in self.modules(): 119 | if isinstance(m, nn.Conv2d): 120 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 121 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 122 | nn.init.constant_(m.weight, 1) 123 | nn.init.constant_(m.bias, 0) 124 | 125 | # Zero-initialize the last BN in each residual branch, 126 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 127 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 128 | if zero_init_residual: 129 | for m in self.modules(): 130 | nn.init.constant_(m.bn2.weight, 0) 131 | 132 | self.init_weight() 133 | 134 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 135 | norm_layer = self._norm_layer 136 | downsample = None 137 | previous_dilation = self.dilation 138 | if dilate: 139 | self.dilation *= stride 140 | stride = 1 141 | if stride != 1 or self.inplanes != planes * block.expansion: 142 | downsample = nn.Sequential( 143 | conv1x1(self.inplanes, planes * block.expansion, stride), 144 | norm_layer(planes * block.expansion), 145 | ) 146 | 147 | layers = [] 148 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups, 149 | self.base_width, previous_dilation, norm_layer)) 150 | self.inplanes = planes * block.expansion 151 | for _ in range(1, blocks): 152 | layers.append(block(self.inplanes, planes, groups=self.groups, 153 | base_width=self.base_width, dilation=self.dilation, 154 | norm_layer=norm_layer)) 155 | 156 | return nn.Sequential(*layers) 157 | 158 | def init_weight(self): 159 | state_dict = torch.load(self.pretrian_path) 160 | pretrained_dict = {k: v for k, v in state_dict.items() if 161 | 'fc' not in k and 'layer4.2' not in k} # 'fc' not in k and 'layer4.1' not in k and 162 | model_dict = self.state_dict() 163 | # for k in model_dict: 164 | # print(k) 165 | # print('####################################') 166 | # print('####################################') 167 | # for k,v in state_dict.items(): 168 | # print(k) 169 | model_dict.update(pretrained_dict) 170 | self.load_state_dict(model_dict,strict=False) 171 | 172 | def forward(self, x, label, Iter, density): 173 | # See note [TorchScript super()] 174 | x = self.pad2d_3(x) #new padding 175 | x = self.conv1(x) 176 | x = self.bn1(x) 177 | x = self.relu(x) 178 | x = self.pad2d_1(x) 179 | x = self.maxpool(x) 180 | 181 | x = self.layer1(x) 182 | x = self.layer2(x) 183 | x = self.layer3(x) 184 | x = self.mask1[0](x, label, Iter, density) 185 | x = self.relu1(x) 186 | x = self.mask2[0](x, label, Iter, density) 187 | x = self.relu2(x) 188 | # f_map = x.detach() 189 | x = self.layer4(x) 190 | 191 | # f_map = x.detach() 192 | x = self.avgpool(x) 193 | 194 | x = torch.flatten(x, 1) 195 | x = self.fc(x) 196 | return x 197 | # def forward(self, x): 198 | # return self._forward_impl(x), None 199 | 200 | -------------------------------------------------------------------------------- /model/vgg_m/__pycache__/vgg_m.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ada-shen/ICNN/1012f6cf55779b6c20973ca42ddcb3ea6de1e49c/model/vgg_m/__pycache__/vgg_m.cpython-35.pyc -------------------------------------------------------------------------------- /model/vgg_m/conv_mask.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import math 3 | import copy 4 | import scipy.io as io 5 | from scipy.io import loadmat 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch.autograd import Function 10 | from torch.nn.parameter import Parameter 11 | 12 | 13 | 14 | 15 | class Divclass: 16 | def __init__(self, depthList, posList): 17 | self.depthList = depthList 18 | self.posList = posList 19 | 20 | 21 | def getMu(x): 22 | IsuseMax = 0 23 | bs = x.size()[0] 24 | depth = x.size()[1] 25 | h = x.size()[2] 26 | w = x.size()[3] 27 | x = x.transpose(2, 3).reshape([bs, depth, h * w]) 28 | if (IsuseMax): 29 | _, p = torch.max(x, dim=2) 30 | p = torch.reshape(p, (bs, depth, 1)).float() # index is long type 31 | mu_y = torch.ceil(p / h) 32 | mu_x = p - (mu_y - 1) * h 33 | sqrtvar = torch.Tensor([]) 34 | else: 35 | tmp_x = torch.Tensor(range(1, h + 1)).reshape(-1, 1).repeat([bs, depth, w, 1]).squeeze(3).cuda() 36 | tmp_y = torch.Tensor(range(1, w + 1)).repeat([bs, depth, h, 1]) 37 | tmp_y = tmp_y.transpose(2, 3).reshape([bs, depth, h * w]).cuda() 38 | sumXtmp = torch.sum(x, 2).unsqueeze(2) 39 | sumX = torch.max(sumXtmp, (torch.ones(sumXtmp.size()).cuda() * 0.000000001)) 40 | mu_xtmp = torch.round((torch.sum(tmp_x.mul(x), 2).unsqueeze(2)).div(sumX)) 41 | mu_x = torch.max(mu_xtmp, torch.ones(mu_xtmp.size()).cuda()) 42 | mu_ytmp = torch.round((torch.sum(tmp_y.mul(x), 2).unsqueeze(2)).div(sumX)) 43 | mu_y = torch.max(mu_ytmp, torch.ones(mu_ytmp.size()).cuda()) 44 | sqrtvartmp1 = mu_x.repeat([1, 1, h * w]) 45 | sqrtvartmp2 = mu_y.repeat([1, 1, h * w]) 46 | sqrtvar = torch.sqrt((torch.sum((tmp_x - sqrtvartmp1).mul(tmp_x - sqrtvartmp1).mul(x), 2).unsqueeze(2) + torch.sum((tmp_y - sqrtvartmp2).mul(tmp_y - sqrtvartmp2).mul(x), 2).unsqueeze(2)).div(sumX)) 47 | p = (mu_x + (mu_y - 1) * h).reshape([bs, depth, 1, 1]) 48 | tmp = torch.linspace(-1, 1, h).repeat(mu_x.size()).cuda() 49 | for i in range(bs): 50 | mu_x[i, :, :] = torch.gather(tmp[i, :, :], 1, (mu_x[i, :, :] - 1).long()) 51 | mu_y[i, :, :] = torch.gather(tmp[i, :, :], 1, (mu_y[i, :, :] - 1).long()) 52 | mu_x = mu_x.reshape([bs, depth, 1, 1]) 53 | mu_y = mu_y.reshape([bs, depth, 1, 1]) 54 | sqrtvar = sqrtvar.reshape([bs, depth]) 55 | return mu_x, mu_y, sqrtvar 56 | 57 | 58 | def getMask(mask_parameter, mask_weight, posTempX, posTempY, bs, depth, h, w): 59 | mask = torch.abs(posTempX - mask_parameter['mu_x'].repeat([1, 1, h, w])) 60 | mask = mask + torch.abs(posTempY - mask_parameter['mu_y'].repeat([1, 1, h, w])) 61 | mask = 1 - mask.mul(mask_weight.reshape(depth, 1, 1).repeat([bs, 1, h, w])) 62 | mask = torch.max(mask, torch.ones(mask.size()).cuda() * (-1)) 63 | for i in range(depth): 64 | if not (mask_parameter['filter'][i].equal(torch.ones(1))): 65 | mask[:, i, :, :] = 1 66 | return mask 67 | 68 | 69 | def get_sliceMag(sliceMag,label,x): 70 | for lab in range(label.shape[1]): 71 | index = (label[:, lab, :, :] == 1).reshape(label.shape[0]) 72 | if torch.sum(index) != 0: 73 | (tmp, idx) = torch.max(x[index, :, :, :], dim=2) 74 | (tmp, idx) = torch.max(tmp, dim=2) 75 | tmp = tmp.reshape(tmp.size()[0], tmp.size()[1], 1, 1) 76 | meantmp = torch.mean(tmp, 0) 77 | if (torch.sum(sliceMag[:, lab]) == 0): 78 | sliceMag[:, lab] = torch.max(meantmp,(torch.ones(meantmp.size()) * 0.1).cuda()).reshape(meantmp.size()[0]) 79 | else: 80 | tmptmp = 0.9 81 | index = (meantmp == 0).reshape(meantmp.size()[0]) 82 | meantmp[index, 0, 0] = sliceMag[index, 0].cuda() 83 | sliceMag[:, lab] = (sliceMag[:,lab] * tmptmp).cuda()+meantmp.reshape(meantmp.size()[0])*(1-tmptmp) 84 | return sliceMag 85 | 86 | 87 | class conv_mask_F(Function): 88 | @staticmethod 89 | def forward(self, x, weight, bias, mask_weight, padding, label, Iter, density, mask_parameter): 90 | bs = x.shape[0] 91 | depth = x.shape[1] 92 | h = x.shape[2] 93 | w = x.shape[3] 94 | posTemp_x = torch.linspace(-1, 1, h).reshape(-1, 1).repeat([depth, 1, w]) 95 | posTemp_y = torch.linspace(-1, 1, w).repeat([depth, h, 1]) 96 | posTempX = posTemp_x.repeat([bs, 1, 1, 1]).cuda() 97 | posTempY = posTemp_y.repeat([bs, 1, 1, 1]).cuda() 98 | mask_parameter['mu_x'], mask_parameter['mu_y'], mask_parameter['sqrtvar'] = getMu(x) 99 | mask = getMask(mask_parameter, mask_weight, posTempX, posTempY, bs, depth, h, w) 100 | input = x.mul(mask) 101 | x_relu = torch.max(input, torch.zeros(input.size()).cuda()) 102 | 103 | parameter_sliceMag = mask_parameter['sliceMag'].clone().data 104 | mask_parameter['sliceMag'] = get_sliceMag(mask_parameter['sliceMag'],label,x) 105 | 106 | self.save_for_backward(x, weight, bias, mask_weight, torch.Tensor([padding]), label, mask, Iter, density, 107 | mask_parameter['filter'], mask_parameter['mag'], mask_parameter['sqrtvar'], mask_parameter['strength'],parameter_sliceMag) 108 | 109 | return F.conv2d(x_relu, weight, bias, padding=padding) 110 | 111 | @staticmethod 112 | def backward(self, grad_output): 113 | x, weight, bias, mask_weight, padding, label, mask, Iter, density, parameter_filter, parameter_mag, parameter_sqrtvar, parameter_strength, parameter_sliceMag = self.saved_tensors 114 | 115 | input = x.mul(torch.max(mask, torch.zeros(mask.size()).cuda())) 116 | if self.needs_input_grad[0]: 117 | x_grad = torch.nn.grad.conv2d_input(input.shape, weight, grad_output, padding=int(padding.item())) 118 | if self.needs_input_grad[1]: 119 | weight_grad = torch.nn.grad.conv2d_weight(input, weight.shape, grad_output, padding=int(padding.item())) 120 | if bias is not None and self.needs_input_grad[2]: 121 | bias_grad = grad_output.sum(0).sum((1, 2)) 122 | 123 | depth = x.size()[1] 124 | h = x.size()[2] 125 | w = x.size()[3] 126 | depthList = (parameter_filter > 0).nonzero()[:, 0].reshape(-1, 1) 127 | labelNum = label.size()[1] 128 | Div_list = [] 129 | 130 | if (labelNum == 1): 131 | theClass = label 132 | posList = (theClass == 1).nonzero()[:, 0].reshape(-1, 1) 133 | Div = Divclass(depthList, posList) 134 | Div_list.append(Div) 135 | else: 136 | (theClass, indextmp) = torch.max(label, dim=1) 137 | theClass = theClass.unsqueeze(2) 138 | if (parameter_sliceMag.size()[0] == torch.Tensor([]).size()[0]): 139 | posList = (theClass == 1).nonzero()[:, 0].reshape(-1, 1) 140 | Div = Divclass(depthList, posList) 141 | Div_list.append(Div) 142 | else: 143 | sliceM = parameter_sliceMag 144 | for i in range(labelNum): 145 | Div = Divclass(depthList=torch.Tensor([]), posList=torch.Tensor([])) 146 | Div_list.append(Div) 147 | (val, index) = torch.max(sliceM[depthList, :].squeeze(1), dim=1) 148 | for lab in range(labelNum): 149 | (Div_list[lab].depthList, indextmp) = torch.sort(depthList[index == lab], dim=0) 150 | Div_list[lab].posList = (label[:, lab, :, :] == 1).nonzero()[:, 0].reshape(-1, 1) 151 | 152 | imgNum = label.size()[0] 153 | alpha = 0.5 154 | x_grad = x_grad.mul(torch.max(mask, torch.zeros(mask.size()).cuda())) 155 | 156 | if ((torch.sum(parameter_filter == 1)) > 0): 157 | parameter_strength = torch.mean(torch.mean(x.mul(mask), 2), 2).transpose(1, 0).cuda() 158 | mask_tmp = (torch.from_numpy(copy.deepcopy(mask.cpu().detach().numpy()[::-1, ::-1, :, :]))).cuda() 159 | alpha_logZ_pos = (torch.log(torch.mean(torch.exp(torch.mean(torch.mean(x.mul(mask_tmp), 2), 2).div(alpha)), 0)) * alpha).reshape(depth, 1) 160 | alpha_logZ_neg = (torch.log(torch.mean(torch.exp(torch.mean(torch.mean(-x, 2), 2).div(alpha)), 0)) * alpha).reshape(depth, 1) 161 | alpha_logZ_pos[torch.isinf(alpha_logZ_pos)] = torch.max(alpha_logZ_pos[torch.isinf(alpha_logZ_pos) == 0]) 162 | alpha_logZ_neg[torch.isinf(alpha_logZ_neg)] = torch.max(alpha_logZ_neg[torch.isinf(alpha_logZ_neg) == 0]) 163 | 164 | for lab in range(len(Div_list)): 165 | if (labelNum == 1): 166 | w_pos = 1 167 | w_neg = 1 168 | else: 169 | if (labelNum > 10): 170 | w_pos = 0.5 / (1 / labelNum) 171 | w_neg = 0.5 / (1 - 1 / labelNum) 172 | else: 173 | w_pos = 0.5 / density[lab] 174 | w_neg = 0.5 / (1 - density[lab]) 175 | 176 | mag = torch.ones([depth, imgNum]).div(1 / Iter).div(parameter_mag).cuda() 177 | dList = Div_list[lab].depthList 178 | dList = dList[(parameter_filter[dList] == 1).squeeze(1)].reshape(-1, 1) 179 | if (dList.size()[0] != torch.Tensor([]).size()[0]): 180 | List = Div_list[lab].posList.cuda() 181 | if (List.size()[0] != torch.Tensor([]).size()[0]): 182 | strength = torch.exp((parameter_strength[:, List].squeeze(2))[dList, :].squeeze(1).div(alpha)).mul((parameter_strength[:, List].squeeze(2))[dList, :].squeeze(1) - alpha_logZ_pos[dList].squeeze(1).repeat(1, List.size()[0]) + alpha) 183 | strength[torch.isinf(strength)] = torch.max(strength[torch.isinf(strength) == 0]) 184 | strength[torch.isnan(strength)] = 0 185 | strength = (strength.div((torch.mean(strength, 1).reshape(-1, 1).repeat(1, List.size()[0])).mul((mag[:, List].squeeze(2))[dList, :].squeeze(1)))).transpose(0, 1).reshape(List.size()[0],dList.size()[0], 1, 1) 186 | strength[torch.isnan(strength)] = 0 187 | strength[torch.isinf(strength)] = torch.max(strength[torch.isinf(strength) == 0]) 188 | index_dList = dList.repeat(List.size()[0], 1) 189 | index_List = List.reshape(-1, 1).repeat(1, dList.size()[0]).reshape(List.size()[0] * dList.size()[0], 1) 190 | x_grad[index_List, index_dList, :, :] = ((x_grad[List, :, :, :].squeeze(1))[:, dList, :, :].squeeze(2) - (mask[List, :, :,:].squeeze(1))[:,dList,:,:].squeeze(2).mul(strength.repeat(1, 1, h, w) * (0.00001 * w_pos))).reshape(List.size()[0] * dList.size()[0],1, h, w) 191 | 192 | list_neg = (label != 1).nonzero()[:, 0].reshape(-1, 1) 193 | if (list_neg.size()[0] != torch.Tensor([]).size()[0]): 194 | strength = torch.mean((torch.mean((x[list_neg, :, :, :].squeeze(1))[:, dList, :, :].squeeze(2), 2).unsqueeze(2)),3).unsqueeze(2).transpose(0, 1).reshape(dList.size()[0], list_neg.size()[0]) 195 | strength = torch.exp(-strength.div(alpha)).mul(-strength - alpha_logZ_neg[dList].squeeze(2).repeat(1, list_neg.size()[0]) + alpha) 196 | strength[torch.isinf(strength)] = torch.max(strength[torch.isinf(strength) == 0]) 197 | strength[torch.isnan(strength)] = 0 198 | strength = (strength.div((torch.mean(strength, 1).reshape(-1, 1).repeat(1, list_neg.size()[0])).mul((mag[:, list_neg].squeeze(2))[dList, :].squeeze(1)))).transpose(0, 1).reshape(list_neg.size()[0], dList.size()[0], 1, 1) 199 | strength[torch.isnan(strength)] = 0 200 | strength[torch.isinf(strength)] = torch.max(strength[torch.isinf(strength) == 0]) 201 | index_dList = dList.repeat(list_neg.size()[0], 1) 202 | index_list_neg = list_neg.reshape(-1, 1).repeat(1, dList.size()[0]).reshape(list_neg.size()[0] * dList.size()[0], 1) 203 | x_grad[index_list_neg, index_dList, :, :] = ((x_grad[list_neg, :, :, :].squeeze(1))[:, dList, :, :].squeeze(2) + (strength.reshape(list_neg.size()[0], dList.size()[0], 1, 1).repeat(1, 1, h, w)) * (0.00001 * w_neg)).reshape(list_neg.size()[0] * dList.size()[0], 1, h, w) 204 | 205 | beta = 3.0 206 | mask_weight_grad = torch.zeros(depth, 1).cuda() 207 | parameter_sqrtvar = parameter_sqrtvar.transpose(0, 1) 208 | 209 | for lab in range(len(Div_list)): 210 | dList = Div_list[lab].depthList.cuda() 211 | List = Div_list[lab].posList 212 | if ((dList.size()[0] != torch.Tensor([]).size()[0]) and (List.size()[0] != torch.Tensor([]).size()[0])): 213 | tmp = ((torch.sum((parameter_strength[:, List].squeeze(2))[dList, :].squeeze(1).mul((parameter_sqrtvar[:, List].squeeze(2))[dList, :].squeeze(1)), 1)). 214 | div(torch.sum((parameter_strength[:, List].squeeze(2))[dList, :].squeeze(1), 1))).reshape(-1, 1) 215 | tmptmp = beta / tmp 216 | tmp = torch.max(torch.min(tmptmp, torch.ones(tmptmp.size()).cuda() * 3),torch.ones(tmptmp.size()).cuda() * 1.5) 217 | tmp = (tmp - mask_weight[dList].squeeze(2)) * (-10000) 218 | mask_weight_grad[dList] = tmp.unsqueeze(2) 219 | 220 | return x_grad, weight_grad, bias_grad, mask_weight_grad, None, None, None, None, None, None, None, None, None, None, None 221 | 222 | 223 | class conv_mask(nn.Module): 224 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, labelnum, loss_type, ): 225 | super(conv_mask, self).__init__() 226 | self.in_channels = in_channels 227 | self.out_channels = out_channels 228 | self.kernel_size = kernel_size 229 | self.stride = stride 230 | self.padding = padding[0] 231 | self.alphainit = 2.0 232 | self.mask_parameter = None 233 | self.label_num = labelnum 234 | self.losstype = loss_type 235 | 236 | self.weight = Parameter(torch.Tensor(out_channels, in_channels, *kernel_size)) 237 | self.mask_weight = Parameter(torch.ones(in_channels, 1) * self.alphainit) 238 | self.bias = Parameter(torch.zeros(out_channels)) 239 | self.init_mask_parameter() 240 | 241 | 242 | def forward(self, x, label, Iter, density): 243 | out = conv_mask_F.apply(x, self.weight, self.bias, self.mask_weight, self.padding, label, Iter, density, self.mask_parameter) 244 | return out 245 | 246 | def init_mag(self): 247 | mag = torch.Tensor([0.1]) 248 | # mag need to be modified for multiple classifications 249 | if self.losstype == 'softmax': 250 | if self.label_num > 10: 251 | mag = mag / 50 252 | if self.model == 'vgg_m': 253 | mag = mag / 1000000 254 | else: 255 | mag = mag * 0.2 256 | return mag 257 | 258 | def init_mask_parameter(self): 259 | mag = self.init_mag() 260 | partRate = 1 261 | textureRate = 0 262 | partNum = round(partRate * self.in_channels) 263 | textureNum = round((textureRate + partRate) * self.in_channels) - partNum 264 | filtertype = torch.zeros(self.in_channels, 1) 265 | filtertype[0:partNum] = 1 266 | filtertype[partNum:partNum + textureNum] = 2 267 | sliceMag = torch.zeros(self.in_channels, self.label_num) 268 | self.mask_parameter = {'posTemp': {'posTemp_x': None, 'posTemp_y': None}, 269 | 'mu_x': None, 270 | 'mu_y': None, 271 | 'sqrtvar': None, 272 | 'strength': None, 273 | 'sliceMag': sliceMag, 274 | 'filter': filtertype, 275 | 'mag': mag} 276 | 277 | 278 | -------------------------------------------------------------------------------- /model/vgg_m/vgg_m.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import h5py 4 | import math 5 | import copy 6 | import scipy.io as io 7 | from scipy.io import loadmat 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | from model.vgg_s.conv_mask import conv_mask 12 | 13 | 14 | 15 | class vgg_m(nn.Module): 16 | def __init__(self, pretrain_path, label_num, dropoutrate, losstype): 17 | super(vgg_m, self).__init__() 18 | self.pretrian_path = pretrain_path 19 | self.dropoutrate = dropoutrate 20 | self.label_num = label_num 21 | self.losstype = losstype 22 | self.conv1 = nn.Sequential( 23 | nn.Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2), padding=(0, 0)), 24 | nn.ReLU(inplace=True), 25 | nn.LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2.0),) 26 | self.maxpool1 = nn.Sequential( 27 | nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(0, 0), dilation=(1, 1), ceil_mode=False), ) 28 | self.conv2 = nn.Sequential( 29 | nn.Conv2d(96, 256, kernel_size=(5, 5), stride=(2, 2), padding=(1, 1)), 30 | nn.ReLU(inplace=True), 31 | nn.LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2.0),) 32 | self.maxpool2 = nn.Sequential( 33 | nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(0, 0), dilation=(1, 1), ceil_mode=False), ) 34 | self.conv3 = nn.Sequential( 35 | nn.Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 36 | nn.ReLU(inplace=True), 37 | nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 38 | nn.ReLU(inplace=True), 39 | nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 40 | nn.ReLU(inplace=True), ) 41 | self.mask1 = nn.Sequential( 42 | conv_mask(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), labelnum=self.label_num, loss_type = self.losstype, ), ) 43 | self.maxpool3 = nn.Sequential( 44 | nn.ReLU(inplace=True), 45 | nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(0, 0), dilation=(1, 1), ceil_mode=False), ) 46 | self.mask2 = nn.Sequential( 47 | conv_mask(512, 4096, kernel_size=(6, 6), stride=(1, 1), padding=(0, 0), labelnum=self.label_num, loss_type = self.losstype, ), ) 48 | self.relu = nn.Sequential( 49 | nn.ReLU(inplace=True), ) 50 | self.line = nn.Sequential( 51 | nn.Dropout2d(p=self.dropoutrate), 52 | nn.Conv2d(4096, 4096, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)), 53 | nn.ReLU(inplace=True), 54 | nn.Dropout2d(p=self.dropoutrate), 55 | nn.Conv2d(4096, self.label_num, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)), ) 56 | self.init_weight() 57 | 58 | def init_weight(self): 59 | data = loadmat(self.pretrian_path) 60 | w, b = data['layers'][0][0][0]['weights'][0][0] 61 | self.conv1[0].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 62 | self.conv1[0].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 63 | 64 | w, b = data['layers'][0][4][0]['weights'][0][0] 65 | self.conv2[0].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 66 | self.conv2[0].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 67 | 68 | w, b = data['layers'][0][8][0]['weights'][0][0] 69 | self.conv3[0].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 70 | self.conv3[0].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 71 | w, b = data['layers'][0][10][0]['weights'][0][0] 72 | self.conv3[2].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 73 | self.conv3[2].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 74 | w, b = data['layers'][0][12][0]['weights'][0][0] 75 | self.conv3[4].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 76 | self.conv3[4].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 77 | 78 | torch.nn.init.normal_(self.mask1[0].weight.data, mean=0, std=0.01) 79 | torch.nn.init.normal_(self.mask2[0].weight.data, mean=0, std=0.01) 80 | 81 | torch.nn.init.normal_(self.line[1].weight.data, mean=0, std=0.01) 82 | torch.nn.init.zeros_(self.line[1].bias.data) 83 | torch.nn.init.normal_(self.line[4].weight.data, mean=0, std=0.01) 84 | torch.nn.init.zeros_(self.line[4].bias.data) 85 | 86 | def forward(self, x, label, Iter, density): 87 | x = self.conv1(x) 88 | x = F.pad(x, (0, 1, 0, 1)) 89 | x = self.maxpool1(x) 90 | 91 | x = self.conv2(x) 92 | x = F.pad(x, (0, 1, 0, 1)) 93 | x = self.maxpool2(x) 94 | 95 | x = self.conv3(x) 96 | x = self.mask1[0](x, label, Iter, density) 97 | x = self.maxpool3(x) 98 | 99 | x = self.mask2[0](x, label, Iter, density) 100 | x = self.relu(x) 101 | x = self.line(x) 102 | return x 103 | 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /model/vgg_s/__pycache__/conv_mask.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ada-shen/ICNN/1012f6cf55779b6c20973ca42ddcb3ea6de1e49c/model/vgg_s/__pycache__/conv_mask.cpython-35.pyc -------------------------------------------------------------------------------- /model/vgg_s/__pycache__/vgg_s.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ada-shen/ICNN/1012f6cf55779b6c20973ca42ddcb3ea6de1e49c/model/vgg_s/__pycache__/vgg_s.cpython-35.pyc -------------------------------------------------------------------------------- /model/vgg_s/conv_mask.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import math 3 | import copy 4 | import scipy.io as io 5 | from scipy.io import loadmat 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch.autograd import Function 10 | from torch.nn.parameter import Parameter 11 | 12 | 13 | 14 | 15 | class Divclass: 16 | def __init__(self, depthList, posList): 17 | self.depthList = depthList 18 | self.posList = posList 19 | 20 | 21 | def getMu(x): 22 | IsuseMax = 0 23 | bs = x.size()[0] 24 | depth = x.size()[1] 25 | h = x.size()[2] 26 | w = x.size()[3] 27 | x = x.transpose(2, 3).reshape([bs, depth, h * w]) 28 | if (IsuseMax): 29 | _, p = torch.max(x, dim=2) 30 | p = torch.reshape(p, (bs, depth, 1)).float() # index is long type 31 | mu_y = torch.ceil(p / h) 32 | mu_x = p - (mu_y - 1) * h 33 | sqrtvar = torch.Tensor([]) 34 | else: 35 | tmp_x = torch.Tensor(range(1, h + 1)).reshape(-1, 1).repeat([bs, depth, w, 1]).squeeze(3).cuda() 36 | tmp_y = torch.Tensor(range(1, w + 1)).repeat([bs, depth, h, 1]) 37 | tmp_y = tmp_y.transpose(2, 3).reshape([bs, depth, h * w]).cuda() 38 | sumXtmp = torch.sum(x, 2).unsqueeze(2) 39 | sumX = torch.max(sumXtmp, (torch.ones(sumXtmp.size()).cuda() * 0.000000001)) 40 | mu_xtmp = torch.round((torch.sum(tmp_x.mul(x), 2).unsqueeze(2)).div(sumX)) 41 | mu_x = torch.max(mu_xtmp, torch.ones(mu_xtmp.size()).cuda()) 42 | mu_ytmp = torch.round((torch.sum(tmp_y.mul(x), 2).unsqueeze(2)).div(sumX)) 43 | mu_y = torch.max(mu_ytmp, torch.ones(mu_ytmp.size()).cuda()) 44 | sqrtvartmp1 = mu_x.repeat([1, 1, h * w]) 45 | sqrtvartmp2 = mu_y.repeat([1, 1, h * w]) 46 | sqrtvar = torch.sqrt((torch.sum((tmp_x - sqrtvartmp1).mul(tmp_x - sqrtvartmp1).mul(x), 2).unsqueeze(2) + torch.sum((tmp_y - sqrtvartmp2).mul(tmp_y - sqrtvartmp2).mul(x), 2).unsqueeze(2)).div(sumX)) 47 | p = (mu_x + (mu_y - 1) * h).reshape([bs, depth, 1, 1]) 48 | tmp = torch.linspace(-1, 1, h).repeat(mu_x.size()).cuda() 49 | for i in range(bs): 50 | mu_x[i, :, :] = torch.gather(tmp[i, :, :], 1, (mu_x[i, :, :] - 1).long()) 51 | mu_y[i, :, :] = torch.gather(tmp[i, :, :], 1, (mu_y[i, :, :] - 1).long()) 52 | mu_x = mu_x.reshape([bs, depth, 1, 1]) 53 | mu_y = mu_y.reshape([bs, depth, 1, 1]) 54 | sqrtvar = sqrtvar.reshape([bs, depth]) 55 | return mu_x, mu_y, sqrtvar 56 | 57 | 58 | def getMask(mask_parameter, mask_weight, posTempX, posTempY, bs, depth, h, w): 59 | mask = torch.abs(posTempX - mask_parameter['mu_x'].repeat([1, 1, h, w])) 60 | mask = mask + torch.abs(posTempY - mask_parameter['mu_y'].repeat([1, 1, h, w])) 61 | mask = 1 - mask.mul(mask_weight.reshape(depth, 1, 1).repeat([bs, 1, h, w])) 62 | mask = torch.max(mask, torch.ones(mask.size()).cuda() * (-1)) 63 | for i in range(depth): 64 | if not (mask_parameter['filter'][i].equal(torch.ones(1))): 65 | mask[:, i, :, :] = 1 66 | return mask 67 | 68 | 69 | def get_sliceMag(sliceMag,label,x): 70 | for lab in range(label.shape[1]): 71 | index = (label[:, lab, :, :] == 1).reshape(label.shape[0]) 72 | if torch.sum(index) != 0: 73 | (tmp, idx) = torch.max(x[index, :, :, :], dim=2) 74 | (tmp, idx) = torch.max(tmp, dim=2) 75 | tmp = tmp.reshape(tmp.size()[0], tmp.size()[1], 1, 1) 76 | meantmp = torch.mean(tmp, 0) 77 | if (torch.sum(sliceMag[:, lab]) == 0): 78 | sliceMag[:, lab] = torch.max(meantmp,(torch.ones(meantmp.size()) * 0.1).cuda()).reshape(meantmp.size()[0]) 79 | else: 80 | tmptmp = 0.9 81 | index = (meantmp == 0).reshape(meantmp.size()[0]) 82 | meantmp[index, 0, 0] = sliceMag[index, 0].cuda() 83 | sliceMag[:, lab] = (sliceMag[:,lab] * tmptmp).cuda()+meantmp.reshape(meantmp.size()[0])*(1-tmptmp) 84 | return sliceMag 85 | 86 | 87 | class conv_mask_F(Function): 88 | @staticmethod 89 | def forward(self, x, weight, bias, mask_weight, padding, label, Iter, density, mask_parameter): 90 | bs = x.shape[0] 91 | depth = x.shape[1] 92 | h = x.shape[2] 93 | w = x.shape[3] 94 | posTemp_x = torch.linspace(-1, 1, h).reshape(-1, 1).repeat([depth, 1, w]) 95 | posTemp_y = torch.linspace(-1, 1, w).repeat([depth, h, 1]) 96 | posTempX = posTemp_x.repeat([bs, 1, 1, 1]).cuda() 97 | posTempY = posTemp_y.repeat([bs, 1, 1, 1]).cuda() 98 | mask_parameter['mu_x'], mask_parameter['mu_y'], mask_parameter['sqrtvar'] = getMu(x) 99 | mask = getMask(mask_parameter, mask_weight, posTempX, posTempY, bs, depth, h, w) 100 | input = x.mul(mask) 101 | x_relu = torch.max(input, torch.zeros(input.size()).cuda()) 102 | 103 | parameter_sliceMag = mask_parameter['sliceMag'].clone().data 104 | mask_parameter['sliceMag'] = get_sliceMag(mask_parameter['sliceMag'],label,x) 105 | 106 | self.save_for_backward(x, weight, bias, mask_weight, torch.Tensor([padding]), label, mask, Iter, density, 107 | mask_parameter['filter'], mask_parameter['mag'], mask_parameter['sqrtvar'], mask_parameter['strength'],parameter_sliceMag) 108 | 109 | return F.conv2d(x_relu, weight, bias, padding=padding) 110 | 111 | @staticmethod 112 | def backward(self, grad_output): 113 | x, weight, bias, mask_weight, padding, label, mask, Iter, density, parameter_filter, parameter_mag, parameter_sqrtvar, parameter_strength, parameter_sliceMag = self.saved_tensors 114 | 115 | input = x.mul(torch.max(mask, torch.zeros(mask.size()).cuda())) 116 | if self.needs_input_grad[0]: 117 | x_grad = torch.nn.grad.conv2d_input(input.shape, weight, grad_output, padding=int(padding.item())) 118 | if self.needs_input_grad[1]: 119 | weight_grad = torch.nn.grad.conv2d_weight(input, weight.shape, grad_output, padding=int(padding.item())) 120 | if bias is not None and self.needs_input_grad[2]: 121 | bias_grad = grad_output.sum(0).sum((1, 2)) 122 | 123 | depth = x.size()[1] 124 | h = x.size()[2] 125 | w = x.size()[3] 126 | depthList = (parameter_filter > 0).nonzero()[:, 0].reshape(-1, 1) 127 | labelNum = label.size()[1] 128 | Div_list = [] 129 | 130 | if (labelNum == 1): 131 | theClass = label 132 | posList = (theClass == 1).nonzero()[:, 0].reshape(-1, 1) 133 | Div = Divclass(depthList, posList) 134 | Div_list.append(Div) 135 | else: 136 | (theClass, indextmp) = torch.max(label, dim=1) 137 | theClass = theClass.unsqueeze(2) 138 | if (parameter_sliceMag.size()[0] == torch.Tensor([]).size()[0]): 139 | posList = (theClass == 1).nonzero()[:, 0].reshape(-1, 1) 140 | Div = Divclass(depthList, posList) 141 | Div_list.append(Div) 142 | else: 143 | sliceM = parameter_sliceMag 144 | for i in range(labelNum): 145 | Div = Divclass(depthList=torch.Tensor([]), posList=torch.Tensor([])) 146 | Div_list.append(Div) 147 | (val, index) = torch.max(sliceM[depthList, :].squeeze(1), dim=1) 148 | for lab in range(labelNum): 149 | (Div_list[lab].depthList, indextmp) = torch.sort(depthList[index == lab], dim=0) 150 | Div_list[lab].posList = (label[:, lab, :, :] == 1).nonzero()[:, 0].reshape(-1, 1) 151 | 152 | imgNum = label.size()[0] 153 | alpha = 0.5 154 | x_grad = x_grad.mul(torch.max(mask, torch.zeros(mask.size()).cuda())) 155 | 156 | if ((torch.sum(parameter_filter == 1)) > 0): 157 | parameter_strength = torch.mean(torch.mean(x.mul(mask), 2), 2).transpose(1, 0).cuda() 158 | mask_tmp = (torch.from_numpy(copy.deepcopy(mask.cpu().detach().numpy()[::-1, ::-1, :, :]))).cuda() 159 | alpha_logZ_pos = (torch.log(torch.mean(torch.exp(torch.mean(torch.mean(x.mul(mask_tmp), 2), 2).div(alpha)), 0)) * alpha).reshape(depth, 1) 160 | alpha_logZ_neg = (torch.log(torch.mean(torch.exp(torch.mean(torch.mean(-x, 2), 2).div(alpha)), 0)) * alpha).reshape(depth, 1) 161 | alpha_logZ_pos[torch.isinf(alpha_logZ_pos)] = torch.max(alpha_logZ_pos[torch.isinf(alpha_logZ_pos) == 0]) 162 | alpha_logZ_neg[torch.isinf(alpha_logZ_neg)] = torch.max(alpha_logZ_neg[torch.isinf(alpha_logZ_neg) == 0]) 163 | 164 | for lab in range(len(Div_list)): 165 | if (labelNum == 1): 166 | w_pos = 1 167 | w_neg = 1 168 | else: 169 | if (labelNum > 10): 170 | w_pos = 0.5 / (1 / labelNum) 171 | w_neg = 0.5 / (1 - 1 / labelNum) 172 | else: 173 | w_pos = 0.5 / density[lab] 174 | w_neg = 0.5 / (1 - density[lab]) 175 | 176 | mag = torch.ones([depth, imgNum]).div(1 / Iter).div(parameter_mag).cuda() 177 | dList = Div_list[lab].depthList 178 | dList = dList[(parameter_filter[dList] == 1).squeeze(1)].reshape(-1, 1) 179 | if (dList.size()[0] != torch.Tensor([]).size()[0]): 180 | List = Div_list[lab].posList.cuda() 181 | if (List.size()[0] != torch.Tensor([]).size()[0]): 182 | strength = torch.exp((parameter_strength[:, List].squeeze(2))[dList, :].squeeze(1).div(alpha)).mul((parameter_strength[:, List].squeeze(2))[dList, :].squeeze(1) - alpha_logZ_pos[dList].squeeze(1).repeat(1, List.size()[0]) + alpha) 183 | strength[torch.isinf(strength)] = torch.max(strength[torch.isinf(strength) == 0]) 184 | strength[torch.isnan(strength)] = 0 185 | strength = (strength.div((torch.mean(strength, 1).reshape(-1, 1).repeat(1, List.size()[0])).mul((mag[:, List].squeeze(2))[dList, :].squeeze(1)))).transpose(0, 1).reshape(List.size()[0],dList.size()[0], 1, 1) 186 | strength[torch.isnan(strength)] = 0 187 | strength[torch.isinf(strength)] = torch.max(strength[torch.isinf(strength) == 0]) 188 | index_dList = dList.repeat(List.size()[0], 1) 189 | index_List = List.reshape(-1, 1).repeat(1, dList.size()[0]).reshape(List.size()[0] * dList.size()[0], 1) 190 | x_grad[index_List, index_dList, :, :] = ((x_grad[List, :, :, :].squeeze(1))[:, dList, :, :].squeeze(2) - (mask[List, :, :,:].squeeze(1))[:,dList,:,:].squeeze(2).mul(strength.repeat(1, 1, h, w) * (0.00001 * w_pos))).reshape(List.size()[0] * dList.size()[0],1, h, w) 191 | 192 | list_neg = (label != 1).nonzero()[:, 0].reshape(-1, 1) 193 | if (list_neg.size()[0] != torch.Tensor([]).size()[0]): 194 | strength = torch.mean((torch.mean((x[list_neg, :, :, :].squeeze(1))[:, dList, :, :].squeeze(2), 2).unsqueeze(2)),3).unsqueeze(2).transpose(0, 1).reshape(dList.size()[0], list_neg.size()[0]) 195 | strength = torch.exp(-strength.div(alpha)).mul(-strength - alpha_logZ_neg[dList].squeeze(2).repeat(1, list_neg.size()[0]) + alpha) 196 | strength[torch.isinf(strength)] = torch.max(strength[torch.isinf(strength) == 0]) 197 | strength[torch.isnan(strength)] = 0 198 | strength = (strength.div((torch.mean(strength, 1).reshape(-1, 1).repeat(1, list_neg.size()[0])).mul((mag[:, list_neg].squeeze(2))[dList, :].squeeze(1)))).transpose(0, 1).reshape(list_neg.size()[0], dList.size()[0], 1, 1) 199 | strength[torch.isnan(strength)] = 0 200 | strength[torch.isinf(strength)] = torch.max(strength[torch.isinf(strength) == 0]) 201 | index_dList = dList.repeat(list_neg.size()[0], 1) 202 | index_list_neg = list_neg.reshape(-1, 1).repeat(1, dList.size()[0]).reshape(list_neg.size()[0] * dList.size()[0], 1) 203 | x_grad[index_list_neg, index_dList, :, :] = ((x_grad[list_neg, :, :, :].squeeze(1))[:, dList, :, :].squeeze(2) + (strength.reshape(list_neg.size()[0], dList.size()[0], 1, 1).repeat(1, 1, h, w)) * (0.00001 * w_neg)).reshape(list_neg.size()[0] * dList.size()[0], 1, h, w) 204 | 205 | beta = 3.0 206 | mask_weight_grad = torch.zeros(depth, 1).cuda() 207 | parameter_sqrtvar = parameter_sqrtvar.transpose(0, 1) 208 | 209 | for lab in range(len(Div_list)): 210 | dList = Div_list[lab].depthList.cuda() 211 | List = Div_list[lab].posList 212 | if ((dList.size()[0] != torch.Tensor([]).size()[0]) and (List.size()[0] != torch.Tensor([]).size()[0])): 213 | tmp = ((torch.sum((parameter_strength[:, List].squeeze(2))[dList, :].squeeze(1).mul((parameter_sqrtvar[:, List].squeeze(2))[dList, :].squeeze(1)), 1)). 214 | div(torch.sum((parameter_strength[:, List].squeeze(2))[dList, :].squeeze(1), 1))).reshape(-1, 1) 215 | tmptmp = beta / tmp 216 | tmp = torch.max(torch.min(tmptmp, torch.ones(tmptmp.size()).cuda() * 3),torch.ones(tmptmp.size()).cuda() * 1.5) 217 | tmp = (tmp - mask_weight[dList].squeeze(2)) * (-10000) 218 | mask_weight_grad[dList] = tmp.unsqueeze(2) 219 | 220 | return x_grad, weight_grad, bias_grad, mask_weight_grad, None, None, None, None, None, None, None, None, None, None, None 221 | 222 | 223 | class conv_mask(nn.Module): 224 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding, labelnum, loss_type, ): 225 | super(conv_mask, self).__init__() 226 | self.in_channels = in_channels 227 | self.out_channels = out_channels 228 | self.kernel_size = kernel_size 229 | self.stride = stride 230 | self.padding = padding[0] 231 | self.alphainit = 2.0 232 | self.mask_parameter = None 233 | self.label_num = labelnum 234 | self.losstype = loss_type 235 | 236 | self.weight = Parameter(torch.Tensor(out_channels, in_channels, *kernel_size)) 237 | self.mask_weight = Parameter(torch.ones(in_channels, 1) * self.alphainit) 238 | self.bias = Parameter(torch.zeros(out_channels)) 239 | self.init_mask_parameter() 240 | 241 | 242 | def forward(self, x, label, Iter, density): 243 | out = conv_mask_F.apply(x, self.weight, self.bias, self.mask_weight, self.padding, label, Iter, density, self.mask_parameter) 244 | return out 245 | 246 | def init_mag(self): 247 | mag = torch.Tensor([0.1]) 248 | # mag need to be modified for multiple classifications 249 | if self.losstype == 'softmax': 250 | if self.label_num > 10: 251 | mag = mag / 50 252 | if self.model == 'vgg_m': 253 | mag = mag / 1000000 254 | else: 255 | mag = mag * 0.2 256 | return mag 257 | 258 | def init_mask_parameter(self): 259 | mag = self.init_mag() 260 | partRate = 1 261 | textureRate = 0 262 | partNum = round(partRate * self.in_channels) 263 | textureNum = round((textureRate + partRate) * self.in_channels) - partNum 264 | filtertype = torch.zeros(self.in_channels, 1) 265 | filtertype[0:partNum] = 1 266 | filtertype[partNum:partNum + textureNum] = 2 267 | sliceMag = torch.zeros(self.in_channels, self.label_num) 268 | self.mask_parameter = {'posTemp': {'posTemp_x': None, 'posTemp_y': None}, 269 | 'mu_x': None, 270 | 'mu_y': None, 271 | 'sqrtvar': None, 272 | 'strength': None, 273 | 'sliceMag': sliceMag, 274 | 'filter': filtertype, 275 | 'mag': mag} 276 | 277 | 278 | -------------------------------------------------------------------------------- /model/vgg_s/vgg_s.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import h5py 4 | import math 5 | import copy 6 | import scipy.io as io 7 | from scipy.io import loadmat 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | from model.vgg_s.conv_mask import conv_mask 12 | 13 | 14 | 15 | class vgg_s(nn.Module): 16 | def __init__(self, pretrain_path, label_num, dropoutrate, losstype): 17 | super(vgg_s, self).__init__() 18 | self.pretrian_path = pretrain_path 19 | self.dropoutrate = dropoutrate 20 | self.label_num = label_num 21 | self.losstype = losstype 22 | self.conv1 = nn.Sequential( 23 | nn.Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2), padding=(0, 0)), 24 | nn.ReLU(inplace=True), 25 | nn.LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2.0),) 26 | self.maxpool1 = nn.Sequential( 27 | nn.MaxPool2d(kernel_size=(3, 3), stride=(3, 3), padding=(0, 0), dilation=(1, 1), ceil_mode=False), ) 28 | self.conv2 = nn.Sequential( 29 | nn.Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(0, 0)), 30 | nn.ReLU(inplace=True),) 31 | self.maxpool2 = nn.Sequential( 32 | nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0), dilation=(1, 1), ceil_mode=False), ) 33 | self.conv3 = nn.Sequential( 34 | nn.Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 35 | nn.ReLU(inplace=True), 36 | nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 37 | nn.ReLU(inplace=True), 38 | nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 39 | nn.ReLU(inplace=True), ) 40 | self.mask1 = nn.Sequential( 41 | conv_mask(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), labelnum=self.label_num, loss_type = self.losstype, ), ) 42 | self.maxpool3 = nn.Sequential( 43 | nn.ReLU(inplace=True), 44 | nn.MaxPool2d(kernel_size=(3, 3), stride=(3, 3), padding=(0, 0), dilation=(1, 1), ceil_mode=False), ) 45 | self.mask2 = nn.Sequential( 46 | conv_mask(512, 4096, kernel_size=(6, 6), stride=(1, 1), padding=(0, 0), labelnum=self.label_num, loss_type = self.losstype, ), ) 47 | self.relu = nn.Sequential( 48 | nn.ReLU(inplace=True), ) 49 | self.line = nn.Sequential( 50 | nn.Dropout2d(p=self.dropoutrate), 51 | nn.Conv2d(4096, 4096, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)), 52 | nn.ReLU(inplace=True), 53 | nn.Dropout2d(p=self.dropoutrate), 54 | nn.Conv2d(4096, self.label_num, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)), ) 55 | self.init_weight() 56 | 57 | def init_weight(self): 58 | data = loadmat(self.pretrian_path) 59 | w, b = data['layers'][0][0][0]['weights'][0][0] 60 | self.conv1[0].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 61 | self.conv1[0].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 62 | 63 | w, b = data['layers'][0][4][0]['weights'][0][0] 64 | self.conv2[0].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 65 | self.conv2[0].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 66 | 67 | w, b = data['layers'][0][7][0]['weights'][0][0] 68 | self.conv3[0].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 69 | self.conv3[0].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 70 | w, b = data['layers'][0][9][0]['weights'][0][0] 71 | self.conv3[2].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 72 | self.conv3[2].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 73 | w, b = data['layers'][0][11][0]['weights'][0][0] 74 | self.conv3[4].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 75 | self.conv3[4].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 76 | 77 | torch.nn.init.normal_(self.mask1[0].weight.data, mean=0, std=0.01) 78 | torch.nn.init.normal_(self.mask2[0].weight.data, mean=0, std=0.01) 79 | 80 | torch.nn.init.normal_(self.line[1].weight.data, mean=0, std=0.01) 81 | torch.nn.init.zeros_(self.line[1].bias.data) 82 | torch.nn.init.normal_(self.line[4].weight.data, mean=0, std=0.01) 83 | torch.nn.init.zeros_(self.line[4].bias.data) 84 | 85 | def forward(self, x, label, Iter, density): 86 | x = self.conv1(x) 87 | x = F.pad(x, (0, 2, 0, 2)) 88 | x = self.maxpool1(x) 89 | 90 | x = self.conv2(x) 91 | x = F.pad(x, (0, 1, 0, 1)) 92 | x = self.maxpool2(x) 93 | 94 | x = self.conv3(x) 95 | x = self.mask1[0](x, label, Iter, density) 96 | x = F.pad(x, (0, 1, 0, 1)) 97 | x = self.maxpool3(x) 98 | 99 | x = self.mask2[0](x, label, Iter, density) 100 | x = self.relu(x) 101 | x = self.line(x) 102 | return x 103 | 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /model/vgg_vd_16/__pycache__/conv_mask.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ada-shen/ICNN/1012f6cf55779b6c20973ca42ddcb3ea6de1e49c/model/vgg_vd_16/__pycache__/conv_mask.cpython-35.pyc -------------------------------------------------------------------------------- /model/vgg_vd_16/__pycache__/vgg_vd_16.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ada-shen/ICNN/1012f6cf55779b6c20973ca42ddcb3ea6de1e49c/model/vgg_vd_16/__pycache__/vgg_vd_16.cpython-35.pyc -------------------------------------------------------------------------------- /model/vgg_vd_16/vgg_vd_16.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import h5py 4 | import math 5 | import copy 6 | import scipy.io as io 7 | from scipy.io import loadmat 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | from model.vgg_vd_16.conv_mask import conv_mask 12 | 13 | 14 | 15 | class vgg_vd_16(nn.Module): 16 | def __init__(self, pretrain_path, label_num, dropoutrate, losstype): 17 | super(vgg_vd_16, self).__init__() 18 | self.pretrian_path = pretrain_path 19 | self.dropoutrate = dropoutrate 20 | self.label_num = label_num 21 | self.losstype = losstype 22 | self.conv1 = nn.Sequential( 23 | nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 24 | nn.ReLU(inplace=True), 25 | nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 26 | nn.ReLU(inplace=True), ) 27 | self.maxpool1 = nn.Sequential( 28 | nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0), dilation=(1, 1), ceil_mode=False), ) 29 | self.conv2 = nn.Sequential( 30 | nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 31 | nn.ReLU(inplace=True), 32 | nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 33 | nn.ReLU(inplace=True), ) 34 | self.maxpool2 = nn.Sequential( 35 | nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0), dilation=(1, 1), ceil_mode=False), ) 36 | self.conv3 = nn.Sequential( 37 | nn.Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 38 | nn.ReLU(inplace=True), 39 | nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 40 | nn.ReLU(inplace=True), 41 | nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 42 | nn.ReLU(inplace=True), ) 43 | self.maxpool3 = nn.Sequential( 44 | nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0), dilation=(1, 1), ceil_mode=False), ) 45 | self.conv4 = nn.Sequential( 46 | nn.Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 47 | nn.ReLU(inplace=True), 48 | nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 49 | nn.ReLU(inplace=True), 50 | nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 51 | nn.ReLU(inplace=True), ) 52 | self.maxpool4 = nn.Sequential( 53 | nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0), dilation=(1, 1), ceil_mode=False), ) 54 | self.conv5 = nn.Sequential( 55 | nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 56 | nn.ReLU(inplace=True), 57 | nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 58 | nn.ReLU(inplace=True), 59 | nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), 60 | nn.ReLU(inplace=True), ) 61 | self.mask1 = nn.Sequential( 62 | conv_mask(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), labelnum=self.label_num, loss_type = self.losstype, ), ) 63 | self.maxpool5 = nn.Sequential( 64 | nn.ReLU(inplace=True), 65 | nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0), dilation=(1, 1), ceil_mode=False), ) 66 | self.mask2 = nn.Sequential( 67 | conv_mask(512, 4096, kernel_size=(7, 7), stride=(1, 1), padding=(0, 0), labelnum=self.label_num, loss_type = self.losstype, ), ) 68 | self.relu = nn.Sequential( 69 | nn.ReLU(inplace=True), ) 70 | self.line = nn.Sequential( 71 | nn.Dropout2d(p=self.dropoutrate), 72 | nn.Conv2d(4096, 4096, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)), 73 | nn.ReLU(inplace=True), 74 | nn.Dropout2d(p=self.dropoutrate), 75 | nn.Conv2d(4096, self.label_num, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)), ) 76 | self.init_weight() 77 | 78 | def init_weight(self): 79 | data = loadmat(self.pretrian_path) 80 | w, b = data['layers'][0][0][0]['weights'][0][0] 81 | self.conv1[0].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 82 | self.conv1[0].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 83 | w, b = data['layers'][0][2][0]['weights'][0][0] 84 | self.conv1[2].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 85 | self.conv1[2].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 86 | 87 | w, b = data['layers'][0][5][0]['weights'][0][0] 88 | self.conv2[0].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 89 | self.conv2[0].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 90 | w, b = data['layers'][0][7][0]['weights'][0][0] 91 | self.conv2[2].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 92 | self.conv2[2].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 93 | 94 | w, b = data['layers'][0][10][0]['weights'][0][0] 95 | self.conv3[0].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 96 | self.conv3[0].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 97 | w, b = data['layers'][0][12][0]['weights'][0][0] 98 | self.conv3[2].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 99 | self.conv3[2].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 100 | w, b = data['layers'][0][14][0]['weights'][0][0] 101 | self.conv3[4].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 102 | self.conv3[4].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 103 | 104 | w, b = data['layers'][0][17][0]['weights'][0][0] 105 | self.conv4[0].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 106 | self.conv4[0].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 107 | w, b = data['layers'][0][19][0]['weights'][0][0] 108 | self.conv4[2].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 109 | self.conv4[2].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 110 | w, b = data['layers'][0][21][0]['weights'][0][0] 111 | self.conv4[4].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 112 | self.conv4[4].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 113 | 114 | w, b = data['layers'][0][24][0]['weights'][0][0] 115 | self.conv5[0].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 116 | self.conv5[0].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 117 | w, b = data['layers'][0][26][0]['weights'][0][0] 118 | self.conv5[2].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 119 | self.conv5[2].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 120 | w, b = data['layers'][0][28][0]['weights'][0][0] 121 | self.conv5[4].weight.data.copy_(torch.from_numpy(w.transpose([3, 2, 0, 1]))) 122 | self.conv5[4].bias.data.copy_(torch.from_numpy(b.reshape(-1))) 123 | 124 | torch.nn.init.normal_(self.mask1[0].weight.data, mean=0, std=0.01) 125 | torch.nn.init.normal_(self.mask2[0].weight.data, mean=0, std=0.01) 126 | 127 | torch.nn.init.normal_(self.line[1].weight.data, mean=0, std=0.01) 128 | torch.nn.init.zeros_(self.line[1].bias.data) 129 | torch.nn.init.normal_(self.line[4].weight.data, mean=0, std=0.01) 130 | torch.nn.init.zeros_(self.line[4].bias.data) 131 | 132 | def forward(self, x, label, Iter, density): 133 | x = self.conv1(x) 134 | x = F.pad(x, (0, 1, 0, 1)) 135 | x = self.maxpool1(x) 136 | 137 | x = self.conv2(x) 138 | x = F.pad(x, (0, 1, 0, 1)) 139 | x = self.maxpool2(x) 140 | 141 | x = self.conv3(x) 142 | x = F.pad(x, (0, 1, 0, 1)) 143 | x = self.maxpool3(x) 144 | 145 | x = self.conv4(x) 146 | x = F.pad(x, (0, 1, 0, 1)) 147 | x = self.maxpool4(x) 148 | 149 | x = self.conv5(x) 150 | 151 | x = self.mask1[0](x, label, Iter, density) 152 | x = self.maxpool5(x) 153 | 154 | x = self.mask2[0](x, label, Iter, density) 155 | x = self.relu(x) 156 | 157 | #x = x.view(x.size(0),-1) ## 158 | #x = x.unsqueeze(2) 159 | #x = x.unsqueeze(3) 160 | 161 | x = self.line(x) 162 | return x 163 | 164 | 165 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # pip install -r requirements.txt 2 | 3 | pytorch==1.2.0 4 | torchvision==0.4.0a0 5 | tensorboard 6 | scipy 7 | opencv-python 8 | matplotlib 9 | 10 | -------------------------------------------------------------------------------- /tools/classification.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | from tools.load_data import load_data 4 | from tools.init_model import init_model 5 | from tools.train_model import train_model 6 | from tools.showresult import showresult 7 | from tools.lib import * 8 | 9 | def classification(root_path,args): 10 | # task/classification 11 | task_path = os.path.join(root_path,'task',args.task_name) 12 | make_dir(task_path) 13 | # task/classification/vgg_vd_16 14 | task_model_path = os.path.join(task_path,args.model) 15 | make_dir(task_model_path) 16 | task_model_dataset_path = os.path.join(task_model_path,args.dataset) 17 | make_dir(task_model_dataset_path) 18 | if args.dataset!='helen' and args.dataset!='celeba' and args.dataset!='cubsample': 19 | task_model_dataset_labelname_path = os.path.join(task_model_dataset_path,args.label_name) 20 | make_dir(task_model_dataset_labelname_path) 21 | else: 22 | task_model_dataset_labelname_path = task_model_dataset_path 23 | task_model_dataset_labelname_taskid_path = os.path.join(task_model_dataset_labelname_path,str(args.task_id)) 24 | make_dir(task_model_dataset_labelname_taskid_path) 25 | 26 | train_dataloader, var_dataloader, density, dataset_length = load_data(root_path, task_model_dataset_labelname_path, args) 27 | 28 | net = init_model(root_path,args) 29 | 30 | max_acc,max_epoch = train_model(task_model_dataset_labelname_taskid_path, args, net, train_dataloader, var_dataloader, density, dataset_length) 31 | # calculate stability 32 | '''max_sta = showresult(max_epoch,task_model_dataset_labelname_taskid_path, task_model_dataset_labelname_path, root_path, args) 33 | with open('train.log','a') as f: 34 | f.writelines(args.label_name+" "+str(max_acc)+" "+str(max_epoch)+" "+str(max_sta)+"\n")''' 35 | print("\n") 36 | print(max_acc) 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /tools/classification_multi.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | from tools.load_data import load_data 4 | from tools.init_model import init_model 5 | from tools.train_model import train_model 6 | from tools.showresult import showresult 7 | from tools.lib import * 8 | from tools.load_data_multi import load_data_multi 9 | 10 | 11 | def classification_multi(root_path,args): 12 | 13 | task_path = os.path.join(root_path, 'task', args.task_name) 14 | make_dir(task_path) 15 | task_model_path = os.path.join(task_path, args.model) 16 | make_dir(task_model_path) 17 | task_model_dataset_path = os.path.join(task_model_path, args.dataset) 18 | make_dir(task_model_dataset_path) 19 | label_name = "" 20 | for i in range(args.label_num): 21 | label_name = label_name + args.label_name[i] +"_" 22 | label_name = label_name[:-1] 23 | task_model_dataset_labelname_path = os.path.join(task_model_dataset_path, label_name) 24 | make_dir(task_model_dataset_labelname_path) 25 | task_model_dataset_labelname_taskid_path = os.path.join(task_model_dataset_labelname_path, str(args.task_id)) 26 | make_dir(task_model_dataset_labelname_taskid_path) 27 | 28 | train_dataloader, var_dataloader, density, dataset_length = load_data_multi(root_path, task_model_dataset_labelname_path, args) 29 | 30 | net = init_model(root_path,args) 31 | 32 | train_model(task_model_dataset_labelname_taskid_path, args, net, train_dataloader, var_dataloader, density, dataset_length) 33 | # calculate stability 34 | #showresult(task_model_dataset_labelname_taskid_path, task_model_dataset_labelname_path, root_path, args) 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /tools/computeStability.py: -------------------------------------------------------------------------------- 1 | import os 2 | from scipy.io import loadmat 3 | import h5py 4 | import numpy as np 5 | from tools.getDistSqrtVar import getDistSqrtVar 6 | from tools.getCNNFeature import getCNNFeature 7 | from tools.get_ilsvrimdb import readAnnotation as ilsvr_readAnnotation 8 | from tools.get_cubimdb import readAnnotation as cub_readAnnotation 9 | from tools.get_vocimdb import readAnnotation as voc_readAnnotation 10 | 11 | def x2P(idx_h, idx_w, layerID, convnet): 12 | idx_h = idx_h[np.newaxis, :] 13 | idx_w = idx_w[np.newaxis, :] 14 | pHW = np.concatenate((idx_h, idx_w), axis=0) 15 | Stride = convnet['targetStride'][layerID-1] 16 | centerStart = convnet['targetCenter'][layerID-1] 17 | pHW = centerStart + (pHW-1) * Stride 18 | return pHW 19 | 20 | 21 | def computeStability(root_path,dataset,dataset_path, truthpart_path, label_name, net, model, convnet, layerID, epochnum, partList, partRate, imdb_mean, selectPatternRatio, patchNumPerPattern): 22 | 23 | if "ilsvrcanimalpart" in dataset_path: 24 | objset = ilsvr_readAnnotation(dataset_path, label_name) 25 | elif "vocpart" in dataset_path: 26 | objset = voc_readAnnotation(root_path, dataset, dataset_path, label_name) 27 | elif "cub200" in dataset_path: 28 | objset = cub_readAnnotation(dataset_path, label_name) 29 | 30 | imgNum = len(objset) 31 | partNum = len(partList) 32 | validImg = np.zeros(imgNum) 33 | for i in range(partNum): 34 | partID = partList[i] 35 | file_path = os.path.join(truthpart_path,label_name, "truth_part"+str(0) + str(partID)+'.mat') 36 | a = h5py.File(file_path,'r') 37 | truth_center = a['truth']['pHW_center'] 38 | for img in range(imgNum): 39 | if type(a[truth_center[img][0]][0]) is np.ndarray: 40 | validImg[img] = True 41 | 42 | patNum = round(512*partRate) 43 | selectedPatternNum = round(patNum*selectPatternRatio) 44 | pos = np.zeros((2,patNum,imgNum)) 45 | score = np.zeros((patNum, imgNum)) 46 | isFlip = False 47 | for imgID in range(imgNum): 48 | if(validImg[imgID]==0): 49 | continue 50 | x,I = getCNNFeature(dataset_path,objset[imgID],net,isFlip,imdb_mean, epochnum, model) # get after conv_mask feature 51 | x = x[:,0:patNum,:,:] 52 | x = np.squeeze(x,axis=0) 53 | xh = x.shape[1] 54 | v = np.max(x, axis=1) 55 | idx = np.argmax(x, axis=1) 56 | tmp = np.argmax(v, axis=1) 57 | v = np.max(v, axis=1) 58 | idx = idx.reshape(idx.shape[0] * idx.shape[1]) 59 | idx_h = idx[tmp + np.array(range(0, patNum)) * xh] # idx_h.shape=(patNum,) 60 | idx_w = tmp # idx_w.shape=(patNum,) 61 | theScore = v # v.shape=(patNum,) 62 | thePos = x2P(idx_h,idx_w,layerID,convnet) 63 | pos[:,:,imgID] = thePos 64 | score[:,imgID] = theScore 65 | ih = I.shape[0] 66 | iw = I.shape[1] 67 | distSqrtVar = getDistSqrtVar(truthpart_path, pos, score, patchNumPerPattern, partList, label_name) 68 | distSqrtVar = np.sort(distSqrtVar[np.isnan(distSqrtVar) == 0]) 69 | stability = np.mean(distSqrtVar[0:min(selectedPatternNum, len(distSqrtVar))])/np.sqrt(np.power(ih,2)+np.power(iw,2)) 70 | 71 | return stability 72 | 73 | -------------------------------------------------------------------------------- /tools/computeStability_multi.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | from scipy.io import loadmat 4 | import h5py 5 | import numpy as np 6 | from tools.getDistSqrtVar import getDistSqrtVar 7 | from tools.getCNNFeature import getCNNFeature 8 | from tools.get_ilsvrimdb import readAnnotation as ilsvr_readAnnotation 9 | from tools.get_cubimdb import readAnnotation as cub_readAnnotation 10 | from tools.get_vocimdb import readAnnotation as voc_readAnnotation 11 | from tools.computeStability import x2P 12 | 13 | def computeStability_multi(patchNumPerPattern, root_path,dataset,dataset_path, truthpart_path, label_name, net, model, convnet, layerID, epochnum, partList, imdb_mean): 14 | 15 | if "ilsvrcanimalpart" in dataset_path: 16 | objset = ilsvr_readAnnotation(dataset_path, label_name) 17 | elif "vocpart" in dataset_path: 18 | objset = voc_readAnnotation(root_path, dataset, dataset_path, label_name) 19 | elif "cub200" in dataset_path: 20 | objset = cub_readAnnotation(dataset_path, label_name) 21 | 22 | imgNum = len(objset) 23 | partNum = len(partList) 24 | validImg = np.zeros(imgNum) 25 | for i in range(partNum): 26 | partID = partList[i] 27 | file_path = os.path.join(truthpart_path,label_name, "truth_part"+str(0) + str(partID)+'.mat') 28 | a = h5py.File(file_path,'r') 29 | truth_center = a['truth']['pHW_center'] 30 | for img in range(imgNum): 31 | if type(a[truth_center[img][0]][0]) is np.ndarray: 32 | validImg[img] = True 33 | 34 | patNum = 512 35 | pos = np.zeros((2,patNum,imgNum)) 36 | score = np.zeros((patNum, imgNum)) 37 | isFlip = False 38 | for imgID in range(imgNum): 39 | if(validImg[imgID]==0): 40 | continue 41 | x,I = getCNNFeature(dataset_path,objset[imgID],net,isFlip,imdb_mean, epochnum, model) # get after conv_mask feature 42 | x = x[:,0:patNum,:,:] 43 | x = np.squeeze(x,axis=0) 44 | xh = x.shape[1] 45 | v = np.max(x, axis=1) 46 | idx = np.argmax(x, axis=1) 47 | tmp = np.argmax(v, axis=1) 48 | v = np.max(v, axis=1) 49 | idx = idx.reshape(idx.shape[0] * idx.shape[1]) 50 | idx_h = idx[tmp + np.array(range(0, patNum)) * xh] # idx_h.shape=(patNum,) 51 | idx_w = tmp # idx_w.shape=(patNum,) 52 | theScore = v # v.shape=(patNum,) 53 | thePos = x2P(idx_h,idx_w,layerID,convnet) 54 | pos[:,:,imgID] = thePos 55 | score[:,imgID] = theScore 56 | ih = I.shape[0] 57 | iw = I.shape[1] 58 | distSqrtVar = getDistSqrtVar(truthpart_path, pos, score, patchNumPerPattern, partList, label_name) 59 | stability_filter = distSqrtVar / (np.sqrt(np.power(ih,2)+np.power(iw,2))) 60 | score_filter = np.mean(score, 1) 61 | 62 | return stability_filter,score_filter 63 | 64 | 65 | -------------------------------------------------------------------------------- /tools/getCNNFeature.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from tools.get_ilsvrimdb import getI 4 | 5 | def get_x(net, im, label, Iter, density,model): 6 | if model == "vgg_vd_16": 7 | x = net.conv1(im) 8 | x = F.pad(x, (0, 1, 0, 1)) 9 | x = net.maxpool1(x) 10 | x = net.conv2(x) 11 | x = F.pad(x, (0, 1, 0, 1)) 12 | x = net.maxpool2(x) 13 | x = net.conv3(x) 14 | x = F.pad(x, (0, 1, 0, 1)) 15 | x = net.maxpool3(x) 16 | x = net.conv4(x) 17 | x = F.pad(x, (0, 1, 0, 1)) 18 | x = net.maxpool4(x) 19 | x = net.conv5(x) 20 | x = net.mask1[0](x, label, Iter, density) 21 | x = net.maxpool5[0](x) 22 | x = x.cpu().clone().data.numpy() 23 | elif model == "alexnet": 24 | x = net.conv1(im) 25 | x = F.pad(x, (0, 1, 0, 1)) 26 | x = net.maxpool1(x) 27 | 28 | x = net.conv2(x) 29 | x = F.pad(x, (0, 1, 0, 1)) 30 | x = net.maxpool2(x) 31 | 32 | x = net.conv3(x) 33 | x = net.mask1[0](x, label, Iter, density) 34 | x = net.maxpool3(x) 35 | elif model == "vgg_s": 36 | x = net.conv1(im) 37 | x = F.pad(x, (0, 2, 0, 2)) 38 | x = net.maxpool1(x) 39 | 40 | x = net.conv2(x) 41 | x = F.pad(x, (0, 1, 0, 1)) 42 | x = net.maxpool2(x) 43 | 44 | x = net.conv3(x) 45 | x = net.mask1[0](x, label, Iter, density) 46 | x = F.pad(x, (0, 1, 0, 1)) 47 | x = net.maxpool3(x) 48 | elif model == "vgg_m": 49 | x = net.conv1(im) 50 | x = F.pad(x, (0, 1, 0, 1)) 51 | x = net.maxpool1(x) 52 | 53 | x = net.conv2(x) 54 | x = F.pad(x, (0, 1, 0, 1)) 55 | x = net.maxpool2(x) 56 | 57 | x = net.conv3(x) 58 | x = net.mask1[0](x, label, Iter, density) 59 | x = net.maxpool3(x) 60 | elif model == "resnet_18": 61 | x = net.pad2d_3(im) # new padding 62 | x = net.conv1(x) 63 | x = net.bn1(x) 64 | x = net.relu(x) 65 | x = net.pad2d_1(x) 66 | x = net.maxpool(x) 67 | 68 | x = net.layer1(x) 69 | x = net.layer2(x) 70 | x = net.layer3(x) 71 | x = net.mask1[0](x, label, Iter, density) 72 | # f_map = x.detach() 73 | 74 | elif model == "resnet_50": 75 | x = net.pad2d_3(im) # new padding 76 | x = net.conv1(x) 77 | x = net.bn1(x) 78 | x = net.relu(x) 79 | x = net.pad2d_1(x) 80 | x = net.maxpool(x) 81 | 82 | x = net.layer1(x) 83 | x = net.layer2(x) 84 | x = net.layer3(x) 85 | x = net.mask1[0](x, label, Iter, density) 86 | # f_map = x.detach() 87 | return x 88 | 89 | def getCNNFeature(dataset_path, obj, net, isFlip, dataMean,epochnum, model): 90 | 91 | if "ilsvrcanimalpart" in dataset_path: 92 | I = getI(obj, (224,224), isFlip) 93 | elif "vocpart" in dataset_path: 94 | I = getI(obj, (224,224), isFlip) 95 | elif "cub200" in dataset_path: 96 | I = getI(obj, (224,224), isFlip) 97 | 98 | im = I[0] - dataMean 99 | im = torch.from_numpy(im).float() 100 | im = im.unsqueeze(3) 101 | im = im.permute(3, 2, 0, 1) 102 | label = torch.ones((1, 1, 1, 1)) 103 | im = im.cuda() 104 | label = label.cuda() 105 | net = net.cuda() 106 | Iter = torch.Tensor([epochnum]) 107 | density = torch.Tensor([0]) 108 | x = get_x(net, im, label, Iter, density,model) # type numpy 109 | return x, I[0] 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /tools/getConvNetPara.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class Layer: 4 | def __init__(self, type, kernel_size, stride, padding): 5 | self.type = type 6 | self.kernel_size = kernel_size 7 | self.stride = stride 8 | self.padding = padding 9 | 10 | def vgg_16_vd(): 11 | net = [] 12 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 13 | net.append(layer) 14 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 15 | net.append(layer) 16 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 17 | net.append(layer) 18 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 19 | net.append(layer) 20 | layer = Layer(type='pool', kernel_size=2, stride=2, padding=0) 21 | net.append(layer) 22 | 23 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 24 | net.append(layer) 25 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 26 | net.append(layer) 27 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 28 | net.append(layer) 29 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 30 | net.append(layer) 31 | layer = Layer(type='pool', kernel_size=2, stride=2, padding=0) 32 | net.append(layer) 33 | 34 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 35 | net.append(layer) 36 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 37 | net.append(layer) 38 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 39 | net.append(layer) 40 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 41 | net.append(layer) 42 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 43 | net.append(layer) 44 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 45 | net.append(layer) 46 | layer = Layer(type='pool', kernel_size=2, stride=2, padding=0) 47 | net.append(layer) 48 | 49 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 50 | net.append(layer) 51 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 52 | net.append(layer) 53 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 54 | net.append(layer) 55 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 56 | net.append(layer) 57 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 58 | net.append(layer) 59 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 60 | net.append(layer) 61 | layer = Layer(type='pool', kernel_size=2, stride=2, padding=0) 62 | net.append(layer) 63 | 64 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 65 | net.append(layer) 66 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 67 | net.append(layer) 68 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 69 | net.append(layer) 70 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 71 | net.append(layer) 72 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 73 | net.append(layer) 74 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 75 | net.append(layer) 76 | layer = Layer(type='conv_mask', kernel_size=3, stride=1, padding=1) 77 | net.append(layer) 78 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 79 | net.append(layer) 80 | layer = Layer(type='pool', kernel_size=2, stride=2, padding=0) 81 | net.append(layer) 82 | 83 | layer = Layer(type='conv_mask', kernel_size=7, stride=1, padding=0) 84 | net.append(layer) 85 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 86 | net.append(layer) 87 | layer = Layer(type='dropout', kernel_size=0, stride=0, padding=0) 88 | net.append(layer) 89 | layer = Layer(type='conv', kernel_size=1, stride=1, padding=0) 90 | net.append(layer) 91 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 92 | net.append(layer) 93 | layer = Layer(type='dropout', kernel_size=0, stride=0, padding=0) 94 | net.append(layer) 95 | layer = Layer(type='conv', kernel_size=1, stride=1, padding=0) 96 | net.append(layer) 97 | 98 | return net 99 | 100 | def alexnet(): 101 | net = [] 102 | layer = Layer(type='conv', kernel_size=11, stride=4, padding=0) 103 | net.append(layer) 104 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 105 | net.append(layer) 106 | layer = Layer(type='lrn', kernel_size=0, stride=0, padding=0) 107 | net.append(layer) 108 | layer = Layer(type='pool', kernel_size=3, stride=2, padding=0) 109 | net.append(layer) 110 | 111 | layer = Layer(type='conv', kernel_size=5, stride=1, padding=2) 112 | net.append(layer) 113 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 114 | net.append(layer) 115 | layer = Layer(type='lrn', kernel_size=0, stride=0, padding=0) 116 | net.append(layer) 117 | layer = Layer(type='pool', kernel_size=3, stride=2, padding=0) 118 | net.append(layer) 119 | 120 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 121 | net.append(layer) 122 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 123 | net.append(layer) 124 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 125 | net.append(layer) 126 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 127 | net.append(layer) 128 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 129 | net.append(layer) 130 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 131 | net.append(layer) 132 | layer = Layer(type='conv_mask', kernel_size=3, stride=1, padding=1) 133 | net.append(layer) 134 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 135 | net.append(layer) 136 | layer = Layer(type='pool', kernel_size=3, stride=2, padding=0) 137 | net.append(layer) 138 | 139 | layer = Layer(type='conv_mask', kernel_size=6, stride=1, padding=0) 140 | net.append(layer) 141 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 142 | net.append(layer) 143 | layer = Layer(type='dropout', kernel_size=0, stride=0, padding=0) 144 | net.append(layer) 145 | layer = Layer(type='conv', kernel_size=1, stride=1, padding=0) 146 | net.append(layer) 147 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 148 | net.append(layer) 149 | layer = Layer(type='dropout', kernel_size=0, stride=0, padding=0) 150 | net.append(layer) 151 | layer = Layer(type='conv', kernel_size=1, stride=1, padding=0) 152 | net.append(layer) 153 | 154 | return net 155 | 156 | def vgg_m(): 157 | net = [] 158 | layer = Layer(type='conv', kernel_size=7, stride=2, padding=0) 159 | net.append(layer) 160 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 161 | net.append(layer) 162 | layer = Layer(type='lrn', kernel_size=0, stride=0, padding=1) 163 | net.append(layer) 164 | layer = Layer(type='pool', kernel_size=3, stride=2, padding=0) 165 | net.append(layer) 166 | 167 | layer = Layer(type='conv', kernel_size=5, stride=2, padding=1) 168 | net.append(layer) 169 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 170 | net.append(layer) 171 | layer = Layer(type='lrn', kernel_size=0, stride=0, padding=1) 172 | net.append(layer) 173 | layer = Layer(type='pool', kernel_size=3, stride=2, padding=0) 174 | net.append(layer) 175 | 176 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 177 | net.append(layer) 178 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 179 | net.append(layer) 180 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 181 | net.append(layer) 182 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 183 | net.append(layer) 184 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 185 | net.append(layer) 186 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 187 | net.append(layer) 188 | layer = Layer(type='conv_mask', kernel_size=3, stride=1, padding=1) 189 | net.append(layer) 190 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 191 | net.append(layer) 192 | layer = Layer(type='pool', kernel_size=3, stride=2, padding=0) 193 | net.append(layer) 194 | 195 | layer = Layer(type='conv_mask', kernel_size=6, stride=1, padding=0) 196 | net.append(layer) 197 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 198 | net.append(layer) 199 | layer = Layer(type='dropout', kernel_size=0, stride=0, padding=0) 200 | net.append(layer) 201 | layer = Layer(type='conv', kernel_size=1, stride=1, padding=0) 202 | net.append(layer) 203 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 204 | net.append(layer) 205 | layer = Layer(type='dropout', kernel_size=0, stride=0, padding=0) 206 | net.append(layer) 207 | layer = Layer(type='conv', kernel_size=1, stride=1, padding=0) 208 | net.append(layer) 209 | 210 | def vgg_s(): 211 | net = [] 212 | layer = Layer(type='conv', kernel_size=7, stride=2, padding=0) 213 | net.append(layer) 214 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 215 | net.append(layer) 216 | layer = Layer(type='lrn', kernel_size=0, stride=0, padding=1) 217 | net.append(layer) 218 | layer = Layer(type='pool', kernel_size=3, stride=3, padding=0) 219 | net.append(layer) 220 | 221 | layer = Layer(type='conv', kernel_size=5, stride=1, padding=0) 222 | net.append(layer) 223 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 224 | net.append(layer) 225 | layer = Layer(type='pool', kernel_size=2, stride=2, padding=0) 226 | net.append(layer) 227 | 228 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 229 | net.append(layer) 230 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 231 | net.append(layer) 232 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 233 | net.append(layer) 234 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 235 | net.append(layer) 236 | layer = Layer(type='conv', kernel_size=3, stride=1, padding=1) 237 | net.append(layer) 238 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 239 | net.append(layer) 240 | layer = Layer(type='conv_mask', kernel_size=3, stride=1, padding=1) 241 | net.append(layer) 242 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 243 | net.append(layer) 244 | layer = Layer(type='pool', kernel_size=3, stride=3, padding=0) 245 | net.append(layer) 246 | 247 | layer = Layer(type='conv_mask', kernel_size=6, stride=1, padding=0) 248 | net.append(layer) 249 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 250 | net.append(layer) 251 | layer = Layer(type='dropout', kernel_size=0, stride=0, padding=0) 252 | net.append(layer) 253 | layer = Layer(type='conv', kernel_size=1, stride=1, padding=0) 254 | net.append(layer) 255 | layer = Layer(type='relu', kernel_size=0, stride=0, padding=0) 256 | net.append(layer) 257 | layer = Layer(type='dropout', kernel_size=0, stride=0, padding=0) 258 | net.append(layer) 259 | layer = Layer(type='conv', kernel_size=1, stride=1, padding=0) 260 | net.append(layer) 261 | 262 | def getConvNetPara(model): 263 | if model == 'alexnet': 264 | net = alexnet() 265 | elif model == 'vgg_m': 266 | net = vgg_m() 267 | elif model == 'vgg_s': 268 | net = vgg_s() 269 | elif model == 'vgg_vd_16': 270 | net = vgg_16_vd() 271 | 272 | 273 | convnet = {'targetLayer': [], 274 | 'targetScale': [], 275 | 'targetStride': [], 276 | 'targetCenter': []} 277 | 278 | convLayers=[] 279 | for i in range(len(net)): 280 | if 'conv' in net[i].type: 281 | convLayers.append(i+1) 282 | convnet['targetLayer'].append(i+2) 283 | length = len(convLayers) 284 | 285 | for i in range(length): 286 | tarLay = convLayers[i] 287 | pad = net[tarLay-1].padding 288 | scale = net[tarLay-1].kernel_size 289 | stride = net[tarLay-1].stride 290 | if (i == 0): 291 | convnet['targetStride'].append(stride) 292 | convnet['targetScale'].append(scale) 293 | convnet['targetCenter'].append((1+scale-pad*2)/2) 294 | else: 295 | IsPool = False 296 | poolStride = 0 297 | poolSize = 0 298 | poolPad = 0 299 | for j in range(convLayers[i-1]+1,tarLay-1): 300 | if 'pool' in net[j].type: 301 | IsPool = True 302 | poolSize = net[j].kernel_size 303 | poolStride = net[j].stride 304 | poolPad = net[j].padding 305 | convnet['targetStride'].append((1 + IsPool * (poolStride - 1)) * stride * convnet['targetStride'][i-1]) 306 | convnet['targetScale'].append(convnet['targetScale'][i-1] + IsPool * (poolSize - 1) * convnet['targetStride'][i-1] + convnet['targetStride'][i] * (scale - 1)) 307 | if (IsPool): 308 | convnet['targetCenter'].append((scale - pad * 2 - 1) * poolStride * convnet['targetStride'][i-1] / 2 + (convnet['targetCenter'][i-1] + convnet['targetStride'][i-1] * (poolSize - 2 * poolPad - 1) / 2)) 309 | else: 310 | convnet['targetCenter'].append((scale - pad * 2 - 1) * convnet['targetStride'][i-1] / 2 + convnet['targetCenter'][i-1]) 311 | return convnet 312 | 313 | 314 | -------------------------------------------------------------------------------- /tools/getDistSqrtVar.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | import numpy as np 4 | 5 | 6 | def getDiff(pos,truth,imgID,invalidNum): 7 | partNum = len(truth) 8 | patNum = pos.shape[1] 9 | diff = np.zeros((2,partNum,patNum)) 10 | for i in range(partNum): 11 | truth_center = truth[i]['truth']['pHW_center'] 12 | pos_truth = truth[i][truth_center[imgID][0]][0] 13 | if (type(pos_truth) is not np.ndarray or len(pos_truth)>2): 14 | diff[:,i,:] = invalidNum 15 | else: 16 | tmp = np.reshape(pos_truth, (2, 1)) 17 | pos_truth = np.tile(tmp, (1, patNum)) 18 | diff[:,i,:] = np.reshape((pos_truth - pos),(2,patNum)) 19 | return diff 20 | 21 | def new_var(array): 22 | array_mean = np.mean(array) 23 | res = 0 24 | for i in range(len(array)): 25 | res = res + (array[i]-array_mean)*(array[i]-array_mean) 26 | return res/(len(array)-1) 27 | 28 | 29 | def getAvgDistSqrtVar(diff,prob,patchNumPerPattern,invalidNum): 30 | partNum = diff.shape[1] 31 | patNum = diff.shape[2] 32 | distSqrtVar = np.zeros((patNum, 1)) 33 | for pat in range(patNum): 34 | idx = np.argsort(-1 * prob[pat,:]) #1dim 35 | for partID in range(partNum): 36 | tmp = np.where(diff[0,partID,pat,:]==invalidNum) 37 | tmp = np.setdiff1d(idx, tmp, True) 38 | tmp = tmp[0:min(patchNumPerPattern, len(tmp))] 39 | if(len(tmp)<2): 40 | distSqrtVar[pat,0] = np.nan 41 | else: 42 | dist = np.reshape(np.sqrt(np.sum((diff[:,partID, pat, tmp]*diff[:,partID, pat, tmp]),axis=0)),(1,len(tmp))) 43 | distSqrtVar[pat,0] = distSqrtVar[pat,0] + np.sqrt(new_var(dist[0,:])) 44 | distSqrtVar[pat,0] = distSqrtVar[pat,0]/partNum 45 | return distSqrtVar 46 | 47 | def getDistSqrtVar(truthpart_path, pos, prob, patchNumPerPattern, partList, label_name): 48 | invalidNum=100000 49 | partNum=len(partList) 50 | truth=[] 51 | for i in range(partNum): 52 | partID = partList[i] 53 | file_path = os.path.join(truthpart_path, label_name, "truth_part" + str(0) + str(partID) + '.mat') 54 | f = h5py.File(file_path, 'r') 55 | truth.append(f) 56 | patNum = pos.shape[1] 57 | imgNum = pos.shape[2] 58 | diff = np.zeros((2,partNum,patNum,imgNum)) 59 | for imgID in range(imgNum): 60 | diff[:,:,:,imgID] = getDiff(pos[:,:,imgID],truth,imgID,invalidNum) 61 | 62 | distSqrtVar = getAvgDistSqrtVar(diff,prob,patchNumPerPattern,invalidNum) 63 | return distSqrtVar -------------------------------------------------------------------------------- /tools/get_celebaimdb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import h5py 4 | import matplotlib.image as mpimg 5 | import math 6 | import numpy as np 7 | 8 | # different get+dataset+imdb.py has deffernet readAnnotation getNegObjSet and getI functions 9 | 10 | def getI(obj,image_size, IsFlip): 11 | I = cv2.imread(obj["filename"]) 12 | if(len(I.shape)==2): 13 | I = np.expand_dims(I,axis=2) 14 | I = np.repeat(I,3,axis=2) 15 | I = I.astype(np.float32) 16 | I = cv2.resize(I,image_size,interpolation=cv2.INTER_LINEAR) 17 | if(IsFlip==True): 18 | I = I[:,::-1,:] 19 | return I 20 | 21 | def readAnnotation(root_path, dataset, dataset_path): 22 | use_train = 162770 23 | use_test = 19868 24 | 25 | dataset_file = os.path.join(dataset_path,dataset, 'list_eval_partition.txt') 26 | label_file = os.path.join(dataset_path,dataset, 'Anno/list_attr_celeba.txt') 27 | dataset_img_path = os.path.join(dataset_path,dataset,'images') 28 | f = open(dataset_file,'r') 29 | g = open(label_file,'r') 30 | objset_train=[] 31 | objset_val = [] 32 | train_label = [] 33 | val_label = [] 34 | g.readline() 35 | g.readline() 36 | 37 | train_num = 0 38 | test_num = 0 39 | for line in f.readlines(): 40 | words = line.split() 41 | line1 = g.readline() 42 | words1 = line1.split() 43 | path = words[0] 44 | if path != words1[0]: 45 | print('filename error!') 46 | if int(words[1])==0 and train_num 0 else -1 for i in words1[1:])) 49 | train_num+=1 50 | if int(words[1])==1 and test_num 0 else -1 for i in words1[1:])) 53 | test_num += 1 54 | 55 | return objset_train,objset_val,train_label,val_label 56 | 57 | def get_celebaimdb(root_path, dataset, dataset_path,image_size): 58 | objset_train,objset_val,train_label,val_label = readAnnotation(root_path, dataset, dataset_path) 59 | train_l = len(objset_train) 60 | val_l = len(objset_val) 61 | 62 | train_data = np.zeros((image_size,image_size,3,train_l)) 63 | val_data = np.zeros((image_size,image_size,3,val_l)) 64 | 65 | for i in range(train_l): 66 | tar= i 67 | IsFlip = False 68 | I = getI(objset_train[i],(image_size,image_size), IsFlip) 69 | train_data[:,:,:,tar]=I 70 | 71 | for i in range(val_l): 72 | tar = i 73 | IsFlip = False 74 | I = getI(objset_val[i],(image_size,image_size), IsFlip) 75 | val_data[:,:,:,tar]=I 76 | 77 | train_label = np.array(train_label) 78 | val_label = np.array(val_label) 79 | 80 | 81 | dataMean = np.mean(train_data[:,:,:,:],axis=3) 82 | imdb_mean = {'mean': dataMean} 83 | dataMean = np.expand_dims(dataMean, axis=3) 84 | dataMean_train = np.tile(dataMean,(1, 1, 1, train_l)) 85 | dataMean_val = np.tile(dataMean, (1, 1, 1, val_l)) 86 | train_data = train_data-dataMean_train 87 | val_data = val_data-dataMean_val 88 | 89 | data_train = train_data.transpose(3,2,0,1) 90 | label_train = train_label[np.newaxis, :, :] 91 | label_train = label_train[np.newaxis,:,:,:] 92 | label_train = label_train.transpose(2,3,0,1) 93 | imdb_train = {'image': data_train, 'label': label_train} 94 | 95 | data_val = val_data.transpose(3,2,0,1) 96 | label_val = val_label[np.newaxis, :, :] 97 | label_val = label_val[np.newaxis, :, :, :] 98 | label_val = label_val.transpose(2,3,0,1) 99 | imdb_val = {'image': data_val, 'label': label_val} 100 | 101 | 102 | return imdb_train, imdb_val, imdb_mean 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /tools/get_cubimdb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import h5py 4 | import matplotlib.image as mpimg 5 | import math 6 | import numpy as np 7 | from tools.lib import cv2imread 8 | from tools.lib import load_txt 9 | from tools.get_ilsvrimdb import getNegObjSet 10 | from tools.get_ilsvrimdb import getI 11 | 12 | # different get+dataset+imdb.py has deffernet readAnnotation getNegObjSet and getI functions 13 | 14 | 15 | def readAnnotation(dataset_path,categoryName): 16 | minArea = 2500 17 | dataset_data_path = os.path.join(dataset_path, "CUB_200_2011") 18 | dataset_data_image_path = os.path.join(dataset_data_path, "images") 19 | dataset_data_labeltxt_path = dataset_data_path + "/image_class_labels.txt" 20 | dataset_data_train_test_txt_path = dataset_data_path + "/train_test_split.txt" 21 | dataset_data_classes_txt_path = dataset_data_path + "/classes.txt" 22 | dataset_data_images_txt_path = dataset_data_path + "/images.txt" 23 | dataset_data_boundingboxes_txt_path = dataset_data_path + "/bounding_boxes.txt" 24 | 25 | idClassPair = load_txt(dataset_data_labeltxt_path,"int_int") 26 | if categoryName in ['1','2','3','4','5','6','7','8','9']: 27 | imgIds = np.where(idClassPair[:,1] == int(categoryName)) 28 | else: 29 | imgIds = idClassPair[:,0]-1 30 | 31 | train_test_list = load_txt(dataset_data_train_test_txt_path,"int_int") 32 | train_list = np.where(train_test_list[:, 1] == 1) 33 | 34 | batchClassnamePair = load_txt(dataset_data_classes_txt_path,"int_str") 35 | 36 | idNamePair = load_txt(dataset_data_images_txt_path,"int_str") 37 | imgnames = [] 38 | for i in range(len(imgIds)): 39 | imgnames.append(idNamePair['id_name'][imgIds[i]]) 40 | 41 | idBndboxPair = load_txt(dataset_data_boundingboxes_txt_path,"int_int_int_int") 42 | x = idBndboxPair[imgIds,0] 43 | y = idBndboxPair[imgIds,1] 44 | width = idBndboxPair[imgIds,2] 45 | height = idBndboxPair[imgIds,3] 46 | 47 | objset = [] 48 | for i in range(len(imgIds)): 49 | xmin = int(x[i]) 50 | ymin = int(y[i]) 51 | xmax = int(x[i]+width[i]) 52 | ymax = int(y[i]+height[i]) 53 | if ((xmax - xmin + 1) * (ymax - ymin + 1) >= minArea) == False: 54 | continue 55 | imgnames[i] = imgnames[i].split('.') 56 | filename = imgnames[i][1] + '.' + imgnames[i][2] 57 | name = batchClassnamePair["id_name"][int(imgnames[i][0])-1] 58 | objset.append( 59 | {'filename': dataset_data_image_path + '/' + imgnames[i][0]+'.' + filename, 'name':name, 'bndbox': [xmin, xmax, ymin, ymax], 60 | 'id': i + 1}) 61 | return objset, train_list 62 | 63 | def get_cubimdb(dataset_path,neg_path,categoryName,image_size): 64 | 65 | objset,trainList = readAnnotation(dataset_path,categoryName) 66 | objset_neg = getNegObjSet(neg_path) 67 | objset_neg = [val for val in objset_neg for i in range(4)] 68 | 69 | num_pos = len(objset) 70 | num_neg = len(objset_neg) 71 | data = np.zeros((image_size, image_size, 3, num_pos)) 72 | data_neg = np.zeros((image_size, image_size, 3, num_neg)) 73 | 74 | 75 | for i in range(num_pos): 76 | tar=i 77 | IsFlip = False 78 | I_patch,I = getI(objset[i],(image_size,image_size), IsFlip) 79 | data[:,:,:,tar]=I_patch 80 | 81 | 82 | for i in range(num_neg): 83 | tar = i 84 | IsFlip = False 85 | I_patch,I = getI(objset_neg[i], (image_size,image_size), IsFlip) 86 | data_neg[:,:,:,tar]=I_patch 87 | 88 | total_images = num_pos + num_neg 89 | labels = np.ones((1,total_images))*(-1) 90 | labels[:,0:num_pos] = 1 91 | 92 | list_train = np.where(labels==-1) 93 | tmp = range(round(num_neg * 0.5)) 94 | list_train = list_train[1][tmp] 95 | list_train = np.append(trainList,list_train) 96 | 97 | set = np.ones((1,total_images))*2 98 | set[:, list_train] = 1 99 | data = np.concatenate((data,data_neg), axis=3) 100 | dataMean = np.mean(data[:,:,:,list_train.astype(int)],axis=3) #depend on training data not truth data 101 | imdb_mean = {'mean': dataMean} 102 | dataMean = np.expand_dims(dataMean,axis=3) 103 | dataMean = np.tile(dataMean,(1,1,1,total_images)) 104 | data = data-dataMean 105 | set = np.squeeze(set,axis=0) 106 | 107 | data_train = data[:, :, :, np.where(set == 1)] 108 | data_train = np.squeeze(data_train,axis=3) 109 | data_train = data_train.transpose(3,2,0,1) 110 | label_train = labels[:, np.where(set == 1)] 111 | label_train = label_train[np.newaxis,:,:,:] 112 | label_train = label_train.transpose(3,1,2,0) 113 | imdb_train = {'image': data_train, 'label': label_train} 114 | 115 | 116 | data_val = data[:, :, :, np.where(set == 2)] 117 | data_val = np.squeeze(data_val, axis=3) 118 | data_val = data_val.transpose(3,2,0,1) 119 | label_val = labels[:, np.where(set == 2)] 120 | label_val = label_val[np.newaxis, :, :, :] 121 | label_val = label_val.transpose(3, 1, 2, 0) 122 | imdb_val = {'image': data_val, 'label': label_val} 123 | return imdb_train, imdb_val, imdb_mean 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /tools/get_cubsampleimdb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import h5py 4 | import matplotlib.image as mpimg 5 | import math 6 | import numpy as np 7 | 8 | # different get+dataset+imdb.py has deffernet readAnnotation getNegObjSet and getI functions 9 | 10 | def getI(obj,image_size, IsFlip): 11 | I = cv2.imread(obj["filename"]) 12 | if(len(I.shape)==2): 13 | I = np.expand_dims(I,axis=2) 14 | I = np.repeat(I,3,axis=2) 15 | I = I.astype(np.float32) 16 | I = cv2.resize(I,image_size,interpolation=cv2.INTER_LINEAR) 17 | if(IsFlip==True): 18 | I = I[:,::-1,:] 19 | return I 20 | 21 | def readAnnotation(root_path, dataset, dataset_path, categoryName): 22 | dataset_file = os.path.join(dataset_path,dataset, categoryName+'_info.txt') 23 | 24 | dataset_img_path = os.path.join(dataset_path,dataset) 25 | print(dataset_file) 26 | print(dataset_img_path) 27 | f = open(dataset_file,'r') 28 | objset_train_true=[] 29 | objset_train_false = [] 30 | objset_val_true = [] 31 | objset_val_false = [] 32 | 33 | for line in f.readlines(): 34 | words = line.split(',') 35 | path = words[1] 36 | if int(words[2])==0 and int(words[3])==1: 37 | objset_train_true.append({'filename':dataset_img_path+'/'+path}) 38 | if int(words[2])==0 and int(words[3])==0: 39 | objset_val_true.append({'filename':dataset_img_path+'/'+path}) 40 | if int(words[2])==1 and int(words[3])==1: 41 | objset_train_false.append({'filename':dataset_img_path+'/'+path}) 42 | if int(words[2])==1 and int(words[3])==0: 43 | objset_val_false.append({'filename':dataset_img_path+'/'+path}) 44 | 45 | return objset_train_true,objset_train_false,objset_val_true,objset_val_false 46 | 47 | def get_cubsampleimdb(root_path, dataset, dataset_path,categoryName,image_size): 48 | objset_train_true,objset_train_false,objset_val_true,objset_val_false = readAnnotation(root_path, dataset, dataset_path,categoryName) 49 | train_true_l = len(objset_train_true) 50 | train_false_l = len(objset_train_false) 51 | val_true_l = len(objset_val_true) 52 | val_false_l = len(objset_val_false) 53 | 54 | 55 | data_train_true = np.zeros((image_size,image_size,3,train_true_l)) 56 | data_train_false = np.zeros((image_size,image_size,3,train_false_l)) 57 | data_val_true = np.zeros((image_size,image_size,3,val_true_l)) 58 | data_val_false = np.zeros((image_size,image_size,3,val_false_l)) 59 | 60 | for i in range(train_true_l): 61 | tar= i 62 | IsFlip = False 63 | I = getI(objset_train_true[i],(image_size,image_size), IsFlip) 64 | data_train_true[:,:,:,tar]=I 65 | 66 | for i in range(train_false_l): 67 | tar = i 68 | IsFlip = False 69 | I = getI(objset_train_false[i], (image_size,image_size), IsFlip) 70 | data_train_false[:,:,:,tar]=I 71 | 72 | for i in range(val_true_l): 73 | tar = i 74 | IsFlip = False 75 | I = getI(objset_val_true[i],(image_size,image_size), IsFlip) 76 | data_val_true[:,:,:,tar]=I 77 | 78 | for i in range(val_false_l): 79 | tar = i 80 | IsFlip = False 81 | I = getI(objset_val_false[i], (image_size,image_size), IsFlip) 82 | data_val_false[:,:,:,tar]=I 83 | 84 | train_label = np.ones((1,train_true_l+train_false_l))*(-1) 85 | train_label[:,0:train_true_l] = 1 86 | 87 | val_label = np.ones((1,val_true_l+val_false_l))*(-1) 88 | val_label[:,0:val_true_l] = 1 89 | 90 | train_data = np.concatenate((data_train_true,data_train_false), axis=3) 91 | val_data = np.concatenate((data_val_true, data_val_false), axis=3) 92 | 93 | dataMean = np.mean(train_data[:,:,:,:],axis=3) 94 | imdb_mean = {'mean': dataMean} 95 | dataMean = np.expand_dims(dataMean, axis=3) 96 | dataMean_train = np.tile(dataMean,(1,1,1,train_true_l+train_false_l)) 97 | dataMean_val = np.tile(dataMean, (1, 1, 1, val_true_l+val_false_l)) 98 | train_data = train_data-dataMean_train 99 | val_data = val_data-dataMean_val 100 | 101 | data_train = train_data.transpose(3,2,0,1) 102 | label_train = train_label[np.newaxis, :, :] 103 | label_train = label_train[np.newaxis,:,:,:] 104 | label_train = label_train.transpose(3,1,2,0) 105 | imdb_train = {'image': data_train, 'label': label_train} 106 | 107 | data_val = val_data.transpose(3,2,0,1) 108 | label_val = val_label[np.newaxis, :, :] 109 | label_val = label_val[np.newaxis, :,:, :] 110 | label_val = label_val.transpose(3, 1, 2, 0) 111 | imdb_val = {'image': data_val, 'label': label_val} 112 | 113 | 114 | return imdb_train, imdb_val, imdb_mean 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /tools/get_helenimdb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import h5py 4 | import matplotlib.image as mpimg 5 | import math 6 | import numpy as np 7 | 8 | # different get+dataset+imdb.py has deffernet readAnnotation getNegObjSet and getI functions 9 | 10 | def getI(obj,image_size, IsFlip): 11 | I = cv2.imread(obj["filename"]) 12 | if(len(I.shape)==2): 13 | I = np.expand_dims(I,axis=2) 14 | I = np.repeat(I,3,axis=2) 15 | I = I.astype(np.float32) 16 | I = cv2.resize(I,image_size,interpolation=cv2.INTER_LINEAR) 17 | if(IsFlip==True): 18 | I = I[:,::-1,:] 19 | return I 20 | 21 | def readAnnotation(root_path, dataset, dataset_path, categoryName): 22 | dataset_file = os.path.join(dataset_path,dataset, categoryName+'_info.txt') 23 | 24 | dataset_img_path = os.path.join(dataset_path,dataset) 25 | print(dataset_file) 26 | print(dataset_img_path) 27 | f = open(dataset_file,'r') 28 | objset_train_true=[] 29 | objset_train_false = [] 30 | objset_val_true = [] 31 | objset_val_false = [] 32 | 33 | for line in f.readlines(): 34 | words = line.split(',') 35 | path = words[1] 36 | if int(words[2])==0 and int(words[3])==1: 37 | objset_train_true.append({'filename':dataset_img_path+'/'+path}) 38 | if int(words[2])==0 and int(words[3])==0: 39 | objset_val_true.append({'filename':dataset_img_path+'/'+path}) 40 | if int(words[2])==1 and int(words[3])==1: 41 | objset_train_false.append({'filename':dataset_img_path+'/'+path}) 42 | if int(words[2])==1 and int(words[3])==0: 43 | objset_val_false.append({'filename':dataset_img_path+'/'+path}) 44 | 45 | return objset_train_true,objset_train_false,objset_val_true,objset_val_false 46 | 47 | def get_helenimdb(root_path, dataset, dataset_path,categoryName,image_size): 48 | objset_train_true,objset_train_false,objset_val_true,objset_val_false = readAnnotation(root_path, dataset, dataset_path,categoryName) 49 | train_true_l = len(objset_train_true) 50 | train_false_l = len(objset_train_false) 51 | val_true_l = len(objset_val_true) 52 | val_false_l = len(objset_val_false) 53 | 54 | 55 | data_train_true = np.zeros((image_size,image_size,3,train_true_l)) 56 | data_train_false = np.zeros((image_size,image_size,3,train_false_l)) 57 | data_val_true = np.zeros((image_size,image_size,3,val_true_l)) 58 | data_val_false = np.zeros((image_size,image_size,3,val_false_l)) 59 | 60 | for i in range(train_true_l): 61 | tar= i 62 | IsFlip = False 63 | I = getI(objset_train_true[i],(image_size,image_size), IsFlip) 64 | data_train_true[:,:,:,tar]=I 65 | 66 | for i in range(train_false_l): 67 | tar = i 68 | IsFlip = False 69 | I = getI(objset_train_false[i], (image_size,image_size), IsFlip) 70 | data_train_false[:,:,:,tar]=I 71 | 72 | for i in range(val_true_l): 73 | tar = i 74 | IsFlip = False 75 | I = getI(objset_val_true[i],(image_size,image_size), IsFlip) 76 | data_val_true[:,:,:,tar]=I 77 | 78 | for i in range(val_false_l): 79 | tar = i 80 | IsFlip = False 81 | I = getI(objset_val_false[i], (image_size,image_size), IsFlip) 82 | data_val_false[:,:,:,tar]=I 83 | 84 | train_label = np.ones((1,train_true_l+train_false_l))*(-1) 85 | train_label[:,0:train_true_l] = 1 86 | 87 | val_label = np.ones((1,val_true_l+val_false_l))*(-1) 88 | val_label[:,0:val_true_l] = 1 89 | 90 | train_data = np.concatenate((data_train_true,data_train_false), axis=3) 91 | val_data = np.concatenate((data_val_true, data_val_false), axis=3) 92 | 93 | dataMean = np.mean(train_data[:,:,:,:],axis=3) 94 | imdb_mean = {'mean': dataMean} 95 | dataMean = np.expand_dims(dataMean, axis=3) 96 | dataMean_train = np.tile(dataMean,(1,1,1,train_true_l+train_false_l)) 97 | dataMean_val = np.tile(dataMean, (1, 1, 1, val_true_l+val_false_l)) 98 | train_data = train_data-dataMean_train 99 | val_data = val_data-dataMean_val 100 | 101 | data_train = train_data.transpose(3,2,0,1) 102 | label_train = train_label[np.newaxis, :, :] 103 | label_train = label_train[np.newaxis,:,:,:] 104 | label_train = label_train.transpose(3,1,2,0) 105 | imdb_train = {'image': data_train, 'label': label_train} 106 | 107 | data_val = val_data.transpose(3,2,0,1) 108 | label_val = val_label[np.newaxis, :, :] 109 | label_val = label_val[np.newaxis, :,:, :] 110 | label_val = label_val.transpose(3, 1, 2, 0) 111 | imdb_val = {'image': data_val, 'label': label_val} 112 | 113 | 114 | return imdb_train, imdb_val, imdb_mean 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /tools/get_ilsvrimdb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import h5py 4 | import matplotlib.image as mpimg 5 | import math 6 | import numpy as np 7 | from tools.lib import cv2imread 8 | 9 | # different get+dataset+imdb.py has deffernet readAnnotation getNegObjSet and getI functions 10 | 11 | 12 | def readAnnotation(dataset_path,categoryName): 13 | minArea = 2500 14 | dataset_data_path = dataset_path 15 | dataset_data_namebatch_bndbox_path = os.path.join(dataset_data_path, categoryName + "_obj", "img", "data.mat") 16 | dataset_data_namebatch_img_path = os.path.join(dataset_data_path, categoryName + "_obj", "img", "img", "") 17 | mat = h5py.File(dataset_data_namebatch_bndbox_path, 'r') 18 | samples = mat['samples']["obj"] 19 | files = os.listdir(dataset_data_namebatch_img_path) 20 | objset = [] 21 | for i in range(len(files)): 22 | xmin = int(mat[(samples[i][0])]['bndbox']['xmin'].value) 23 | ymin = int(mat[(samples[i][0])]['bndbox']['ymin'].value) 24 | xmax = int(mat[(samples[i][0])]['bndbox']['xmax'].value) 25 | ymax = int(mat[(samples[i][0])]['bndbox']['ymax'].value) 26 | if ((xmax-xmin+1)*(ymax-ymin+1)>=minArea) == False: 27 | continue 28 | filename = ("%05d.jpg") % (i+1) 29 | objset.append({'filename':os.path.join(dataset_data_namebatch_img_path + filename),'bndbox':[xmin,xmax,ymin,ymax],'id':i+1}) 30 | return objset 31 | 32 | 33 | def getNegObjSet(neg_path): 34 | MaxObjNum = 1000 35 | objset_neg = [] 36 | for i in range(MaxObjNum): 37 | filename = ("%05d.JPEG") % (i+1) 38 | img = mpimg.imread(os.path.join(neg_path,filename)) 39 | h = img.shape[0] 40 | w = img.shape[1] 41 | xmin = 1 42 | ymin = 1 43 | xmax = w 44 | ymax = h 45 | objset_neg.append({'filename': os.path.join(neg_path,filename), 'bndbox':[xmin,xmax,ymin,ymax],'id': i + 1000000001}) 46 | return objset_neg 47 | 48 | 49 | def getI(obj,image_size, IsFlip): 50 | I = cv2imread(obj["filename"]) 51 | if(len(I.shape)==2): 52 | I = np.expand_dims(I,axis=2) 53 | I = np.repeat(I,3,axis=2) 54 | h = I.shape[0] 55 | w = I.shape[1] 56 | d = I.shape[2] 57 | xmin = max(obj['bndbox'][0],1) 58 | xmax = min(obj['bndbox'][1],w) 59 | ymin = max(obj['bndbox'][2],1) 60 | ymax = min(obj['bndbox'][3],h) 61 | I_patch = I[ymin-1:ymax-1,xmin-1:xmax-1,:] 62 | I_patch = I_patch.astype(np.float32) 63 | I_patch = cv2.resize(I_patch,image_size,interpolation=cv2.INTER_LINEAR) 64 | if(IsFlip==True): 65 | I_patch = I_patch[:,::-1,:] 66 | return I_patch, I 67 | 68 | 69 | def get_ilsvrimdb(dataset_path,neg_path,categoryName,image_size): 70 | trainRate = 0.9 71 | posRate = 0.75 72 | objset = readAnnotation(dataset_path,categoryName) 73 | objset_neg = getNegObjSet(neg_path) 74 | num_pos = len(objset) 75 | num_neg = len(objset_neg) 76 | tarN = round(posRate/(1-posRate)*num_neg) 77 | repN = math.ceil(tarN/(num_pos*2)) 78 | list_train_pos = np.round(np.linspace(0,num_pos-1,round(num_pos*trainRate))) # 79 | list_train_pos = np.append(2*list_train_pos-1, 2*list_train_pos) 80 | if (repN > 1): 81 | repN_tmp = np.array(range(repN)) 82 | repN_tmp = np.expand_dims(repN_tmp,axis=1) 83 | list_train_pos = np.tile(list_train_pos,(repN, 1)) + np.tile(repN_tmp*(num_pos*2),(1,list_train_pos.size)) 84 | list_train_pos = np.reshape(list_train_pos,(1,list_train_pos.size)) 85 | list_train_pos = np.sort(list_train_pos,axis=1) 86 | list_train_pos = list_train_pos[np.where(list_train_pos<=tarN)] 87 | list_train_neg = np.round(np.linspace(0,num_neg-1,round(num_neg*trainRate))) 88 | list_train = np.sort(np.append(list_train_pos, list_train_neg + tarN)) # 1dim 89 | list_train = list_train-1 # start_index is 0 in python, is 1 in matlab 90 | 91 | data = np.zeros((image_size,image_size,3,num_pos*2)) 92 | data_neg = np.zeros((image_size,image_size,3,num_neg)) 93 | for i in range(num_pos): 94 | tar=(i+1)*2-2 95 | IsFlip = False 96 | I_patch,I = getI(objset[i],(image_size,image_size), IsFlip) 97 | data[:,:,:,tar]=I_patch 98 | tar = (i+1)*2-1 99 | IsFlip = True 100 | I_patch,I = getI(objset[i],(image_size,image_size), IsFlip) 101 | data[:,:,:,tar]=I_patch 102 | data = np.tile(data, (1, 1, 1, repN)) 103 | data = data[:,:,:, 0:tarN] 104 | for i in range(num_neg): 105 | tar = i 106 | IsFlip = False 107 | I_patch,I = getI(objset_neg[i], (image_size,image_size), IsFlip) 108 | data_neg[:,:,:,tar]=I_patch 109 | 110 | total_images = tarN + num_neg 111 | labels = np.ones((1,total_images))*(-1) 112 | labels[:,0:tarN] = 1 113 | set = np.ones((1,total_images))*2 114 | set[:, list_train.astype(int)] = 1 115 | data = np.concatenate((data,data_neg), axis=3) 116 | dataMean = np.mean(data[:,:,:,list_train.astype(int)],axis=3) #depend on training data not truth data 117 | imdb_mean = {'mean': dataMean} 118 | dataMean = np.expand_dims(dataMean,axis=3) 119 | dataMean = np.tile(dataMean,(1,1,1,total_images)) 120 | data = data-dataMean 121 | set = np.squeeze(set,axis=0) 122 | 123 | data_train = data[:, :, :, np.where(set == 1)] 124 | data_train = np.squeeze(data_train,axis=3) 125 | data_train = data_train.transpose(3,2,0,1) 126 | label_train = labels[:, np.where(set == 1)] 127 | label_train = label_train[np.newaxis,:,:,:] 128 | label_train = label_train.transpose(3,1,2,0) 129 | imdb_train = {'image': data_train, 'label': label_train} 130 | 131 | 132 | data_val = data[:, :, :, np.where(set == 2)] 133 | data_val = np.squeeze(data_val, axis=3) 134 | data_val = data_val.transpose(3,2,0,1) 135 | label_val = labels[:, np.where(set == 2)] 136 | label_val = label_val[np.newaxis, :, :, :] 137 | label_val = label_val.transpose(3, 1, 2, 0) 138 | imdb_val = {'image': data_val, 'label': label_val} 139 | 140 | return imdb_train, imdb_val, imdb_mean 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | -------------------------------------------------------------------------------- /tools/get_voc2010imdb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import h5py 4 | import matplotlib.image as mpimg 5 | import math 6 | import numpy as np 7 | 8 | # different get+dataset+imdb.py has deffernet readAnnotation getNegObjSet and getI functions 9 | 10 | 11 | def getI(obj,image_size, IsFlip): 12 | I = cv2.imread(obj["filename"]) 13 | if(len(I.shape)==2): 14 | I = np.expand_dims(I,axis=2) 15 | I = np.repeat(I,3,axis=2) 16 | I = I.astype(np.float32) 17 | I = cv2.resize(I,image_size,interpolation=cv2.INTER_LINEAR) 18 | if(IsFlip==True): 19 | I = I[:,::-1,:] 20 | return I 21 | 22 | def readAnnotation(root_path, dataset, dataset_path, categoryName): 23 | dataset_file = os.path.join(dataset_path,dataset, categoryName+'_info.txt') 24 | 25 | dataset_img_path = os.path.join(dataset_path,dataset) 26 | f = open(dataset_file,'r') 27 | objset_train_true=[] 28 | objset_train_false = [] 29 | objset_val_true = [] 30 | objset_val_false = [] 31 | 32 | for line in f.readlines(): 33 | words = line.split(',') 34 | path = words[1] 35 | if int(words[2])==0 and int(words[3])==1: 36 | objset_train_true.append({'filename':dataset_img_path+'/'+path}) 37 | if int(words[2])==0 and int(words[3])==0: 38 | objset_val_true.append({'filename':dataset_img_path+'/'+path}) 39 | if int(words[2])==1 and int(words[3])==1: 40 | objset_train_false.append({'filename':dataset_img_path+'/'+path}) 41 | if int(words[2])==1 and int(words[3])==0: 42 | objset_val_false.append({'filename':dataset_img_path+'/'+path}) 43 | 44 | return objset_train_true,objset_train_false,objset_val_true,objset_val_false 45 | 46 | def get_voc2010imdb(root_path, dataset, dataset_path,categoryName,image_size): 47 | objset_train_true,objset_train_false,objset_val_true,objset_val_false = readAnnotation(root_path, dataset, dataset_path,categoryName) 48 | train_true_l = len(objset_train_true) 49 | train_false_l = len(objset_train_false) 50 | val_true_l = len(objset_val_true) 51 | val_false_l = len(objset_val_false) 52 | 53 | 54 | data_train_true = np.zeros((image_size,image_size,3,train_true_l)) 55 | data_train_false = np.zeros((image_size,image_size,3,train_false_l)) 56 | data_val_true = np.zeros((image_size,image_size,3,val_true_l)) 57 | data_val_false = np.zeros((image_size,image_size,3,val_false_l)) 58 | 59 | for i in range(train_true_l): 60 | tar= i 61 | IsFlip = False 62 | I = getI(objset_train_true[i],(image_size,image_size), IsFlip) 63 | data_train_true[:,:,:,tar]=I 64 | 65 | for i in range(train_false_l): 66 | tar = i 67 | IsFlip = False 68 | I = getI(objset_train_false[i], (image_size,image_size), IsFlip) 69 | data_train_false[:,:,:,tar]=I 70 | 71 | for i in range(val_true_l): 72 | tar = i 73 | IsFlip = False 74 | I = getI(objset_val_true[i],(image_size,image_size), IsFlip) 75 | data_val_true[:,:,:,tar]=I 76 | 77 | for i in range(val_false_l): 78 | tar = i 79 | IsFlip = False 80 | I = getI(objset_val_false[i], (image_size,image_size), IsFlip) 81 | data_val_false[:,:,:,tar]=I 82 | 83 | train_label = np.ones((1,train_true_l+train_false_l))*(-1) 84 | train_label[:,0:train_true_l] = 1 85 | 86 | val_label = np.ones((1,val_true_l+val_false_l))*(-1) 87 | val_label[:,0:val_true_l] = 1 88 | 89 | train_data = np.concatenate((data_train_true,data_train_false), axis=3) 90 | val_data = np.concatenate((data_val_true, data_val_false), axis=3) 91 | 92 | dataMean = np.mean(train_data[:,:,:,:],axis=3) 93 | imdb_mean = {'mean': dataMean} 94 | dataMean = np.expand_dims(dataMean, axis=3) 95 | dataMean_train = np.tile(dataMean,(1,1,1,train_true_l+train_false_l)) 96 | dataMean_val = np.tile(dataMean, (1, 1, 1, val_true_l+val_false_l)) 97 | train_data = train_data-dataMean_train 98 | val_data = val_data-dataMean_val 99 | 100 | data_train = train_data.transpose(3,2,0,1) 101 | label_train = train_label[np.newaxis, :, :] 102 | label_train = label_train[np.newaxis,:,:,:] 103 | label_train = label_train.transpose(3,1,2,0) 104 | imdb_train = {'image': data_train, 'label': label_train} 105 | 106 | data_val = val_data.transpose(3,2,0,1) 107 | label_val = val_label[np.newaxis, :, :] 108 | label_val = label_val[np.newaxis, :,:, :] 109 | label_val = label_val.transpose(3, 1, 2, 0) 110 | imdb_val = {'image': data_val, 'label': label_val} 111 | 112 | 113 | return imdb_train, imdb_val, imdb_mean 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /tools/get_vocimdb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import h5py 4 | import matplotlib.image as mpimg 5 | import math 6 | import numpy as np 7 | from tools.lib import cv2imread 8 | from tools.get_ilsvrimdb import getNegObjSet 9 | from tools.get_ilsvrimdb import getI 10 | 11 | # different get+dataset+imdb.py has deffernet readAnnotation getNegObjSet and getI functions 12 | 13 | 14 | def readAnnotation(root_path, dataset, dataset_path, categoryName): 15 | minArea = 2500 16 | dataset_truth_path = os.path.join(root_path, "data_input", dataset) 17 | dataset_truth_file = os.path.join(dataset_truth_path, 'truth_'+categoryName+'.mat') 18 | dataset_data_namebatch_img_path = os.path.join(dataset_path, "VOCdevkit","VOC2010","JPEGImages") 19 | mat = h5py.File(dataset_truth_file, 'r') 20 | truth = mat['truth']["obj"] 21 | objset = [] 22 | for i in range(len(truth)): 23 | xmin = int(mat[(truth[i][0])]['bndbox']['Wmin'].value) 24 | ymin = int(mat[(truth[i][0])]['bndbox']['Hmin'].value) 25 | xmax = int(mat[(truth[i][0])]['bndbox']['Wmax'].value) 26 | ymax = int(mat[(truth[i][0])]['bndbox']['Hmax'].value) 27 | if ((xmax-xmin+1)*(ymax-ymin+1)>=minArea) == False: 28 | continue 29 | filename = mat[(truth[i][0])]['filename'] 30 | objset.append({'filename':dataset_data_namebatch_img_path+'/'+"".join(chr(j) for j in filename[:]),'bndbox':[xmin,xmax,ymin,ymax],'id':i+1}) 31 | return objset 32 | 33 | def get_vocimdb(root_path, dataset, dataset_path,neg_path,categoryName,image_size): 34 | trainRate = 0.9 35 | posRate = 0.75 36 | objset = readAnnotation(root_path, dataset, dataset_path,categoryName) 37 | objset_neg = getNegObjSet(neg_path) 38 | num_pos = len(objset) 39 | num_neg = len(objset_neg) 40 | tarN = round(posRate/(1-posRate)*num_neg) 41 | repN = math.ceil(tarN/(num_pos*2)) 42 | list_train_pos = np.round(np.linspace(0,num_pos-1,round(num_pos*trainRate))) # 43 | list_train_pos = np.append(2*list_train_pos-1, 2*list_train_pos) 44 | if (repN > 1): 45 | repN_tmp = np.array(range(repN)) 46 | repN_tmp = np.expand_dims(repN_tmp,axis=1) 47 | list_train_pos = np.tile(list_train_pos,(repN, 1)) + np.tile(repN_tmp*(num_pos*2),(1,list_train_pos.size)) 48 | list_train_pos = np.reshape(list_train_pos,(1,list_train_pos.size)) 49 | list_train_pos = np.sort(list_train_pos,axis=1) 50 | list_train_pos = list_train_pos[np.where(list_train_pos<=tarN)] 51 | list_train_neg = np.round(np.linspace(0,num_neg-1,round(num_neg*trainRate))) 52 | list_train = np.sort(np.append(list_train_pos, list_train_neg + tarN)) # 1dim 53 | list_train = list_train-1 # start_index is 0 in python, is 1 in matlab 54 | 55 | data = np.zeros((image_size,image_size,3,num_pos*2)) 56 | data_neg = np.zeros((image_size,image_size,3,num_neg)) 57 | for i in range(num_pos): 58 | tar=(i+1)*2-2 59 | IsFlip = False 60 | I_patch,I = getI(objset[i],(image_size,image_size), IsFlip) 61 | data[:,:,:,tar]=I_patch 62 | tar = (i+1)*2-1 63 | IsFlip = True 64 | I_patch,I = getI(objset[i],(image_size,image_size), IsFlip) 65 | data[:,:,:,tar]=I_patch 66 | data = np.tile(data, (1, 1, 1, repN)) 67 | data = data[:,:,:, 0:tarN] 68 | for i in range(num_neg): 69 | tar = i 70 | IsFlip = False 71 | I_patch,I = getI(objset_neg[i], (image_size,image_size), IsFlip) 72 | data_neg[:,:,:,tar]=I_patch 73 | 74 | total_images = tarN + num_neg 75 | labels = np.ones((1,total_images))*(-1) 76 | labels[:,0:tarN] = 1 77 | set = np.ones((1,total_images))*2 78 | set[:, list_train.astype(int)] = 1 79 | data = np.concatenate((data,data_neg), axis=3) 80 | dataMean = np.mean(data[:,:,:,list_train.astype(int)],axis=3) #depend on training data not truth data 81 | imdb_mean = {'mean': dataMean} 82 | dataMean = np.expand_dims(dataMean,axis=3) 83 | dataMean = np.tile(dataMean,(1,1,1,total_images)) 84 | data = data-dataMean 85 | set = np.squeeze(set,axis=0) 86 | 87 | data_train = data[:, :, :, np.where(set == 1)] 88 | data_train = np.squeeze(data_train,axis=3) 89 | data_train = data_train.transpose(3,2,0,1) 90 | label_train = labels[:, np.where(set == 1)] 91 | label_train = label_train[np.newaxis,:,:,:] 92 | label_train = label_train.transpose(3,1,2,0) 93 | imdb_train = {'image': data_train, 'label': label_train} 94 | 95 | 96 | data_val = data[:, :, :, np.where(set == 2)] 97 | data_val = np.squeeze(data_val, axis=3) 98 | data_val = data_val.transpose(3,2,0,1) 99 | label_val = labels[:, np.where(set == 2)] 100 | label_val = label_val[np.newaxis, :, :, :] 101 | label_val = label_val.transpose(3, 1, 2, 0) 102 | imdb_val = {'image': data_val, 'label': label_val} 103 | 104 | return imdb_train, imdb_val, imdb_mean 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /tools/init_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from model.vgg_vd_16.vgg_vd_16 import vgg_vd_16 4 | from model.alexnet.alexnet import alexnet 5 | from model.vgg_m.vgg_m import vgg_m 6 | from model.vgg_s.vgg_s import vgg_s 7 | from model.resnet_18.resnet_18 import resnet_18 8 | from model.resnet_50.resnet_50 import resnet_50 9 | from model.densenet_121.densenet_121 import densenet_121 10 | 11 | def get_net(model, model_path, label_num, dropoutrate, losstype): 12 | if(model == "vgg_vd_16"): 13 | pretrain_path = os.path.join(model_path,model+".mat") 14 | net = vgg_vd_16(pretrain_path, label_num, dropoutrate, losstype) 15 | elif(model == 'alexnet'): 16 | pretrain_path = os.path.join(model_path, model + ".mat") 17 | net = alexnet(pretrain_path, label_num, dropoutrate, losstype) 18 | elif(model == 'vgg_m'): 19 | pretrain_path = os.path.join(model_path, model + ".mat") 20 | net = vgg_m(pretrain_path, label_num, dropoutrate, losstype) 21 | elif(model == 'vgg_s'): 22 | pretrain_path = os.path.join(model_path, model + ".mat") 23 | net = vgg_s(pretrain_path, label_num, dropoutrate, losstype) 24 | elif (model == 'resnet_18'): 25 | pretrain_path = os.path.join(model_path, model + ".pth") 26 | net = resnet_18(pretrain_path, label_num, dropoutrate, losstype) 27 | elif (model == 'resnet_50'): 28 | pretrain_path = os.path.join(model_path, model + ".pth") 29 | net = resnet_50(pretrain_path, label_num, dropoutrate, losstype) 30 | elif (model == 'densenet_121'): 31 | pretrain_path = os.path.join(model_path, model + ".pth") 32 | net = densenet_121(pretrain_path, label_num, dropoutrate, losstype) 33 | return net 34 | 35 | 36 | def download_pretrain(model,model_path): 37 | download_vgg_vd_16_path = "http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-16.mat" 38 | download_vgg_m_path = "http://www.vlfeat.org/matconvnet/models/imagenet-vgg-m.mat" 39 | download_vgg_s_path = "http://www.vlfeat.org/matconvnet/models/imagenet-vgg-s.mat" 40 | download_alexnet_path = "http://www.vlfeat.org/matconvnet/models/imagenet-caffe-alex.mat" 41 | download_resnet_18_path = "https://download.pytorch.org/models/resnet18-5c106cde.pth" 42 | download_resnet_50_path = "https://download.pytorch.org/models/resnet50-19c8e357.pth" 43 | download_densenet_121_path = "https://download.pytorch.org/models/densenet121-a639ec97.pth" 44 | 45 | if (model == "vgg_vd_16"): 46 | pretrain_path = os.path.join(model_path, model+".mat") 47 | if os.path.exists(pretrain_path) == False: 48 | os.system(" wget -O " + pretrain_path + " --no-check-certificate " + download_vgg_vd_16_path) 49 | elif model == "alexnet": 50 | pretrain_path = os.path.join(model_path, model + ".mat") 51 | if os.path.exists(pretrain_path) == False: 52 | os.system(" wget -O " + pretrain_path + " --no-check-certificate " + download_alexnet_path) 53 | elif model == "vgg_m": 54 | pretrain_path = os.path.join(model_path, model + ".mat") 55 | if os.path.exists(pretrain_path) == False: 56 | os.system(" wget -O " + pretrain_path + " --no-check-certificate " + download_vgg_m_path) 57 | elif model == "vgg_s": 58 | pretrain_path = os.path.join(model_path, model + ".mat") 59 | if os.path.exists(pretrain_path) == False: 60 | os.system(" wget -O " + pretrain_path + " --no-check-certificate " + download_vgg_s_path) 61 | elif model == "resnet_18": 62 | pretrain_path = os.path.join(model_path, model + ".pth") 63 | if os.path.exists(pretrain_path) == False: 64 | os.system(" wget -O " + pretrain_path + " --no-check-certificate " + download_resnet_18_path) 65 | elif model == "resnet_50": 66 | pretrain_path = os.path.join(model_path, model + ".pth") 67 | if os.path.exists(pretrain_path) == False: 68 | os.system(" wget -O " + pretrain_path + " --no-check-certificate " + download_resnet_50_path) 69 | elif model == "densenet_121": 70 | pretrain_path = os.path.join(model_path, model + ".pth") 71 | if os.path.exists(pretrain_path) == False: 72 | os.system(" wget -O " + pretrain_path + " --no-check-certificate " + download_densenet_121_path) 73 | else: 74 | print("error: no target model!") 75 | os.exit(0) 76 | 77 | 78 | def init_model(root_path,args): 79 | model_path = os.path.join(root_path, 'model', args.model) 80 | download_pretrain(args.model,model_path) 81 | net = get_net(args.model, model_path, args.label_num, args.dropoutrate, args.losstype) 82 | return net 83 | 84 | 85 | -------------------------------------------------------------------------------- /tools/lib.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import torch 4 | import h5py 5 | import pandas as pd 6 | import numpy as np 7 | 8 | 9 | 10 | def init_lr(model,label_num,losstype): 11 | if model == "alexnet": 12 | lrMag = 30 * label_num 13 | elif model == "vgg_vd_16": 14 | lrMag = label_num 15 | elif model == "vgg_m": 16 | lrMag = 30 * label_num 17 | elif model == "vgg_s": 18 | lrMag = 10 * label_num 19 | elif model == "resnet_18": 20 | lrMag = 10*label_num 21 | elif model == "resnet_50": 22 | lrMag = 10*label_num 23 | elif model == "densenet_121": 24 | lrMag = 10*label_num 25 | else: 26 | print("error: no target model!") 27 | os.exit(0) 28 | 29 | if label_num == 1: 30 | lr = np.logspace(-4, -4, 1000) 31 | lr = lr[0:500]*lrMag 32 | else: 33 | lr = np.logspace(-4, -5, 80)*lrMag 34 | # mag need to be modified for multiple classifications 35 | if losstype == 'softmax': 36 | if label_num > 10: 37 | lr = lr/50 38 | if model == 'vgg_m': 39 | lr = lr/10 40 | else: 41 | lr = lr/5 42 | else: 43 | if label_num > 10: 44 | lr = lr/10 45 | if model == 'vgg_m': 46 | lr = lr*2 47 | epochnum = len(lr) 48 | return lr, epochnum 49 | 50 | 51 | def cv2imread(path): 52 | I = cv2.imread(path) 53 | channel_1 = I[:,:,0] 54 | channel_3 = I[:,:,2] 55 | I[:,:,2] = channel_1 56 | I[:,:,0] = channel_3 57 | return I 58 | 59 | 60 | def make_dir(dir_path): 61 | if os.path.exists(dir_path) == False: 62 | os.mkdir(dir_path) 63 | 64 | 65 | def save_imdb(imdb_path,imdb,type): 66 | f = h5py.File(imdb_path, "w") 67 | if type == 'mean': 68 | f.create_dataset('mean', data=imdb['mean']) 69 | else: 70 | f.create_dataset('image', data=imdb['image']) 71 | f.create_dataset('label', data=imdb['label']) 72 | f.close() 73 | 74 | 75 | def load_imdb(mean_path,type): 76 | f = h5py.File(mean_path, "r") 77 | if type == 'mean': 78 | imdb = f['mean'] 79 | else: 80 | imdb = f 81 | return imdb 82 | 83 | # load trained model 84 | def load_model(model_path): 85 | model = torch.load(model_path) 86 | return model 87 | 88 | 89 | def load_csv(csv_path): 90 | data = pd.read_csv(csv_path) 91 | vector = np.array(data) 92 | vector = vector.squeeze(1) 93 | return vector 94 | 95 | def load_txt(txt_path,type): 96 | if type == "int_int": 97 | res = [] 98 | with open(txt_path,'r') as data: 99 | for each_line in data: 100 | temp = each_line.split() 101 | temp[0] = int(temp[0]) 102 | temp[1] = int(temp[1]) 103 | res.append(temp) 104 | res = np.array(res) 105 | elif type == "int_str": 106 | res = {'id':[],'id_name':[]} 107 | with open(txt_path,'r') as data: 108 | for each_line in data: 109 | temp = each_line.split() 110 | temp[0] = int(temp[0]) 111 | res['id'].append(temp[0]) 112 | res['id_name'].append(temp[1]) 113 | else: 114 | res = [] 115 | with open(txt_path, 'r') as data: 116 | for each_line in data: 117 | temp = each_line.split() 118 | temp[0] = float(temp[1]) 119 | temp[1] = float(temp[2]) 120 | temp[2] = float(temp[3]) 121 | temp[3] = float(temp[4]) 122 | res.append(temp[0:4]) 123 | res = np.array(res) 124 | return res 125 | 126 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /tools/load_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | import numpy as np 4 | import torch 5 | from torch.utils.data import DataLoader 6 | from tools.get_ilsvrimdb import get_ilsvrimdb 7 | from tools.get_vocimdb import get_vocimdb 8 | from tools.get_cubimdb import get_cubimdb 9 | from tools.get_voc2010imdb import get_voc2010imdb 10 | from tools.get_helenimdb import get_helenimdb 11 | from tools.get_celebaimdb import get_celebaimdb 12 | from tools.get_cubsampleimdb import get_cubsampleimdb 13 | from tools.lib import * 14 | 15 | 16 | class MyDataset(torch.utils.data.Dataset): 17 | def __init__(self, data, transform=None): 18 | self.transform = transform 19 | self.data = data 20 | 21 | def __getitem__(self,index): 22 | img = torch.from_numpy(self.data['image'][index,:,:,:]).float() 23 | label = torch.from_numpy(self.data['label'][index,:,:,:]).float() 24 | 25 | if self.transform is not None: 26 | img = self.transform(img) 27 | return img,label 28 | 29 | def __len__(self): 30 | return self.data['label'].shape[0] 31 | 32 | 33 | def get_density(label): 34 | if label.shape[1]>1: 35 | label = torch.from_numpy(label[:,:,0,0]) 36 | density = torch.mean((label>0).float(),0) 37 | else: 38 | density = torch.Tensor([0]) 39 | return density 40 | 41 | 42 | def download_dataset(datasets_path,dataset_path,dataset): 43 | downloadpath_ilsvrcanimalpart = "https://github.com/zqs1022/detanimalpart.git" 44 | downloadpath1_vocpart = "http://www.stat.ucla.edu/~xianjie.chen/pascal_part_dataset/trainval.tar.gz" 45 | downloadpath2_vocpart = "http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar" 46 | downloadpath_cub200 = "http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/CUB_200_2011.tgz" 47 | 48 | 49 | if os.path.exists(dataset_path) == False: 50 | if dataset == "ilsvrcanimalpart": 51 | os.system(" git clone " + downloadpath_ilsvrcanimalpart + " " + dataset_path) 52 | os.system(" unzip " + os.path.join(dataset_path, 'detanimalpart-master.zip') + ' -d ' + dataset_path) 53 | elif dataset == "vocpart": 54 | os.system(" wget -O " + dataset_path + " --no-check-certificate " + downloadpath1_vocpart) 55 | os.system(" wget -O " + dataset_path + " --no-check-certificate " + downloadpath2_vocpart) 56 | os.system(" tar -xvzf "+ dataset_path + '/trainval.tar.gz') 57 | os.system(" tar -xvf " + dataset_path + '/VOCtrainval_03-May-2010.tar') 58 | elif dataset == "cub200": 59 | os.system(" wget -O " + dataset_path + " --no-check-certificate " + downloadpath_cub200) 60 | os.system(" tar -xvzf "+ dataset_path + '/CUB_200_2011.tgz') 61 | else: 62 | print("error: no target dataset!") 63 | os.exit(0) 64 | 65 | 66 | def get_imdb(root_path,imdb_path,dataset_path,dataset,imagesize,label_name): 67 | neg_path = os.path.join(root_path, 'datasets', 'neg') 68 | imdb_train_path = os.path.join(imdb_path, 'train.mat') 69 | imdb_val_path = os.path.join(imdb_path, 'val.mat') 70 | imdb_mean_path = os.path.join(imdb_path, 'mean.mat') 71 | if os.path.exists(imdb_train_path) == False: 72 | if (dataset == "ilsvrcanimalpart"): 73 | imdb_train, imdb_val, imdb_mean = get_ilsvrimdb(dataset_path, neg_path, label_name,imagesize) 74 | save_imdb(imdb_train_path, imdb_train ,'train') # image: type:numpy size(3596, 3, 224, 224) ; label type:numpy size(3596, 1, 1, 1) 75 | save_imdb(imdb_val_path, imdb_val, 'val') # image: type:numpy size(404, 3, 224, 224) ; label type:numpy size(404, 1, 1, 1) 76 | save_imdb(imdb_mean_path, imdb_mean, 'mean') #mean type:numpy size:(224,224,3) ; 77 | elif dataset == "vocpart": 78 | imdb_train, imdb_val, imdb_mean = get_vocimdb(root_path, dataset, dataset_path, neg_path, label_name,imagesize) 79 | save_imdb(imdb_train_path, imdb_train,'train') 80 | save_imdb(imdb_val_path, imdb_val,'val') 81 | save_imdb(imdb_mean_path, imdb_mean, 'mean') 82 | elif dataset == "cub200": 83 | imdb_train, imdb_val, imdb_mean = get_cubimdb(dataset_path, neg_path, label_name,imagesize) 84 | save_imdb(imdb_train_path, imdb_train,'train') 85 | save_imdb(imdb_val_path, imdb_val,'val') 86 | save_imdb(imdb_mean_path, imdb_mean, 'mean') 87 | elif dataset == "voc2010_crop": 88 | imdb_train, imdb_val, imdb_mean = get_voc2010imdb(root_path, dataset, dataset_path, label_name, imagesize) 89 | save_imdb(imdb_train_path, imdb_train,'train') 90 | save_imdb(imdb_val_path, imdb_val,'val') 91 | save_imdb(imdb_mean_path, imdb_mean, 'mean') 92 | elif dataset == "cubsample": 93 | imdb_train, imdb_val, imdb_mean = get_cubsampleimdb(root_path, dataset, dataset_path, 'cubsample', imagesize) 94 | save_imdb(imdb_train_path, imdb_train,'train') 95 | save_imdb(imdb_val_path, imdb_val,'val') 96 | save_imdb(imdb_mean_path, imdb_mean, 'mean') 97 | elif dataset == "helen": 98 | imdb_train, imdb_val, imdb_mean = get_helenimdb(root_path, dataset, dataset_path, 'helen', imagesize) 99 | save_imdb(imdb_train_path, imdb_train,'train') 100 | save_imdb(imdb_val_path, imdb_val,'val') 101 | save_imdb(imdb_mean_path, imdb_mean, 'mean') 102 | elif dataset == "celeba": 103 | imdb_train, imdb_val, imdb_mean = get_celebaimdb(root_path, dataset, dataset_path, imagesize) 104 | save_imdb(imdb_train_path, imdb_train,'train') 105 | save_imdb(imdb_val_path, imdb_val,'val') 106 | save_imdb(imdb_mean_path, imdb_mean, 'mean') 107 | else: 108 | imdb_train = load_imdb(imdb_train_path, 'train') 109 | imdb_val = load_imdb(imdb_val_path, 'val') 110 | 111 | return imdb_train, imdb_val 112 | 113 | 114 | def load_data(root_path, imdb_path, args): 115 | datasets_path = os.path.join(root_path,'datasets') 116 | # Check if you need to download the dataset 117 | download_dataset(datasets_path,datasets_path,args.dataset) 118 | # Check if you need to generate the imdb 119 | imdb_train, imdb_val = get_imdb(root_path,imdb_path,datasets_path,args.dataset,args.imagesize,args.label_name) 120 | density = get_density(np.concatenate((imdb_train['label'], imdb_val['label']), axis=0)) 121 | train_dataset = MyDataset(imdb_train, transform=None) 122 | val_dataset = MyDataset(imdb_val, transform=None) 123 | train_dataloader = DataLoader(train_dataset, args.batchsize, shuffle=True) 124 | val_dataloader = DataLoader(val_dataset, args.batchsize, shuffle=True) 125 | dataset_length = {'train': len(train_dataset), 'val': len(val_dataset)} 126 | return train_dataloader, val_dataloader, density, dataset_length -------------------------------------------------------------------------------- /tools/load_data_multi.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | import numpy as np 4 | import torch 5 | from torch.utils.data import DataLoader 6 | from tools.get_ilsvrimdb import get_ilsvrimdb 7 | from tools.get_vocimdb import get_vocimdb 8 | from tools.get_cubimdb import get_cubimdb 9 | from tools.get_ilsvrimdb import readAnnotation as ilsvr_readAnnotation 10 | from tools.get_cubimdb import readAnnotation as cub_readAnnotation 11 | from tools.get_vocimdb import readAnnotation as voc_readAnnotation 12 | from tools.lib import * 13 | from tools.load_data import download_dataset 14 | from tools.load_data import get_density 15 | from tools.load_data import MyDataset 16 | 17 | def get_imdb(root_path,imdb_path,dataset_path,dataset,imagesize,label_name): 18 | neg_path = os.path.join(root_path, 'datasets', 'neg') 19 | imdb_train_path = os.path.join(imdb_path, label_name + '_train.mat') 20 | imdb_val_path = os.path.join(imdb_path, label_name +'_val.mat') 21 | imdb_mean_path = os.path.join(imdb_path, label_name+ '_mean.mat') 22 | if os.path.exists(imdb_train_path) == False: 23 | if (dataset == "ilsvrcanimalpart"): 24 | imdb_train, imdb_val, imdb_mean = get_ilsvrimdb(dataset_path, neg_path, label_name,imagesize) 25 | save_imdb(imdb_train_path, imdb_train ,'train') # image: type:numpy size(3596, 3, 224, 224) ; label type:numpy size(3596, 1, 1, 1) 26 | save_imdb(imdb_val_path, imdb_val, 'val') # image: type:numpy size(404, 3, 224, 224) ; label type:numpy size(404, 1, 1, 1) 27 | save_imdb(imdb_mean_path, imdb_mean, 'mean') #mean type:numpy size:(224,224,3) ; 28 | elif dataset == "vocpart": 29 | imdb_train, imdb_val, imdb_mean = get_vocimdb(root_path, dataset, dataset_path, neg_path, label_name,imagesize) 30 | save_imdb(imdb_train_path, imdb_train,'train') 31 | save_imdb(imdb_val_path, imdb_val,'val') 32 | save_imdb(imdb_mean_path, imdb_mean, 'mean') 33 | elif dataset == "cub200": 34 | imdb_train, imdb_val, imdb_mean = get_cubimdb(dataset_path, neg_path, label_name, imagesize) 35 | save_imdb(imdb_train_path, imdb_train, 'train') 36 | save_imdb(imdb_val_path, imdb_val, 'val') 37 | save_imdb(imdb_mean_path, imdb_mean, 'mean') 38 | 39 | def get_imdb_multi(root_path,dataset_path,imdb_path,dataset,label_num,label_name): 40 | imdb_train_path = os.path.join(imdb_path, 'train.mat') 41 | imdb_val_path = os.path.join(imdb_path, 'val.mat') 42 | imdb_mean_path = os.path.join(imdb_path, 'mean.mat') 43 | if os.path.exists(imdb_train_path) == False: 44 | trainRate = 0.9 45 | if (label_num > 10): 46 | maxSampleNum = 400 47 | minSampleNum = 100 48 | else: 49 | maxSampleNum = 1000000 50 | minSampleNum = 1500 51 | for i in range(label_num): 52 | tempimdb_train_path = os.path.join(imdb_path, label_name[i] + '_train.mat') 53 | tempimdb_val_path = os.path.join(imdb_path, label_name[i] + '_val.mat') 54 | tempimdb_mean_path = os.path.join(imdb_path, label_name[i] + '_mean.mat') 55 | tempimdb_train = load_imdb(tempimdb_train_path, 'train') 56 | tempimdb_val = load_imdb(tempimdb_val_path, 'val') 57 | tempimdb_img = np.concatenate((tempimdb_train['image'],tempimdb_val['image']),axis=0) 58 | tempimdb_label = np.concatenate((tempimdb_train['label'],tempimdb_val['label']),axis=0) 59 | tempimdb_mean = load_imdb(tempimdb_mean_path, 'mean') 60 | tempimdb_mean = np.transpose(tempimdb_mean,(2,1,0)) 61 | tempimdb_mean = tempimdb_mean[np.newaxis,:,:,:] 62 | if (dataset == "ilsvrcanimalpart"): 63 | objset = ilsvr_readAnnotation(dataset_path, label_name[i]) 64 | elif dataset == "vocpart": 65 | objset = voc_readAnnotation(root_path, dataset, dataset_path, label_name) 66 | elif dataset == "cub200": 67 | objset = cub_readAnnotation(dataset_path, label_name) 68 | List = np.where(tempimdb_label == 1) 69 | List = List[0] 70 | List = List[0:(len(objset)*2)] 71 | if(len(List) < minSampleNum): 72 | List = np.tile(List,int(np.ceil(minSampleNum/len(List)))) 73 | List = List[0:minSampleNum] 74 | List = List[0:min(len(List),maxSampleNum)] 75 | tempimdb_img = tempimdb_img[List,:,:,:] 76 | tempimdb_label = tempimdb_label[List,:,:,:] 77 | img_num = len(List) 78 | tempimdb_mean = np.repeat(tempimdb_mean,img_num,axis=0) 79 | tempimdb_img = tempimdb_img - tempimdb_mean 80 | if(i==0): 81 | imdb_img = tempimdb_img 82 | imdb_label = np.ones((img_num,label_num,1,1)) * (-1) 83 | imdb_label[:,0,0,0] = tempimdb_label[:,0,0,0] 84 | else: 85 | imdb_img = np.concatenate((imdb_img,tempimdb_img),axis=0) 86 | imdb_label = np.concatenate((imdb_label,np.ones((img_num,label_num,1,1)) * (-1)),axis=0) 87 | imdb_label[(imdb_label.shape[0]-tempimdb_label.shape[0]):imdb_label.shape[0],i,0,0] = tempimdb_label[:,0,0,0] 88 | num = imdb_img.shape[0] 89 | List_train = np.round(np.linspace(0,num-1,round(num*trainRate))) 90 | set = np.ones((1, num)) * 2 91 | set[:, List_train.astype(int)] = 1 92 | dataMean = np.mean(imdb_img[List_train.astype(int), :, :, :], axis=0) # shape = 3,224,224 93 | imdb_mean = {'mean': np.transpose(dataMean,(2,1,0))} 94 | dataMean = dataMean[np.newaxis, :, :, :] 95 | dataMean = np.repeat(dataMean, num, axis=0) 96 | imdb_img = imdb_img - dataMean 97 | set = np.squeeze(set, axis=0) 98 | 99 | data_train = imdb_img[np.where(set == 1), :, :, :] 100 | data_train = np.squeeze(data_train,axis=0) 101 | label_train = imdb_label[np.where(set == 1), :, :, :] 102 | label_train = np.squeeze(label_train, axis=0) 103 | imdb_train = {'image': data_train, 'label': label_train} 104 | 105 | data_val = imdb_img[np.where(set == 2),:, :, :] 106 | data_val = np.squeeze(data_val,axis=0) 107 | label_val = imdb_label[np.where(set == 2),:,:,:] 108 | label_val = np.squeeze(label_val, axis=0) 109 | imdb_val = {'image': data_val, 'label': label_val} 110 | 111 | save_imdb(imdb_train_path, imdb_train,'train') # image: type:numpy size(3596, 3, 224, 224) ; label type:numpy size(3596, 1, 1, 1) 112 | save_imdb(imdb_val_path, imdb_val,'val') # image: type:numpy size(404, 3, 224, 224) ; label type:numpy size(404, 1, 1, 1) 113 | save_imdb(imdb_mean_path, imdb_mean, 'mean') # mean type:numpy size:(224,224,3) ; 114 | else: 115 | imdb_train = load_imdb(imdb_train_path, 'train') 116 | imdb_val = load_imdb(imdb_val_path, 'val') 117 | return imdb_train, imdb_val 118 | 119 | 120 | def load_data_multi(root_path, imdb_path, args): 121 | datasets_path = os.path.join(root_path, 'datasets') 122 | dataset_path = os.path.join(datasets_path, args.dataset) 123 | # Check if you need to download the dataset 124 | download_dataset(datasets_path,dataset_path,args.dataset) 125 | # Check if you need to generate the imdb 126 | for i in range(args.label_num): 127 | get_imdb(root_path,imdb_path,dataset_path,args.dataset,args.imagesize,args.label_name[i]) 128 | imdb_train, imdb_val = get_imdb_multi(root_path,dataset_path,imdb_path,args.dataset,args.label_num,args.label_name) 129 | density = get_density(np.concatenate((imdb_train['label'], imdb_val['label']), axis=0)) 130 | train_dataset = MyDataset(imdb_train, transform=None) 131 | val_dataset = MyDataset(imdb_val, transform=None) 132 | train_dataloader = DataLoader(train_dataset, args.batchsize, shuffle=True) 133 | val_dataloader = DataLoader(val_dataset, args.batchsize, shuffle=False) 134 | dataset_length = {'train': len(train_dataset), 'val': len(val_dataset)} 135 | return train_dataloader, val_dataloader, density, dataset_length -------------------------------------------------------------------------------- /tools/logistic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | 5 | class logistic_F(Function): 6 | @staticmethod 7 | def forward(self, x, c): 8 | #print('loss_forward') 9 | a = -c.mul(x) 10 | b = torch.max(a,torch.zeros(a.size()).cuda()) 11 | #b = torch.max(a, torch.zeros(a.size())) 12 | t = b + torch.log(torch.exp(-b) + torch.exp(a-b)) 13 | t = torch.sum(t) 14 | #t1 = torch.sum((b>0)) 15 | self.save_for_backward(x, c) 16 | return t 17 | 18 | @staticmethod 19 | def backward(self, grad_output): 20 | #print('loss_backward') 21 | x,c = self.saved_tensors 22 | x_grad = c_grad = None 23 | x_grad = -grad_output*c.div(1+torch.exp(c.mul(x))) 24 | return x_grad , c_grad 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /tools/sgd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.optim.optimizer import Optimizer, required 3 | 4 | class SGD(Optimizer): 5 | r"""Implements stochastic gradient descent (optionally with momentum). 6 | 7 | Nesterov momentum is based on the formula from 8 | `On the importance of initialization and momentum in deep learning`__. 9 | 10 | Args: 11 | params (iterable): iterable of parameters to optimize or dicts defining 12 | parameter groups 13 | lr (float): learning rate 14 | momentum (float, optional): momentum factor (default: 0) 15 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 16 | dampening (float, optional): dampening for momentum (default: 0) 17 | nesterov (bool, optional): enables Nesterov momentum (default: False) 18 | 19 | Example: 20 | >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) 21 | >>> optimizer.zero_grad() 22 | >>> loss_fn(model(input), target).backward() 23 | >>> optimizer.step() 24 | 25 | __ http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf 26 | 27 | .. note:: 28 | The implementation of SGD with Momentum/Nesterov subtly differs from 29 | Sutskever et. al. and implementations in some other frameworks. 30 | 31 | Considering the specific case of Momentum, the update can be written as 32 | 33 | .. math:: 34 | v = \rho * v + g \\ 35 | p = p - lr * v 36 | 37 | where p, g, v and :math:`\rho` denote the parameters, gradient, 38 | velocity, and momentum respectively. 39 | 40 | This is in contrast to Sutskever et. al. and 41 | other frameworks which employ an update of the form 42 | 43 | .. math:: 44 | v = \rho * v + lr * g \\ 45 | p = p - v 46 | 47 | The Nesterov version is analogously modified. 48 | """ 49 | 50 | def __init__(self, params, lr=required, momentum=0, dampening=0, 51 | weight_decay=0, nesterov=False): 52 | if lr is not required and lr < 0.0: 53 | raise ValueError("Invalid learning rate: {}".format(lr)) 54 | if momentum < 0.0: 55 | raise ValueError("Invalid momentum value: {}".format(momentum)) 56 | if weight_decay < 0.0: 57 | raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) 58 | 59 | defaults = dict(lr=lr, momentum=momentum, dampening=dampening, 60 | weight_decay=weight_decay, nesterov=nesterov) 61 | if nesterov and (momentum <= 0 or dampening != 0): 62 | raise ValueError("Nesterov momentum requires a momentum and zero dampening") 63 | super(SGD, self).__init__(params, defaults) 64 | 65 | def __setstate__(self, state): 66 | super(SGD, self).__setstate__(state) 67 | for group in self.param_groups: 68 | group.setdefault('nesterov', False) 69 | 70 | def step(self, closure=None): 71 | """Performs a single optimization step. 72 | 73 | Arguments: 74 | closure (callable, optional): A closure that reevaluates the model 75 | and returns the loss. 76 | """ 77 | loss = None 78 | if closure is not None: 79 | loss = closure() 80 | 81 | 82 | for group in self.param_groups: 83 | 84 | weight_decay = group['weight_decay'] 85 | momentum = group['momentum'] 86 | dampening = group['dampening'] 87 | nesterov = group['nesterov'] 88 | 89 | for p in group['params']: 90 | if p.grad is None: 91 | continue 92 | d_p = p.grad.data 93 | #normalize 94 | batchsize=8 95 | d_p.mul_(1/batchsize) 96 | 97 | if weight_decay != 0: 98 | d_p.add_(weight_decay, p.data) 99 | if momentum != 0: 100 | param_state = self.state[p] 101 | if 'momentum_buffer' not in param_state: 102 | buf = param_state['momentum_buffer'] = torch.zeros_like(p.data) 103 | buf.mul_(momentum).add_(-d_p) 104 | else: 105 | buf = param_state['momentum_buffer'] 106 | buf.mul_(momentum).add_(-d_p) 107 | if nesterov: 108 | d_p = d_p.add(momentum, buf) 109 | else: 110 | d_p = buf 111 | 112 | p.data.add_(group['lr'], d_p) 113 | 114 | return loss 115 | -------------------------------------------------------------------------------- /tools/showresult.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import h5py 4 | import numpy as np 5 | from tools.getConvNetPara import getConvNetPara 6 | from tools.computeStability import computeStability 7 | from tools.computeStability_multi import computeStability_multi 8 | from tools.lib import* 9 | 10 | 11 | def step_computeStability(root_path,dataset,dataset_path, truthpart_path, label_name, net, model, layerID, epochnum, partList, partRate, imdb_mean): 12 | selectPatternRatio = 1.0 13 | patchNumPerPattern = 100 14 | if(label_name=='n01443537'): 15 | partList=[1] 16 | convnet = getConvNetPara(model) 17 | stability = computeStability(root_path,dataset,dataset_path, truthpart_path, label_name, net, model, convnet, layerID, epochnum, partList, partRate, imdb_mean, selectPatternRatio, patchNumPerPattern) 18 | return stability 19 | 20 | def step_computeStability_multi(root_path,dataset,dataset_path, truthpart_path, label_name, net, model, layerID, epochnum, partList, imdb_mean): 21 | selectPatternRatio = 1.0 22 | patchNumPerPattern = 100 23 | 24 | convnet = getConvNetPara(model) 25 | for i in range(len(label_name)): 26 | if (label_name[i] == 'n01443537'): 27 | partList = [1] 28 | tmp,tmp_score = computeStability_multi(patchNumPerPattern, root_path,dataset,dataset_path, truthpart_path, label_name[i], net, model, convnet, layerID, epochnum,partList, imdb_mean) 29 | if i == 0: 30 | stability = np.zeros((len(tmp),len(label_name))) 31 | score = np.zeros((len(tmp),len(label_name))) 32 | stability[:,i] = np.squeeze(tmp,1) 33 | score[:,i] = tmp_score 34 | for i in range(len(tmp)): 35 | idx = np.argmax(score[i,:]) 36 | stability[i, 0] = stability[i,idx] 37 | stability = stability[:,0] 38 | selectedPatternNum = round(len(stability) * selectPatternRatio) 39 | stability = np.sort(stability[np.isnan(stability) == 0]) 40 | stability = np.mean(stability[0:min(selectedPatternNum, len(stability))]) 41 | return stability 42 | 43 | 44 | 45 | def getresult(root_path,dataset,taskid_path, imdb_path, dataset_path, truthpart_path, label_name, model, layerID, epochnum, partList): 46 | partRate = 1 47 | imdb_mean_path = os.path.join(imdb_path, 'mean.mat') 48 | imdb_mean = load_imdb(imdb_mean_path,'mean') 49 | sta = [] 50 | 51 | net_path = os.path.join(taskid_path,"net-" + str(epochnum) +".pkl") 52 | net = load_model(net_path) 53 | stability = step_computeStability(root_path,dataset,dataset_path, truthpart_path, label_name, net, model, layerID, epochnum, partList, partRate, imdb_mean) 54 | return stability 55 | 56 | def getresult_multi(root_path,dataset,taskid_path, imdb_path, dataset_path, truthpart_path, label_name, model, layerID, epochnum, partList): 57 | imdb_mean_path = os.path.join(imdb_path, 'mean.mat') 58 | imdb_mean = load_imdb(imdb_mean_path, 'mean') 59 | net_path = os.path.join(taskid_path, "net-" + str(epochnum) + ".pkl") 60 | net = load_model(net_path) 61 | stability = step_computeStability_multi(root_path,dataset,dataset_path, truthpart_path, label_name, net, model, layerID, epochnum, partList, imdb_mean) 62 | return stability 63 | 64 | def showresult(epoch_num,taskid_path, imdb_path, root_path, args): 65 | if args.model in ['alexnet','vgg_m','vgg_s']: 66 | layerID = 6 67 | elif args.model in ['vgg_vd_16']: 68 | layerID = 14 69 | else: 70 | print('invalid model name') 71 | os._exit(1) 72 | 73 | if args.dataset == 'cub200': 74 | partList=[1, 6, 14] 75 | elif args.dataset == 'vocpart': 76 | partList = [1, 2, 3] 77 | elif args.dataset == 'ilsvrcanimalpart': 78 | partList = [1, 2] 79 | else: 80 | print('invalid dataset name') 81 | os._exit(1) 82 | 83 | datasets_path = os.path.join(root_path, 'datasets') 84 | dataset_path = os.path.join(datasets_path, args.dataset) 85 | 86 | truthpart_path = os.path.join(root_path, "data_input", args.dataset) 87 | if args.task_name == 'classification_multi': 88 | stability = getresult_multi(root_path,args.dataset,taskid_path, imdb_path, dataset_path, truthpart_path, args.label_name, args.model, layerID, epoch_num, partList) 89 | else: 90 | stability = getresult(root_path,args.dataset,taskid_path, imdb_path, dataset_path, truthpart_path, args.label_name, args.model, layerID, epoch_num, partList) 91 | print(stability) 92 | return stability 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /tools/softmax.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import h5py 3 | import numpy as np 4 | from torch.autograd import Function 5 | import torch.autograd.variable as Variable 6 | 7 | class softmax_F(Function): 8 | @staticmethod 9 | def forward(self, x, c): 10 | #print('loss_forward') 11 | (a,tmp) = torch.max(c,1) 12 | tmp = tmp.unsqueeze(1).float() 13 | inputSize = np.array([x.size()[0],x.size()[1],x.size()[2],x.size()[3]]) 14 | numPixelsPerImage = np.prod(inputSize[2:4]) 15 | numPixels = numPixelsPerImage * inputSize[0] 16 | imageVolume = numPixelsPerImage * inputSize[1] 17 | n = np.array(range(numPixels)) 18 | n = np.reshape(n, (tmp.size()[0],tmp.size()[1],tmp.size()[2],tmp.size()[3])) 19 | offset = np.mod(n,numPixelsPerImage) + imageVolume * np.fix(n / numPixelsPerImage) 20 | ci = torch.from_numpy(offset).float().cuda() + numPixelsPerImage * torch.max(tmp,torch.zeros(tmp.size()).cuda()) 21 | (Xmax,b) = torch.max(x,1) 22 | Xmax = Xmax.unsqueeze(1).float() 23 | ex = torch.exp(x - Xmax) 24 | x_line = x.reshape(x.size()[0]*x.size()[1]) 25 | x_ci = torch.zeros(ci.size()).cuda() 26 | for i in range(x.size()[0]): 27 | x_ci[i, 0,0,0] = x_line[ci[i, 0, 0, 0].long()] 28 | t = Xmax + torch.log(torch.sum(ex,1).unsqueeze(1)) - x_ci 29 | t = torch.sum(t) 30 | self.save_for_backward(x, tmp) 31 | return t 32 | 33 | @staticmethod 34 | def backward(self, grad_output): 35 | #print('loss_backward') 36 | x,tmp = self.saved_tensors 37 | x_grad = tmp_grad = None 38 | inputSize = np.array([x.size()[0],x.size()[1],x.size()[2],x.size()[3]]) 39 | numPixelsPerImage = np.prod(inputSize[2:4]) ; 40 | numPixels = numPixelsPerImage * inputSize[0] ; 41 | imageVolume = numPixelsPerImage * inputSize[1]; 42 | 43 | n = np.array(range(numPixels)) 44 | n = np.reshape(n, (tmp.size()[0],tmp.size()[1],tmp.size()[2],tmp.size()[3])) 45 | offset = np.mod(n,numPixelsPerImage) + imageVolume * np.fix(n / numPixelsPerImage) 46 | ci = torch.from_numpy(offset).float().cuda() + numPixelsPerImage * torch.max(tmp,torch.zeros(tmp.size()).cuda()) 47 | (Xmax,b) = torch.max(x,1) 48 | Xmax = Xmax.unsqueeze(1).float() 49 | ex = torch.exp(x - Xmax) 50 | x_grad = ex.div(torch.sum(ex,1).unsqueeze(1)) 51 | 52 | x_grad_line = x_grad.reshape(x_grad.size()[0]*x_grad.size()[1]) 53 | for i in range(ci.size()[0]): 54 | index = ci[i,0,0,0].long() 55 | x_grad_line[index] = x_grad_line[index] - 1 56 | x_grad = x_grad_line.reshape(x_grad.size()[0],x_grad.size()[1],x_grad.size()[2],x_grad.size()[3]) 57 | x_grad = grad_output.mul(x_grad) 58 | x_grad = x_grad * x.size()[1] 59 | #print('x_grad',x_grad[0,:,:,:]) 60 | return x_grad , tmp_grad 61 | 62 | 63 | 64 | ''' 65 | Data = h5py.File('imdb_train_32_mutil.mat') 66 | label = Data['label'][0:8] 67 | label = torch.from_numpy(label) 68 | label = label.reshape(label.size()[0],label.size()[1],1,1).cuda() 69 | 70 | data = h5py.File('m_fmap_finally.mat') 71 | x = data['fmap'][:, :, :, :] 72 | x = torch.from_numpy(x).cuda() 73 | x = Variable(x,requires_grad = True) 74 | 75 | loss = Our_loss_softmax_F.apply(x,label) 76 | loss.backward() 77 | ''' 78 | -------------------------------------------------------------------------------- /tools/train_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import torch 4 | import numpy as np 5 | from tools.sgd import SGD 6 | import torch.autograd.variable as Variable 7 | from tools.logistic import logistic_F 8 | from tools.softmax import softmax_F 9 | from tensorboardX import SummaryWriter 10 | from tools.lib import * 11 | from torch import nn 12 | 13 | 14 | def train_model(taskid_path, args, net, train_dataloader, val_dataloader, density, dataset_length): 15 | 16 | log_path = os.path.join(taskid_path,"log") 17 | make_dir(log_path) 18 | writer = SummaryWriter(log_path) 19 | 20 | torch.cuda.set_device(args.gpu_id) 21 | net = net.cuda() 22 | 23 | max_acc = 0 24 | max_epoch = 0 25 | judge = 0 26 | 27 | 28 | for epoch in range(args.epochnum): 29 | paras = dict(net.named_parameters()) 30 | paras_new = [] 31 | for k, v in paras.items(): 32 | if 'mask' in k: 33 | if 'bias' in k: 34 | paras_new += [{'params': [v], 'lr': args.lr[epoch] * 2, 'weight_decay': args.weightdecay * 0}] 35 | if 'mask_weight' in k: 36 | paras_new += [{'params': [v], 'lr': args.lr[epoch] * 0.05, 'weight_decay': args.weightdecay * 0}] 37 | if '.weight' in k: 38 | paras_new += [{'params': [v], 'lr': args.lr[epoch] * 1, 'weight_decay': args.weightdecay * 1}] 39 | if 'line' in k: 40 | if 'bias' in k: 41 | paras_new += [{'params': [v], 'lr': args.lr[epoch] * 2, 'weight_decay': args.weightdecay * 0}] 42 | if 'weight' in k: 43 | paras_new += [{'params': [v], 'lr': args.lr[epoch] * 1, 'weight_decay': args.weightdecay * 1}] 44 | if 'conv' in k: 45 | if 'bias' in k: 46 | paras_new += [{'params': [v], 'lr': args.lr[epoch] * 1, 'weight_decay': args.weightdecay * 1}] 47 | if 'weight' in k: 48 | paras_new += [{'params': [v], 'lr': args.lr[epoch] * 1, 'weight_decay': args.weightdecay * 1}] 49 | optimizer = SGD(paras_new, lr=args.lr[epoch], momentum=args.momentum, weight_decay=args.weightdecay) 50 | 51 | # train 52 | net.train() 53 | train_loss = [] 54 | train_acc = [] 55 | print('Train: ' + "\n" + 'epoch:{}'.format(epoch + 1)) 56 | for index, (image, label) in enumerate(train_dataloader): 57 | batch_size = image.shape[0] 58 | 59 | image = Variable(image) 60 | image = image.cuda() 61 | label = label.cuda() 62 | 63 | out = net(image, label, torch.Tensor([epoch + 1]), density) 64 | 65 | if args.model == "resnet_18" or args.model == "resnet_50" or args.model == "densenet_121": 66 | out = torch.unsqueeze(out,2) 67 | out = torch.unsqueeze(out, 3) 68 | label = Variable(label) 69 | if args.losstype == 'logistic': 70 | loss = logistic_F.apply(out, label) 71 | train_loss.append(loss.cpu().clone().data.numpy()) 72 | train_correct = label.mul(out) 73 | train_correct = torch.max(train_correct, torch.zeros(train_correct.size()).cuda()) 74 | train_correct = torch.sum((train_correct > 0)) 75 | train_acc.append(train_correct.cpu().data.numpy()) 76 | if args.losstype == 'softmax': 77 | loss = softmax_F.apply(out, label) 78 | train_loss.append(loss.cpu().clone().data.numpy()) 79 | (tmp, out) = torch.sort(out, dim=1, descending=True) 80 | (tmp, label) = torch.max(label, dim=1) 81 | label = label.unsqueeze(2) 82 | error = ~(out == label) 83 | train_correct = args.batchsize - torch.sum(error[:, 0, 0, 0]) 84 | train_acc.append(train_correct.cpu().data.numpy()) 85 | optimizer.zero_grad() 86 | loss.backward() 87 | optimizer.step() 88 | 89 | print('batch:{}/{}'.format(index + 1, len(train_dataloader)) + " " + 90 | 'loss:{:.6f}'.format(loss / batch_size) + " " + 91 | 'acc:{:.6f}'.format(train_correct.cpu().data.numpy()/(batch_size*args.label_num))) 92 | 93 | length = dataset_length['train'] if index + 1 == len(train_dataloader) else args.batchsize * (index + 1) 94 | if (index + 1) % 10: 95 | writer.add_scalar('Train/Loss', sum(train_loss)/ length, epoch) 96 | writer.add_scalar('Train/acc', sum(train_acc)/ (length*args.label_num), epoch) 97 | 98 | 99 | # eval 100 | 101 | net.eval() 102 | with torch.no_grad(): 103 | eval_loss = [] 104 | eval_acc = [] 105 | for index, (image, label) in enumerate(val_dataloader): 106 | print('Val: ' + "\n" + 'epoch:{}'.format(epoch + 1)) 107 | batch_size = image.shape[0] 108 | image = Variable(image) 109 | image = image.cuda() 110 | label = label.cuda() 111 | 112 | out = net(image, label, torch.Tensor([epoch + 1]), density) 113 | if args.model == "resnet_18" or args.model == "resnet_50" or args.model == "densenet_121": 114 | out = torch.unsqueeze(out, 2) 115 | out = torch.unsqueeze(out, 3) 116 | label = Variable(label) 117 | if args.losstype == 'logistic': 118 | loss = logistic_F.apply(out, label) 119 | eval_loss.append(loss.cpu().data.numpy()) 120 | eval_correct = label.mul(out) 121 | eval_correct = torch.max(eval_correct, torch.zeros(eval_correct.size()).cuda()) 122 | eval_correct = torch.sum((eval_correct > 0)) 123 | eval_acc.append(eval_correct.cpu().data.numpy()) 124 | if args.losstype == 'softmax': 125 | loss = softmax_F.apply(out, label) 126 | eval_loss.append(loss.cpu().data.numpy()) 127 | (tmp, out) = torch.sort(out, dim=1, descending=True) 128 | (tmp, label) = torch.max(label, dim=1) 129 | label = label.unsqueeze(2) 130 | error = ~(out == label) 131 | eval_correct = args.batchsize - torch.sum(error[:, 0, 0, 0]) 132 | eval_acc.append(eval_correct.cpu().data.numpy()) 133 | length = dataset_length['val'] if index + 1 == len(val_dataloader) else args.batchsize * (index + 1) 134 | print('batch:{}/{}'.format(index + 1, len(val_dataloader)) + " " + 135 | 'loss:{:.6f}'.format(loss/batch_size) + " " + 136 | 'acc:{:.6f}'.format(eval_correct.cpu().data.numpy()/(batch_size*args.label_num))) 137 | print("max_acc:"+str(max_acc)) 138 | 139 | if sum(eval_acc)/(length*args.label_num)>max_acc: 140 | judge=1 141 | max_acc=sum(eval_acc)/(length*args.label_num) 142 | print("rightnow max_acc:"+str(max_acc)) 143 | max_epoch=epoch 144 | 145 | writer.add_scalar('Eval/Loss', sum(eval_loss)/ length, epoch) 146 | writer.add_scalar('Eval/acc', sum(eval_acc)/ (length*args.label_num), epoch) 147 | if judge==1 or (epoch+1)%50==0: 148 | # save 149 | torch.save(net, taskid_path + '/net-' + str(epoch + 1) + '.pkl') 150 | #torch.save(net.state_dict(), taskid_path + '/net-params-' + str(epoch + 1) + '.pkl') 151 | judge=0 152 | 153 | return max_acc,max_epoch+1 --------------------------------------------------------------------------------