├── README.md ├── data_process.py ├── spp_layer.py ├── ssw.py ├── data_pre.py ├── model_wsddn.py └── train.py /README.md: -------------------------------------------------------------------------------- 1 | # WSDDN 2 | 3 | This is a pytorch project of the WSDDN model proposed by the following paper. 4 | 5 | Bilen, H. and Vedaldi, A. Weakly Supervised Deep Detection Networks. In CVPR,2016 6 | -------------------------------------------------------------------------------- /data_process.py: -------------------------------------------------------------------------------- 1 | import ssw 2 | import os 3 | import cv2 4 | import numpy 5 | data_path="./JPEGImages" 6 | f = open('./ssw.txt','w') 7 | data_txt=open('annotations.txt', 'r') 8 | c=0 9 | string=[] 10 | for line in data_txt: 11 | line = line.rstrip() 12 | words = line.split() 13 | if not (words[0][0:4] == '2007' or words[0][0:4] == '2008'): 14 | img=cv2.imread(os.path.join(data_path, str(words[0])+".jpg")) 15 | img=cv2.resize(img,(480,480)) 16 | a=ssw.ssw(img) 17 | a=ssw.feature_mapping(a) 18 | a=list(numpy.array(a).flat) 19 | string=str(words[0])+" "+" ".join(str(i) for i in a)+'\n' 20 | f.write(string) 21 | print(c) 22 | c=c+1 23 | f.close() 24 | data_txt.close() 25 | -------------------------------------------------------------------------------- /spp_layer.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch.nn as nn 3 | import torch 4 | 5 | def spatial_pyramid_pool(previous_conv, num_sample, previous_conv_size, out_pool_size): 6 | ''' 7 | previous_conv: a tensor vector of previous convolution layer 8 | num_sample: an int number of image in the batch 9 | previous_conv_size: an int vector [height, width] of the matrix features size of previous convolution layer 10 | out_pool_size: a int vector of expected output size of max pooling layer 11 | 12 | returns: a tensor vector with shape [1 x n] is the concentration of multi-level pooling 13 | ''' 14 | # print(previous_conv.size()) 15 | for i in range(len(out_pool_size)): 16 | # print(previous_conv_size) 17 | h_wid = math.ceil(previous_conv_size[0] / out_pool_size[i]) 18 | w_wid = math.ceil(previous_conv_size[1] / out_pool_size[i]) 19 | h_pad = min(math.floor((h_wid*out_pool_size[i] - previous_conv_size[0] + 1)/2),math.floor(h_wid/2)) 20 | w_pad = min(math.floor((w_wid*out_pool_size[i] - previous_conv_size[1] + 1)/2),math.floor(w_wid/2)) 21 | #print([h_wid,w_wid,h_pad,w_pad]) 22 | maxpool = nn.MaxPool2d((h_wid, w_wid), stride=(h_wid, w_wid), padding=(h_pad, w_pad)) 23 | x = maxpool(previous_conv) 24 | if(i == 0): 25 | spp = x.view(num_sample,-1) 26 | # print("spp size:",spp.size()) 27 | else: 28 | # print("size:",spp.size()) 29 | spp = torch.cat((spp,x.view(num_sample,-1)), 1) 30 | return spp 31 | -------------------------------------------------------------------------------- /ssw.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import selectivesearch 3 | import matplotlib.pyplot as plt 4 | import matplotlib.patches as mpatches 5 | import numpy as np 6 | import math 7 | import torch 8 | def ssw(img,scale=500,sigma=0.7,min_size=20): 9 | img_lbl,regions=selectivesearch.selective_search(img,scale=scale,sigma=sigma,min_size=min_size) 10 | candidates =set() 11 | for r in regions: 12 | # 重复的不要 13 | if r['rect'] in candidates: 14 | continue 15 | # 太小和太大的不要 16 | if r['size'] < 2000: 17 | continue 18 | #x, y, w, h = r['rect'] 19 | # 太不方的不要 20 | #if w > 2*h or h > 2* w : 21 | # continue 22 | candidates.add(r['rect']) 23 | ##('len(candidates)', 34) 一次过滤后剩余34个窗 24 | #2)第二次过滤 大圈套小圈的目标 只保留大圈 25 | ''' 26 | num_array=[] 27 | for i in candidates: 28 | if len(num_array)==0: 29 | num_array.append(i) 30 | else: 31 | content=False 32 | replace=-1 33 | index=0 34 | for j in num_array: 35 | ##新窗口在小圈 则滤 36 | if i[0]>=j[0] and i[0]+i[2]<=j[0]+j[2]and i[1]>=j[1] and i[1]+i[3]<=j[1]+j[3]: 37 | content=True 38 | break 39 | ##新窗口不在小圈 而在老窗口外部 替换老窗口 40 | elif i[0]<=j[0] and i[0]+i[2]>=j[0]+j[2]and i[1]<=j[1] and i[1]+i[3]>=j[1]+j[3]: 41 | replace=index 42 | break 43 | index+=1 44 | if not content: 45 | if replace>=0: 46 | num_array[replace]=i 47 | else: 48 | num_array.append(i) 49 | #窗口过滤完之后的数量 50 | num_array=set(num_array) 51 | ''' 52 | return candidates 53 | 54 | def feature_mapping(regions): 55 | #如果保留pooling5,也就是映射到7*7 56 | mapping=[] 57 | #for ele in regions: 58 | # mapping.append((math.floor(ele[0]/32)+1,math.floor(ele[1]/32)+1,max(math.ceil((ele[0]+ele[2])/32)-1-(math.floor(ele[0]/32)+1),0), 59 | # max(0,math.ceil((ele[1]+ele[3])/32)-1-(math.floor(ele[1]/32)+1)))) 60 | #如果不保留pooling5,也就是映射到14*14 61 | for ele in regions: 62 | mapping.append((math.floor(ele[0]/16)+1,math.floor(ele[1]/16)+1,math.ceil((ele[0]+ele[2])/16)-1-(math.floor(ele[0]/16)+1), 63 | math.ceil((ele[1]+ele[3])/16)-1-(math.floor(ele[1]/16)+1))) 64 | mapping=list(set(mapping)) 65 | return mapping 66 | 67 | ''' 68 | img=cv2.imread('./JPEGImages/2009_004858.jpg') 69 | print(img.size) 70 | a=ssw(img) 71 | b=feature_mapping(a) 72 | tensor=torch.from_numpy(np.array(b)) 73 | print(tensor) 74 | print(tensor.shape) 75 | ''' 76 | 77 | 78 | -------------------------------------------------------------------------------- /data_pre.py: -------------------------------------------------------------------------------- 1 | from torch.utils import data 2 | from PIL import Image 3 | import torchvision.transforms as transforms 4 | import torch 5 | import numpy as np 6 | from math import floor 7 | 8 | Transform = transforms.Compose([ 9 | transforms.Resize([480, 480]), 10 | transforms.RandomHorizontalFlip(), 11 | transforms.ToTensor(), 12 | transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ], 13 | std = [ 0.229, 0.224, 0.225 ]), 14 | ]) 15 | 16 | class myDataSet(data.Dataset): 17 | def __init__(self, root, istest, transfrom): 18 | self.root = root 19 | self.data_txt = open('annotations.txt', 'r') 20 | self.ssw_txt = open('ssw.txt', 'r') 21 | self.ssw_test_txt = open('ssw_test.txt', 'r') 22 | self.istest = istest 23 | self.transform = transfrom 24 | self.imgs = [] 25 | for line in self.data_txt: 26 | line = line.rstrip() 27 | words = line.split() 28 | if self.istest: 29 | if words[0][0:4] == '2007' or words[0][0:4] == '2008': 30 | label_cur = [0 for i in range(20)] 31 | for i in range(1, len(words)): 32 | label_cur[int(words[i])] = 1 33 | #label_cur.append(int(words[i])) 34 | #self.imgs.append([words[0], label_cur]) 35 | for linee in self.ssw_test_txt: 36 | linee = linee.rstrip() 37 | wordss = linee.split() 38 | if wordss[0] == words[0]: 39 | ssw_block = torch.Tensor(floor((len(wordss) - 1) / 4), 4) 40 | for i in range(floor((len(wordss) - 1) / 4)): 41 | w=max(int(wordss[i * 4 + 3]),2) 42 | h=max(int(wordss[i*4+4]),2) 43 | ssw_block[i,0]=(30-w if (int(wordss[i*4+1])+w>=31) else int(wordss[i*4+1])) 44 | ssw_block[i,2]=w 45 | ssw_block[i,1]=(30-h if (int(wordss[i*4+2])+h>=31) else int(wordss[i*4+2])) 46 | ssw_block[i,3]=h 47 | break 48 | else: 49 | ssw_block = torch.tensor([[0,0,2,2]]) 50 | self.imgs.append([words[0], ssw_block,label_cur]) 51 | 52 | else: 53 | if not (words[0][0:4] == '2007' or words[0][0:4] == '2008'): 54 | label_cur = [0 for i in range(20)] 55 | for i in range(1, len(words)): 56 | label_cur[int(words[i])] = 1 57 | #label_cur.append(int(words[i])) 58 | for linee in self.ssw_txt: 59 | linee = linee.rstrip() 60 | wordss = linee.split() 61 | if wordss[0] == words[0]: 62 | ssw_block = torch.Tensor(floor((len(wordss) - 1) / 4), 4) 63 | for i in range(floor((len(wordss) - 1) / 4)): 64 | w=max(int(wordss[i * 4 + 3]),2) 65 | h=max(int(wordss[i*4+4]),2) 66 | ssw_block[i,0]=(30-w if (int(wordss[i*4+1])+w>=31) else int(wordss[i*4+1])) 67 | ssw_block[i,2]=w 68 | ssw_block[i,1]=(30-h if (int(wordss[i*4+2])+h>=31) else int(wordss[i*4+2])) 69 | ssw_block[i,3]=h 70 | break 71 | else: 72 | ssw_block = torch.tensor([[0,0,2,2]]) 73 | self.imgs.append([words[0], ssw_block,label_cur]) 74 | 75 | def __getitem__(self, index): 76 | cur_img = Image.open(self.root + self.imgs[index][0] + '.jpg') 77 | data_once = self.transform(cur_img) 78 | label_once = self.imgs[index][2] 79 | ssw_block=self.imgs[index][1] 80 | ''' 81 | for line in self.ssw_txt: 82 | line = line.rstrip() 83 | words = line.split() 84 | ssw_block = torch.tensor([0,0,2,2]) 85 | if words[0] == self.imgs[index][0]: 86 | ssw_block = torch.Tensor(floor((len(words) - 1) / 4), 4) 87 | for i in range(floor((len(words) - 1) / 4)): 88 | w=max(int(words[i * 4 + 3]),2) 89 | h=max(int(words[i*4+4]),2) 90 | ssw_block[i,0]=(30-w if (int(words[i*4+1])+w>=31) else int(words[i*4+1])) 91 | ssw_block[i,2]=w 92 | ssw_block[i,1]=(30-h if (int(words[i*4+2])+h>=31) else int(words[i*4+2])) 93 | ssw_block[i,3]=h 94 | 95 | break 96 | ''' 97 | return data_once, ssw_block, torch.Tensor(label_once) 98 | 99 | def __len__(self): 100 | return len(self.imgs) 101 | 102 | if __name__ == '__main__': 103 | trainData = myDataSet('JPEGImages/', 0, Transform) 104 | testData = myDataSet('JPEGImages/' ,1, Transform) 105 | print('trainData', len(trainData)) 106 | print('testData', len(testData)) 107 | print(trainData[1][1]) 108 | -------------------------------------------------------------------------------- /model_wsddn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.models as v_models 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from math import floor 6 | 7 | from spp_layer import spatial_pyramid_pool 8 | #from data_pre import myDataSet 9 | 10 | BATCH_SIZE = 1 11 | R = 10 12 | 13 | ''' 14 | def select_fmap(fmap, ssw): #fmap.shape = [BATCH_SIZE, 512, 14, 14] ssw.shape = [BATCH_SIZE, R, 4] 15 | for i in range(BATCH_SIZE): 16 | for j in range(ssw.size(1)): 17 | fmap_piece = torch.unsqueeze(fmap[i, :, floor(ssw[i, j, 0]) : floor(ssw[i, j, 0] + ssw[i, j, 2]), 18 | floor(ssw[i, j, 1]) : floor(ssw[i, j, 1] + ssw[i, j, 3])], 0) 19 | if j == 0: 20 | y_piece = fmap_piece 21 | print(y_piece.shape) 22 | else: 23 | y_piece = torch.cat((y_piece, fmap_piece), 0) 24 | print(y_piece.shape) 25 | if i == 0: 26 | y = torch.unsqueeze(y_piece, 0) 27 | else: 28 | y = torch.cat((y, torch.unsqueeze(y_piece, 0)), 0) 29 | return y 30 | ''' 31 | 32 | ''' 33 | def through_spp(x): 34 | for i in range(BATCH_SIZE): 35 | y_piece = torch.unsqueeze(spatial_pyramid_pool(previous_conv = x[i,:], num_sample = R, 36 | previous_conv_size = [x.size(3),x.size(4)], out_pool_size = [2, 2]), 0) 37 | if i == 0: 38 | y = y_piece 39 | #print(y_piece.shape) 40 | else: 41 | y = torch.cat((y, y_piece)) 42 | #print(y.shape) 43 | return y 44 | ''' 45 | 46 | cfg = { 47 | 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512], 48 | 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 49 | 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 50 | 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'] 51 | } 52 | 53 | class WSDDN(nn.Module): 54 | def __init__(self, vgg_name): 55 | super(WSDDN, self).__init__() 56 | self.features = self._make_layers(cfg[vgg_name]) 57 | self.fc6 = nn.Linear(4096, 4096) 58 | self.fc7 = nn.Linear(4096, 4096) 59 | self.fc8c = nn.Linear(4096, 20) 60 | self.fc8d = nn.Linear(4096, 20) 61 | 62 | def forward(self, x, ssw_get): #x.shape = [BATCH_SIZE, 3, h, w] ssw_get.shape = [BATCH_SIZE, R, 4] out.shape = [BATCH_SIZE, 20] 63 | x = self.features(x) 64 | x = self.through_spp_new(x, ssw_get) 65 | #print(x.shape) 66 | #out = out.view(out.size(0), -1) 67 | #x = self.through_spp(x) 68 | #print(x.shape) 69 | x = F.relu(self.fc6(x)) 70 | x = F.relu(self.fc7(x)) 71 | x_c = F.relu(self.fc8c(x)) 72 | x_d = F.relu(self.fc8d(x)) 73 | #print(x_c.shape) 74 | #print(x_d) 75 | segma_c = F.softmax(x_c, dim = 2) 76 | segma_d = F.softmax(x_d, dim = 1) 77 | #print(segma_c) 78 | #print(segma_d) 79 | #print(segma_c.shape) 80 | #print(segma_d.shape) 81 | x = segma_c * segma_d 82 | x = torch.sum(x, dim = 1) 83 | #print(x.shape) 84 | return x, segma_d, segma_c 85 | 86 | def _make_layers(self, cfg): #init VGG 87 | layers = [] 88 | in_channels = 3 89 | for x in cfg: 90 | if x == 'M': 91 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 92 | else: 93 | layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1), 94 | nn.BatchNorm2d(x), 95 | nn.ReLU(inplace=True)] 96 | in_channels = x 97 | return nn.Sequential(*layers) 98 | 99 | def through_spp_new(self, x, ssw): #x.shape = [BATCH_SIZE, 512, 14, 14] ssw_get.shape = [BATCH_SIZE, R, 4] y.shape = [BATCH_SIZE, R, 4096] 100 | for i in range(BATCH_SIZE): 101 | for j in range(ssw.size(1)): 102 | fmap_piece = torch.unsqueeze(x[i, :, floor(ssw[i, j, 0]) : floor(ssw[i, j, 0] + ssw[i, j, 2]), 103 | floor(ssw[i, j, 1]) : floor(ssw[i, j, 1] + ssw[i, j, 3])], 0) 104 | fmap_piece = spatial_pyramid_pool(previous_conv = fmap_piece, num_sample = 1, 105 | previous_conv_size = [fmap_piece.size(2),fmap_piece.size(3)], out_pool_size = [2, 2]) 106 | if j == 0: 107 | y_piece = fmap_piece 108 | #print('fmap_piece.shape', fmap_piece.shape) 109 | else: 110 | 111 | y_piece = torch.cat((y_piece, fmap_piece)) 112 | if i == 0: 113 | y = torch.unsqueeze(y_piece, 0) 114 | #print('y_piece', y_piece.shape) 115 | else: 116 | y = torch.cat((y, torch.unsqueeze(y_piece, 0))) 117 | return y 118 | 119 | def through_spp(self, x): #spp_layer 120 | for i in range(BATCH_SIZE): 121 | y_piece = torch.unsqueeze(spatial_pyramid_pool(previous_conv = x[i,:], num_sample = R, 122 | previous_conv_size = [x.size(3),x.size(4)], out_pool_size = [2, 2]), 0) 123 | if i == 0: 124 | y = y_piece 125 | #print(y_piece.shape) 126 | else: 127 | y = torch.cat((y, y_piece)) 128 | #print(y.shape) 129 | return y 130 | 131 | def select_fmap(self, fmap, ssw): #choose interested region fmap.shape = [BATCH_SIZE, 512, 14, 14] ssw.shape = [BATCH_SIZE, R, 4] 132 | for i in range(BATCH_SIZE): 133 | for j in range(ssw.size(1)): 134 | fmap_piece = torch.unsqueeze(fmap[i, :, floor(ssw[i, j, 0]) : floor(ssw[i, j, 0] + ssw[i, j, 2]), 135 | floor(ssw[i, j, 1]) : floor(ssw[i, j, 1] + ssw[i, j, 3])], 0) 136 | if j == 0: 137 | y_piece = fmap_piece 138 | #print(y_piece.shape) 139 | else: 140 | y_piece = torch.cat((y_piece, fmap_piece), 0) 141 | #print(y_piece.shape) 142 | if i == 0: 143 | y = torch.unsqueeze(y_piece, 0) 144 | else: 145 | y = torch.cat((y, torch.unsqueeze(y_piece, 0)), 0) 146 | return y 147 | 148 | if __name__ == '__main__': 149 | net_test = WSDDN('VGG11') 150 | x_test = torch.randn(BATCH_SIZE, 3, 224, 224) 151 | ssw_spp = torch.zeros(BATCH_SIZE, R, 4) 152 | for i in range(BATCH_SIZE): 153 | for j in range(R): 154 | ssw_spp[i, j, 0] = 0 155 | ssw_spp[i, j, 1] = 0 156 | ssw_spp[i, j, 2] = 4 157 | ssw_spp[i, j, 3] = 4 158 | out_test = net_test(x_test, ssw_spp) 159 | print(out_test[0].shape) 160 | ''' 161 | ssw_spp = torch.zeros(BATCH_SIZE, R, 4) 162 | for i in range(BATCH_SIZE): 163 | for j in range(R): 164 | ssw_spp[i, j, 0] = 0 165 | ssw_spp[i, j, 1] = 0 166 | ssw_spp[i, j, 2] = 4 167 | ssw_spp[i, j, 3] = 4 168 | map_test = torch.randn(BATCH_SIZE, 512, 14, 14) 169 | y_test = select_fmap(map_test, ssw_spp) 170 | print(y_test.shape) 171 | ''' 172 | 173 | ''' 174 | spp_test = torch.randn(BATCH_SIZE, R, 512, 14, 14) 175 | out_test = through_spp(spp_test) 176 | print(out_test.shape) 177 | ''' 178 | #pretrained_model_path = 179 | #net_wsddn = WSDDN('VGG11') 180 | #state_dict = torch.load(pretrained_model_path) 181 | #net_wsddn.load_state_dict({k: v for k, v in state_dict.items() if k in net_wsddn.state_dict()}) 182 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torchvision.transforms as transforms 5 | from torch.utils.data import DataLoader 6 | from torchsummary import summary 7 | from torch.autograd import Variable 8 | import torch.optim as optim 9 | import argparse 10 | from model_wsddn import WSDDN 11 | from data_pre import myDataSet 12 | import os 13 | from tensorboardX import SummaryWriter 14 | import ssw 15 | 16 | Transform = transforms.Compose([ 17 | transforms.Resize([480, 480]), 18 | transforms.RandomHorizontalFlip(), 19 | transforms.ToTensor(), 20 | transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ], 21 | std = [ 0.229, 0.224, 0.225 ]), 22 | ]) 23 | 24 | parser = argparse.ArgumentParser(description='wsddn Input:BatchSize initial LR EPOCH') 25 | parser.add_argument('--test','-t', action = 'store_true', 26 | help='set test mode') 27 | parser.add_argument('--model_path', type=str,default='./model_para', 28 | help='dir to save para') 29 | parser.add_argument('--BATCH_SIZE', type=int,default=1, 30 | help='batch_size') 31 | parser.add_argument('--LR', type=float,default=0.00001, 32 | help='Learning Rate') 33 | parser.add_argument('--EPOCH', type=int,default=40, 34 | help='epoch') 35 | parser.add_argument('--GPU', type=int,default=0, 36 | help='GPU') 37 | args = parser.parse_args() 38 | model_path=args.model_path 39 | BATCH_SIZE=args.BATCH_SIZE 40 | LR=args.LR 41 | EPOCH=args.EPOCH 42 | print('model_path:',model_path) 43 | print('batch_size:',BATCH_SIZE) 44 | print('initial LR:',LR) 45 | print('epoch:',EPOCH) 46 | 47 | torch.cuda.set_device(args.GPU) 48 | net_wsddn = WSDDN('VGG11') 49 | if os.path.exists(os.path.join(model_path, 'wsddn.pkl')): 50 | net_wsddn.load_state_dict(torch.load(os.path.join(model_path, 'wsddn.pkl'))) 51 | else: 52 | pretrained_dict = torch.load('vgg11_bn-6002323d.pth.1') 53 | modified_dict = net_wsddn.state_dict() 54 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in modified_dict} 55 | modified_dict.update(pretrained_dict) 56 | net_wsddn.load_state_dict(modified_dict) 57 | net_wsddn.cuda() 58 | 59 | criterion = nn.BCELoss(weight=None, size_average=True) 60 | optimizer1 = optim.SGD(net_wsddn.parameters(), lr = LR, momentum = 0.9) 61 | optimizer2 = optim.SGD(net_wsddn.parameters(), lr = 0.1 * LR, momentum = 0.9) 62 | writer = SummaryWriter('WSDDN') 63 | #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) 64 | trainData = myDataSet('JPEGImages/', 0, Transform) 65 | testData = myDataSet('JPEGImages/' ,1, Transform) 66 | #print('trainData', len(trainData)) 67 | #print('testData', len(testData)) 68 | 69 | trainLoader = torch.utils.data.DataLoader(dataset=trainData, batch_size=BATCH_SIZE, shuffle=False,num_workers=1) 70 | testLoader = torch.utils.data.DataLoader(dataset=testData, batch_size=BATCH_SIZE, shuffle=False) 71 | if not args.test: 72 | net_wsddn.train() 73 | for epoch in range(EPOCH): 74 | #scheduler.step(epoch) 75 | running_loss = 0.0 76 | print(epoch) 77 | for i, (images, kuang,labels) in enumerate(trainLoader): 78 | images = Variable(images).cuda() 79 | labels = Variable(labels).cuda() 80 | kuang =Variable(kuang).cuda() 81 | if epoch < 10: 82 | optimizer1.zero_grad() 83 | else: 84 | optimizer2.zero_grad() 85 | #ssw 86 | #print(kuang) 87 | ''' 88 | if kuang.size(1)==0: 89 | print(kuang) 90 | continue 91 | ''' 92 | #kuang=kuang.view([1,*kuang.shape]) 93 | #print(kuang.shape) 94 | #forward + backward + optimizer 95 | outputs_1, output_2, output_3 = net_wsddn(images,kuang) 96 | outputs_1=torch.sigmoid(outputs_1) 97 | loss = criterion(outputs_1 , labels) 98 | loss.backward() 99 | if epoch < 10: 100 | optimizer1.step() 101 | else: 102 | optimizer2.step() 103 | 104 | running_loss += loss.item() 105 | 106 | if i % 500 == 499: 107 | print('[%d , %5d] loss: %.3f' % (epoch + 1 , i + 1 , running_loss / 500)) 108 | running_loss = 0.0 109 | writer.add_scalar('Train/loss', loss.item(),epoch) 110 | torch.save(net_wsddn.state_dict(), os.path.join(model_path, 'wsddn.pkl')) 111 | print('Finished Training') 112 | writer.close() 113 | torch.save(net_wsddn.state_dict(), os.path.join(model_path, 'wsddn.pkl')) 114 | else: 115 | ##UNFINISHED 116 | 117 | net_wsddn.eval() 118 | result_name = 'box_result.txt' 119 | f = open(result_name, 'w') 120 | for i, (images, kuang, labels) in enumerate(testLoader): 121 | images = Variable(images).cuda() 122 | labels = Variable(labels).cuda() 123 | kuang = Variable(kuang).cuda() 124 | outputs_1, output_2, output_3 = net_wsddn(images,kuang) 125 | for j in range(outputs_1.size(1)): 126 | if outputs_1[0, j] > 0.05: 127 | for k in range(output_2.size(0)): 128 | if output_2[0, k, j] > 0.1: 129 | #print(kuang.shape) 130 | new_line = [i, j, float('%.3f' % output_3[0, k, j].item()), 8 * kuang[0, k, 0].item(), 131 | 8 * kuang[0, k, 1].item(), 8 * kuang[0, k, 2].item(), 8 * kuang[0, k, 3].item()] 132 | #new_line = str(i) + ' ' + str(j) + ' ' + str(kuang[0, k, 0].item()) + ' ' + str(kuang[0, k, 1].item()) + 133 | # ' ' + str(kuang[0, k, 2].item()) + ' ' + str(kuang[0, k, 3].item()) + '\n' 134 | for line_mem in new_line: 135 | f.write(str(line_mem) + ' ') 136 | f.write('\r\n') 137 | if (i % 500) == 0: 138 | print(i) 139 | #predicted = outputs_1.data>=0.5 140 | #vec_1 += (predicted.float() == labels).cpu().float().sum(0) #correct_num 141 | #vec_2 += labels.cpu().sum(0)#appear_num 142 | #equal to predicted=outputs.data>=0 143 | #total += labels.size(0)*labels.size(1) 144 | #correct += (predicted.float() == labels).sum() 145 | #print('Classification Accuracy of the model on the train images(mAcc): %.4f %%' % (100 * float(correct) / float(total))) 146 | #print('Localization Accuracy of the model on the train images(mAP): %.4f %%' % (100 * (vec_1*vec_2).sum())) 147 | f.close() 148 | data1 = open('box_result.txt', 'r') 149 | data2 = open('bonus_ground_truth.txt', 'r') 150 | #data3 = open('meiren/annotations.txt', 'r') 151 | f = open('for_map.txt', 'w') 152 | for line in data1: 153 | c = 0 154 | #print('c', c) 155 | line = line.rstrip() 156 | words = line.split() 157 | data3 = open('annotations.txt', 'r') 158 | for line1 in data3: 159 | #print(int(words[0])) 160 | if c == int(words[0]): 161 | #print('ok') 162 | line1 = line1.rstrip() 163 | words1 = line1.split() 164 | new_line = [words1[0], words[1], words[2], words[3], words[4], words[5], words[6]] 165 | for line_mem in new_line: 166 | f.write(str(line_mem) + ' ') 167 | f.write('\r\n') 168 | data3.close() 169 | break 170 | c += 1 171 | data1.close() 172 | data2.close() 173 | #data3.close() 174 | f.close() 175 | ''' 176 | for images, labels in testLoader: 177 | images = Variable(images).cuda() 178 | labels= Variable(labels).cuda() 179 | outputs_1,output_2 = net_wsddn(images,kuang) 180 | predicted = outputs_1.data>=0.5 181 | vec_1 += (predicted.float() == labels).cpu().float().sum(0) #correct_num 182 | vec_2 += labels.cpu().sum(0)#appear_num 183 | #equal to predicted=outputs.data>=0 184 | total += labels.size(0)*labels.size(1) 185 | correct += (predicted.float() == labels).sum() 186 | print('Classification Accuracy of the model on the test images(mAcc): %.4f %%' % (100 * float(correct) / float(total))) 187 | print('Localization Accuracy of the model on the test images(mAP): %.4f %%' % (100 * (vec_1*vec_2).sum())) 188 | ''' 189 | --------------------------------------------------------------------------------