├── README.md
├── data_process.py
├── spp_layer.py
├── ssw.py
├── data_pre.py
├── model_wsddn.py
└── train.py


/README.md:
--------------------------------------------------------------------------------
1 | # WSDDN
2 | 
3 | This is a pytorch project of the WSDDN model proposed by the following paper.
4 | 
5 | Bilen, H. and Vedaldi, A. Weakly Supervised Deep Detection Networks. In CVPR,2016
6 | 


--------------------------------------------------------------------------------
/data_process.py:
--------------------------------------------------------------------------------
 1 | import ssw
 2 | import os
 3 | import cv2
 4 | import numpy
 5 | data_path="./JPEGImages"
 6 | f = open('./ssw.txt','w')
 7 | data_txt=open('annotations.txt', 'r')
 8 | c=0
 9 | string=[]
10 | for line in data_txt:
11 |     line = line.rstrip()
12 |     words = line.split()
13 |     if not (words[0][0:4] == '2007' or words[0][0:4] == '2008'):
14 |         img=cv2.imread(os.path.join(data_path, str(words[0])+".jpg"))
15 |         img=cv2.resize(img,(480,480))
16 |         a=ssw.ssw(img)
17 |         a=ssw.feature_mapping(a)
18 |         a=list(numpy.array(a).flat)
19 |         string=str(words[0])+" "+" ".join(str(i) for i in a)+'\n'
20 |         f.write(string)
21 |         print(c)
22 |         c=c+1
23 | f.close()
24 | data_txt.close()
25 | 


--------------------------------------------------------------------------------
/spp_layer.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch.nn as nn
 3 | import torch
 4 | 
 5 | def spatial_pyramid_pool(previous_conv, num_sample, previous_conv_size, out_pool_size):
 6 |     '''
 7 |     previous_conv: a tensor vector of previous convolution layer
 8 |     num_sample: an int number of image in the batch
 9 |     previous_conv_size: an int vector [height, width] of the matrix features size of previous convolution layer
10 |     out_pool_size: a int vector of expected output size of max pooling layer
11 |     
12 |     returns: a tensor vector with shape [1 x n] is the concentration of multi-level pooling
13 |     '''    
14 |     # print(previous_conv.size())
15 |     for i in range(len(out_pool_size)):
16 |         # print(previous_conv_size)
17 |         h_wid = math.ceil(previous_conv_size[0] / out_pool_size[i])
18 |         w_wid = math.ceil(previous_conv_size[1] / out_pool_size[i])
19 |         h_pad = min(math.floor((h_wid*out_pool_size[i] - previous_conv_size[0] + 1)/2),math.floor(h_wid/2))
20 |         w_pad = min(math.floor((w_wid*out_pool_size[i] - previous_conv_size[1] + 1)/2),math.floor(w_wid/2))
21 |         #print([h_wid,w_wid,h_pad,w_pad])
22 |         maxpool = nn.MaxPool2d((h_wid, w_wid), stride=(h_wid, w_wid), padding=(h_pad, w_pad))
23 |         x = maxpool(previous_conv)
24 |         if(i == 0):
25 |             spp = x.view(num_sample,-1)
26 |             # print("spp size:",spp.size())
27 |         else:
28 |             # print("size:",spp.size())
29 |             spp = torch.cat((spp,x.view(num_sample,-1)), 1)
30 |     return spp
31 | 


--------------------------------------------------------------------------------
/ssw.py:
--------------------------------------------------------------------------------
 1 | import cv2 
 2 | import selectivesearch
 3 | import matplotlib.pyplot as plt
 4 | import matplotlib.patches as mpatches
 5 | import numpy as np
 6 | import math
 7 | import torch 
 8 | def ssw(img,scale=500,sigma=0.7,min_size=20):
 9 |     img_lbl,regions=selectivesearch.selective_search(img,scale=scale,sigma=sigma,min_size=min_size)
10 |     candidates =set()
11 |     for r in regions:
12 |         # 重复的不要
13 |         if r['rect'] in candidates:
14 |             continue
15 |         # 太小和太大的不要
16 |         if r['size'] < 2000:
17 |             continue
18 |         #x, y, w, h = r['rect']
19 |         # 太不方的不要
20 |         #if w  > 2*h or h > 2* w :
21 |         #    continue
22 |         candidates.add(r['rect'])
23 |         ##('len(candidates)', 34) 一次过滤后剩余34个窗
24 |     #2)第二次过滤 大圈套小圈的目标 只保留大圈
25 |     '''
26 |     num_array=[]
27 |     for i in candidates:
28 |         if len(num_array)==0:
29 |             num_array.append(i)
30 |         else:
31 |             content=False
32 |             replace=-1
33 |             index=0
34 |         for j in num_array:
35 |     ##新窗口在小圈 则滤
36 |             if i[0]>=j[0] and i[0]+i[2]<=j[0]+j[2]and i[1]>=j[1] and i[1]+i[3]<=j[1]+j[3]:
37 |                 content=True
38 |                 break
39 |             ##新窗口不在小圈 而在老窗口外部 替换老窗口
40 |             elif i[0]<=j[0] and i[0]+i[2]>=j[0]+j[2]and i[1]<=j[1] and i[1]+i[3]>=j[1]+j[3]:
41 |                 replace=index
42 |                 break
43 |                 index+=1
44 |             if not content:
45 |                 if replace>=0:
46 |                     num_array[replace]=i
47 |                 else:
48 |                     num_array.append(i)
49 |             #窗口过滤完之后的数量
50 |     num_array=set(num_array)
51 |     '''
52 |     return candidates
53 | 
54 | def feature_mapping(regions):
55 |     #如果保留pooling5，也就是映射到7*7
56 |     mapping=[]
57 |     #for ele in regions:
58 |     #    mapping.append((math.floor(ele[0]/32)+1,math.floor(ele[1]/32)+1,max(math.ceil((ele[0]+ele[2])/32)-1-(math.floor(ele[0]/32)+1),0),
59 |     #    max(0,math.ceil((ele[1]+ele[3])/32)-1-(math.floor(ele[1]/32)+1))))
60 |     #如果不保留pooling5，也就是映射到14*14  
61 |     for ele in regions:
62 |         mapping.append((math.floor(ele[0]/16)+1,math.floor(ele[1]/16)+1,math.ceil((ele[0]+ele[2])/16)-1-(math.floor(ele[0]/16)+1),
63 |         math.ceil((ele[1]+ele[3])/16)-1-(math.floor(ele[1]/16)+1)))   
64 |     mapping=list(set(mapping))
65 |     return mapping
66 | 
67 | '''
68 | img=cv2.imread('./JPEGImages/2009_004858.jpg')
69 | print(img.size)
70 | a=ssw(img)
71 | b=feature_mapping(a)
72 | tensor=torch.from_numpy(np.array(b))
73 | print(tensor)
74 | print(tensor.shape)
75 | '''
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/data_pre.py:
--------------------------------------------------------------------------------
  1 | from torch.utils import data
  2 | from PIL import Image
  3 | import torchvision.transforms as transforms
  4 | import torch
  5 | import numpy as np
  6 | from math import floor
  7 | 
  8 | Transform = transforms.Compose([
  9 |     transforms.Resize([480, 480]),
 10 |     transforms.RandomHorizontalFlip(),
 11 |     transforms.ToTensor(),
 12 |     transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ],
 13 |                          std  = [ 0.229, 0.224, 0.225 ]),
 14 |     ])
 15 | 
 16 | class myDataSet(data.Dataset):
 17 |     def __init__(self, root, istest, transfrom):
 18 |         self.root = root
 19 |         self.data_txt = open('annotations.txt', 'r')
 20 |         self.ssw_txt = open('ssw.txt', 'r')
 21 |         self.ssw_test_txt = open('ssw_test.txt', 'r')
 22 |         self.istest = istest
 23 |         self.transform = transfrom
 24 |         self.imgs = []
 25 |         for line in self.data_txt:
 26 |             line = line.rstrip()
 27 |             words = line.split()
 28 |             if self.istest:
 29 |                 if words[0][0:4] == '2007' or words[0][0:4] == '2008':
 30 |                     label_cur = [0 for i in range(20)]
 31 |                     for i in range(1, len(words)):
 32 |                         label_cur[int(words[i])] = 1
 33 |                         #label_cur.append(int(words[i]))
 34 |                     #self.imgs.append([words[0], label_cur])
 35 |                     for linee in self.ssw_test_txt:
 36 |                         linee = linee.rstrip()
 37 |                         wordss = linee.split()
 38 |                         if wordss[0] == words[0]:
 39 |                             ssw_block = torch.Tensor(floor((len(wordss) - 1) / 4), 4)
 40 |                             for i in range(floor((len(wordss) - 1) / 4)):
 41 |                                 w=max(int(wordss[i * 4 + 3]),2)
 42 |                                 h=max(int(wordss[i*4+4]),2)
 43 |                                 ssw_block[i,0]=(30-w if (int(wordss[i*4+1])+w>=31) else int(wordss[i*4+1]))
 44 |                                 ssw_block[i,2]=w
 45 |                                 ssw_block[i,1]=(30-h if (int(wordss[i*4+2])+h>=31) else int(wordss[i*4+2]))
 46 |                                 ssw_block[i,3]=h                        
 47 |                             break
 48 |                         else:
 49 |                             ssw_block = torch.tensor([[0,0,2,2]])   
 50 |                     self.imgs.append([words[0], ssw_block,label_cur])
 51 |                     
 52 |             else:
 53 |                 if not (words[0][0:4] == '2007' or words[0][0:4] == '2008'):
 54 |                     label_cur = [0 for i in range(20)]
 55 |                     for i in range(1, len(words)):
 56 |                         label_cur[int(words[i])] = 1
 57 |                         #label_cur.append(int(words[i]))
 58 |                     for linee in self.ssw_txt:
 59 |                         linee = linee.rstrip()
 60 |                         wordss = linee.split()
 61 |                         if wordss[0] == words[0]:
 62 |                             ssw_block = torch.Tensor(floor((len(wordss) - 1) / 4), 4)
 63 |                             for i in range(floor((len(wordss) - 1) / 4)):
 64 |                                 w=max(int(wordss[i * 4 + 3]),2)
 65 |                                 h=max(int(wordss[i*4+4]),2)
 66 |                                 ssw_block[i,0]=(30-w if (int(wordss[i*4+1])+w>=31) else int(wordss[i*4+1]))
 67 |                                 ssw_block[i,2]=w
 68 |                                 ssw_block[i,1]=(30-h if (int(wordss[i*4+2])+h>=31) else int(wordss[i*4+2]))
 69 |                                 ssw_block[i,3]=h                        
 70 |                             break
 71 |                         else:
 72 |                             ssw_block = torch.tensor([[0,0,2,2]])   
 73 |                     self.imgs.append([words[0], ssw_block,label_cur])
 74 |                     
 75 |     def __getitem__(self, index):
 76 |         cur_img = Image.open(self.root + self.imgs[index][0] + '.jpg')
 77 |         data_once = self.transform(cur_img)
 78 |         label_once = self.imgs[index][2]
 79 |         ssw_block=self.imgs[index][1]
 80 |         '''
 81 |         for line in self.ssw_txt:
 82 |             line = line.rstrip()
 83 |             words = line.split()
 84 |             ssw_block = torch.tensor([0,0,2,2])
 85 |             if words[0] == self.imgs[index][0]:
 86 |                 ssw_block = torch.Tensor(floor((len(words) - 1) / 4), 4)
 87 |                 for i in range(floor((len(words) - 1) / 4)):
 88 |                     w=max(int(words[i * 4 + 3]),2)
 89 |                     h=max(int(words[i*4+4]),2)
 90 |                     ssw_block[i,0]=(30-w if (int(words[i*4+1])+w>=31) else int(words[i*4+1]))
 91 |                     ssw_block[i,2]=w
 92 |                     ssw_block[i,1]=(30-h if (int(words[i*4+2])+h>=31) else int(words[i*4+2]))
 93 |                     ssw_block[i,3]=h                    
 94 |                 
 95 |                 break
 96 |                 '''
 97 |         return data_once, ssw_block, torch.Tensor(label_once)
 98 |     
 99 |     def __len__(self):
100 |         return len(self.imgs)
101 | 
102 | if __name__ == '__main__':
103 |     trainData = myDataSet('JPEGImages/', 0, Transform)
104 |     testData = myDataSet('JPEGImages/' ,1, Transform)
105 |     print('trainData', len(trainData))
106 |     print('testData', len(testData))
107 |     print(trainData[1][1])
108 | 


--------------------------------------------------------------------------------
/model_wsddn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torchvision.models as v_models
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from math import floor
  6 | 
  7 | from spp_layer import spatial_pyramid_pool
  8 | #from data_pre import myDataSet
  9 | 
 10 | BATCH_SIZE = 1
 11 | R = 10
 12 | 
 13 | '''
 14 | def select_fmap(fmap, ssw): #fmap.shape = [BATCH_SIZE, 512, 14, 14]  ssw.shape = [BATCH_SIZE, R, 4]
 15 |     for i in range(BATCH_SIZE):
 16 |         for j in range(ssw.size(1)):
 17 |             fmap_piece = torch.unsqueeze(fmap[i, :, floor(ssw[i, j, 0]) : floor(ssw[i, j, 0] + ssw[i, j, 2]), 
 18 |                                   floor(ssw[i, j, 1]) : floor(ssw[i, j, 1] + ssw[i, j, 3])], 0)
 19 |             if j == 0:
 20 |                 y_piece = fmap_piece
 21 |                 print(y_piece.shape)
 22 |             else:
 23 |                 y_piece = torch.cat((y_piece, fmap_piece), 0)
 24 |                 print(y_piece.shape)
 25 |         if i == 0:
 26 |             y = torch.unsqueeze(y_piece, 0)
 27 |         else:
 28 |             y = torch.cat((y, torch.unsqueeze(y_piece, 0)), 0)
 29 |     return y
 30 | '''
 31 | 
 32 | '''
 33 | def through_spp(x):
 34 |     for i in range(BATCH_SIZE):
 35 |         y_piece = torch.unsqueeze(spatial_pyramid_pool(previous_conv = x[i,:], num_sample = R, 
 36 |                                     previous_conv_size = [x.size(3),x.size(4)], out_pool_size = [2, 2]), 0)
 37 |         if i == 0:
 38 |             y = y_piece
 39 |             #print(y_piece.shape)
 40 |         else:
 41 |             y = torch.cat((y, y_piece))
 42 |             #print(y.shape)
 43 |     return y
 44 | '''
 45 | 
 46 | cfg = {
 47 |     'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512],
 48 |     'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
 49 |     'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
 50 |     'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']
 51 | }
 52 | 
 53 | class WSDDN(nn.Module):
 54 |     def __init__(self, vgg_name):
 55 |         super(WSDDN, self).__init__()
 56 |         self.features = self._make_layers(cfg[vgg_name])
 57 |         self.fc6 = nn.Linear(4096, 4096)
 58 |         self.fc7 = nn.Linear(4096, 4096)
 59 |         self.fc8c = nn.Linear(4096, 20)
 60 |         self.fc8d = nn.Linear(4096, 20)
 61 | 
 62 |     def forward(self, x, ssw_get): #x.shape = [BATCH_SIZE, 3, h, w]  ssw_get.shape = [BATCH_SIZE, R, 4] out.shape = [BATCH_SIZE, 20]
 63 |         x = self.features(x)
 64 |         x = self.through_spp_new(x, ssw_get)
 65 |         #print(x.shape)
 66 |         #out = out.view(out.size(0), -1)
 67 |         #x = self.through_spp(x)
 68 |         #print(x.shape)
 69 |         x = F.relu(self.fc6(x))
 70 |         x = F.relu(self.fc7(x))
 71 |         x_c = F.relu(self.fc8c(x))
 72 |         x_d = F.relu(self.fc8d(x))
 73 |         #print(x_c.shape)
 74 |         #print(x_d)
 75 |         segma_c = F.softmax(x_c, dim = 2)
 76 |         segma_d = F.softmax(x_d, dim = 1)
 77 |         #print(segma_c)
 78 |         #print(segma_d)
 79 |         #print(segma_c.shape)
 80 |         #print(segma_d.shape)
 81 |         x = segma_c * segma_d
 82 |         x = torch.sum(x, dim = 1)
 83 |         #print(x.shape)
 84 |         return x, segma_d, segma_c
 85 | 
 86 |     def _make_layers(self, cfg):  #init VGG
 87 |         layers = []
 88 |         in_channels = 3
 89 |         for x in cfg:
 90 |             if x == 'M':
 91 |                 layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
 92 |             else:
 93 |                 layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
 94 |                            nn.BatchNorm2d(x),
 95 |                            nn.ReLU(inplace=True)]
 96 |                 in_channels = x
 97 |         return nn.Sequential(*layers)
 98 | 
 99 |     def through_spp_new(self, x, ssw):  #x.shape = [BATCH_SIZE, 512, 14, 14] ssw_get.shape = [BATCH_SIZE, R, 4] y.shape = [BATCH_SIZE, R, 4096]
100 |         for i in range(BATCH_SIZE):
101 |             for j in range(ssw.size(1)):
102 |                 fmap_piece = torch.unsqueeze(x[i, :, floor(ssw[i, j, 0]) : floor(ssw[i, j, 0] + ssw[i, j, 2]), 
103 |                                       floor(ssw[i, j, 1]) : floor(ssw[i, j, 1] + ssw[i, j, 3])], 0)
104 |                 fmap_piece = spatial_pyramid_pool(previous_conv = fmap_piece, num_sample = 1, 
105 |                                         previous_conv_size = [fmap_piece.size(2),fmap_piece.size(3)], out_pool_size = [2, 2])
106 |                 if j == 0:
107 |                     y_piece = fmap_piece
108 |                     #print('fmap_piece.shape', fmap_piece.shape)
109 |                 else:
110 | 
111 |                     y_piece = torch.cat((y_piece, fmap_piece))
112 |             if i == 0:
113 |                 y = torch.unsqueeze(y_piece, 0)
114 |                 #print('y_piece', y_piece.shape)
115 |             else:
116 |                 y = torch.cat((y, torch.unsqueeze(y_piece, 0)))
117 |         return y
118 | 
119 |     def through_spp(self, x):  #spp_layer
120 |         for i in range(BATCH_SIZE):
121 |             y_piece = torch.unsqueeze(spatial_pyramid_pool(previous_conv = x[i,:], num_sample = R, 
122 |                                         previous_conv_size = [x.size(3),x.size(4)], out_pool_size = [2, 2]), 0)
123 |             if i == 0:
124 |                 y = y_piece
125 |                 #print(y_piece.shape)
126 |             else:
127 |                 y = torch.cat((y, y_piece))
128 |                 #print(y.shape)
129 |         return y
130 | 
131 |     def select_fmap(self, fmap, ssw): #choose interested region  fmap.shape = [BATCH_SIZE, 512, 14, 14]  ssw.shape = [BATCH_SIZE, R, 4]
132 |         for i in range(BATCH_SIZE):
133 |             for j in range(ssw.size(1)):
134 |                 fmap_piece = torch.unsqueeze(fmap[i, :, floor(ssw[i, j, 0]) : floor(ssw[i, j, 0] + ssw[i, j, 2]), 
135 |                                       floor(ssw[i, j, 1]) : floor(ssw[i, j, 1] + ssw[i, j, 3])], 0)
136 |                 if j == 0:
137 |                     y_piece = fmap_piece
138 |                     #print(y_piece.shape)
139 |                 else:
140 |                     y_piece = torch.cat((y_piece, fmap_piece), 0)
141 |                     #print(y_piece.shape)
142 |             if i == 0:
143 |                 y = torch.unsqueeze(y_piece, 0)
144 |             else:
145 |                 y = torch.cat((y, torch.unsqueeze(y_piece, 0)), 0)
146 |         return y
147 | 
148 | if __name__ == '__main__':
149 |     net_test = WSDDN('VGG11')
150 |     x_test = torch.randn(BATCH_SIZE, 3, 224, 224)
151 |     ssw_spp = torch.zeros(BATCH_SIZE, R, 4)
152 |     for i in range(BATCH_SIZE):
153 |         for j in range(R):
154 |             ssw_spp[i, j, 0] = 0
155 |             ssw_spp[i, j, 1] = 0
156 |             ssw_spp[i, j, 2] = 4
157 |             ssw_spp[i, j, 3] = 4
158 |     out_test = net_test(x_test, ssw_spp)
159 |     print(out_test[0].shape)
160 |     '''
161 |     ssw_spp = torch.zeros(BATCH_SIZE, R, 4)
162 |     for i in range(BATCH_SIZE):
163 |         for j in range(R):
164 |             ssw_spp[i, j, 0] = 0
165 |             ssw_spp[i, j, 1] = 0
166 |             ssw_spp[i, j, 2] = 4
167 |             ssw_spp[i, j, 3] = 4
168 |     map_test = torch.randn(BATCH_SIZE, 512, 14, 14)
169 |     y_test = select_fmap(map_test, ssw_spp)
170 |     print(y_test.shape)
171 |     '''
172 |     
173 |     '''
174 |     spp_test = torch.randn(BATCH_SIZE, R, 512, 14, 14)
175 |     out_test = through_spp(spp_test)
176 |     print(out_test.shape)
177 |     '''
178 | #pretrained_model_path = 
179 | #net_wsddn = WSDDN('VGG11')
180 | #state_dict = torch.load(pretrained_model_path)
181 | #net_wsddn.load_state_dict({k: v for k, v in state_dict.items() if k in net_wsddn.state_dict()})
182 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torchvision.transforms as transforms
  5 | from torch.utils.data import DataLoader
  6 | from torchsummary import summary
  7 | from torch.autograd import Variable
  8 | import torch.optim as optim
  9 | import argparse
 10 | from model_wsddn import WSDDN
 11 | from data_pre import myDataSet
 12 | import os
 13 | from tensorboardX import SummaryWriter
 14 | import ssw
 15 | 
 16 | Transform = transforms.Compose([
 17 |     transforms.Resize([480, 480]),
 18 |     transforms.RandomHorizontalFlip(),
 19 |     transforms.ToTensor(),
 20 |     transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ],
 21 |                          std  = [ 0.229, 0.224, 0.225 ]),
 22 |     ])
 23 | 
 24 | parser = argparse.ArgumentParser(description='wsddn Input:BatchSize initial LR EPOCH')
 25 | parser.add_argument('--test','-t', action = 'store_true',
 26 |  help='set test mode')
 27 | parser.add_argument('--model_path', type=str,default='./model_para',
 28 |  help='dir to save para')
 29 | parser.add_argument('--BATCH_SIZE', type=int,default=1,
 30 |  help='batch_size')
 31 | parser.add_argument('--LR', type=float,default=0.00001,
 32 |  help='Learning Rate')
 33 | parser.add_argument('--EPOCH', type=int,default=40,
 34 |  help='epoch')
 35 | parser.add_argument('--GPU', type=int,default=0,
 36 |  help='GPU')
 37 | args = parser.parse_args()
 38 | model_path=args.model_path
 39 | BATCH_SIZE=args.BATCH_SIZE
 40 | LR=args.LR
 41 | EPOCH=args.EPOCH
 42 | print('model_path:',model_path)
 43 | print('batch_size:',BATCH_SIZE)
 44 | print('initial LR:',LR)
 45 | print('epoch:',EPOCH)
 46 | 
 47 | torch.cuda.set_device(args.GPU)
 48 | net_wsddn = WSDDN('VGG11')
 49 | if os.path.exists(os.path.join(model_path, 'wsddn.pkl')):
 50 |     net_wsddn.load_state_dict(torch.load(os.path.join(model_path, 'wsddn.pkl')))
 51 | else:
 52 |     pretrained_dict = torch.load('vgg11_bn-6002323d.pth.1')
 53 |     modified_dict = net_wsddn.state_dict()
 54 |     pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in modified_dict}
 55 |     modified_dict.update(pretrained_dict)
 56 |     net_wsddn.load_state_dict(modified_dict)
 57 | net_wsddn.cuda()
 58 | 
 59 | criterion = nn.BCELoss(weight=None, size_average=True) 
 60 | optimizer1 = optim.SGD(net_wsddn.parameters(), lr = LR, momentum = 0.9)
 61 | optimizer2 = optim.SGD(net_wsddn.parameters(), lr = 0.1 * LR, momentum = 0.9)
 62 | writer = SummaryWriter('WSDDN')
 63 | #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
 64 | trainData = myDataSet('JPEGImages/', 0, Transform)
 65 | testData = myDataSet('JPEGImages/' ,1, Transform)
 66 | #print('trainData', len(trainData))
 67 | #print('testData', len(testData))
 68 | 
 69 | trainLoader = torch.utils.data.DataLoader(dataset=trainData, batch_size=BATCH_SIZE, shuffle=False,num_workers=1)
 70 | testLoader = torch.utils.data.DataLoader(dataset=testData, batch_size=BATCH_SIZE, shuffle=False)
 71 | if not args.test:
 72 |     net_wsddn.train()
 73 |     for epoch in range(EPOCH):
 74 |         #scheduler.step(epoch)
 75 |         running_loss = 0.0
 76 |         print(epoch)
 77 |         for i, (images, kuang,labels) in enumerate(trainLoader):
 78 |             images = Variable(images).cuda()
 79 |             labels = Variable(labels).cuda()
 80 |             kuang =Variable(kuang).cuda()
 81 |             if epoch < 10:
 82 |                 optimizer1.zero_grad()
 83 |             else:
 84 |                 optimizer2.zero_grad()
 85 |             #ssw
 86 |             #print(kuang)
 87 |             '''
 88 |             if kuang.size(1)==0:
 89 |                 print(kuang)
 90 |                 continue
 91 |                 '''
 92 |             #kuang=kuang.view([1,*kuang.shape])
 93 |             #print(kuang.shape)
 94 |             #forward + backward + optimizer
 95 |             outputs_1, output_2, output_3 = net_wsddn(images,kuang)
 96 |             outputs_1=torch.sigmoid(outputs_1)
 97 |             loss = criterion(outputs_1 , labels)
 98 |             loss.backward()
 99 |             if epoch < 10:
100 |                 optimizer1.step()
101 |             else:
102 |                 optimizer2.step()
103 | 
104 |             running_loss += loss.item()
105 | 
106 |             if i % 500 == 499:
107 |                 print('[%d , %5d] loss: %.3f' % (epoch + 1 , i + 1 , running_loss / 500))
108 |                 running_loss = 0.0
109 |         writer.add_scalar('Train/loss', loss.item(),epoch)
110 |         torch.save(net_wsddn.state_dict(), os.path.join(model_path, 'wsddn.pkl'))
111 |     print('Finished Training')
112 |     writer.close()
113 |     torch.save(net_wsddn.state_dict(), os.path.join(model_path, 'wsddn.pkl'))
114 | else:
115 |     ##UNFINISHED
116 |     
117 |     net_wsddn.eval()
118 |     result_name = 'box_result.txt'
119 |     f = open(result_name, 'w')
120 |     for i, (images, kuang, labels) in enumerate(testLoader):
121 |         images = Variable(images).cuda()
122 |         labels = Variable(labels).cuda()
123 |         kuang = Variable(kuang).cuda()
124 |         outputs_1, output_2, output_3 = net_wsddn(images,kuang)
125 |         for j in range(outputs_1.size(1)):
126 |             if outputs_1[0, j] > 0.05:
127 |                 for k in range(output_2.size(0)):
128 |                     if output_2[0, k, j] > 0.1:
129 |                         #print(kuang.shape)
130 |                         new_line = [i, j, float('%.3f' % output_3[0, k, j].item()), 8 * kuang[0, k, 0].item(), 
131 |                                     8 * kuang[0, k, 1].item(), 8 * kuang[0, k, 2].item(), 8 * kuang[0, k, 3].item()]
132 |                         #new_line = str(i) + ' ' +  str(j) + ' ' + str(kuang[0, k, 0].item()) + ' ' + str(kuang[0, k, 1].item()) +
133 |                         #           ' ' + str(kuang[0, k, 2].item()) + ' ' + str(kuang[0, k, 3].item()) + '\n'
134 |                         for line_mem in new_line:
135 |                             f.write(str(line_mem) + ' ')
136 |                         f.write('\r\n')
137 |         if (i % 500) == 0:
138 |             print(i)
139 |             #predicted = outputs_1.data>=0.5
140 |             #vec_1 += (predicted.float() == labels).cpu().float().sum(0) #correct_num
141 |             #vec_2 += labels.cpu().sum(0)#appear_num
142 |             #equal to predicted=outputs.data>=0
143 |             #total += labels.size(0)*labels.size(1)
144 |             #correct += (predicted.float() == labels).sum()
145 |         #print('Classification Accuracy of the model on the train images(mAcc): %.4f %%' % (100 * float(correct) / float(total)))
146 |         #print('Localization Accuracy of the model on the train images(mAP): %.4f %%' % (100 * (vec_1*vec_2).sum()))
147 |     f.close()
148 |     data1 = open('box_result.txt', 'r')
149 |     data2 = open('bonus_ground_truth.txt', 'r')
150 |     #data3 = open('meiren/annotations.txt', 'r')
151 |     f = open('for_map.txt', 'w')
152 |     for line in data1:
153 |         c = 0
154 |         #print('c', c)
155 |         line = line.rstrip()
156 |         words = line.split()
157 |         data3 = open('annotations.txt', 'r')
158 |         for line1 in data3:
159 |             #print(int(words[0]))
160 |             if c == int(words[0]):
161 |                 #print('ok')
162 |                 line1 = line1.rstrip()
163 |                 words1 = line1.split()
164 |                 new_line = [words1[0], words[1], words[2], words[3], words[4], words[5], words[6]]
165 |                 for line_mem in new_line:
166 |                     f.write(str(line_mem) + ' ')
167 |                 f.write('\r\n')
168 |                 data3.close()
169 |                 break
170 |             c += 1
171 |     data1.close()
172 |     data2.close()
173 |     #data3.close()
174 |     f.close()
175 |     '''
176 |         for images, labels in testLoader:
177 |             images = Variable(images).cuda()
178 |             labels= Variable(labels).cuda()
179 |             outputs_1,output_2 = net_wsddn(images,kuang)
180 |             predicted = outputs_1.data>=0.5
181 |             vec_1 += (predicted.float() == labels).cpu().float().sum(0) #correct_num
182 |             vec_2 += labels.cpu().sum(0)#appear_num
183 |             #equal to predicted=outputs.data>=0
184 |             total += labels.size(0)*labels.size(1)
185 |             correct += (predicted.float() == labels).sum()
186 |         print('Classification Accuracy of the model on the test images(mAcc): %.4f %%' % (100 * float(correct) / float(total)))
187 |         print('Localization Accuracy of the model on the test images(mAP): %.4f %%' % (100 * (vec_1*vec_2).sum()))
188 |     '''
189 | 


--------------------------------------------------------------------------------