├── README.md
├── dataloader.py
├── deploy_scripts
    ├── config.json
    └── customize_service.py
├── efficientnet_pytorch
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-36.pyc
    │   ├── model.cpython-36.pyc
    │   └── utils.cpython-36.pyc
    ├── model.py
    └── utils.py
├── get_img_url.py
├── inference.py
├── loss.py
├── meters.py
├── model.py
├── preprocess_data.py
├── ramdom_erase.py
├── samplers.py
├── senet.py
├── train.py
└── tt.py


/README.md:
--------------------------------------------------------------------------------
 1 | # garbage_classification
 2 | Huawei Cloud garbage classification source code for pytorch implementation
 3 | 
 4 | resnet50 and some tricks for traning
 5 | 1、random erasing, cutout for data augmentation
 6 | 2、bnneck before fc
 7 | 3、multi-feature fusion
 8 | 4、label smoothing, triplet loss, center loss etc.
 9 | 
10 | support effitionnet
11 | 


--------------------------------------------------------------------------------
/dataloader.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | from torch.utils.data import Dataset
 3 | 
 4 | class MyDataset(Dataset):
 5 |     def __init__(self, txt_path = '../../data/garbage_classify/img_list.txt', transform = None):
 6 |         fd = open(txt_path, 'r')
 7 |         imgs = []
 8 | 
 9 |         for line in fd:
10 |             line = line.rstrip()
11 |             words = line.split(' ')
12 |             imgs.append((words[0], int(words[1])))
13 | 
14 |         self.imgs = imgs
15 |         self.transforms = transform
16 | 
17 |     def __getitem__(self, item):
18 |         fn, label = self.imgs[item]
19 | 
20 |         image = Image.open(fn)
21 | 
22 |         if self.transforms is not None:
23 |             image = self.transforms(image)
24 | 
25 |         return image, label
26 | 
27 |     def __len__(self):
28 |         return len(self.imgs)
29 | 
30 | def process_dir(txt_path = '../../data/garbage_classify/img_list.txt'):
31 |     fd = open(txt_path, 'r')
32 |     imgs = []
33 | 
34 |     for line in fd:
35 |         line = line.rstrip()
36 |         words = line.split(' ')
37 |         imgs.append((words[0], int(words[1])))
38 |     return imgs
39 | 


--------------------------------------------------------------------------------
/deploy_scripts/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model_algorithm": "image_classification",
 3 |     "model_type": "PyTorch",
 4 |     "runtime": "python3.6",
 5 |     "metrics": {
 6 |         "f1": 0,
 7 |         "accuracy": 0.6253,
 8 |         "precision": 0,
 9 |         "recall": 0
10 |     },
11 |     "apis": [
12 |         {
13 |             "procotol": "http",
14 |             "url": "/",
15 |             "method": "post",
16 |             "request": {
17 |                 "Content-type": "multipart/form-data",
18 |                 "data": {
19 |                     "type": "object",
20 |                     "properties": {
21 |                         "input_img": {"type": "file"}
22 |                     },
23 |                     "required": ["input_img"]
24 |                 }
25 |             },
26 |             "response": {
27 |                 "Content-type": "multipart/form-data",
28 |                 "data": {
29 |                     "type": "object",
30 |                     "properties": {
31 |                         "result": {"type": "string"}
32 |                     },
33 |                     "required": ["result"]
34 |                 }
35 |             }
36 |         }
37 |     ],
38 |     "dependencies": [
39 |         {
40 |             "installer": "pip",
41 |             "packages": [
42 |                 {
43 |                     "package_name": "Pillow",
44 |                     "package_version": "5.0.0",
45 |                     "restraint": "ATLEAST"
46 |                 }
47 |             ]
48 |         }
49 |     ]
50 | }


--------------------------------------------------------------------------------
/deploy_scripts/customize_service.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import numpy as np
  3 | from PIL import Image
  4 | import torch
  5 | import torch.nn as nn
  6 | import math
  7 | import torchvision.transforms as T
  8 | from efficientnet_pytorch import EfficientNet, efficientnet
  9 | from model_service.pytorch_model_service import PTServingBaseService
 10 | 
 11 | def weights_init_kaiming(m):
 12 |     classname = m.__class__.__name__
 13 |     if classname.find('Linear') != -1:
 14 |         nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out')
 15 |         nn.init.constant_(m.bias, 0.0)
 16 |     elif classname.find('Conv') != -1:
 17 |         nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in')
 18 |         if m.bias is not None:
 19 |             nn.init.constant_(m.bias, 0.0)
 20 |     elif classname.find('BatchNorm') != -1:
 21 |         if m.affine:
 22 |             nn.init.normal_(m.weight, 1.0, 0.02)
 23 |             nn.init.constant_(m.bias, 0.0)
 24 | 
 25 | 
 26 | def weights_init_classifier(m):
 27 |     classname = m.__class__.__name__
 28 |     if classname.find('Linear') != -1:
 29 |         nn.init.normal_(m.weight, std=0.001)
 30 |         if m.bias:
 31 |             nn.init.constant_(m.bias, 0.0)
 32 | 
 33 | def conv3x3(in_planes, out_planes, stride=1):
 34 |     """3x3 convolution with padding"""
 35 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 36 |                      padding=1, bias=False)
 37 | 
 38 | 
 39 | class BasicBlock(nn.Module):
 40 |     expansion = 1
 41 | 
 42 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 43 |         super(BasicBlock, self).__init__()
 44 |         self.conv1 = conv3x3(inplanes, planes, stride)
 45 |         self.bn1 = nn.BatchNorm2d(planes)
 46 |         self.relu = nn.ReLU(inplace=True)
 47 |         self.conv2 = conv3x3(planes, planes)
 48 |         self.bn2 = nn.BatchNorm2d(planes)
 49 |         self.downsample = downsample
 50 |         self.stride = stride
 51 | 
 52 |     def forward(self, x):
 53 |         residual = x
 54 | 
 55 |         out = self.conv1(x)
 56 |         out = self.bn1(out)
 57 |         out = self.relu(out)
 58 | 
 59 |         out = self.conv2(out)
 60 |         out = self.bn2(out)
 61 | 
 62 |         if self.downsample is not None:
 63 |             residual = self.downsample(x)
 64 | 
 65 |         out += residual
 66 |         out = self.relu(out)
 67 | 
 68 |         return out
 69 | 
 70 | 
 71 | class Bottleneck(nn.Module):
 72 |     expansion = 4
 73 | 
 74 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 75 |         super(Bottleneck, self).__init__()
 76 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 77 |         self.bn1 = nn.BatchNorm2d(planes)
 78 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 79 |                                padding=1, bias=False)
 80 |         self.bn2 = nn.BatchNorm2d(planes)
 81 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 82 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 83 |         self.relu = nn.ReLU(inplace=True)
 84 |         self.downsample = downsample
 85 |         self.stride = stride
 86 | 
 87 |     def forward(self, x):
 88 |         residual = x
 89 | 
 90 |         out = self.conv1(x)
 91 |         out = self.bn1(out)
 92 |         out = self.relu(out)
 93 | 
 94 |         out = self.conv2(out)
 95 |         out = self.bn2(out)
 96 |         out = self.relu(out)
 97 | 
 98 |         out = self.conv3(out)
 99 |         out = self.bn3(out)
100 | 
101 |         if self.downsample is not None:
102 |             residual = self.downsample(x)
103 | 
104 |         out += residual
105 |         out = self.relu(out)
106 | 
107 |         return out
108 | 
109 | 
110 | class ResNet(nn.Module):
111 | 
112 |     def __init__(self, block, layers, num_classes=1000):
113 |         self.inplanes = 64
114 |         super(ResNet, self).__init__()
115 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
116 |                                bias=False)
117 |         self.bn1 = nn.BatchNorm2d(64)
118 |         self.relu = nn.ReLU(inplace=True)
119 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
120 |         self.layer1 = self._make_layer(block, 64, layers[0])
121 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
122 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
123 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
124 |         self.avgpool = nn.AdaptiveAvgPool2d((1,1))
125 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
126 | 
127 |         for m in self.modules():
128 |             if isinstance(m, nn.Conv2d):
129 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
130 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
131 |             elif isinstance(m, nn.BatchNorm2d):
132 |                 m.weight.data.fill_(1)
133 |                 m.bias.data.zero_()
134 | 
135 |     def _make_layer(self, block, planes, blocks, stride=1):
136 |         downsample = None
137 |         if stride != 1 or self.inplanes != planes * block.expansion:
138 |             downsample = nn.Sequential(
139 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
140 |                           kernel_size=1, stride=stride, bias=False),
141 |                 nn.BatchNorm2d(planes * block.expansion),
142 |             )
143 | 
144 |         layers = []
145 |         layers.append(block(self.inplanes, planes, stride, downsample))
146 |         self.inplanes = planes * block.expansion
147 |         for i in range(1, blocks):
148 |             layers.append(block(self.inplanes, planes))
149 | 
150 |         return nn.Sequential(*layers)
151 | 
152 |     def forward(self, x):
153 |         x = self.conv1(x)
154 |         x = self.bn1(x)
155 |         x = self.relu(x)
156 |         x = self.maxpool(x)
157 | 
158 |         x = self.layer1(x)
159 |         x = self.layer2(x)
160 |         x = self.layer3(x)
161 |         x = self.layer4(x)
162 | 
163 |         # x = self.avgpool(x)
164 |         # x = x.view(x.size(0), -1)
165 |         # x = self.fc(x)
166 | 
167 |         return x
168 | 
169 | class Baseline(nn.Module):
170 |     in_planes = 2048
171 | 
172 |     def __init__(self, num_classes = 40, model_path = 'C:/Users/maliho/.torch/models/resnet50-19c8e357.pth', neck = 'bnneck', neck_feat = 'after', pretrain_choice = 'imagenet'):
173 |         super(Baseline, self).__init__()
174 | 
175 |         self.base = ResNet(block=Bottleneck, layers=[3, 4, 6, 3], num_classes=num_classes)
176 |         # if pretrain_choice == 'imagenet':
177 |         #     self.base.load_param(model_path)
178 |         #     print('Loading pretrained ImageNet model......')
179 | 
180 |         self.gap = nn.AdaptiveAvgPool2d(1)
181 |         # self.gap = nn.AdaptiveMaxPool2d(1)
182 |         self.num_classes = num_classes
183 |         self.neck = neck
184 |         self.neck_feat = neck_feat
185 | 
186 |         if self.neck == 'no':
187 |             self.classifier = nn.Linear(self.in_planes, self.num_classes)
188 |             # self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False)     # new add by luo
189 |             # self.classifier.apply(weights_init_classifier)  # new add by luo
190 |         elif self.neck == 'bnneck':
191 |             self.bottleneck = nn.BatchNorm1d(self.in_planes)
192 |             self.bottleneck.bias.requires_grad_(False)  # no shift
193 |             self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False)
194 | 
195 |             self.bottleneck.apply(weights_init_kaiming)
196 |             self.classifier.apply(weights_init_classifier)
197 | 
198 |     def forward(self, x):
199 |         x = self.base(x)
200 |         global_feat = self.gap(x)  # (b, 2048, 1, 1)
201 |         global_feat = global_feat.view(global_feat.shape[0], -1)  # flatten to (bs, 2048)
202 | 
203 |         if self.neck == 'no':
204 |             feat = global_feat
205 |         elif self.neck == 'bnneck':
206 |             feat = self.bottleneck(global_feat)  # normalize for angular softmax
207 | 
208 |         # if self.training:
209 |         cls_score = self.classifier(feat)
210 |         return cls_score
211 |             # return [global_feat], [cls_score]  # global feature for triplet loss
212 |         # else:
213 |         #     if self.neck_feat == 'after':
214 |         #         # print("Test with feature after BN")
215 |         #         return feat
216 |         #     else:
217 |         #         # print("Test with feature before BN")
218 |         #         return global_feat
219 | 
220 |     # def get_optim_policy(self):
221 |     #     return self.parameters()
222 | 
223 | class efficient_baseline(nn.Module):
224 |     in_planes = 1792
225 |     def __init__(self, num_classes = 40, neck = 'bnneck', neck_feat = 'after',
226 |                  model_path = '/home/zhoumi/.cache/torch/checkpoints/efficientnet-b4-6ed6700e.pth'):
227 |         super(efficient_baseline, self).__init__()
228 | 
229 |         #1.4, 1.8, 380, 0.4
230 |         blocks_args, global_params = efficientnet(width_coefficient=1.4, depth_coefficient=1.8,
231 |                                                   dropout_rate=0.4, image_size=380)
232 | 
233 |         self.base = EfficientNet(blocks_args=blocks_args, global_params=global_params)
234 |         self.base.load_param(model_path)
235 |         print('Loading pretrained ImageNet model......')
236 |         # self.gap = nn.AdaptiveAvgPool2d(1)
237 |         # self.gap = nn.AdaptiveMaxPool2d(1)
238 |         self.num_classes = num_classes
239 |         self.neck = neck
240 |         self.neck_feat = neck_feat
241 | 
242 |         if self.neck == 'no':
243 |             self.classifier = nn.Linear(self.in_planes, self.num_classes)
244 |             # self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False)     # new add by luo
245 |             # self.classifier.apply(weights_init_classifier)  # new add by luo
246 |         elif self.neck == 'bnneck':
247 |             self.bottleneck = nn.BatchNorm1d(self.in_planes)
248 |             self.bottleneck.bias.requires_grad_(False)  # no shift
249 |             self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False)
250 | 
251 |             self.bottleneck.apply(weights_init_kaiming)
252 |             self.classifier.apply(weights_init_classifier)
253 | 
254 |     def forward(self, x):
255 |         global_feat = self.base(x)
256 | 
257 |         # global_feat = self.gap(x)  # (b, 2048, 1, 1)
258 |         # global_feat = global_feat.view(global_feat.shape[0], -1)  # flatten to (bs, 2048)
259 | 
260 |         if self.neck == 'no':
261 |             feat = global_feat
262 |         elif self.neck == 'bnneck':
263 |             feat = self.bottleneck(global_feat)  # normalize for angular softmax
264 | 
265 |         # if self.training:
266 |         cls_score = self.classifier(feat)
267 |         return cls_score, global_feat
268 | 
269 | class garbage_classify_service(PTServingBaseService):
270 |     def __init__(self, model_name, model_path):
271 |         # these three parameters are no need to modify
272 |         self.model_name = model_name
273 |         self.model_path = model_path
274 |         self.input_key_1 = 'input_img'
275 |         self.output_key_1 = 'output_score'
276 | 
277 |         self.input_size = 224  # the input image size of the model
278 |         # model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=40)
279 |         # model = Baseline(num_classes=40)
280 |         # model.load_state_dict(torch.load(self.model_path, map_location=lambda storage, loc: storage))
281 |         model = torch.load(self.model_path, map_location=lambda storage, loc: storage)
282 |         self.model = model.eval()
283 |         self.transform = T.Compose([T.Resize((224, 224)),
284 |                              T.ToTensor(),
285 |                              T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
286 | 
287 |         self.label_id_name_dict = \
288 |             {
289 |                 "0": "其他垃圾/一次性快餐盒",
290 |                 "1": "其他垃圾/污损塑料",
291 |                 "2": "其他垃圾/烟蒂",
292 |                 "3": "其他垃圾/牙签",
293 |                 "4": "其他垃圾/破碎花盆及碟碗",
294 |                 "5": "其他垃圾/竹筷",
295 |                 "6": "厨余垃圾/剩饭剩菜",
296 |                 "7": "厨余垃圾/大骨头",
297 |                 "8": "厨余垃圾/水果果皮",
298 |                 "9": "厨余垃圾/水果果肉",
299 |                 "10": "厨余垃圾/茶叶渣",
300 |                 "11": "厨余垃圾/菜叶菜根",
301 |                 "12": "厨余垃圾/蛋壳",
302 |                 "13": "厨余垃圾/鱼骨",
303 |                 "14": "可回收物/充电宝",
304 |                 "15": "可回收物/包",
305 |                 "16": "可回收物/化妆品瓶",
306 |                 "17": "可回收物/塑料玩具",
307 |                 "18": "可回收物/塑料碗盆",
308 |                 "19": "可回收物/塑料衣架",
309 |                 "20": "可回收物/快递纸袋",
310 |                 "21": "可回收物/插头电线",
311 |                 "22": "可回收物/旧衣服",
312 |                 "23": "可回收物/易拉罐",
313 |                 "24": "可回收物/枕头",
314 |                 "25": "可回收物/毛绒玩具",
315 |                 "26": "可回收物/洗发水瓶",
316 |                 "27": "可回收物/玻璃杯",
317 |                 "28": "可回收物/皮鞋",
318 |                 "29": "可回收物/砧板",
319 |                 "30": "可回收物/纸板箱",
320 |                 "31": "可回收物/调料瓶",
321 |                 "32": "可回收物/酒瓶",
322 |                 "33": "可回收物/金属食品罐",
323 |                 "34": "可回收物/锅",
324 |                 "35": "可回收物/食用油桶",
325 |                 "36": "可回收物/饮料瓶",
326 |                 "37": "有害垃圾/干电池",
327 |                 "38": "有害垃圾/软膏",
328 |                 "39": "有害垃圾/过期药物"
329 |             }
330 | 
331 |     def _preprocess(self, data):
332 |         preprocessed_data = {}
333 |         for k, v in data.items():
334 |             for file_name, file_content in v.items():
335 |                 img = Image.open(file_content)
336 |                 img = self.transform(img)
337 |                 preprocessed_data[k] = img
338 |         return preprocessed_data
339 | 
340 |     def _inference(self, data):
341 |         """
342 |         model inference function
343 |         Here are a inference example of resnet, if you use another model, please modify this function
344 |         """
345 |         img = data[self.input_key_1]
346 |         img = img[np.newaxis, :, :, :]  # the input tensor shape of resnet is [?, 224, 224, 3]
347 |         pred_score = self.model(img)[0]
348 |         if pred_score is not None:
349 |             pred_label = torch.argmax(pred_score, dim=1).item()
350 |             result = {'result': self.label_id_name_dict[str(pred_label)]}
351 |         else:
352 |             result = {'result': 'predict score is None'}
353 |         return result
354 | 
355 |     def _postprocess(self, data):
356 |         return data
357 | 


--------------------------------------------------------------------------------
/efficientnet_pytorch/__init__.py:
--------------------------------------------------------------------------------
 1 | __version__ = "0.4.0"
 2 | from .model import EfficientNet
 3 | from .utils import (
 4 |     GlobalParams,
 5 |     BlockArgs,
 6 |     BlockDecoder,
 7 |     efficientnet,
 8 |     get_model_params,
 9 | )
10 | 
11 | 


--------------------------------------------------------------------------------
/efficientnet_pytorch/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maliho0803/garbage_classification/6fff88b248208e4a3184370b19aa5f3a25c10083/efficientnet_pytorch/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/efficientnet_pytorch/__pycache__/model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maliho0803/garbage_classification/6fff88b248208e4a3184370b19aa5f3a25c10083/efficientnet_pytorch/__pycache__/model.cpython-36.pyc


--------------------------------------------------------------------------------
/efficientnet_pytorch/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maliho0803/garbage_classification/6fff88b248208e4a3184370b19aa5f3a25c10083/efficientnet_pytorch/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/efficientnet_pytorch/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch.nn import functional as F
  4 | 
  5 | from .utils import (
  6 |     relu_fn,
  7 |     round_filters,
  8 |     round_repeats,
  9 |     drop_connect,
 10 |     get_same_padding_conv2d,
 11 |     get_model_params,
 12 |     efficientnet_params,
 13 |     load_pretrained_weights,
 14 | )
 15 | 
 16 | class MBConvBlock(nn.Module):
 17 |     """
 18 |     Mobile Inverted Residual Bottleneck Block
 19 | 
 20 |     Args:
 21 |         block_args (namedtuple): BlockArgs, see above
 22 |         global_params (namedtuple): GlobalParam, see above
 23 | 
 24 |     Attributes:
 25 |         has_se (bool): Whether the block contains a Squeeze and Excitation layer.
 26 |     """
 27 | 
 28 |     def __init__(self, block_args, global_params):
 29 |         super().__init__()
 30 |         self._block_args = block_args
 31 |         self._bn_mom = 1 - global_params.batch_norm_momentum
 32 |         self._bn_eps = global_params.batch_norm_epsilon
 33 |         self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
 34 |         self.id_skip = block_args.id_skip  # skip connection and drop connect
 35 | 
 36 |         # Get static or dynamic convolution depending on image size
 37 |         Conv2d = get_same_padding_conv2d(image_size=global_params.image_size)
 38 | 
 39 |         # Expansion phase
 40 |         inp = self._block_args.input_filters  # number of input channels
 41 |         oup = self._block_args.input_filters * self._block_args.expand_ratio  # number of output channels
 42 |         if self._block_args.expand_ratio != 1:
 43 |             self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
 44 |             self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
 45 | 
 46 |         # Depthwise convolution phase
 47 |         k = self._block_args.kernel_size
 48 |         s = self._block_args.stride
 49 |         self._depthwise_conv = Conv2d(
 50 |             in_channels=oup, out_channels=oup, groups=oup,  # groups makes it depthwise
 51 |             kernel_size=k, stride=s, bias=False)
 52 |         self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
 53 | 
 54 |         # Squeeze and Excitation layer, if desired
 55 |         if self.has_se:
 56 |             num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio))
 57 |             self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
 58 |             self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)
 59 | 
 60 |         # Output phase
 61 |         final_oup = self._block_args.output_filters
 62 |         self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
 63 |         self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
 64 | 
 65 |     def forward(self, inputs, drop_connect_rate=None):
 66 |         """
 67 |         :param inputs: input tensor
 68 |         :param drop_connect_rate: drop connect rate (float, between 0 and 1)
 69 |         :return: output of block
 70 |         """
 71 | 
 72 |         # Expansion and Depthwise Convolution
 73 |         x = inputs
 74 |         if self._block_args.expand_ratio != 1:
 75 |             x = relu_fn(self._bn0(self._expand_conv(inputs)))
 76 |         x = relu_fn(self._bn1(self._depthwise_conv(x)))
 77 | 
 78 |         # Squeeze and Excitation
 79 |         if self.has_se:
 80 |             x_squeezed = F.adaptive_avg_pool2d(x, 1)
 81 |             x_squeezed = self._se_expand(relu_fn(self._se_reduce(x_squeezed)))
 82 |             x = torch.sigmoid(x_squeezed) * x
 83 | 
 84 |         x = self._bn2(self._project_conv(x))
 85 | 
 86 |         # Skip connection and drop connect
 87 |         input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters
 88 |         if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters:
 89 |             if drop_connect_rate:
 90 |                 x = drop_connect(x, p=drop_connect_rate, training=self.training)
 91 |             x = x + inputs  # skip connection
 92 |         return x
 93 | 
 94 | 
 95 | class EfficientNet(nn.Module):
 96 |     """
 97 |     An EfficientNet model. Most easily loaded with the .from_name or .from_pretrained methods
 98 | 
 99 |     Args:
100 |         blocks_args (list): A list of BlockArgs to construct blocks
101 |         global_params (namedtuple): A set of GlobalParams shared between blocks
102 | 
103 |     Example:
104 |         model = EfficientNet.from_pretrained('efficientnet-b0')
105 | 
106 |     """
107 | 
108 |     def __init__(self, blocks_args=None, global_params=None):
109 |         super().__init__()
110 |         assert isinstance(blocks_args, list), 'blocks_args should be a list'
111 |         assert len(blocks_args) > 0, 'block args must be greater than 0'
112 |         self._global_params = global_params
113 |         self._blocks_args = blocks_args
114 | 
115 |         # Get static or dynamic convolution depending on image size
116 |         Conv2d = get_same_padding_conv2d(image_size=global_params.image_size)
117 | 
118 |         # Batch norm parameters
119 |         bn_mom = 1 - self._global_params.batch_norm_momentum
120 |         bn_eps = self._global_params.batch_norm_epsilon
121 | 
122 |         # Stem
123 |         in_channels = 3  # rgb
124 |         out_channels = round_filters(32, self._global_params)  # number of output channels
125 |         self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
126 |         self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
127 | 
128 |         # Build blocks
129 |         self._blocks = nn.ModuleList([])
130 |         for block_args in self._blocks_args:
131 | 
132 |             # Update block input and output filters based on depth multiplier.
133 |             block_args = block_args._replace(
134 |                 input_filters=round_filters(block_args.input_filters, self._global_params),
135 |                 output_filters=round_filters(block_args.output_filters, self._global_params),
136 |                 num_repeat=round_repeats(block_args.num_repeat, self._global_params)
137 |             )
138 | 
139 |             # The first block needs to take care of stride and filter size increase.
140 |             self._blocks.append(MBConvBlock(block_args, self._global_params))
141 |             if block_args.num_repeat > 1:
142 |                 block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
143 |             for _ in range(block_args.num_repeat - 1):
144 |                 self._blocks.append(MBConvBlock(block_args, self._global_params))
145 | 
146 |         # Head
147 |         in_channels = block_args.output_filters  # output of final block
148 |         out_channels = round_filters(1280, self._global_params)
149 |         self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
150 |         self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
151 | 
152 |         # Final linear layer
153 |         self._dropout = self._global_params.dropout_rate
154 |         self._fc = nn.Linear(out_channels, self._global_params.num_classes)
155 | 
156 |     def extract_features(self, inputs):
157 |         """ Returns output of the final convolution layer """
158 | 
159 |         # Stem
160 |         x = relu_fn(self._bn0(self._conv_stem(inputs)))
161 | 
162 |         # Blocks
163 |         for idx, block in enumerate(self._blocks):
164 |             drop_connect_rate = self._global_params.drop_connect_rate
165 |             if drop_connect_rate:
166 |                 drop_connect_rate *= float(idx) / len(self._blocks)
167 |             x = block(x, drop_connect_rate=drop_connect_rate)
168 | 
169 |         # Head
170 |         x = relu_fn(self._bn1(self._conv_head(x)))
171 | 
172 |         return x
173 | 
174 |     def forward(self, inputs):
175 |         """ Calls extract_features to extract features, applies final linear layer, and returns logits. """
176 | 
177 |         # Convolution layers
178 |         x = self.extract_features(inputs)
179 | 
180 |         # Pooling and final linear layer
181 |         x = F.adaptive_avg_pool2d(x, 1).squeeze(-1).squeeze(-1)
182 |         if self._dropout:
183 |             x = F.dropout(x, p=self._dropout, training=self.training)
184 |         # x = self._fc(x)
185 |         return x
186 | 
187 |     def load_param(self, model_path):
188 |         param_dict = torch.load(model_path)
189 |         for i in param_dict:
190 |             if '_fc' in i:
191 |                 continue
192 |             self.state_dict()[i].copy_(param_dict[i])
193 | 
194 |     @classmethod
195 |     def from_name(cls, model_name, override_params=None):
196 |         cls._check_model_name_is_valid(model_name)
197 |         blocks_args, global_params = get_model_params(model_name, override_params)
198 |         return cls(blocks_args, global_params)
199 | 
200 |     @classmethod
201 |     def from_pretrained(cls, model_name, num_classes=1000):
202 |         model = cls.from_name(model_name, override_params={'num_classes': num_classes})
203 |         load_pretrained_weights(model, model_name, load_fc=(num_classes == 1000))
204 |         return model
205 | 
206 |     @classmethod
207 |     def get_image_size(cls, model_name):
208 |         cls._check_model_name_is_valid(model_name)
209 |         _, _, res, _ = efficientnet_params(model_name)
210 |         return res
211 | 
212 |     @classmethod
213 |     def _check_model_name_is_valid(cls, model_name, also_need_pretrained_weights=False):
214 |         """ Validates model name. None that pretrained weights are only available for
215 |         the first four models (efficientnet-b{i} for i in 0,1,2,3) at the moment. """
216 |         num_models = 4 if also_need_pretrained_weights else 8
217 |         valid_models = ['efficientnet-b'+str(i) for i in range(num_models)]
218 |         if model_name not in valid_models:
219 |             raise ValueError('model_name should be one of: ' + ', '.join(valid_models))
220 | 


--------------------------------------------------------------------------------
/efficientnet_pytorch/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains helper functions for building the model and for loading model parameters.
  3 | These helper functions are built to mirror those in the official TensorFlow implementation.
  4 | """
  5 | 
  6 | import re
  7 | import math
  8 | import collections
  9 | from functools import partial
 10 | import torch
 11 | from torch import nn
 12 | from torch.nn import functional as F
 13 | from torch.utils import model_zoo
 14 | 
 15 | 
 16 | ########################################################################
 17 | ############### HELPERS FUNCTIONS FOR MODEL ARCHITECTURE ###############
 18 | ########################################################################
 19 | 
 20 | 
 21 | # Parameters for the entire model (stem, all blocks, and head)
 22 | GlobalParams = collections.namedtuple('GlobalParams', [
 23 |     'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate',
 24 |     'num_classes', 'width_coefficient', 'depth_coefficient',
 25 |     'depth_divisor', 'min_depth', 'drop_connect_rate', 'image_size'])
 26 | 
 27 | 
 28 | # Parameters for an individual model block
 29 | BlockArgs = collections.namedtuple('BlockArgs', [
 30 |     'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
 31 |     'expand_ratio', 'id_skip', 'stride', 'se_ratio'])
 32 | 
 33 | 
 34 | # Change namedtuple defaults
 35 | GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
 36 | BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
 37 | 
 38 | 
 39 | def relu_fn(x):
 40 |     """ Swish activation function """
 41 |     return x * torch.sigmoid(x)
 42 | 
 43 | 
 44 | def round_filters(filters, global_params):
 45 |     """ Calculate and round number of filters based on depth multiplier. """
 46 |     multiplier = global_params.width_coefficient
 47 |     if not multiplier:
 48 |         return filters
 49 |     divisor = global_params.depth_divisor
 50 |     min_depth = global_params.min_depth
 51 |     filters *= multiplier
 52 |     min_depth = min_depth or divisor
 53 |     new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
 54 |     if new_filters < 0.9 * filters:  # prevent rounding by more than 10%
 55 |         new_filters += divisor
 56 |     return int(new_filters)
 57 | 
 58 | 
 59 | def round_repeats(repeats, global_params):
 60 |     """ Round number of filters based on depth multiplier. """
 61 |     multiplier = global_params.depth_coefficient
 62 |     if not multiplier:
 63 |         return repeats
 64 |     return int(math.ceil(multiplier * repeats))
 65 | 
 66 | 
 67 | def drop_connect(inputs, p, training):
 68 |     """ Drop connect. """
 69 |     if not training: return inputs
 70 |     batch_size = inputs.shape[0]
 71 |     keep_prob = 1 - p
 72 |     random_tensor = keep_prob
 73 |     random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
 74 |     binary_tensor = torch.floor(random_tensor)
 75 |     output = inputs / keep_prob * binary_tensor
 76 |     return output
 77 | 
 78 | 
 79 | def get_same_padding_conv2d(image_size=None):
 80 |     """ Chooses static padding if you have specified an image size, and dynamic padding otherwise.
 81 |         Static padding is necessary for ONNX exporting of models. """
 82 |     if image_size is None:
 83 |         return Conv2dDynamicSamePadding
 84 |     else:
 85 |         return partial(Conv2dStaticSamePadding, image_size=image_size)
 86 | 
 87 | class Conv2dDynamicSamePadding(nn.Conv2d):
 88 |     """ 2D Convolutions like TensorFlow, for a dynamic image size """
 89 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
 90 |         super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
 91 |         self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]]*2
 92 | 
 93 |     def forward(self, x):
 94 |         ih, iw = x.size()[-2:]
 95 |         kh, kw = self.weight.size()[-2:]
 96 |         sh, sw = self.stride
 97 |         oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
 98 |         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
 99 |         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
100 |         if pad_h > 0 or pad_w > 0:
101 |             x = F.pad(x, [pad_w//2, pad_w - pad_w//2, pad_h//2, pad_h - pad_h//2])
102 |         return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
103 | 
104 | 
105 | class Conv2dStaticSamePadding(nn.Conv2d):
106 |     """ 2D Convolutions like TensorFlow, for a fixed image size"""
107 |     def __init__(self, in_channels, out_channels, kernel_size, image_size=None, **kwargs):
108 |         super().__init__(in_channels, out_channels, kernel_size, **kwargs)
109 |         self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
110 | 
111 |         # Calculate padding based on image size and save it
112 |         assert image_size is not None
113 |         ih, iw = image_size if type(image_size) == list else [image_size, image_size]
114 |         kh, kw = self.weight.size()[-2:]
115 |         sh, sw = self.stride
116 |         oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
117 |         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
118 |         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
119 |         if pad_h > 0 or pad_w > 0:
120 |             self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
121 |         else:
122 |             self.static_padding = Identity()
123 | 
124 |     def forward(self, x):
125 |         x = self.static_padding(x)
126 |         x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
127 |         return x
128 | 
129 | 
130 | class Identity(nn.Module):
131 |     def __init__(self,):
132 |         super(Identity, self).__init__()
133 | 
134 |     def forward(self, input):
135 |         return input
136 | 
137 | 
138 | ########################################################################
139 | ############## HELPERS FUNCTIONS FOR LOADING MODEL PARAMS ##############
140 | ########################################################################
141 | 
142 | 
143 | def efficientnet_params(model_name):
144 |     """ Map EfficientNet model name to parameter coefficients. """
145 |     params_dict = {
146 |         # Coefficients:   width,depth,res,dropout
147 |         'efficientnet-b0': (1.0, 1.0, 224, 0.2),
148 |         'efficientnet-b1': (1.0, 1.1, 240, 0.2),
149 |         'efficientnet-b2': (1.1, 1.2, 260, 0.3),
150 |         'efficientnet-b3': (1.2, 1.4, 300, 0.3),
151 |         'efficientnet-b4': (1.4, 1.8, 380, 0.4),
152 |         'efficientnet-b5': (1.6, 2.2, 456, 0.4),
153 |         'efficientnet-b6': (1.8, 2.6, 528, 0.5),
154 |         'efficientnet-b7': (2.0, 3.1, 600, 0.5),
155 |     }
156 |     return params_dict[model_name]
157 | 
158 | 
159 | class BlockDecoder(object):
160 |     """ Block Decoder for readability, straight from the official TensorFlow repository """
161 | 
162 |     @staticmethod
163 |     def _decode_block_string(block_string):
164 |         """ Gets a block through a string notation of arguments. """
165 |         assert isinstance(block_string, str)
166 | 
167 |         ops = block_string.split('_')
168 |         options = {}
169 |         for op in ops:
170 |             splits = re.split(r'(\d.*)', op)
171 |             if len(splits) >= 2:
172 |                 key, value = splits[:2]
173 |                 options[key] = value
174 | 
175 |         # Check stride
176 |         assert (('s' in options and len(options['s']) == 1) or
177 |                 (len(options['s']) == 2 and options['s'][0] == options['s'][1]))
178 | 
179 |         return BlockArgs(
180 |             kernel_size=int(options['k']),
181 |             num_repeat=int(options['r']),
182 |             input_filters=int(options['i']),
183 |             output_filters=int(options['o']),
184 |             expand_ratio=int(options['e']),
185 |             id_skip=('noskip' not in block_string),
186 |             se_ratio=float(options['se']) if 'se' in options else None,
187 |             stride=[int(options['s'][0])])
188 | 
189 |     @staticmethod
190 |     def _encode_block_string(block):
191 |         """Encodes a block to a string."""
192 |         args = [
193 |             'r%d' % block.num_repeat,
194 |             'k%d' % block.kernel_size,
195 |             's%d%d' % (block.strides[0], block.strides[1]),
196 |             'e%s' % block.expand_ratio,
197 |             'i%d' % block.input_filters,
198 |             'o%d' % block.output_filters
199 |         ]
200 |         if 0 < block.se_ratio <= 1:
201 |             args.append('se%s' % block.se_ratio)
202 |         if block.id_skip is False:
203 |             args.append('noskip')
204 |         return '_'.join(args)
205 | 
206 |     @staticmethod
207 |     def decode(string_list):
208 |         """
209 |         Decodes a list of string notations to specify blocks inside the network.
210 | 
211 |         :param string_list: a list of strings, each string is a notation of block
212 |         :return: a list of BlockArgs namedtuples of block args
213 |         """
214 |         assert isinstance(string_list, list)
215 |         blocks_args = []
216 |         for block_string in string_list:
217 |             blocks_args.append(BlockDecoder._decode_block_string(block_string))
218 |         return blocks_args
219 | 
220 |     @staticmethod
221 |     def encode(blocks_args):
222 |         """
223 |         Encodes a list of BlockArgs to a list of strings.
224 | 
225 |         :param blocks_args: a list of BlockArgs namedtuples of block args
226 |         :return: a list of strings, each string is a notation of block
227 |         """
228 |         block_strings = []
229 |         for block in blocks_args:
230 |             block_strings.append(BlockDecoder._encode_block_string(block))
231 |         return block_strings
232 | 
233 | 
234 | def efficientnet(width_coefficient=None, depth_coefficient=None, dropout_rate=0.2,
235 |                  drop_connect_rate=0.2, image_size=None, num_classes=1000):
236 |     """ Creates a efficientnet model. """
237 | 
238 |     blocks_args = [
239 |         'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25',
240 |         'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25',
241 |         'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25',
242 |         'r1_k3_s11_e6_i192_o320_se0.25',
243 |     ]
244 |     blocks_args = BlockDecoder.decode(blocks_args)
245 | 
246 |     global_params = GlobalParams(
247 |         batch_norm_momentum=0.99,
248 |         batch_norm_epsilon=1e-3,
249 |         dropout_rate=dropout_rate,
250 |         drop_connect_rate=drop_connect_rate,
251 |         # data_format='channels_last',  # removed, this is always true in PyTorch
252 |         num_classes=num_classes,
253 |         width_coefficient=width_coefficient,
254 |         depth_coefficient=depth_coefficient,
255 |         depth_divisor=8,
256 |         min_depth=None,
257 |         image_size=image_size,
258 |     )
259 | 
260 |     return blocks_args, global_params
261 | 
262 | 
263 | def get_model_params(model_name, override_params):
264 |     """ Get the block args and global params for a given model """
265 |     if model_name.startswith('efficientnet'):
266 |         w, d, s, p = efficientnet_params(model_name)
267 |         # note: all models have drop connect rate = 0.2
268 |         blocks_args, global_params = efficientnet(
269 |             width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s)
270 |     else:
271 |         raise NotImplementedError('model name is not pre-defined: %s' % model_name)
272 |     if override_params:
273 |         # ValueError will be raised here if override_params has fields not included in global_params.
274 |         global_params = global_params._replace(**override_params)
275 |     return blocks_args, global_params
276 | 
277 | 
278 | url_map = {
279 |     'efficientnet-b0': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b0-355c32eb.pth',
280 |     'efficientnet-b1': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b1-f1951068.pth',
281 |     'efficientnet-b2': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b2-8bb594d6.pth',
282 |     'efficientnet-b3': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b3-5fb5a3c3.pth',
283 |     'efficientnet-b4': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b4-6ed6700e.pth',
284 |     'efficientnet-b5': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b5-b6417697.pth',
285 |     'efficientnet-b6': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b6-c76e70fd.pth',
286 |     'efficientnet-b7': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b7-dcc49843.pth',
287 | }
288 | 
289 | def load_pretrained_weights(model, model_name, load_fc=True):
290 |     """ Loads pretrained weights, and downloads if loading for the first time. """
291 |     state_dict = model_zoo.load_url(url_map[model_name])
292 |     if load_fc:
293 |         model.load_state_dict(state_dict)
294 |     else:
295 |         state_dict.pop('_fc.weight')
296 |         state_dict.pop('_fc.bias')
297 |         res = model.load_state_dict(state_dict, strict=False)
298 |         assert str(res.missing_keys) == str(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights'
299 |     print('Loaded pretrained weights for {}'.format(model_name))
300 | 


--------------------------------------------------------------------------------
/get_img_url.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import requests
  3 | from urllib import error
  4 | from bs4 import BeautifulSoup
  5 | import os
  6 | 
  7 | num = 0
  8 | numPicture = 0
  9 | file = ''
 10 | List = []
 11 | 
 12 | 
 13 | def Find(url):
 14 |     global List
 15 |     print('正在检测图片总数，请稍等.....')
 16 |     t = 0
 17 |     i = 1
 18 |     s = 0
 19 |     while t < 1000:
 20 |         Url = url + str(t)
 21 |         try:
 22 |             Result = requests.get(Url, timeout=7)
 23 |         except BaseException:
 24 |             t = t + 60
 25 |             continue
 26 |         else:
 27 |             result = Result.text
 28 |             pic_url = re.findall('"objURL":"(.*?)",', result, re.S)  # 先利用正则表达式找到图片url
 29 |             s += len(pic_url)
 30 |             if len(pic_url) == 0:
 31 |                 break
 32 |             else:
 33 |                 List.append(pic_url)
 34 |                 t = t + 60
 35 |     return s
 36 | 
 37 | 
 38 | def recommend(url):
 39 |     Re = []
 40 |     try:
 41 |         html = requests.get(url)
 42 |     except error.HTTPError as e:
 43 |         return
 44 |     else:
 45 |         html.encoding = 'utf-8'
 46 |         bsObj = BeautifulSoup(html.text, 'html.parser')
 47 |         div = bsObj.find('div', id='topRS')
 48 |         if div is not None:
 49 |             listA = div.findAll('a')
 50 |             for i in listA:
 51 |                 if i is not None:
 52 |                     Re.append(i.get_text())
 53 |         return Re
 54 | 
 55 | 
 56 | def dowmloadPicture(html, keyword):
 57 |     global num
 58 |     # t =0
 59 |     pic_url = re.findall('"objURL":"(.*?)",', html, re.S)  # 先利用正则表达式找到图片url
 60 |     print('找到关键词:' + keyword + '的图片，即将开始下载图片...')
 61 |     for each in pic_url:
 62 |         print('正在下载第' + str(num + 1) + '张图片，图片地址:' + str(each))
 63 |         try:
 64 |             if each is not None:
 65 |                 pic = requests.get(each, timeout=7)
 66 |             else:
 67 |                 continue
 68 |         except BaseException:
 69 |             print('错误，当前图片无法下载')
 70 |             continue
 71 |         else:
 72 |             string = file + r'/' + keyword + '_' + str(num) + '.jpg'
 73 |             print(string)
 74 |             fp = open(string, 'wb')
 75 |             fp.write(pic.content)
 76 |             fp.close()
 77 |             num += 1
 78 |         if num >= numPicture:
 79 |             return
 80 | 
 81 | 
 82 | if __name__ == '__main__':  # 主函数入口
 83 |     word = input("请输入搜索关键词(可以是人名，地名等): ")
 84 |     # add = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=%E5%BC%A0%E5%A4%A9%E7%88%B1&pn=120'
 85 |     url = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=' + word + '&pn='
 86 |     tot = Find(url)
 87 |     Recommend = recommend(url)  # 记录相关推荐
 88 |     print('经过检测%s类图片共有%d张' % (word, tot))
 89 |     numPicture = int(input('请输入想要下载的图片数量 '))
 90 |     file = input('请建立一个存储图片的文件夹，输入文件夹名称即可')
 91 |     y = os.path.exists(file)
 92 |     if y == 1:
 93 |         print('该文件已存在，请重新输入')
 94 |         file = input('请建立一个存储图片的文件夹，)输入文件夹名称即可')
 95 |         os.mkdir(file)
 96 |     else:
 97 |         os.mkdir(file)
 98 |     t = 0
 99 |     tmp = url
100 |     while t < numPicture:
101 |         try:
102 |             url = tmp + str(t)
103 |             result = requests.get(url, timeout=10)
104 |             print(url)
105 |         except error.HTTPError as e:
106 |             print('网络错误，请调整网络后重试')
107 |             t = t + 60
108 |         else:
109 |             dowmloadPicture(result.text, word)
110 |             t = t + 60
111 | 
112 |     print('当前搜索结束，感谢使用')
113 |     print('猜你喜欢')
114 |     for re in Recommend:
115 |         print(re, end='  ')
116 | 
117 | 


--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
 1 | from model import Baseline, ft_net, efficient_baseline
 2 | from PIL import Image
 3 | import glob
 4 | import torch
 5 | import torchvision.transforms as T
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | from efficientnet_pytorch import EfficientNet
 9 | 
10 | use_ff = False
11 | use_efficientnet = False
12 | transform = T.Compose([T.Resize((224, 224)),
13 |                              T.ToTensor(),
14 |                              T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
15 | 
16 | img_paths = glob.glob('/Users/zhoumi/Downloads/garbage_classify/val_data/*jpg')
17 | # if use_ff == False:
18 | #     if use_efficientnet == True:
19 | #         model = EfficientNet.from_pretrained('efficientnet-b4', num_classes=40)
20 | #     else:
21 | #         model = Baseline(num_classes=40)
22 | # else:
23 | #     model = ft_net(num_classes=40)
24 | # model.load_state_dict(torch.load('./models/ff_best_model.pth', map_location=lambda storage, loc: storage))
25 | model = torch.load('./models/best_model_v2_tri_old1.pth', map_location=lambda storage, loc: storage)
26 | model = model.eval().cpu()
27 | # print(model)
28 | 
29 | wrong = 0
30 | for img_path in img_paths:
31 |     label = int(img_path.split('/')[-1].split('_')[0])
32 |     img = transform(Image.open(img_path))
33 |     input = img[np.newaxis, :, :, :]
34 |     # print(input.size())
35 | 
36 |     if use_ff == False:
37 |         if use_efficientnet == True:
38 |             pred_score = model(input)
39 |         else:
40 |             pred_score, _ = model(input)
41 |         # print(pred_score)
42 |         pred_label = torch.argmax(pred_score, dim=1).item()
43 |     else:
44 |         o1, o2, o3 = model(input)
45 |         pred_label = torch.argmax((o1 + o2 + o3) / 3, dim=1).item()
46 | 
47 |     print(img_path.split('/')[-1], label, pred_label)
48 |     if label != pred_label:
49 |         # plt.imshow(Image.open(img_path))
50 |         wrong +=1
51 |         # plt.show()
52 | 
53 | print('acc：{}'.format(1- wrong/len(img_paths)))
54 | 
55 | # best_model1.pth acc：0.9082819986310746  resnet50
56 | # tri_best_model.pth acc：acc：0.9103353867214237 resnet50
57 | # ff_best_model.pth acc：0.8809034907597536 feature fusion
58 | # effic_best_model acc：0.9226557152635182 effic4
59 | # effic4_best_model.pth acc：0.9301848049281314 effic4
60 | 
61 | #new datasets
62 | #best_modle_v2 acc：0.9340878828229028


--------------------------------------------------------------------------------
/loss.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | 
  4 | class CrossEntropyLabelSmooth(nn.Module):
  5 |     """Cross entropy loss with label smoothing regularizer.
  6 |     Reference:
  7 |     Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016.
  8 |     Equation: y = (1 - epsilon) * y + epsilon / K.
  9 |     Args:
 10 |         num_classes (int): number of classes.
 11 |         epsilon (float): weight.
 12 |     """
 13 | 
 14 |     def __init__(self, num_classes, epsilon=0.1, use_gpu=True):
 15 |         super(CrossEntropyLabelSmooth, self).__init__()
 16 |         self.num_classes = num_classes
 17 |         self.epsilon = epsilon
 18 |         self.use_gpu = use_gpu
 19 |         self.logsoftmax = nn.LogSoftmax(dim=1)
 20 | 
 21 |     def forward(self, inputs, targets):
 22 |         """
 23 |         Args:
 24 |             inputs: prediction matrix (before softmax) with shape (batch_size, num_classes)
 25 |             targets: ground truth labels with shape (num_classes)
 26 |         """
 27 |         log_probs = self.logsoftmax(inputs)
 28 |         targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).cpu(), 1)
 29 |         if self.use_gpu: targets = targets.cuda()
 30 |         targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
 31 |         loss = (- targets * log_probs).mean(0).sum()
 32 |         return loss
 33 | 
 34 | class CenterLoss(nn.Module):
 35 |     """Center loss.
 36 |     Reference:
 37 |     Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
 38 |     Args:
 39 |         num_classes (int): number of classes.
 40 |         feat_dim (int): feature dimension.
 41 |     """
 42 | 
 43 |     def __init__(self, num_classes=751, feat_dim=2048, use_gpu=True):
 44 |         super(CenterLoss, self).__init__()
 45 |         self.num_classes = num_classes
 46 |         self.feat_dim = feat_dim
 47 |         self.use_gpu = use_gpu
 48 | 
 49 |         if self.use_gpu:
 50 |             self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim).cuda())
 51 |         else:
 52 |             self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim))
 53 | 
 54 |     def forward(self, x, labels):
 55 |         """
 56 |         Args:
 57 |             x: feature matrix with shape (batch_size, feat_dim).
 58 |             labels: ground truth labels with shape (num_classes).
 59 |         """
 60 |         assert x.size(0) == labels.size(0), "features.size(0) is not equal to labels.size(0)"
 61 | 
 62 |         batch_size = x.size(0)
 63 |         distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
 64 |                   torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
 65 |         distmat.addmm_(1, -2, x, self.centers.t())
 66 | 
 67 |         classes = torch.arange(self.num_classes).long()
 68 |         if self.use_gpu: classes = classes.cuda()
 69 |         labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
 70 |         mask = labels.eq(classes.expand(batch_size, self.num_classes))
 71 | 
 72 |         dist = distmat * mask.float()
 73 |         loss = dist.clamp(min=1e-12, max=1e+12).sum() / batch_size
 74 |         #dist = []
 75 |         #for i in range(batch_size):
 76 |         #    value = distmat[i][mask[i]]
 77 |         #    value = value.clamp(min=1e-12, max=1e+12)  # for numerical stability
 78 |         #    dist.append(value)
 79 |         #dist = torch.cat(dist)
 80 |         #loss = dist.mean()
 81 |         return loss
 82 | 
 83 | def normalize(x, axis=-1):
 84 |     """Normalizing to unit length along the specified dimension.
 85 |     Args:
 86 |       x: pytorch Variable
 87 |     Returns:
 88 |       x: pytorch Variable, same shape as input
 89 |     """
 90 |     x = 1. * x / (torch.norm(x, 2, axis, keepdim=True).expand_as(x) + 1e-12)
 91 |     return x
 92 | 
 93 | 
 94 | def euclidean_dist(x, y):
 95 |     """
 96 |     Args:
 97 |       x: pytorch Variable, with shape [m, d]
 98 |       y: pytorch Variable, with shape [n, d]
 99 |     Returns:
100 |       dist: pytorch Variable, with shape [m, n]
101 |     """
102 |     m, n = x.size(0), y.size(0)
103 |     xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n)
104 |     yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t()
105 |     dist = xx + yy
106 |     dist.addmm_(1, -2, x, y.t())
107 |     dist = dist.clamp(min=1e-12).sqrt()  # for numerical stability
108 |     return dist
109 | 
110 | 
111 | def hard_example_mining(dist_mat, labels, return_inds=False):
112 |     """For each anchor, find the hardest positive and negative sample.
113 |     Args:
114 |       dist_mat: pytorch Variable, pair wise distance between samples, shape [N, N]
115 |       labels: pytorch LongTensor, with shape [N]
116 |       return_inds: whether to return the indices. Save time if `False`(?)
117 |     Returns:
118 |       dist_ap: pytorch Variable, distance(anchor, positive); shape [N]
119 |       dist_an: pytorch Variable, distance(anchor, negative); shape [N]
120 |       p_inds: pytorch LongTensor, with shape [N];
121 |         indices of selected hard positive samples; 0 <= p_inds[i] <= N - 1
122 |       n_inds: pytorch LongTensor, with shape [N];
123 |         indices of selected hard negative samples; 0 <= n_inds[i] <= N - 1
124 |     NOTE: Only consider the case in which all labels have same num of samples,
125 |       thus we can cope with all anchors in parallel.
126 |     """
127 | 
128 |     assert len(dist_mat.size()) == 2
129 |     assert dist_mat.size(0) == dist_mat.size(1)
130 |     N = dist_mat.size(0)
131 | 
132 |     # shape [N, N]
133 |     is_pos = labels.expand(N, N).eq(labels.expand(N, N).t())
134 |     is_neg = labels.expand(N, N).ne(labels.expand(N, N).t())
135 | 
136 |     # `dist_ap` means distance(anchor, positive)
137 |     # both `dist_ap` and `relative_p_inds` with shape [N, 1]
138 |     dist_ap, relative_p_inds = torch.max(
139 |         dist_mat[is_pos].contiguous().view(N, -1), 1, keepdim=True)
140 |     # `dist_an` means distance(anchor, negative)
141 |     # both `dist_an` and `relative_n_inds` with shape [N, 1]
142 |     dist_an, relative_n_inds = torch.min(
143 |         dist_mat[is_neg].contiguous().view(N, -1), 1, keepdim=True)
144 |     # shape [N]
145 |     dist_ap = dist_ap.squeeze(1)
146 |     dist_an = dist_an.squeeze(1)
147 | 
148 |     if return_inds:
149 |         # shape [N, N]
150 |         ind = (labels.new().resize_as_(labels)
151 |                .copy_(torch.arange(0, N).long())
152 |                .unsqueeze(0).expand(N, N))
153 |         # shape [N, 1]
154 |         p_inds = torch.gather(
155 |             ind[is_pos].contiguous().view(N, -1), 1, relative_p_inds.data)
156 |         n_inds = torch.gather(
157 |             ind[is_neg].contiguous().view(N, -1), 1, relative_n_inds.data)
158 |         # shape [N]
159 |         p_inds = p_inds.squeeze(1)
160 |         n_inds = n_inds.squeeze(1)
161 |         return dist_ap, dist_an, p_inds, n_inds
162 | 
163 |     return dist_ap, dist_an
164 | 
165 | 
166 | class TripletLoss(object):
167 |     """Modified from Tong Xiao's open-reid (https://github.com/Cysu/open-reid).
168 |     Related Triplet Loss theory can be found in paper 'In Defense of the Triplet
169 |     Loss for Person Re-Identification'."""
170 | 
171 |     def __init__(self, margin=None):
172 |         self.margin = margin
173 |         if margin is not None:
174 |             self.ranking_loss = nn.MarginRankingLoss(margin=margin)
175 |         else:
176 |             self.ranking_loss = nn.SoftMarginLoss()
177 | 
178 |     def __call__(self, global_feat, labels, normalize_feature=False):
179 |         if normalize_feature:
180 |             global_feat = normalize(global_feat, axis=-1)
181 |         dist_mat = euclidean_dist(global_feat, global_feat)
182 |         dist_ap, dist_an = hard_example_mining(
183 |             dist_mat, labels)
184 |         y = dist_an.new().resize_as_(dist_an).fill_(1)
185 |         if self.margin is not None:
186 |             loss = self.ranking_loss(dist_an, dist_ap, y)
187 |         else:
188 |             loss = self.ranking_loss(dist_an - dist_ap, y)
189 |         return loss, dist_ap, dist_an


--------------------------------------------------------------------------------
/meters.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | import math
 3 | 
 4 | import numpy as np
 5 | 
 6 | 
 7 | class AverageMeter(object):
 8 |     def __init__(self):
 9 |         self.n = 0
10 |         self.sum = 0.0
11 |         self.var = 0.0
12 |         self.val = 0.0
13 |         self.mean = np.nan
14 |         self.std = np.nan
15 | 
16 |     def update(self, value, n=1):
17 |         self.val = value
18 |         self.sum += value
19 |         self.var += value * value
20 |         self.n += n
21 | 
22 |         if self.n == 0:
23 |             self.mean, self.std = np.nan, np.nan
24 |         elif self.n == 1:
25 |             self.mean, self.std = self.sum, np.inf
26 |         else:
27 |             self.mean = self.sum / self.n
28 |             self.std = math.sqrt(
29 |                 (self.var - self.n * self.mean * self.mean) / (self.n - 1.0))
30 | 
31 |     def value(self):
32 |         return self.mean, self.std
33 | 
34 |     def reset(self):
35 |         self.n = 0
36 |         self.sum = 0.0
37 |         self.var = 0.0
38 |         self.val = 0.0
39 |         self.mean = np.nan
40 |         self.std = np.nan


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import math
  3 | import torch.utils.model_zoo as model_zoo
  4 | import torch
  5 | 
  6 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
  7 |            'resnet152']
  8 | 
  9 | 
 10 | model_urls = {
 11 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 12 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 13 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 14 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 15 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 16 | }
 17 | 
 18 | 
 19 | def conv3x3(in_planes, out_planes, stride=1):
 20 |     """3x3 convolution with padding"""
 21 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 22 |                      padding=1, bias=False)
 23 | 
 24 | 
 25 | class BasicBlock(nn.Module):
 26 |     expansion = 1
 27 | 
 28 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 29 |         super(BasicBlock, self).__init__()
 30 |         self.conv1 = conv3x3(inplanes, planes, stride)
 31 |         self.bn1 = nn.BatchNorm2d(planes)
 32 |         self.relu = nn.ReLU(inplace=True)
 33 |         self.conv2 = conv3x3(planes, planes)
 34 |         self.bn2 = nn.BatchNorm2d(planes)
 35 |         self.downsample = downsample
 36 |         self.stride = stride
 37 | 
 38 |     def forward(self, x):
 39 |         residual = x
 40 | 
 41 |         out = self.conv1(x)
 42 |         out = self.bn1(out)
 43 |         out = self.relu(out)
 44 | 
 45 |         out = self.conv2(out)
 46 |         out = self.bn2(out)
 47 | 
 48 |         if self.downsample is not None:
 49 |             residual = self.downsample(x)
 50 | 
 51 |         out += residual
 52 |         out = self.relu(out)
 53 | 
 54 |         return out
 55 | 
 56 | 
 57 | class Bottleneck(nn.Module):
 58 |     expansion = 4
 59 | 
 60 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 61 |         super(Bottleneck, self).__init__()
 62 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 63 |         self.bn1 = nn.BatchNorm2d(planes)
 64 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 65 |                                padding=1, bias=False)
 66 |         self.bn2 = nn.BatchNorm2d(planes)
 67 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 68 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 69 |         self.relu = nn.ReLU(inplace=True)
 70 |         self.downsample = downsample
 71 |         self.stride = stride
 72 | 
 73 |     def forward(self, x):
 74 |         residual = x
 75 | 
 76 |         out = self.conv1(x)
 77 |         out = self.bn1(out)
 78 |         out = self.relu(out)
 79 | 
 80 |         out = self.conv2(out)
 81 |         out = self.bn2(out)
 82 |         out = self.relu(out)
 83 | 
 84 |         out = self.conv3(out)
 85 |         out = self.bn3(out)
 86 | 
 87 |         if self.downsample is not None:
 88 |             residual = self.downsample(x)
 89 | 
 90 |         out += residual
 91 |         out = self.relu(out)
 92 | 
 93 |         return out
 94 | 
 95 | 
 96 | class ResNet(nn.Module):
 97 | 
 98 |     def __init__(self, block, layers, num_classes=1000):
 99 |         self.inplanes = 64
100 |         super(ResNet, self).__init__()
101 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
102 |                                bias=False)
103 |         self.bn1 = nn.BatchNorm2d(64)
104 |         self.relu = nn.ReLU(inplace=True)
105 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
106 |         self.layer1 = self._make_layer(block, 64, layers[0])
107 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
108 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
109 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
110 |         self.avgpool = nn.AdaptiveAvgPool2d((1,1))
111 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
112 | 
113 |         for m in self.modules():
114 |             if isinstance(m, nn.Conv2d):
115 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
116 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
117 |             elif isinstance(m, nn.BatchNorm2d):
118 |                 m.weight.data.fill_(1)
119 |                 m.bias.data.zero_()
120 | 
121 |     def _make_layer(self, block, planes, blocks, stride=1):
122 |         downsample = None
123 |         if stride != 1 or self.inplanes != planes * block.expansion:
124 |             downsample = nn.Sequential(
125 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
126 |                           kernel_size=1, stride=stride, bias=False),
127 |                 nn.BatchNorm2d(planes * block.expansion),
128 |             )
129 | 
130 |         layers = []
131 |         layers.append(block(self.inplanes, planes, stride, downsample))
132 |         self.inplanes = planes * block.expansion
133 |         for i in range(1, blocks):
134 |             layers.append(block(self.inplanes, planes))
135 | 
136 |         return nn.Sequential(*layers)
137 | 
138 |     def forward(self, x):
139 |         x = self.conv1(x)
140 |         x = self.bn1(x)
141 |         x = self.relu(x)
142 |         x = self.maxpool(x)
143 | 
144 |         x = self.layer1(x)
145 |         x = self.layer2(x)
146 |         x = self.layer3(x)
147 |         x = self.layer4(x)
148 | 
149 |         # x = self.avgpool(x)
150 |         # x = x.view(x.size(0), -1)
151 |         # x = self.fc(x)
152 | 
153 |         return x
154 | 
155 |     def load_param(self, model_path):
156 |         param_dict = torch.load(model_path)
157 |         for i in param_dict:
158 |             if 'fc' in i:
159 |                 continue
160 |             self.state_dict()[i].copy_(param_dict[i])
161 | 
162 | def resnet18(pretrained=False, **kwargs):
163 |     """Constructs a ResNet-18 model.
164 | 
165 |     Args:
166 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
167 |     """
168 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
169 |     if pretrained:
170 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
171 |     return model
172 | 
173 | def resnet50(pretrained=False, **kwargs):
174 |     """Constructs a ResNet-50 model.
175 | 
176 |     Args:
177 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
178 |     """
179 |     model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=40)
180 |     if pretrained:
181 |         state_dict = model_zoo.load_url(model_urls['resnet50'])
182 |         # print(state_dict)
183 |         for i in state_dict:
184 |             if 'fc' in i:
185 |                 continue
186 |             model.state_dict()[i].copy_(state_dict[i])
187 |     return model
188 | 
189 | def weights_init_kaiming(m):
190 |     classname = m.__class__.__name__
191 |     if classname.find('Linear') != -1:
192 |         nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out')
193 |         nn.init.constant_(m.bias, 0.0)
194 |     elif classname.find('Conv') != -1:
195 |         nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in')
196 |         if m.bias is not None:
197 |             nn.init.constant_(m.bias, 0.0)
198 |     elif classname.find('BatchNorm') != -1:
199 |         if m.affine:
200 |             nn.init.normal_(m.weight, 1.0, 0.02)
201 |             nn.init.constant_(m.bias, 0.0)
202 | 
203 | def weights_init_kaiming1(m):
204 |     classname = m.__class__.__name__
205 |     # print(classname)
206 |     if classname.find('Conv2d') != -1:
207 |         nn.init.kaiming_normal(m.weight.data, a=0, mode='fan_in')
208 |         nn.init.constant(m.bias.data, 0.0)
209 |     elif classname.find('Linear') != -1:
210 |         nn.init.kaiming_normal(m.weight.data, a=0, mode='fan_out')
211 |         #init.constant(m.bias.data, 0.0)
212 |     elif classname.find('BatchNorm1d') != -1:
213 |         nn.init.normal(m.weight.data, 1.0, 0.02)
214 |         nn.init.constant(m.bias.data, 0.0)
215 | 
216 | def weights_init_classifier(m):
217 |     classname = m.__class__.__name__
218 |     if classname.find('Linear') != -1:
219 |         nn.init.normal_(m.weight, std=0.001)
220 |         if m.bias:
221 |             nn.init.constant_(m.bias, 0.0)
222 | 
223 | class Baseline(nn.Module):
224 |     in_planes = 2048
225 | 
226 |     def __init__(self, num_classes = 40, model_path = '/home/zhoumi/.torch/models/resnet101-5d3b4d8f.pth', neck = 'bnneck', neck_feat = 'after', pretrain_choice = 'imagenet'):
227 |         super(Baseline, self).__init__()
228 | 
229 |         self.base = ResNet(block=Bottleneck, layers=[3, 4, 6, 3], num_classes=num_classes)
230 |         # if pretrain_choice == 'imagenet':
231 |         #     self.base.load_param(model_path)
232 |         #     print('Loading pretrained ImageNet model......')
233 | 
234 |         self.gap = nn.AdaptiveAvgPool2d(1)
235 |         # self.gap = nn.AdaptiveMaxPool2d(1)
236 |         self.num_classes = num_classes
237 |         self.neck = neck
238 |         self.neck_feat = neck_feat
239 | 
240 |         if self.neck == 'no':
241 |             self.classifier = nn.Linear(self.in_planes, self.num_classes)
242 |             # self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False)     # new add by luo
243 |             # self.classifier.apply(weights_init_classifier)  # new add by luo
244 |         elif self.neck == 'bnneck':
245 |             self.bottleneck = nn.BatchNorm1d(self.in_planes)
246 |             self.bottleneck.bias.requires_grad_(False)  # no shift
247 |             self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False)
248 | 
249 |             self.bottleneck.apply(weights_init_kaiming)
250 |             self.classifier.apply(weights_init_classifier)
251 | 
252 |     def forward(self, x):
253 |         x = self.base(x)
254 |         global_feat = self.gap(x)  # (b, 2048, 1, 1)
255 |         global_feat = global_feat.view(global_feat.shape[0], -1)  # flatten to (bs, 2048)
256 | 
257 |         if self.neck == 'no':
258 |             feat = global_feat
259 |         elif self.neck == 'bnneck':
260 |             feat = self.bottleneck(global_feat)  # normalize for angular softmax
261 | 
262 |         # if self.training:
263 |         cls_score = self.classifier(feat)
264 |         return cls_score, global_feat
265 |             # return [global_feat], [cls_score]  # global feature for triplet loss
266 |         # else:
267 |         #     if self.neck_feat == 'after':
268 |         #         # print("Test with feature after BN")
269 |         #         return feat
270 |         #     else:
271 |         #         # print("Test with feature before BN")
272 |         #         return global_feat
273 | 
274 |     # def get_optim_policy(self):
275 |     #     return self.parameters()
276 | 
277 | #feature fusion
278 | class ClassBlock(nn.Module):
279 |     def __init__(self, input_dim, class_num, relu=True, num_bottleneck=512):
280 |         super(ClassBlock, self).__init__()
281 |         # add_block = []
282 |         add_block1 = []
283 |         add_block2 = []
284 |         add_block1 += [nn.BatchNorm1d(input_dim)]
285 |         if relu:
286 |             add_block1 += [nn.LeakyReLU(0.1)]
287 |         add_block1 += [nn.Linear(input_dim, num_bottleneck, bias=False)]
288 |         add_block2 += [nn.BatchNorm1d(num_bottleneck)]
289 | 
290 |         # add_block = nn.Sequential(*add_block)
291 |         # add_block.apply(weights_init_kaiming)
292 |         add_block1 = nn.Sequential(*add_block1)
293 |         add_block1.apply(weights_init_kaiming1)
294 |         add_block2 = nn.Sequential(*add_block2)
295 |         add_block2.apply(weights_init_kaiming1)
296 |         classifier = []
297 |         classifier += [nn.Linear(num_bottleneck, class_num, bias=False)]
298 |         classifier = nn.Sequential(*classifier)
299 |         classifier.apply(weights_init_classifier)
300 | 
301 |         self.add_block1 = add_block1
302 |         self.add_block2 = add_block2
303 |         self.classifier = classifier
304 | 
305 |     def forward(self, x):
306 |         x = self.add_block1(x)
307 |         x1 = self.add_block2(x)
308 |         x2 = self.classifier(x1)
309 |         return x2
310 | 
311 | 
312 | # ft_net_50_1
313 | class ft_net(nn.Module):
314 | 
315 |     def __init__(self, num_classes = 40, pretrain_choice = 'imagenet',
316 |                  model_path = '/home/zhoumi/.torch/models/resnet101-5d3b4d8f.pth'):
317 |         super(ft_net, self).__init__()
318 |         model_ft = ResNet(block=Bottleneck, layers=[3, 4, 6, 3], num_classes=num_classes)
319 |         # if pretrain_choice == 'imagenet':
320 |         #     model_ft.load_param(model_path)
321 |         #     print('Loading pretrained ImageNet model......')
322 |         # avg pooling to global pooling
323 |         model_ft.avgpool = nn.AdaptiveAvgPool2d((1, 1))
324 |         model_ft.fc = nn.Sequential()
325 |         self.model = model_ft
326 |         # remove the final downsample
327 |         self.model.layer4[0].downsample[0].stride = (1, 1)
328 |         self.model.layer4[0].conv2.stride = (1, 1)
329 |         self.avgpool_1 = nn.AdaptiveAvgPool2d((1, 1))
330 |         # self.avgpool_2 = nn.AdaptiveAvgPool2d((2,2))
331 | 
332 |         self.avgpool_2 = nn.AdaptiveAvgPool2d((2, 2))
333 |         self.avgpool_3 = nn.AdaptiveMaxPool2d((2, 2))
334 |         self.avgpool_4 = nn.AdaptiveMaxPool2d((1, 1))
335 |         self.avgpool_5 = nn.AdaptiveMaxPool2d((1, 1))
336 |         self.classifier_1 = ClassBlock(1024, num_classes, num_bottleneck=512)
337 | 
338 |         self.classifier_2 = ClassBlock(2048, num_classes, num_bottleneck=512)
339 |         self.classifier_3 = ClassBlock(8192, num_classes, num_bottleneck=512)
340 | 
341 |     def forward(self, x):
342 |         x = self.model.conv1(x)
343 |         x = self.model.bn1(x)
344 |         x = self.model.relu(x)
345 |         x = self.model.maxpool(x)
346 |         x = self.model.layer1(x)
347 |         x = self.model.layer2(x)
348 |         x0 = self.model.layer3(x)
349 |         x = self.model.layer4(x0)
350 |         x3 = self.model.avgpool(x)
351 |         x_3 = self.avgpool_5(x)
352 |         x_41 = self.avgpool_2(x)
353 |         x_4 = self.avgpool_3(x)
354 |         x_0 = self.avgpool_1(x0)
355 |         x_1 = self.avgpool_4(x0)
356 |         x0 = x_0 + x_1
357 |         x_31 = x3 + x_3
358 |         x4 = x_41 + x_4
359 |         #
360 |         x6 = torch.squeeze(x0, dim=2)
361 |         x6 = torch.squeeze(x6, dim=2)
362 | 
363 |         # x_0 = torch.squeeze(x_0)
364 |         # x_1 = torch.squeeze(x_1)
365 |         # x3 = torch.squeeze(x3)
366 |         # x_3 = torch.squeeze(x_3)
367 |         # x7 = x1.view(x1.size(0),-1)
368 | 
369 |         #
370 |         x9 = torch.squeeze(x_31, dim=2)
371 |         x9 = torch.squeeze(x9, dim=2)
372 | 
373 |         #x_10 = x_4.view(x_4.size(0), -1)
374 |         #x_11 = x_41.view(x_41.size(0), -1)
375 |         x10 = x4.view(x4.size(0), -1)
376 | 
377 |         #
378 |         x16 = self.classifier_1(x6)
379 |         x18 = self.classifier_2(x9)
380 |         x22 = self.classifier_3(x10)
381 |         #
382 |         return x16, x18, x22#, x_0, x_1, x3, x_3, x_10, x_11
383 | 
384 | from efficientnet_pytorch import EfficientNet, efficientnet
385 | 
386 | class efficient_baseline(nn.Module):
387 |     in_planes = 1792
388 |     def __init__(self, num_classes = 40, neck = 'bnneck', neck_feat = 'after',
389 |                  model_path = '/home/zhoumi/.cache/torch/checkpoints/efficientnet-b4-6ed6700e.pth'):
390 |         super(efficient_baseline, self).__init__()
391 | 
392 |         #1.4, 1.8, 380, 0.4
393 |         blocks_args, global_params = efficientnet(width_coefficient=1.4, depth_coefficient=1.8,
394 |                                                   dropout_rate=0.4, image_size=380)
395 | 
396 |         self.base = EfficientNet(blocks_args=blocks_args, global_params=global_params)
397 |         self.base.load_param(model_path)
398 |         print('Loading pretrained ImageNet model......')
399 |         # self.gap = nn.AdaptiveAvgPool2d(1)
400 |         # self.gap = nn.AdaptiveMaxPool2d(1)
401 |         self.num_classes = num_classes
402 |         self.neck = neck
403 |         self.neck_feat = neck_feat
404 | 
405 |         if self.neck == 'no':
406 |             self.classifier = nn.Linear(self.in_planes, self.num_classes)
407 |             # self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False)     # new add by luo
408 |             # self.classifier.apply(weights_init_classifier)  # new add by luo
409 |         elif self.neck == 'bnneck':
410 |             self.bottleneck = nn.BatchNorm1d(self.in_planes)
411 |             self.bottleneck.bias.requires_grad_(False)  # no shift
412 |             self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False)
413 | 
414 |             self.bottleneck.apply(weights_init_kaiming)
415 |             self.classifier.apply(weights_init_classifier)
416 | 
417 |     def forward(self, x):
418 |         global_feat = self.base(x)
419 | 
420 |         # global_feat = self.gap(x)  # (b, 2048, 1, 1)
421 |         # global_feat = global_feat.view(global_feat.shape[0], -1)  # flatten to (bs, 2048)
422 | 
423 |         if self.neck == 'no':
424 |             feat = global_feat
425 |         elif self.neck == 'bnneck':
426 |             feat = self.bottleneck(global_feat)  # normalize for angular softmax
427 | 
428 |         # if self.training:
429 |         cls_score = self.classifier(feat)
430 |         return cls_score, global_feat
431 | 
432 | 


--------------------------------------------------------------------------------
/preprocess_data.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import random
 4 | 
 5 | fd_train = open('/Users/zhoumi/Downloads/garbage_classify/train.txt', 'w')
 6 | fd_test = open('/Users/zhoumi/Downloads/garbage_classify/val.txt', 'w')
 7 | img_files = glob.glob('/Users/zhoumi/Downloads/garbage_classify/train_data/*jpg')
 8 | 
 9 | for img_file in img_files:
10 |     class_file = img_file.replace('.jpg', '.txt')
11 |     txt = open(class_file, 'r')
12 |     label = txt.readlines()[0].split(' ')[-1]
13 | 
14 |     if random.uniform(0, 1) > 0.1:
15 |         fd_train.write(img_file)
16 |         fd_train.write(' ')
17 |         fd_train.write(label)
18 |         fd_train.write('\n')
19 |     else:
20 |         fd_test.write(img_file)
21 |         fd_test.write(' ')
22 |         fd_test.write(label)
23 |         fd_test.write('\n')
24 |     print(img_file)
25 | 
26 | fd_train.close()
27 | fd_test.close()


--------------------------------------------------------------------------------
/ramdom_erase.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | from torchvision.transforms import *
 4 | 
 5 | from PIL import Image
 6 | import random
 7 | import math
 8 | import numpy as np
 9 | import torch
10 | 
11 | 
12 | class Cutout(object):
13 |     def __init__(self, probability=0.5, size=64, mean=[0.4914, 0.4822, 0.4465]):
14 |         self.probability = probability
15 |         self.mean = mean
16 |         self.size = size
17 | 
18 |     def __call__(self, img):
19 | 
20 |         if random.uniform(0, 1) > self.probability:
21 |             return img
22 | 
23 |         h = self.size
24 |         w = self.size
25 |         for attempt in range(100):
26 |             area = img.size()[1] * img.size()[2]
27 |             if w < img.size()[2] and h < img.size()[1]:
28 |                 x1 = random.randint(0, img.size()[1] - h)
29 |                 y1 = random.randint(0, img.size()[2] - w)
30 |                 if img.size()[0] == 3:
31 |                     img[0, x1:x1 + h, y1:y1 + w] = self.mean[0]
32 |                     img[1, x1:x1 + h, y1:y1 + w] = self.mean[1]
33 |                     img[2, x1:x1 + h, y1:y1 + w] = self.mean[2]
34 |                 else:
35 |                     img[0, x1:x1 + h, y1:y1 + w] = self.mean[0]
36 |                 return img
37 |         return img
38 | 
39 | 
40 | class RandomErasing(object):
41 |     """ Randomly selects a rectangle region in an image and erases its pixels.
42 |         'Random Erasing Data Augmentation' by Zhong et al.
43 |         See https://arxiv.org/pdf/1708.04896.pdf
44 |     Args:
45 |          probability: The probability that the Random Erasing operation will be performed.
46 |          sl: Minimum proportion of erased area against input image.
47 |          sh: Maximum proportion of erased area against input image.
48 |          r1: Minimum aspect ratio of erased area.
49 |          mean: Erasing value.
50 |     """
51 | 
52 |     def __init__(self, probability=0.5, sl=0.02, sh=0.4, r1=0.3, mean=[0.4914, 0.4822, 0.4465]):
53 |         self.probability = probability
54 |         self.mean = mean
55 |         self.sl = sl
56 |         self.sh = sh
57 |         self.r1 = r1
58 | 
59 |     def __call__(self, img):
60 | 
61 |         if random.uniform(0, 1) > self.probability:
62 |             return img
63 | 
64 |         for attempt in range(100):
65 |             area = img.size()[1] * img.size()[2]
66 | 
67 |             target_area = random.uniform(self.sl, self.sh) * area
68 |             aspect_ratio = random.uniform(self.r1, 1 / self.r1)
69 | 
70 |             h = int(round(math.sqrt(target_area * aspect_ratio)))
71 |             w = int(round(math.sqrt(target_area / aspect_ratio)))
72 | 
73 |             if w < img.size()[2] and h < img.size()[1]:
74 |                 x1 = random.randint(0, img.size()[1] - h)
75 |                 y1 = random.randint(0, img.size()[2] - w)
76 |                 if img.size()[0] == 3:
77 |                     img[0, x1:x1 + h, y1:y1 + w] = self.mean[0]
78 |                     img[1, x1:x1 + h, y1:y1 + w] = self.mean[1]
79 |                     img[2, x1:x1 + h, y1:y1 + w] = self.mean[2]
80 |                 else:
81 |                     img[0, x1:x1 + h, y1:y1 + w] = self.mean[0]
82 |                 return img
83 | 
84 |         return img
85 | 


--------------------------------------------------------------------------------
/samplers.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | from collections import defaultdict
 4 | 
 5 | import numpy as np
 6 | import torch
 7 | import random
 8 | from torch.utils.data.sampler import Sampler
 9 | import copy
10 | 
11 | 
12 | class RandomIdentitySampler(Sampler):
13 |     def __init__(self, data_source, num_instances=4):
14 |         self.data_source = data_source
15 |         self.num_instances = num_instances
16 |         self.index_dic = defaultdict(list)
17 |         for index, (_, pid) in enumerate(data_source):
18 |             self.index_dic[pid].append(index)
19 |         self.pids = list(self.index_dic.keys())
20 |         self.num_identities = len(self.pids)
21 |         print(self.num_identities)
22 | 
23 |     def __iter__(self):
24 |         indices = torch.randperm(self.num_identities)
25 |         ret = []
26 |         for i in indices:
27 |             pid = self.pids[i]
28 |             t = self.index_dic[pid]
29 |             replace = False if len(t) >= self.num_instances else True
30 |             t = np.random.choice(t, size=self.num_instances, replace=replace)
31 |             ret.extend(t)
32 |         return iter(ret)
33 | 
34 |     def __len__(self):
35 |         return self.num_identities * self.num_instances
36 | 
37 | class RandomIdentitySampler_new(Sampler):
38 |     """
39 |     Randomly sample N identities, then for each identity,
40 |     randomly sample K instances, therefore batch size is N*K.
41 |     Args:
42 |     - data_source (list): list of (img_path, pid, camid).
43 |     - num_instances (int): number of instances per identity in a batch.
44 |     - batch_size (int): number of examples in a batch.
45 |     """
46 | 
47 |     def __init__(self, data_source, batch_size, num_instances):
48 |         self.data_source = data_source
49 |         self.batch_size = batch_size
50 |         self.num_instances = num_instances
51 |         self.num_pids_per_batch = self.batch_size // self.num_instances
52 |         self.index_dic = defaultdict(list)
53 |         for index, (_, pid) in enumerate(self.data_source):
54 |             self.index_dic[pid].append(index)
55 |         self.pids = list(self.index_dic.keys())
56 | 
57 |         # estimate number of examples in an epoch
58 |         self.length = 0
59 |         for pid in self.pids:
60 |             idxs = self.index_dic[pid]
61 |             num = len(idxs)
62 |             if num < self.num_instances:
63 |                 num = self.num_instances
64 |             self.length += num - num % self.num_instances
65 | 
66 |     def __iter__(self):
67 |         batch_idxs_dict = defaultdict(list)
68 | 
69 |         for pid in self.pids:
70 |             idxs = copy.deepcopy(self.index_dic[pid])
71 |             if len(idxs) < self.num_instances:
72 |                 idxs = np.random.choice(idxs, size=self.num_instances, replace=True)
73 |             random.shuffle(idxs)
74 |             batch_idxs = []
75 |             for idx in idxs:
76 |                 batch_idxs.append(idx)
77 |                 if len(batch_idxs) == self.num_instances:
78 |                     batch_idxs_dict[pid].append(batch_idxs)
79 |                     batch_idxs = []
80 | 
81 |         avai_pids = copy.deepcopy(self.pids)
82 |         final_idxs = []
83 | 
84 |         while len(avai_pids) >= self.num_pids_per_batch:
85 |             selected_pids = random.sample(avai_pids, self.num_pids_per_batch)
86 |             for pid in selected_pids:
87 |                 batch_idxs = batch_idxs_dict[pid].pop(0)
88 |                 final_idxs.extend(batch_idxs)
89 |                 if len(batch_idxs_dict[pid]) == 0:
90 |                     avai_pids.remove(pid)
91 | 
92 |         self.length = len(final_idxs)
93 |         return iter(final_idxs)
94 | 
95 |     def __len__(self):
96 |         return self.length
97 | 


--------------------------------------------------------------------------------
/senet.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ResNet code gently borrowed from
  3 | https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
  4 | """
  5 | from __future__ import print_function, division, absolute_import
  6 | from collections import OrderedDict
  7 | import math
  8 | import torch
  9 | import torch.nn as nn
 10 | from torch.utils import model_zoo
 11 | 
 12 | __all__ = ['SENet', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152',
 13 |            'se_resnext50_32x4d', 'se_resnext101_32x4d']
 14 | 
 15 | pretrained_settings = {
 16 |     'senet154': {
 17 |         'imagenet': {
 18 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth',
 19 |             'input_space': 'RGB',
 20 |             'input_size': [3, 224, 224],
 21 |             'input_range': [0, 1],
 22 |             'mean': [0.485, 0.456, 0.406],
 23 |             'std': [0.229, 0.224, 0.225],
 24 |             'num_classes': 1000
 25 |         }
 26 |     },
 27 |     'se_resnet50': {
 28 |         'imagenet': {
 29 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth',
 30 |             'input_space': 'RGB',
 31 |             'input_size': [3, 224, 224],
 32 |             'input_range': [0, 1],
 33 |             'mean': [0.485, 0.456, 0.406],
 34 |             'std': [0.229, 0.224, 0.225],
 35 |             'num_classes': 1000
 36 |         }
 37 |     },
 38 |     'se_resnet101': {
 39 |         'imagenet': {
 40 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth',
 41 |             'input_space': 'RGB',
 42 |             'input_size': [3, 224, 224],
 43 |             'input_range': [0, 1],
 44 |             'mean': [0.485, 0.456, 0.406],
 45 |             'std': [0.229, 0.224, 0.225],
 46 |             'num_classes': 1000
 47 |         }
 48 |     },
 49 |     'se_resnet152': {
 50 |         'imagenet': {
 51 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth',
 52 |             'input_space': 'RGB',
 53 |             'input_size': [3, 224, 224],
 54 |             'input_range': [0, 1],
 55 |             'mean': [0.485, 0.456, 0.406],
 56 |             'std': [0.229, 0.224, 0.225],
 57 |             'num_classes': 1000
 58 |         }
 59 |     },
 60 |     'se_resnext50_32x4d': {
 61 |         'imagenet': {
 62 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth',
 63 |             'input_space': 'RGB',
 64 |             'input_size': [3, 224, 224],
 65 |             'input_range': [0, 1],
 66 |             'mean': [0.485, 0.456, 0.406],
 67 |             'std': [0.229, 0.224, 0.225],
 68 |             'num_classes': 1000
 69 |         }
 70 |     },
 71 |     'se_resnext101_32x4d': {
 72 |         'imagenet': {
 73 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth',
 74 |             'input_space': 'RGB',
 75 |             'input_size': [3, 224, 224],
 76 |             'input_range': [0, 1],
 77 |             'mean': [0.485, 0.456, 0.406],
 78 |             'std': [0.229, 0.224, 0.225],
 79 |             'num_classes': 1000
 80 |         }
 81 |     },
 82 | }
 83 | 
 84 | 
 85 | class SEModule(nn.Module):
 86 | 
 87 |     def __init__(self, channels, reduction):
 88 |         super(SEModule, self).__init__()
 89 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 90 |         self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1,
 91 |                              padding=0)
 92 |         self.relu = nn.ReLU(inplace=True)
 93 |         self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1,
 94 |                              padding=0)
 95 |         self.sigmoid = nn.Sigmoid()
 96 | 
 97 |     def forward(self, x):
 98 |         module_input = x
 99 |         x = self.avg_pool(x)
100 |         x = self.fc1(x)
101 |         x = self.relu(x)
102 |         x = self.fc2(x)
103 |         x = self.sigmoid(x)
104 |         return module_input * x
105 | 
106 | 
107 | class Bottleneck(nn.Module):
108 |     """
109 |     Base class for bottlenecks that implements `forward()` method.
110 |     """
111 |     def forward(self, x):
112 |         residual = x
113 | 
114 |         out = self.conv1(x)
115 |         out = self.bn1(out)
116 |         out = self.relu(out)
117 | 
118 |         out = self.conv2(out)
119 |         out = self.bn2(out)
120 |         out = self.relu(out)
121 | 
122 |         out = self.conv3(out)
123 |         out = self.bn3(out)
124 | 
125 |         if self.downsample is not None:
126 |             residual = self.downsample(x)
127 | 
128 |         out = self.se_module(out) + residual
129 |         out = self.relu(out)
130 | 
131 |         return out
132 | 
133 | 
134 | class SEBottleneck(Bottleneck):
135 |     """
136 |     Bottleneck for SENet154.
137 |     """
138 |     expansion = 4
139 | 
140 |     def __init__(self, inplanes, planes, groups, reduction, stride=1,
141 |                  downsample=None):
142 |         super(SEBottleneck, self).__init__()
143 |         self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
144 |         self.bn1 = nn.BatchNorm2d(planes * 2)
145 |         self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3,
146 |                                stride=stride, padding=1, groups=groups,
147 |                                bias=False)
148 |         self.bn2 = nn.BatchNorm2d(planes * 4)
149 |         self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1,
150 |                                bias=False)
151 |         self.bn3 = nn.BatchNorm2d(planes * 4)
152 |         self.relu = nn.ReLU(inplace=True)
153 |         self.se_module = SEModule(planes * 4, reduction=reduction)
154 |         self.downsample = downsample
155 |         self.stride = stride
156 | 
157 | 
158 | class SEResNetBottleneck(Bottleneck):
159 |     """
160 |     ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe
161 |     implementation and uses `stride=stride` in `conv1` and not in `conv2`
162 |     (the latter is used in the torchvision implementation of ResNet).
163 |     """
164 |     expansion = 4
165 | 
166 |     def __init__(self, inplanes, planes, groups, reduction, stride=1,
167 |                  downsample=None):
168 |         super(SEResNetBottleneck, self).__init__()
169 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False,
170 |                                stride=stride)
171 |         self.bn1 = nn.BatchNorm2d(planes)
172 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1,
173 |                                groups=groups, bias=False)
174 |         self.bn2 = nn.BatchNorm2d(planes)
175 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
176 |         self.bn3 = nn.BatchNorm2d(planes * 4)
177 |         self.relu = nn.ReLU(inplace=True)
178 |         self.se_module = SEModule(planes * 4, reduction=reduction)
179 |         self.downsample = downsample
180 |         self.stride = stride
181 | 
182 | 
183 | class SEResNeXtBottleneck(Bottleneck):
184 |     """
185 |     ResNeXt bottleneck type C with a Squeeze-and-Excitation module.
186 |     """
187 |     expansion = 4
188 | 
189 |     def __init__(self, inplanes, planes, groups, reduction, stride=1,
190 |                  downsample=None, base_width=4):
191 |         super(SEResNeXtBottleneck, self).__init__()
192 |         width = math.floor(planes * (base_width / 64)) * groups
193 |         self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False,
194 |                                stride=1)
195 |         self.bn1 = nn.BatchNorm2d(width)
196 |         self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride,
197 |                                padding=1, groups=groups, bias=False)
198 |         self.bn2 = nn.BatchNorm2d(width)
199 |         self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)
200 |         self.bn3 = nn.BatchNorm2d(planes * 4)
201 |         self.relu = nn.ReLU(inplace=True)
202 |         self.se_module = SEModule(planes * 4, reduction=reduction)
203 |         self.downsample = downsample
204 |         self.stride = stride
205 | 
206 | 
207 | class SENet(nn.Module):
208 | 
209 |     def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
210 |                  inplanes=128, input_3x3=True, downsample_kernel_size=3,
211 |                  downsample_padding=1, last_stride=2):
212 |         """
213 |         Parameters
214 |         ----------
215 |         block (nn.Module): Bottleneck class.
216 |             - For SENet154: SEBottleneck
217 |             - For SE-ResNet models: SEResNetBottleneck
218 |             - For SE-ResNeXt models:  SEResNeXtBottleneck
219 |         layers (list of ints): Number of residual blocks for 4 layers of the
220 |             network (layer1...layer4).
221 |         groups (int): Number of groups for the 3x3 convolution in each
222 |             bottleneck block.
223 |             - For SENet154: 64
224 |             - For SE-ResNet models: 1
225 |             - For SE-ResNeXt models:  32
226 |         reduction (int): Reduction ratio for Squeeze-and-Excitation modules.
227 |             - For all models: 16
228 |         dropout_p (float or None): Drop probability for the Dropout layer.
229 |             If `None` the Dropout layer is not used.
230 |             - For SENet154: 0.2
231 |             - For SE-ResNet models: None
232 |             - For SE-ResNeXt models: None
233 |         inplanes (int):  Number of input channels for layer1.
234 |             - For SENet154: 128
235 |             - For SE-ResNet models: 64
236 |             - For SE-ResNeXt models: 64
237 |         input_3x3 (bool): If `True`, use three 3x3 convolutions instead of
238 |             a single 7x7 convolution in layer0.
239 |             - For SENet154: True
240 |             - For SE-ResNet models: False
241 |             - For SE-ResNeXt models: False
242 |         downsample_kernel_size (int): Kernel size for downsampling convolutions
243 |             in layer2, layer3 and layer4.
244 |             - For SENet154: 3
245 |             - For SE-ResNet models: 1
246 |             - For SE-ResNeXt models: 1
247 |         downsample_padding (int): Padding for downsampling convolutions in
248 |             layer2, layer3 and layer4.
249 |             - For SENet154: 1
250 |             - For SE-ResNet models: 0
251 |             - For SE-ResNeXt models: 0
252 |         num_classes (int): Number of outputs in `last_linear` layer.
253 |             - For all models: 1000
254 |         """
255 |         super(SENet, self).__init__()
256 |         self.inplanes = inplanes
257 |         if input_3x3:
258 |             layer0_modules = [
259 |                 ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1,
260 |                                     bias=False)),
261 |                 ('bn1', nn.BatchNorm2d(64)),
262 |                 ('relu1', nn.ReLU(inplace=True)),
263 |                 ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,
264 |                                     bias=False)),
265 |                 ('bn2', nn.BatchNorm2d(64)),
266 |                 ('relu2', nn.ReLU(inplace=True)),
267 |                 ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1,
268 |                                     bias=False)),
269 |                 ('bn3', nn.BatchNorm2d(inplanes)),
270 |                 ('relu3', nn.ReLU(inplace=True)),
271 |             ]
272 |         else:
273 |             layer0_modules = [
274 |                 ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2,
275 |                                     padding=3, bias=False)),
276 |                 ('bn1', nn.BatchNorm2d(inplanes)),
277 |                 ('relu1', nn.ReLU(inplace=True)),
278 |             ]
279 |         # To preserve compatibility with Caffe weights `ceil_mode=True`
280 |         # is used instead of `padding=1`.
281 |         layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2,
282 |                                                     ceil_mode=True)))
283 |         self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
284 |         self.layer1 = self._make_layer(
285 |             block,
286 |             planes=64,
287 |             blocks=layers[0],
288 |             groups=groups,
289 |             reduction=reduction,
290 |             downsample_kernel_size=1,
291 |             downsample_padding=0
292 |         )
293 |         self.layer2 = self._make_layer(
294 |             block,
295 |             planes=128,
296 |             blocks=layers[1],
297 |             stride=2,
298 |             groups=groups,
299 |             reduction=reduction,
300 |             downsample_kernel_size=downsample_kernel_size,
301 |             downsample_padding=downsample_padding
302 |         )
303 |         self.layer3 = self._make_layer(
304 |             block,
305 |             planes=256,
306 |             blocks=layers[2],
307 |             stride=2,
308 |             groups=groups,
309 |             reduction=reduction,
310 |             downsample_kernel_size=downsample_kernel_size,
311 |             downsample_padding=downsample_padding
312 |         )
313 |         self.layer4 = self._make_layer(
314 |             block,
315 |             planes=512,
316 |             blocks=layers[3],
317 |             stride=last_stride,
318 |             groups=groups,
319 |             reduction=reduction,
320 |             downsample_kernel_size=downsample_kernel_size,
321 |             downsample_padding=downsample_padding
322 |         )
323 |         self.avg_pool = nn.AvgPool2d(7, stride=1)
324 |         self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
325 | 
326 |     def _make_layer(self, block, planes, blocks, groups, reduction, stride=1,
327 |                     downsample_kernel_size=1, downsample_padding=0):
328 |         downsample = None
329 |         if stride != 1 or self.inplanes != planes * block.expansion:
330 |             downsample = nn.Sequential(
331 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
332 |                           kernel_size=downsample_kernel_size, stride=stride,
333 |                           padding=downsample_padding, bias=False),
334 |                 nn.BatchNorm2d(planes * block.expansion),
335 |             )
336 | 
337 |         layers = []
338 |         layers.append(block(self.inplanes, planes, groups, reduction, stride,
339 |                             downsample))
340 |         self.inplanes = planes * block.expansion
341 |         for i in range(1, blocks):
342 |             layers.append(block(self.inplanes, planes, groups, reduction))
343 | 
344 |         return nn.Sequential(*layers)
345 |     
346 |     def load_param(self, model_path):
347 |         param_dict = torch.load(model_path)
348 |         for i in param_dict:
349 |             if 'last_linear' in i:
350 |                 continue
351 |             self.state_dict()[i].copy_(param_dict[i])
352 | 
353 |     def forward(self, x):
354 |         x = self.layer0(x)
355 |         x = self.layer1(x)
356 |         x = self.layer2(x)
357 |         x = self.layer3(x)
358 |         x = self.layer4(x)
359 |         return x


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | import torchvision.models
  5 | from dataloader import *
  6 | import torchvision.transforms as T
  7 | from loss import CrossEntropyLabelSmooth, CenterLoss, TripletLoss
  8 | from torch.utils.data import DataLoader
  9 | from meters import AverageMeter
 10 | from model import resnet50, Baseline, ft_net, efficient_baseline
 11 | from torch.autograd import Variable
 12 | import torch
 13 | from bisect import bisect_right
 14 | from ramdom_erase import Cutout, RandomErasing
 15 | from samplers import RandomIdentitySampler, RandomIdentitySampler_new
 16 | from efficientnet_pytorch import EfficientNet
 17 | 
 18 | 
 19 | NUM_CLASSES = 40
 20 | MAX_EPOC = 60
 21 | BATCH_SIZE = 32
 22 | TEST_BATCH_SIZE = 1
 23 | use_triplet = True
 24 | use_ff = False
 25 | use_efficientnet = False
 26 | 
 27 | def adjust_lr(ep):
 28 |     lr = 1e-4
 29 |     if use_triplet == True:
 30 |         warmup_factor = 1
 31 |         if ep < 10:
 32 |             alpha = ep / 10
 33 |             warmup_factor = 0.01 * (1 - alpha) + alpha
 34 | 
 35 |         lr = lr * warmup_factor * 0.1 ** bisect_right([20, 40], ep)
 36 |     else:
 37 |         if ep <4:
 38 |             lr = 1e-4
 39 |         elif ep < 7:
 40 |             lr =1e-5
 41 |         else:
 42 |             lr = 1e-6
 43 | 
 44 |     return lr
 45 | 
 46 | # model = resnet50(num_classes=NUM_CLASSES, pretrained=True)
 47 | if use_ff == False:
 48 |     if use_efficientnet == True:
 49 |         model = EfficientNet.from_pretrained('efficientnet-b4', num_classes=NUM_CLASSES)
 50 |         # model = torch.nn.DataParallel(model)
 51 |     else:
 52 |         model = efficient_baseline(num_classes=NUM_CLASSES, neck='bnneck')
 53 | else:
 54 |     model = ft_net(num_classes=NUM_CLASSES)
 55 | print(model)
 56 | 
 57 | train_transform = T.Compose([T.Resize((224, 224)),
 58 |                              T.RandomHorizontalFlip(),
 59 |                              # T.RandomVerticalFlip(),
 60 |                              # T.ColorJitter(0.5, 0.5, 0.5, 0.5),
 61 |                              T.Pad(10),
 62 |                              T.RandomCrop((224, 224)),
 63 |                              # T.RandomRotation(90),
 64 |                              T.ToTensor(),
 65 |                              T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
 66 |                              Cutout(probability=0.5, size=64, mean=[0.0, 0.0, 0.0]),
 67 |                              RandomErasing(probability=0.0, mean=[0.485, 0.456, 0.406])])
 68 | 
 69 | test_transform = T.Compose([T.Resize((224, 224)),
 70 |                              T.ToTensor(),
 71 |                              T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
 72 | 
 73 | train_datasets = MyDataset(txt_path='/data/zhoumi/datasets/train_data/train.txt', transform=train_transform)
 74 | train = process_dir(txt_path='/data/zhoumi/datasets/train_data/train.txt')
 75 | 
 76 | if use_triplet == True:
 77 |     train_data = DataLoader(train_datasets, sampler=RandomIdentitySampler_new(train, NUM_CLASSES ,4),
 78 |                         batch_size=BATCH_SIZE, pin_memory=True, num_workers=8, drop_last=True)
 79 | else:
 80 |     train_data = DataLoader(train_datasets, batch_size=BATCH_SIZE, pin_memory=True, shuffle=True)
 81 | 
 82 | test_data = DataLoader(MyDataset(txt_path='/data/zhoumi/datasets/train_data/val.txt', transform=test_transform),
 83 |                        batch_size=TEST_BATCH_SIZE, pin_memory=True)
 84 | 
 85 | optimizer = optim.Adam(params=model.parameters(), lr=1e-4, weight_decay=5e-4)
 86 | # optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4)
 87 | 
 88 | #define loss function
 89 | xent_criterion = CrossEntropyLabelSmooth(NUM_CLASSES)
 90 | center_criterion = CenterLoss(NUM_CLASSES, feat_dim=1792)
 91 | triplet_criterion = TripletLoss(margin=0.3)
 92 | 
 93 | best_model = model
 94 | best_acc = 0
 95 | print(len(test_data) * TEST_BATCH_SIZE, len(train_data))
 96 | 
 97 | model = model.cuda()
 98 | 
 99 | for epoch in range(MAX_EPOC):
100 |     lr = adjust_lr(epoch)
101 |     for p in optimizer.param_groups:
102 |         p['lr'] = lr
103 | 
104 |     for i, inputs in enumerate(train_data):
105 |         model = model.train()
106 |         images, labels = Variable(inputs[0].cuda()), Variable(inputs[1].cuda())
107 |         if use_ff == False:
108 |             if use_efficientnet == True:
109 |                 output = model(images)
110 |             else:
111 |                 output, feat = model(images)
112 |         else:
113 |             output1, output2, output3 = model(images)
114 |         if use_triplet == True:
115 |             sofmax_loss = xent_criterion(output, labels)
116 |             triplet_loss = triplet_criterion(feat, labels)[0]
117 |             losses = sofmax_loss + triplet_loss + 0.0005 * center_criterion(feat, labels)
118 |         else:
119 |             if use_ff == False:
120 |                 losses = xent_criterion(output, labels)
121 |             else:
122 |                 losses = (xent_criterion(output1, labels) + xent_criterion(output2, labels) + xent_criterion(output3, labels))/3
123 |         optimizer.zero_grad()
124 |         losses.backward()
125 |         optimizer.step()
126 | 
127 |         if (i+1) % 100 == 0:
128 |             corrects = 0
129 |             model = model.eval()
130 |             for j, test in enumerate(test_data):
131 |                 t_images, t_labels = Variable(test[0].cuda()), Variable(test[1].cuda())
132 | 
133 |                 if use_ff == False:
134 |                     if use_efficientnet == True:
135 |                         pred = torch.argmax(model(t_images), 1)
136 |                     else:
137 |                         _, pred = torch.max(model(t_images)[0], 1)
138 |                 else:
139 |                     o1, o2, o3 = model(t_images)
140 |                     _, pred = torch.max((o1 + o2 + o3)/3, 1)
141 |                     print(pred, t_labels.data)
142 | 
143 |                 corrects += torch.sum(pred == t_labels.data)
144 | 
145 |             acc = corrects.item() / len(test_data) / TEST_BATCH_SIZE
146 |             if acc > best_acc:
147 |                 best_acc = acc
148 |                 best_model = model
149 | 
150 |             if use_triplet == True:
151 |                 print("epoch: {}, iter: {}, lr: {}, loss: {}, softmax_loss: {}, triplet_loss: {} acc: {}".format(epoch,
152 |                  i, optimizer.param_groups[0]['lr'], losses.item(), sofmax_loss.item(), triplet_loss.item(), acc))
153 |             else:
154 |                 print("epoch: {}, iter: {}, lr: {}, loss: {}, acc: {}".format(epoch,
155 |                 i, optimizer.param_groups[0]['lr'], losses.item(), acc))
156 | 
157 | torch.save(model, './best_model_v2_tri_center_old.pth')
158 | 
159 | 
160 | 
161 | 
162 | 
163 | 
164 | 


--------------------------------------------------------------------------------
/tt.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | import os
 3 | import glob
 4 | src_path = '/Users/zhoumi/Downloads/garbage_classify_v2/train_data_v2/'
 5 | dst_path = '/Users/zhoumi/Downloads/garbage_classify_v2/val'
 6 | with open('/Users/zhoumi/Downloads/garbage_classify_v2/train_data_v2/val.txt', 'r') as fd:
 7 |     lines = fd.readlines()
 8 |     for line in lines:
 9 |         img_path = line.split(' ')[0].split('/')[-1]
10 |         label = str(line.split(' ')[1])
11 |         img_name = str(label).replace('\n', '') + '_' + img_path
12 |         shutil.copy(os.path.join(src_path, line.split(' ')[0].split('/')[-1]), os.path.join(dst_path, img_name))
13 | 
14 | # img_paths = glob.glob('/Users/zhoumi/Downloads/garbage_classify/train_data/*jpg')
15 | # dst_path = '/Users/zhoumi/Downloads/garbage_classify/new/'
16 | # for img_path in img_paths:
17 | #     txt_path = img_path.replace('.jpg', '.txt')
18 | #     with open(txt_path, 'r') as fd:
19 | #         line = fd.readlines()[0]
20 | #         lable = line.split(' ')[-1]
21 | #         if not os.path.exists(os.path.join(dst_path, str(lable))):
22 | #             os.mkdir(os.path.join(dst_path, str(lable)))
23 | #
24 | #         shutil.copy(img_path, os.path.join(dst_path, str(lable), line.split(' ')[0][:-1]))
25 | #     fd.close()
26 | 
27 | from efficientnet_pytorch import EfficientNet
28 | # model = EfficientNet.from_pretrained('efficientnet-b7', num_classes=40)
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------