├── README.md ├── dataloader.py ├── deploy_scripts ├── config.json └── customize_service.py ├── efficientnet_pytorch ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── model.cpython-36.pyc │ └── utils.cpython-36.pyc ├── model.py └── utils.py ├── get_img_url.py ├── inference.py ├── loss.py ├── meters.py ├── model.py ├── preprocess_data.py ├── ramdom_erase.py ├── samplers.py ├── senet.py ├── train.py └── tt.py /README.md: -------------------------------------------------------------------------------- 1 | # garbage_classification 2 | Huawei Cloud garbage classification source code for pytorch implementation 3 | 4 | resnet50 and some tricks for traning 5 | 1、random erasing, cutout for data augmentation 6 | 2、bnneck before fc 7 | 3、multi-feature fusion 8 | 4、label smoothing, triplet loss, center loss etc. 9 | 10 | support effitionnet 11 | -------------------------------------------------------------------------------- /dataloader.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | from torch.utils.data import Dataset 3 | 4 | class MyDataset(Dataset): 5 | def __init__(self, txt_path = '../../data/garbage_classify/img_list.txt', transform = None): 6 | fd = open(txt_path, 'r') 7 | imgs = [] 8 | 9 | for line in fd: 10 | line = line.rstrip() 11 | words = line.split(' ') 12 | imgs.append((words[0], int(words[1]))) 13 | 14 | self.imgs = imgs 15 | self.transforms = transform 16 | 17 | def __getitem__(self, item): 18 | fn, label = self.imgs[item] 19 | 20 | image = Image.open(fn) 21 | 22 | if self.transforms is not None: 23 | image = self.transforms(image) 24 | 25 | return image, label 26 | 27 | def __len__(self): 28 | return len(self.imgs) 29 | 30 | def process_dir(txt_path = '../../data/garbage_classify/img_list.txt'): 31 | fd = open(txt_path, 'r') 32 | imgs = [] 33 | 34 | for line in fd: 35 | line = line.rstrip() 36 | words = line.split(' ') 37 | imgs.append((words[0], int(words[1]))) 38 | return imgs 39 | -------------------------------------------------------------------------------- /deploy_scripts/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_algorithm": "image_classification", 3 | "model_type": "PyTorch", 4 | "runtime": "python3.6", 5 | "metrics": { 6 | "f1": 0, 7 | "accuracy": 0.6253, 8 | "precision": 0, 9 | "recall": 0 10 | }, 11 | "apis": [ 12 | { 13 | "procotol": "http", 14 | "url": "/", 15 | "method": "post", 16 | "request": { 17 | "Content-type": "multipart/form-data", 18 | "data": { 19 | "type": "object", 20 | "properties": { 21 | "input_img": {"type": "file"} 22 | }, 23 | "required": ["input_img"] 24 | } 25 | }, 26 | "response": { 27 | "Content-type": "multipart/form-data", 28 | "data": { 29 | "type": "object", 30 | "properties": { 31 | "result": {"type": "string"} 32 | }, 33 | "required": ["result"] 34 | } 35 | } 36 | } 37 | ], 38 | "dependencies": [ 39 | { 40 | "installer": "pip", 41 | "packages": [ 42 | { 43 | "package_name": "Pillow", 44 | "package_version": "5.0.0", 45 | "restraint": "ATLEAST" 46 | } 47 | ] 48 | } 49 | ] 50 | } -------------------------------------------------------------------------------- /deploy_scripts/customize_service.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import numpy as np 3 | from PIL import Image 4 | import torch 5 | import torch.nn as nn 6 | import math 7 | import torchvision.transforms as T 8 | from efficientnet_pytorch import EfficientNet, efficientnet 9 | from model_service.pytorch_model_service import PTServingBaseService 10 | 11 | def weights_init_kaiming(m): 12 | classname = m.__class__.__name__ 13 | if classname.find('Linear') != -1: 14 | nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out') 15 | nn.init.constant_(m.bias, 0.0) 16 | elif classname.find('Conv') != -1: 17 | nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') 18 | if m.bias is not None: 19 | nn.init.constant_(m.bias, 0.0) 20 | elif classname.find('BatchNorm') != -1: 21 | if m.affine: 22 | nn.init.normal_(m.weight, 1.0, 0.02) 23 | nn.init.constant_(m.bias, 0.0) 24 | 25 | 26 | def weights_init_classifier(m): 27 | classname = m.__class__.__name__ 28 | if classname.find('Linear') != -1: 29 | nn.init.normal_(m.weight, std=0.001) 30 | if m.bias: 31 | nn.init.constant_(m.bias, 0.0) 32 | 33 | def conv3x3(in_planes, out_planes, stride=1): 34 | """3x3 convolution with padding""" 35 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 36 | padding=1, bias=False) 37 | 38 | 39 | class BasicBlock(nn.Module): 40 | expansion = 1 41 | 42 | def __init__(self, inplanes, planes, stride=1, downsample=None): 43 | super(BasicBlock, self).__init__() 44 | self.conv1 = conv3x3(inplanes, planes, stride) 45 | self.bn1 = nn.BatchNorm2d(planes) 46 | self.relu = nn.ReLU(inplace=True) 47 | self.conv2 = conv3x3(planes, planes) 48 | self.bn2 = nn.BatchNorm2d(planes) 49 | self.downsample = downsample 50 | self.stride = stride 51 | 52 | def forward(self, x): 53 | residual = x 54 | 55 | out = self.conv1(x) 56 | out = self.bn1(out) 57 | out = self.relu(out) 58 | 59 | out = self.conv2(out) 60 | out = self.bn2(out) 61 | 62 | if self.downsample is not None: 63 | residual = self.downsample(x) 64 | 65 | out += residual 66 | out = self.relu(out) 67 | 68 | return out 69 | 70 | 71 | class Bottleneck(nn.Module): 72 | expansion = 4 73 | 74 | def __init__(self, inplanes, planes, stride=1, downsample=None): 75 | super(Bottleneck, self).__init__() 76 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 77 | self.bn1 = nn.BatchNorm2d(planes) 78 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 79 | padding=1, bias=False) 80 | self.bn2 = nn.BatchNorm2d(planes) 81 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 82 | self.bn3 = nn.BatchNorm2d(planes * 4) 83 | self.relu = nn.ReLU(inplace=True) 84 | self.downsample = downsample 85 | self.stride = stride 86 | 87 | def forward(self, x): 88 | residual = x 89 | 90 | out = self.conv1(x) 91 | out = self.bn1(out) 92 | out = self.relu(out) 93 | 94 | out = self.conv2(out) 95 | out = self.bn2(out) 96 | out = self.relu(out) 97 | 98 | out = self.conv3(out) 99 | out = self.bn3(out) 100 | 101 | if self.downsample is not None: 102 | residual = self.downsample(x) 103 | 104 | out += residual 105 | out = self.relu(out) 106 | 107 | return out 108 | 109 | 110 | class ResNet(nn.Module): 111 | 112 | def __init__(self, block, layers, num_classes=1000): 113 | self.inplanes = 64 114 | super(ResNet, self).__init__() 115 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 116 | bias=False) 117 | self.bn1 = nn.BatchNorm2d(64) 118 | self.relu = nn.ReLU(inplace=True) 119 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 120 | self.layer1 = self._make_layer(block, 64, layers[0]) 121 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 122 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 123 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 124 | self.avgpool = nn.AdaptiveAvgPool2d((1,1)) 125 | self.fc = nn.Linear(512 * block.expansion, num_classes) 126 | 127 | for m in self.modules(): 128 | if isinstance(m, nn.Conv2d): 129 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 130 | m.weight.data.normal_(0, math.sqrt(2. / n)) 131 | elif isinstance(m, nn.BatchNorm2d): 132 | m.weight.data.fill_(1) 133 | m.bias.data.zero_() 134 | 135 | def _make_layer(self, block, planes, blocks, stride=1): 136 | downsample = None 137 | if stride != 1 or self.inplanes != planes * block.expansion: 138 | downsample = nn.Sequential( 139 | nn.Conv2d(self.inplanes, planes * block.expansion, 140 | kernel_size=1, stride=stride, bias=False), 141 | nn.BatchNorm2d(planes * block.expansion), 142 | ) 143 | 144 | layers = [] 145 | layers.append(block(self.inplanes, planes, stride, downsample)) 146 | self.inplanes = planes * block.expansion 147 | for i in range(1, blocks): 148 | layers.append(block(self.inplanes, planes)) 149 | 150 | return nn.Sequential(*layers) 151 | 152 | def forward(self, x): 153 | x = self.conv1(x) 154 | x = self.bn1(x) 155 | x = self.relu(x) 156 | x = self.maxpool(x) 157 | 158 | x = self.layer1(x) 159 | x = self.layer2(x) 160 | x = self.layer3(x) 161 | x = self.layer4(x) 162 | 163 | # x = self.avgpool(x) 164 | # x = x.view(x.size(0), -1) 165 | # x = self.fc(x) 166 | 167 | return x 168 | 169 | class Baseline(nn.Module): 170 | in_planes = 2048 171 | 172 | def __init__(self, num_classes = 40, model_path = 'C:/Users/maliho/.torch/models/resnet50-19c8e357.pth', neck = 'bnneck', neck_feat = 'after', pretrain_choice = 'imagenet'): 173 | super(Baseline, self).__init__() 174 | 175 | self.base = ResNet(block=Bottleneck, layers=[3, 4, 6, 3], num_classes=num_classes) 176 | # if pretrain_choice == 'imagenet': 177 | # self.base.load_param(model_path) 178 | # print('Loading pretrained ImageNet model......') 179 | 180 | self.gap = nn.AdaptiveAvgPool2d(1) 181 | # self.gap = nn.AdaptiveMaxPool2d(1) 182 | self.num_classes = num_classes 183 | self.neck = neck 184 | self.neck_feat = neck_feat 185 | 186 | if self.neck == 'no': 187 | self.classifier = nn.Linear(self.in_planes, self.num_classes) 188 | # self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False) # new add by luo 189 | # self.classifier.apply(weights_init_classifier) # new add by luo 190 | elif self.neck == 'bnneck': 191 | self.bottleneck = nn.BatchNorm1d(self.in_planes) 192 | self.bottleneck.bias.requires_grad_(False) # no shift 193 | self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False) 194 | 195 | self.bottleneck.apply(weights_init_kaiming) 196 | self.classifier.apply(weights_init_classifier) 197 | 198 | def forward(self, x): 199 | x = self.base(x) 200 | global_feat = self.gap(x) # (b, 2048, 1, 1) 201 | global_feat = global_feat.view(global_feat.shape[0], -1) # flatten to (bs, 2048) 202 | 203 | if self.neck == 'no': 204 | feat = global_feat 205 | elif self.neck == 'bnneck': 206 | feat = self.bottleneck(global_feat) # normalize for angular softmax 207 | 208 | # if self.training: 209 | cls_score = self.classifier(feat) 210 | return cls_score 211 | # return [global_feat], [cls_score] # global feature for triplet loss 212 | # else: 213 | # if self.neck_feat == 'after': 214 | # # print("Test with feature after BN") 215 | # return feat 216 | # else: 217 | # # print("Test with feature before BN") 218 | # return global_feat 219 | 220 | # def get_optim_policy(self): 221 | # return self.parameters() 222 | 223 | class efficient_baseline(nn.Module): 224 | in_planes = 1792 225 | def __init__(self, num_classes = 40, neck = 'bnneck', neck_feat = 'after', 226 | model_path = '/home/zhoumi/.cache/torch/checkpoints/efficientnet-b4-6ed6700e.pth'): 227 | super(efficient_baseline, self).__init__() 228 | 229 | #1.4, 1.8, 380, 0.4 230 | blocks_args, global_params = efficientnet(width_coefficient=1.4, depth_coefficient=1.8, 231 | dropout_rate=0.4, image_size=380) 232 | 233 | self.base = EfficientNet(blocks_args=blocks_args, global_params=global_params) 234 | self.base.load_param(model_path) 235 | print('Loading pretrained ImageNet model......') 236 | # self.gap = nn.AdaptiveAvgPool2d(1) 237 | # self.gap = nn.AdaptiveMaxPool2d(1) 238 | self.num_classes = num_classes 239 | self.neck = neck 240 | self.neck_feat = neck_feat 241 | 242 | if self.neck == 'no': 243 | self.classifier = nn.Linear(self.in_planes, self.num_classes) 244 | # self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False) # new add by luo 245 | # self.classifier.apply(weights_init_classifier) # new add by luo 246 | elif self.neck == 'bnneck': 247 | self.bottleneck = nn.BatchNorm1d(self.in_planes) 248 | self.bottleneck.bias.requires_grad_(False) # no shift 249 | self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False) 250 | 251 | self.bottleneck.apply(weights_init_kaiming) 252 | self.classifier.apply(weights_init_classifier) 253 | 254 | def forward(self, x): 255 | global_feat = self.base(x) 256 | 257 | # global_feat = self.gap(x) # (b, 2048, 1, 1) 258 | # global_feat = global_feat.view(global_feat.shape[0], -1) # flatten to (bs, 2048) 259 | 260 | if self.neck == 'no': 261 | feat = global_feat 262 | elif self.neck == 'bnneck': 263 | feat = self.bottleneck(global_feat) # normalize for angular softmax 264 | 265 | # if self.training: 266 | cls_score = self.classifier(feat) 267 | return cls_score, global_feat 268 | 269 | class garbage_classify_service(PTServingBaseService): 270 | def __init__(self, model_name, model_path): 271 | # these three parameters are no need to modify 272 | self.model_name = model_name 273 | self.model_path = model_path 274 | self.input_key_1 = 'input_img' 275 | self.output_key_1 = 'output_score' 276 | 277 | self.input_size = 224 # the input image size of the model 278 | # model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=40) 279 | # model = Baseline(num_classes=40) 280 | # model.load_state_dict(torch.load(self.model_path, map_location=lambda storage, loc: storage)) 281 | model = torch.load(self.model_path, map_location=lambda storage, loc: storage) 282 | self.model = model.eval() 283 | self.transform = T.Compose([T.Resize((224, 224)), 284 | T.ToTensor(), 285 | T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) 286 | 287 | self.label_id_name_dict = \ 288 | { 289 | "0": "其他垃圾/一次性快餐盒", 290 | "1": "其他垃圾/污损塑料", 291 | "2": "其他垃圾/烟蒂", 292 | "3": "其他垃圾/牙签", 293 | "4": "其他垃圾/破碎花盆及碟碗", 294 | "5": "其他垃圾/竹筷", 295 | "6": "厨余垃圾/剩饭剩菜", 296 | "7": "厨余垃圾/大骨头", 297 | "8": "厨余垃圾/水果果皮", 298 | "9": "厨余垃圾/水果果肉", 299 | "10": "厨余垃圾/茶叶渣", 300 | "11": "厨余垃圾/菜叶菜根", 301 | "12": "厨余垃圾/蛋壳", 302 | "13": "厨余垃圾/鱼骨", 303 | "14": "可回收物/充电宝", 304 | "15": "可回收物/包", 305 | "16": "可回收物/化妆品瓶", 306 | "17": "可回收物/塑料玩具", 307 | "18": "可回收物/塑料碗盆", 308 | "19": "可回收物/塑料衣架", 309 | "20": "可回收物/快递纸袋", 310 | "21": "可回收物/插头电线", 311 | "22": "可回收物/旧衣服", 312 | "23": "可回收物/易拉罐", 313 | "24": "可回收物/枕头", 314 | "25": "可回收物/毛绒玩具", 315 | "26": "可回收物/洗发水瓶", 316 | "27": "可回收物/玻璃杯", 317 | "28": "可回收物/皮鞋", 318 | "29": "可回收物/砧板", 319 | "30": "可回收物/纸板箱", 320 | "31": "可回收物/调料瓶", 321 | "32": "可回收物/酒瓶", 322 | "33": "可回收物/金属食品罐", 323 | "34": "可回收物/锅", 324 | "35": "可回收物/食用油桶", 325 | "36": "可回收物/饮料瓶", 326 | "37": "有害垃圾/干电池", 327 | "38": "有害垃圾/软膏", 328 | "39": "有害垃圾/过期药物" 329 | } 330 | 331 | def _preprocess(self, data): 332 | preprocessed_data = {} 333 | for k, v in data.items(): 334 | for file_name, file_content in v.items(): 335 | img = Image.open(file_content) 336 | img = self.transform(img) 337 | preprocessed_data[k] = img 338 | return preprocessed_data 339 | 340 | def _inference(self, data): 341 | """ 342 | model inference function 343 | Here are a inference example of resnet, if you use another model, please modify this function 344 | """ 345 | img = data[self.input_key_1] 346 | img = img[np.newaxis, :, :, :] # the input tensor shape of resnet is [?, 224, 224, 3] 347 | pred_score = self.model(img)[0] 348 | if pred_score is not None: 349 | pred_label = torch.argmax(pred_score, dim=1).item() 350 | result = {'result': self.label_id_name_dict[str(pred_label)]} 351 | else: 352 | result = {'result': 'predict score is None'} 353 | return result 354 | 355 | def _postprocess(self, data): 356 | return data 357 | -------------------------------------------------------------------------------- /efficientnet_pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.4.0" 2 | from .model import EfficientNet 3 | from .utils import ( 4 | GlobalParams, 5 | BlockArgs, 6 | BlockDecoder, 7 | efficientnet, 8 | get_model_params, 9 | ) 10 | 11 | -------------------------------------------------------------------------------- /efficientnet_pytorch/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maliho0803/garbage_classification/6fff88b248208e4a3184370b19aa5f3a25c10083/efficientnet_pytorch/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /efficientnet_pytorch/__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maliho0803/garbage_classification/6fff88b248208e4a3184370b19aa5f3a25c10083/efficientnet_pytorch/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /efficientnet_pytorch/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maliho0803/garbage_classification/6fff88b248208e4a3184370b19aa5f3a25c10083/efficientnet_pytorch/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /efficientnet_pytorch/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from .utils import ( 6 | relu_fn, 7 | round_filters, 8 | round_repeats, 9 | drop_connect, 10 | get_same_padding_conv2d, 11 | get_model_params, 12 | efficientnet_params, 13 | load_pretrained_weights, 14 | ) 15 | 16 | class MBConvBlock(nn.Module): 17 | """ 18 | Mobile Inverted Residual Bottleneck Block 19 | 20 | Args: 21 | block_args (namedtuple): BlockArgs, see above 22 | global_params (namedtuple): GlobalParam, see above 23 | 24 | Attributes: 25 | has_se (bool): Whether the block contains a Squeeze and Excitation layer. 26 | """ 27 | 28 | def __init__(self, block_args, global_params): 29 | super().__init__() 30 | self._block_args = block_args 31 | self._bn_mom = 1 - global_params.batch_norm_momentum 32 | self._bn_eps = global_params.batch_norm_epsilon 33 | self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1) 34 | self.id_skip = block_args.id_skip # skip connection and drop connect 35 | 36 | # Get static or dynamic convolution depending on image size 37 | Conv2d = get_same_padding_conv2d(image_size=global_params.image_size) 38 | 39 | # Expansion phase 40 | inp = self._block_args.input_filters # number of input channels 41 | oup = self._block_args.input_filters * self._block_args.expand_ratio # number of output channels 42 | if self._block_args.expand_ratio != 1: 43 | self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False) 44 | self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) 45 | 46 | # Depthwise convolution phase 47 | k = self._block_args.kernel_size 48 | s = self._block_args.stride 49 | self._depthwise_conv = Conv2d( 50 | in_channels=oup, out_channels=oup, groups=oup, # groups makes it depthwise 51 | kernel_size=k, stride=s, bias=False) 52 | self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) 53 | 54 | # Squeeze and Excitation layer, if desired 55 | if self.has_se: 56 | num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio)) 57 | self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1) 58 | self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1) 59 | 60 | # Output phase 61 | final_oup = self._block_args.output_filters 62 | self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False) 63 | self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps) 64 | 65 | def forward(self, inputs, drop_connect_rate=None): 66 | """ 67 | :param inputs: input tensor 68 | :param drop_connect_rate: drop connect rate (float, between 0 and 1) 69 | :return: output of block 70 | """ 71 | 72 | # Expansion and Depthwise Convolution 73 | x = inputs 74 | if self._block_args.expand_ratio != 1: 75 | x = relu_fn(self._bn0(self._expand_conv(inputs))) 76 | x = relu_fn(self._bn1(self._depthwise_conv(x))) 77 | 78 | # Squeeze and Excitation 79 | if self.has_se: 80 | x_squeezed = F.adaptive_avg_pool2d(x, 1) 81 | x_squeezed = self._se_expand(relu_fn(self._se_reduce(x_squeezed))) 82 | x = torch.sigmoid(x_squeezed) * x 83 | 84 | x = self._bn2(self._project_conv(x)) 85 | 86 | # Skip connection and drop connect 87 | input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters 88 | if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters: 89 | if drop_connect_rate: 90 | x = drop_connect(x, p=drop_connect_rate, training=self.training) 91 | x = x + inputs # skip connection 92 | return x 93 | 94 | 95 | class EfficientNet(nn.Module): 96 | """ 97 | An EfficientNet model. Most easily loaded with the .from_name or .from_pretrained methods 98 | 99 | Args: 100 | blocks_args (list): A list of BlockArgs to construct blocks 101 | global_params (namedtuple): A set of GlobalParams shared between blocks 102 | 103 | Example: 104 | model = EfficientNet.from_pretrained('efficientnet-b0') 105 | 106 | """ 107 | 108 | def __init__(self, blocks_args=None, global_params=None): 109 | super().__init__() 110 | assert isinstance(blocks_args, list), 'blocks_args should be a list' 111 | assert len(blocks_args) > 0, 'block args must be greater than 0' 112 | self._global_params = global_params 113 | self._blocks_args = blocks_args 114 | 115 | # Get static or dynamic convolution depending on image size 116 | Conv2d = get_same_padding_conv2d(image_size=global_params.image_size) 117 | 118 | # Batch norm parameters 119 | bn_mom = 1 - self._global_params.batch_norm_momentum 120 | bn_eps = self._global_params.batch_norm_epsilon 121 | 122 | # Stem 123 | in_channels = 3 # rgb 124 | out_channels = round_filters(32, self._global_params) # number of output channels 125 | self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) 126 | self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) 127 | 128 | # Build blocks 129 | self._blocks = nn.ModuleList([]) 130 | for block_args in self._blocks_args: 131 | 132 | # Update block input and output filters based on depth multiplier. 133 | block_args = block_args._replace( 134 | input_filters=round_filters(block_args.input_filters, self._global_params), 135 | output_filters=round_filters(block_args.output_filters, self._global_params), 136 | num_repeat=round_repeats(block_args.num_repeat, self._global_params) 137 | ) 138 | 139 | # The first block needs to take care of stride and filter size increase. 140 | self._blocks.append(MBConvBlock(block_args, self._global_params)) 141 | if block_args.num_repeat > 1: 142 | block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) 143 | for _ in range(block_args.num_repeat - 1): 144 | self._blocks.append(MBConvBlock(block_args, self._global_params)) 145 | 146 | # Head 147 | in_channels = block_args.output_filters # output of final block 148 | out_channels = round_filters(1280, self._global_params) 149 | self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False) 150 | self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) 151 | 152 | # Final linear layer 153 | self._dropout = self._global_params.dropout_rate 154 | self._fc = nn.Linear(out_channels, self._global_params.num_classes) 155 | 156 | def extract_features(self, inputs): 157 | """ Returns output of the final convolution layer """ 158 | 159 | # Stem 160 | x = relu_fn(self._bn0(self._conv_stem(inputs))) 161 | 162 | # Blocks 163 | for idx, block in enumerate(self._blocks): 164 | drop_connect_rate = self._global_params.drop_connect_rate 165 | if drop_connect_rate: 166 | drop_connect_rate *= float(idx) / len(self._blocks) 167 | x = block(x, drop_connect_rate=drop_connect_rate) 168 | 169 | # Head 170 | x = relu_fn(self._bn1(self._conv_head(x))) 171 | 172 | return x 173 | 174 | def forward(self, inputs): 175 | """ Calls extract_features to extract features, applies final linear layer, and returns logits. """ 176 | 177 | # Convolution layers 178 | x = self.extract_features(inputs) 179 | 180 | # Pooling and final linear layer 181 | x = F.adaptive_avg_pool2d(x, 1).squeeze(-1).squeeze(-1) 182 | if self._dropout: 183 | x = F.dropout(x, p=self._dropout, training=self.training) 184 | # x = self._fc(x) 185 | return x 186 | 187 | def load_param(self, model_path): 188 | param_dict = torch.load(model_path) 189 | for i in param_dict: 190 | if '_fc' in i: 191 | continue 192 | self.state_dict()[i].copy_(param_dict[i]) 193 | 194 | @classmethod 195 | def from_name(cls, model_name, override_params=None): 196 | cls._check_model_name_is_valid(model_name) 197 | blocks_args, global_params = get_model_params(model_name, override_params) 198 | return cls(blocks_args, global_params) 199 | 200 | @classmethod 201 | def from_pretrained(cls, model_name, num_classes=1000): 202 | model = cls.from_name(model_name, override_params={'num_classes': num_classes}) 203 | load_pretrained_weights(model, model_name, load_fc=(num_classes == 1000)) 204 | return model 205 | 206 | @classmethod 207 | def get_image_size(cls, model_name): 208 | cls._check_model_name_is_valid(model_name) 209 | _, _, res, _ = efficientnet_params(model_name) 210 | return res 211 | 212 | @classmethod 213 | def _check_model_name_is_valid(cls, model_name, also_need_pretrained_weights=False): 214 | """ Validates model name. None that pretrained weights are only available for 215 | the first four models (efficientnet-b{i} for i in 0,1,2,3) at the moment. """ 216 | num_models = 4 if also_need_pretrained_weights else 8 217 | valid_models = ['efficientnet-b'+str(i) for i in range(num_models)] 218 | if model_name not in valid_models: 219 | raise ValueError('model_name should be one of: ' + ', '.join(valid_models)) 220 | -------------------------------------------------------------------------------- /efficientnet_pytorch/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains helper functions for building the model and for loading model parameters. 3 | These helper functions are built to mirror those in the official TensorFlow implementation. 4 | """ 5 | 6 | import re 7 | import math 8 | import collections 9 | from functools import partial 10 | import torch 11 | from torch import nn 12 | from torch.nn import functional as F 13 | from torch.utils import model_zoo 14 | 15 | 16 | ######################################################################## 17 | ############### HELPERS FUNCTIONS FOR MODEL ARCHITECTURE ############### 18 | ######################################################################## 19 | 20 | 21 | # Parameters for the entire model (stem, all blocks, and head) 22 | GlobalParams = collections.namedtuple('GlobalParams', [ 23 | 'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate', 24 | 'num_classes', 'width_coefficient', 'depth_coefficient', 25 | 'depth_divisor', 'min_depth', 'drop_connect_rate', 'image_size']) 26 | 27 | 28 | # Parameters for an individual model block 29 | BlockArgs = collections.namedtuple('BlockArgs', [ 30 | 'kernel_size', 'num_repeat', 'input_filters', 'output_filters', 31 | 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) 32 | 33 | 34 | # Change namedtuple defaults 35 | GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields) 36 | BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields) 37 | 38 | 39 | def relu_fn(x): 40 | """ Swish activation function """ 41 | return x * torch.sigmoid(x) 42 | 43 | 44 | def round_filters(filters, global_params): 45 | """ Calculate and round number of filters based on depth multiplier. """ 46 | multiplier = global_params.width_coefficient 47 | if not multiplier: 48 | return filters 49 | divisor = global_params.depth_divisor 50 | min_depth = global_params.min_depth 51 | filters *= multiplier 52 | min_depth = min_depth or divisor 53 | new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) 54 | if new_filters < 0.9 * filters: # prevent rounding by more than 10% 55 | new_filters += divisor 56 | return int(new_filters) 57 | 58 | 59 | def round_repeats(repeats, global_params): 60 | """ Round number of filters based on depth multiplier. """ 61 | multiplier = global_params.depth_coefficient 62 | if not multiplier: 63 | return repeats 64 | return int(math.ceil(multiplier * repeats)) 65 | 66 | 67 | def drop_connect(inputs, p, training): 68 | """ Drop connect. """ 69 | if not training: return inputs 70 | batch_size = inputs.shape[0] 71 | keep_prob = 1 - p 72 | random_tensor = keep_prob 73 | random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device) 74 | binary_tensor = torch.floor(random_tensor) 75 | output = inputs / keep_prob * binary_tensor 76 | return output 77 | 78 | 79 | def get_same_padding_conv2d(image_size=None): 80 | """ Chooses static padding if you have specified an image size, and dynamic padding otherwise. 81 | Static padding is necessary for ONNX exporting of models. """ 82 | if image_size is None: 83 | return Conv2dDynamicSamePadding 84 | else: 85 | return partial(Conv2dStaticSamePadding, image_size=image_size) 86 | 87 | class Conv2dDynamicSamePadding(nn.Conv2d): 88 | """ 2D Convolutions like TensorFlow, for a dynamic image size """ 89 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True): 90 | super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) 91 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]]*2 92 | 93 | def forward(self, x): 94 | ih, iw = x.size()[-2:] 95 | kh, kw = self.weight.size()[-2:] 96 | sh, sw = self.stride 97 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 98 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) 99 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) 100 | if pad_h > 0 or pad_w > 0: 101 | x = F.pad(x, [pad_w//2, pad_w - pad_w//2, pad_h//2, pad_h - pad_h//2]) 102 | return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 103 | 104 | 105 | class Conv2dStaticSamePadding(nn.Conv2d): 106 | """ 2D Convolutions like TensorFlow, for a fixed image size""" 107 | def __init__(self, in_channels, out_channels, kernel_size, image_size=None, **kwargs): 108 | super().__init__(in_channels, out_channels, kernel_size, **kwargs) 109 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 110 | 111 | # Calculate padding based on image size and save it 112 | assert image_size is not None 113 | ih, iw = image_size if type(image_size) == list else [image_size, image_size] 114 | kh, kw = self.weight.size()[-2:] 115 | sh, sw = self.stride 116 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) 117 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) 118 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) 119 | if pad_h > 0 or pad_w > 0: 120 | self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)) 121 | else: 122 | self.static_padding = Identity() 123 | 124 | def forward(self, x): 125 | x = self.static_padding(x) 126 | x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 127 | return x 128 | 129 | 130 | class Identity(nn.Module): 131 | def __init__(self,): 132 | super(Identity, self).__init__() 133 | 134 | def forward(self, input): 135 | return input 136 | 137 | 138 | ######################################################################## 139 | ############## HELPERS FUNCTIONS FOR LOADING MODEL PARAMS ############## 140 | ######################################################################## 141 | 142 | 143 | def efficientnet_params(model_name): 144 | """ Map EfficientNet model name to parameter coefficients. """ 145 | params_dict = { 146 | # Coefficients: width,depth,res,dropout 147 | 'efficientnet-b0': (1.0, 1.0, 224, 0.2), 148 | 'efficientnet-b1': (1.0, 1.1, 240, 0.2), 149 | 'efficientnet-b2': (1.1, 1.2, 260, 0.3), 150 | 'efficientnet-b3': (1.2, 1.4, 300, 0.3), 151 | 'efficientnet-b4': (1.4, 1.8, 380, 0.4), 152 | 'efficientnet-b5': (1.6, 2.2, 456, 0.4), 153 | 'efficientnet-b6': (1.8, 2.6, 528, 0.5), 154 | 'efficientnet-b7': (2.0, 3.1, 600, 0.5), 155 | } 156 | return params_dict[model_name] 157 | 158 | 159 | class BlockDecoder(object): 160 | """ Block Decoder for readability, straight from the official TensorFlow repository """ 161 | 162 | @staticmethod 163 | def _decode_block_string(block_string): 164 | """ Gets a block through a string notation of arguments. """ 165 | assert isinstance(block_string, str) 166 | 167 | ops = block_string.split('_') 168 | options = {} 169 | for op in ops: 170 | splits = re.split(r'(\d.*)', op) 171 | if len(splits) >= 2: 172 | key, value = splits[:2] 173 | options[key] = value 174 | 175 | # Check stride 176 | assert (('s' in options and len(options['s']) == 1) or 177 | (len(options['s']) == 2 and options['s'][0] == options['s'][1])) 178 | 179 | return BlockArgs( 180 | kernel_size=int(options['k']), 181 | num_repeat=int(options['r']), 182 | input_filters=int(options['i']), 183 | output_filters=int(options['o']), 184 | expand_ratio=int(options['e']), 185 | id_skip=('noskip' not in block_string), 186 | se_ratio=float(options['se']) if 'se' in options else None, 187 | stride=[int(options['s'][0])]) 188 | 189 | @staticmethod 190 | def _encode_block_string(block): 191 | """Encodes a block to a string.""" 192 | args = [ 193 | 'r%d' % block.num_repeat, 194 | 'k%d' % block.kernel_size, 195 | 's%d%d' % (block.strides[0], block.strides[1]), 196 | 'e%s' % block.expand_ratio, 197 | 'i%d' % block.input_filters, 198 | 'o%d' % block.output_filters 199 | ] 200 | if 0 < block.se_ratio <= 1: 201 | args.append('se%s' % block.se_ratio) 202 | if block.id_skip is False: 203 | args.append('noskip') 204 | return '_'.join(args) 205 | 206 | @staticmethod 207 | def decode(string_list): 208 | """ 209 | Decodes a list of string notations to specify blocks inside the network. 210 | 211 | :param string_list: a list of strings, each string is a notation of block 212 | :return: a list of BlockArgs namedtuples of block args 213 | """ 214 | assert isinstance(string_list, list) 215 | blocks_args = [] 216 | for block_string in string_list: 217 | blocks_args.append(BlockDecoder._decode_block_string(block_string)) 218 | return blocks_args 219 | 220 | @staticmethod 221 | def encode(blocks_args): 222 | """ 223 | Encodes a list of BlockArgs to a list of strings. 224 | 225 | :param blocks_args: a list of BlockArgs namedtuples of block args 226 | :return: a list of strings, each string is a notation of block 227 | """ 228 | block_strings = [] 229 | for block in blocks_args: 230 | block_strings.append(BlockDecoder._encode_block_string(block)) 231 | return block_strings 232 | 233 | 234 | def efficientnet(width_coefficient=None, depth_coefficient=None, dropout_rate=0.2, 235 | drop_connect_rate=0.2, image_size=None, num_classes=1000): 236 | """ Creates a efficientnet model. """ 237 | 238 | blocks_args = [ 239 | 'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25', 240 | 'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25', 241 | 'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25', 242 | 'r1_k3_s11_e6_i192_o320_se0.25', 243 | ] 244 | blocks_args = BlockDecoder.decode(blocks_args) 245 | 246 | global_params = GlobalParams( 247 | batch_norm_momentum=0.99, 248 | batch_norm_epsilon=1e-3, 249 | dropout_rate=dropout_rate, 250 | drop_connect_rate=drop_connect_rate, 251 | # data_format='channels_last', # removed, this is always true in PyTorch 252 | num_classes=num_classes, 253 | width_coefficient=width_coefficient, 254 | depth_coefficient=depth_coefficient, 255 | depth_divisor=8, 256 | min_depth=None, 257 | image_size=image_size, 258 | ) 259 | 260 | return blocks_args, global_params 261 | 262 | 263 | def get_model_params(model_name, override_params): 264 | """ Get the block args and global params for a given model """ 265 | if model_name.startswith('efficientnet'): 266 | w, d, s, p = efficientnet_params(model_name) 267 | # note: all models have drop connect rate = 0.2 268 | blocks_args, global_params = efficientnet( 269 | width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s) 270 | else: 271 | raise NotImplementedError('model name is not pre-defined: %s' % model_name) 272 | if override_params: 273 | # ValueError will be raised here if override_params has fields not included in global_params. 274 | global_params = global_params._replace(**override_params) 275 | return blocks_args, global_params 276 | 277 | 278 | url_map = { 279 | 'efficientnet-b0': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b0-355c32eb.pth', 280 | 'efficientnet-b1': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b1-f1951068.pth', 281 | 'efficientnet-b2': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b2-8bb594d6.pth', 282 | 'efficientnet-b3': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b3-5fb5a3c3.pth', 283 | 'efficientnet-b4': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b4-6ed6700e.pth', 284 | 'efficientnet-b5': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b5-b6417697.pth', 285 | 'efficientnet-b6': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b6-c76e70fd.pth', 286 | 'efficientnet-b7': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b7-dcc49843.pth', 287 | } 288 | 289 | def load_pretrained_weights(model, model_name, load_fc=True): 290 | """ Loads pretrained weights, and downloads if loading for the first time. """ 291 | state_dict = model_zoo.load_url(url_map[model_name]) 292 | if load_fc: 293 | model.load_state_dict(state_dict) 294 | else: 295 | state_dict.pop('_fc.weight') 296 | state_dict.pop('_fc.bias') 297 | res = model.load_state_dict(state_dict, strict=False) 298 | assert str(res.missing_keys) == str(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights' 299 | print('Loaded pretrained weights for {}'.format(model_name)) 300 | -------------------------------------------------------------------------------- /get_img_url.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | from urllib import error 4 | from bs4 import BeautifulSoup 5 | import os 6 | 7 | num = 0 8 | numPicture = 0 9 | file = '' 10 | List = [] 11 | 12 | 13 | def Find(url): 14 | global List 15 | print('正在检测图片总数,请稍等.....') 16 | t = 0 17 | i = 1 18 | s = 0 19 | while t < 1000: 20 | Url = url + str(t) 21 | try: 22 | Result = requests.get(Url, timeout=7) 23 | except BaseException: 24 | t = t + 60 25 | continue 26 | else: 27 | result = Result.text 28 | pic_url = re.findall('"objURL":"(.*?)",', result, re.S) # 先利用正则表达式找到图片url 29 | s += len(pic_url) 30 | if len(pic_url) == 0: 31 | break 32 | else: 33 | List.append(pic_url) 34 | t = t + 60 35 | return s 36 | 37 | 38 | def recommend(url): 39 | Re = [] 40 | try: 41 | html = requests.get(url) 42 | except error.HTTPError as e: 43 | return 44 | else: 45 | html.encoding = 'utf-8' 46 | bsObj = BeautifulSoup(html.text, 'html.parser') 47 | div = bsObj.find('div', id='topRS') 48 | if div is not None: 49 | listA = div.findAll('a') 50 | for i in listA: 51 | if i is not None: 52 | Re.append(i.get_text()) 53 | return Re 54 | 55 | 56 | def dowmloadPicture(html, keyword): 57 | global num 58 | # t =0 59 | pic_url = re.findall('"objURL":"(.*?)",', html, re.S) # 先利用正则表达式找到图片url 60 | print('找到关键词:' + keyword + '的图片,即将开始下载图片...') 61 | for each in pic_url: 62 | print('正在下载第' + str(num + 1) + '张图片,图片地址:' + str(each)) 63 | try: 64 | if each is not None: 65 | pic = requests.get(each, timeout=7) 66 | else: 67 | continue 68 | except BaseException: 69 | print('错误,当前图片无法下载') 70 | continue 71 | else: 72 | string = file + r'/' + keyword + '_' + str(num) + '.jpg' 73 | print(string) 74 | fp = open(string, 'wb') 75 | fp.write(pic.content) 76 | fp.close() 77 | num += 1 78 | if num >= numPicture: 79 | return 80 | 81 | 82 | if __name__ == '__main__': # 主函数入口 83 | word = input("请输入搜索关键词(可以是人名,地名等): ") 84 | # add = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=%E5%BC%A0%E5%A4%A9%E7%88%B1&pn=120' 85 | url = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=' + word + '&pn=' 86 | tot = Find(url) 87 | Recommend = recommend(url) # 记录相关推荐 88 | print('经过检测%s类图片共有%d张' % (word, tot)) 89 | numPicture = int(input('请输入想要下载的图片数量 ')) 90 | file = input('请建立一个存储图片的文件夹,输入文件夹名称即可') 91 | y = os.path.exists(file) 92 | if y == 1: 93 | print('该文件已存在,请重新输入') 94 | file = input('请建立一个存储图片的文件夹,)输入文件夹名称即可') 95 | os.mkdir(file) 96 | else: 97 | os.mkdir(file) 98 | t = 0 99 | tmp = url 100 | while t < numPicture: 101 | try: 102 | url = tmp + str(t) 103 | result = requests.get(url, timeout=10) 104 | print(url) 105 | except error.HTTPError as e: 106 | print('网络错误,请调整网络后重试') 107 | t = t + 60 108 | else: 109 | dowmloadPicture(result.text, word) 110 | t = t + 60 111 | 112 | print('当前搜索结束,感谢使用') 113 | print('猜你喜欢') 114 | for re in Recommend: 115 | print(re, end=' ') 116 | 117 | -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- 1 | from model import Baseline, ft_net, efficient_baseline 2 | from PIL import Image 3 | import glob 4 | import torch 5 | import torchvision.transforms as T 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | from efficientnet_pytorch import EfficientNet 9 | 10 | use_ff = False 11 | use_efficientnet = False 12 | transform = T.Compose([T.Resize((224, 224)), 13 | T.ToTensor(), 14 | T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) 15 | 16 | img_paths = glob.glob('/Users/zhoumi/Downloads/garbage_classify/val_data/*jpg') 17 | # if use_ff == False: 18 | # if use_efficientnet == True: 19 | # model = EfficientNet.from_pretrained('efficientnet-b4', num_classes=40) 20 | # else: 21 | # model = Baseline(num_classes=40) 22 | # else: 23 | # model = ft_net(num_classes=40) 24 | # model.load_state_dict(torch.load('./models/ff_best_model.pth', map_location=lambda storage, loc: storage)) 25 | model = torch.load('./models/best_model_v2_tri_old1.pth', map_location=lambda storage, loc: storage) 26 | model = model.eval().cpu() 27 | # print(model) 28 | 29 | wrong = 0 30 | for img_path in img_paths: 31 | label = int(img_path.split('/')[-1].split('_')[0]) 32 | img = transform(Image.open(img_path)) 33 | input = img[np.newaxis, :, :, :] 34 | # print(input.size()) 35 | 36 | if use_ff == False: 37 | if use_efficientnet == True: 38 | pred_score = model(input) 39 | else: 40 | pred_score, _ = model(input) 41 | # print(pred_score) 42 | pred_label = torch.argmax(pred_score, dim=1).item() 43 | else: 44 | o1, o2, o3 = model(input) 45 | pred_label = torch.argmax((o1 + o2 + o3) / 3, dim=1).item() 46 | 47 | print(img_path.split('/')[-1], label, pred_label) 48 | if label != pred_label: 49 | # plt.imshow(Image.open(img_path)) 50 | wrong +=1 51 | # plt.show() 52 | 53 | print('acc:{}'.format(1- wrong/len(img_paths))) 54 | 55 | # best_model1.pth acc:0.9082819986310746 resnet50 56 | # tri_best_model.pth acc:acc:0.9103353867214237 resnet50 57 | # ff_best_model.pth acc:0.8809034907597536 feature fusion 58 | # effic_best_model acc:0.9226557152635182 effic4 59 | # effic4_best_model.pth acc:0.9301848049281314 effic4 60 | 61 | #new datasets 62 | #best_modle_v2 acc:0.9340878828229028 -------------------------------------------------------------------------------- /loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | class CrossEntropyLabelSmooth(nn.Module): 5 | """Cross entropy loss with label smoothing regularizer. 6 | Reference: 7 | Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016. 8 | Equation: y = (1 - epsilon) * y + epsilon / K. 9 | Args: 10 | num_classes (int): number of classes. 11 | epsilon (float): weight. 12 | """ 13 | 14 | def __init__(self, num_classes, epsilon=0.1, use_gpu=True): 15 | super(CrossEntropyLabelSmooth, self).__init__() 16 | self.num_classes = num_classes 17 | self.epsilon = epsilon 18 | self.use_gpu = use_gpu 19 | self.logsoftmax = nn.LogSoftmax(dim=1) 20 | 21 | def forward(self, inputs, targets): 22 | """ 23 | Args: 24 | inputs: prediction matrix (before softmax) with shape (batch_size, num_classes) 25 | targets: ground truth labels with shape (num_classes) 26 | """ 27 | log_probs = self.logsoftmax(inputs) 28 | targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).cpu(), 1) 29 | if self.use_gpu: targets = targets.cuda() 30 | targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes 31 | loss = (- targets * log_probs).mean(0).sum() 32 | return loss 33 | 34 | class CenterLoss(nn.Module): 35 | """Center loss. 36 | Reference: 37 | Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016. 38 | Args: 39 | num_classes (int): number of classes. 40 | feat_dim (int): feature dimension. 41 | """ 42 | 43 | def __init__(self, num_classes=751, feat_dim=2048, use_gpu=True): 44 | super(CenterLoss, self).__init__() 45 | self.num_classes = num_classes 46 | self.feat_dim = feat_dim 47 | self.use_gpu = use_gpu 48 | 49 | if self.use_gpu: 50 | self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim).cuda()) 51 | else: 52 | self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim)) 53 | 54 | def forward(self, x, labels): 55 | """ 56 | Args: 57 | x: feature matrix with shape (batch_size, feat_dim). 58 | labels: ground truth labels with shape (num_classes). 59 | """ 60 | assert x.size(0) == labels.size(0), "features.size(0) is not equal to labels.size(0)" 61 | 62 | batch_size = x.size(0) 63 | distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \ 64 | torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t() 65 | distmat.addmm_(1, -2, x, self.centers.t()) 66 | 67 | classes = torch.arange(self.num_classes).long() 68 | if self.use_gpu: classes = classes.cuda() 69 | labels = labels.unsqueeze(1).expand(batch_size, self.num_classes) 70 | mask = labels.eq(classes.expand(batch_size, self.num_classes)) 71 | 72 | dist = distmat * mask.float() 73 | loss = dist.clamp(min=1e-12, max=1e+12).sum() / batch_size 74 | #dist = [] 75 | #for i in range(batch_size): 76 | # value = distmat[i][mask[i]] 77 | # value = value.clamp(min=1e-12, max=1e+12) # for numerical stability 78 | # dist.append(value) 79 | #dist = torch.cat(dist) 80 | #loss = dist.mean() 81 | return loss 82 | 83 | def normalize(x, axis=-1): 84 | """Normalizing to unit length along the specified dimension. 85 | Args: 86 | x: pytorch Variable 87 | Returns: 88 | x: pytorch Variable, same shape as input 89 | """ 90 | x = 1. * x / (torch.norm(x, 2, axis, keepdim=True).expand_as(x) + 1e-12) 91 | return x 92 | 93 | 94 | def euclidean_dist(x, y): 95 | """ 96 | Args: 97 | x: pytorch Variable, with shape [m, d] 98 | y: pytorch Variable, with shape [n, d] 99 | Returns: 100 | dist: pytorch Variable, with shape [m, n] 101 | """ 102 | m, n = x.size(0), y.size(0) 103 | xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n) 104 | yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t() 105 | dist = xx + yy 106 | dist.addmm_(1, -2, x, y.t()) 107 | dist = dist.clamp(min=1e-12).sqrt() # for numerical stability 108 | return dist 109 | 110 | 111 | def hard_example_mining(dist_mat, labels, return_inds=False): 112 | """For each anchor, find the hardest positive and negative sample. 113 | Args: 114 | dist_mat: pytorch Variable, pair wise distance between samples, shape [N, N] 115 | labels: pytorch LongTensor, with shape [N] 116 | return_inds: whether to return the indices. Save time if `False`(?) 117 | Returns: 118 | dist_ap: pytorch Variable, distance(anchor, positive); shape [N] 119 | dist_an: pytorch Variable, distance(anchor, negative); shape [N] 120 | p_inds: pytorch LongTensor, with shape [N]; 121 | indices of selected hard positive samples; 0 <= p_inds[i] <= N - 1 122 | n_inds: pytorch LongTensor, with shape [N]; 123 | indices of selected hard negative samples; 0 <= n_inds[i] <= N - 1 124 | NOTE: Only consider the case in which all labels have same num of samples, 125 | thus we can cope with all anchors in parallel. 126 | """ 127 | 128 | assert len(dist_mat.size()) == 2 129 | assert dist_mat.size(0) == dist_mat.size(1) 130 | N = dist_mat.size(0) 131 | 132 | # shape [N, N] 133 | is_pos = labels.expand(N, N).eq(labels.expand(N, N).t()) 134 | is_neg = labels.expand(N, N).ne(labels.expand(N, N).t()) 135 | 136 | # `dist_ap` means distance(anchor, positive) 137 | # both `dist_ap` and `relative_p_inds` with shape [N, 1] 138 | dist_ap, relative_p_inds = torch.max( 139 | dist_mat[is_pos].contiguous().view(N, -1), 1, keepdim=True) 140 | # `dist_an` means distance(anchor, negative) 141 | # both `dist_an` and `relative_n_inds` with shape [N, 1] 142 | dist_an, relative_n_inds = torch.min( 143 | dist_mat[is_neg].contiguous().view(N, -1), 1, keepdim=True) 144 | # shape [N] 145 | dist_ap = dist_ap.squeeze(1) 146 | dist_an = dist_an.squeeze(1) 147 | 148 | if return_inds: 149 | # shape [N, N] 150 | ind = (labels.new().resize_as_(labels) 151 | .copy_(torch.arange(0, N).long()) 152 | .unsqueeze(0).expand(N, N)) 153 | # shape [N, 1] 154 | p_inds = torch.gather( 155 | ind[is_pos].contiguous().view(N, -1), 1, relative_p_inds.data) 156 | n_inds = torch.gather( 157 | ind[is_neg].contiguous().view(N, -1), 1, relative_n_inds.data) 158 | # shape [N] 159 | p_inds = p_inds.squeeze(1) 160 | n_inds = n_inds.squeeze(1) 161 | return dist_ap, dist_an, p_inds, n_inds 162 | 163 | return dist_ap, dist_an 164 | 165 | 166 | class TripletLoss(object): 167 | """Modified from Tong Xiao's open-reid (https://github.com/Cysu/open-reid). 168 | Related Triplet Loss theory can be found in paper 'In Defense of the Triplet 169 | Loss for Person Re-Identification'.""" 170 | 171 | def __init__(self, margin=None): 172 | self.margin = margin 173 | if margin is not None: 174 | self.ranking_loss = nn.MarginRankingLoss(margin=margin) 175 | else: 176 | self.ranking_loss = nn.SoftMarginLoss() 177 | 178 | def __call__(self, global_feat, labels, normalize_feature=False): 179 | if normalize_feature: 180 | global_feat = normalize(global_feat, axis=-1) 181 | dist_mat = euclidean_dist(global_feat, global_feat) 182 | dist_ap, dist_an = hard_example_mining( 183 | dist_mat, labels) 184 | y = dist_an.new().resize_as_(dist_an).fill_(1) 185 | if self.margin is not None: 186 | loss = self.ranking_loss(dist_an, dist_ap, y) 187 | else: 188 | loss = self.ranking_loss(dist_an - dist_ap, y) 189 | return loss, dist_ap, dist_an -------------------------------------------------------------------------------- /meters.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | import math 3 | 4 | import numpy as np 5 | 6 | 7 | class AverageMeter(object): 8 | def __init__(self): 9 | self.n = 0 10 | self.sum = 0.0 11 | self.var = 0.0 12 | self.val = 0.0 13 | self.mean = np.nan 14 | self.std = np.nan 15 | 16 | def update(self, value, n=1): 17 | self.val = value 18 | self.sum += value 19 | self.var += value * value 20 | self.n += n 21 | 22 | if self.n == 0: 23 | self.mean, self.std = np.nan, np.nan 24 | elif self.n == 1: 25 | self.mean, self.std = self.sum, np.inf 26 | else: 27 | self.mean = self.sum / self.n 28 | self.std = math.sqrt( 29 | (self.var - self.n * self.mean * self.mean) / (self.n - 1.0)) 30 | 31 | def value(self): 32 | return self.mean, self.std 33 | 34 | def reset(self): 35 | self.n = 0 36 | self.sum = 0.0 37 | self.var = 0.0 38 | self.val = 0.0 39 | self.mean = np.nan 40 | self.std = np.nan -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch.utils.model_zoo as model_zoo 4 | import torch 5 | 6 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 7 | 'resnet152'] 8 | 9 | 10 | model_urls = { 11 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 12 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 13 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 14 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 15 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 16 | } 17 | 18 | 19 | def conv3x3(in_planes, out_planes, stride=1): 20 | """3x3 convolution with padding""" 21 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 22 | padding=1, bias=False) 23 | 24 | 25 | class BasicBlock(nn.Module): 26 | expansion = 1 27 | 28 | def __init__(self, inplanes, planes, stride=1, downsample=None): 29 | super(BasicBlock, self).__init__() 30 | self.conv1 = conv3x3(inplanes, planes, stride) 31 | self.bn1 = nn.BatchNorm2d(planes) 32 | self.relu = nn.ReLU(inplace=True) 33 | self.conv2 = conv3x3(planes, planes) 34 | self.bn2 = nn.BatchNorm2d(planes) 35 | self.downsample = downsample 36 | self.stride = stride 37 | 38 | def forward(self, x): 39 | residual = x 40 | 41 | out = self.conv1(x) 42 | out = self.bn1(out) 43 | out = self.relu(out) 44 | 45 | out = self.conv2(out) 46 | out = self.bn2(out) 47 | 48 | if self.downsample is not None: 49 | residual = self.downsample(x) 50 | 51 | out += residual 52 | out = self.relu(out) 53 | 54 | return out 55 | 56 | 57 | class Bottleneck(nn.Module): 58 | expansion = 4 59 | 60 | def __init__(self, inplanes, planes, stride=1, downsample=None): 61 | super(Bottleneck, self).__init__() 62 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 63 | self.bn1 = nn.BatchNorm2d(planes) 64 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 65 | padding=1, bias=False) 66 | self.bn2 = nn.BatchNorm2d(planes) 67 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 68 | self.bn3 = nn.BatchNorm2d(planes * 4) 69 | self.relu = nn.ReLU(inplace=True) 70 | self.downsample = downsample 71 | self.stride = stride 72 | 73 | def forward(self, x): 74 | residual = x 75 | 76 | out = self.conv1(x) 77 | out = self.bn1(out) 78 | out = self.relu(out) 79 | 80 | out = self.conv2(out) 81 | out = self.bn2(out) 82 | out = self.relu(out) 83 | 84 | out = self.conv3(out) 85 | out = self.bn3(out) 86 | 87 | if self.downsample is not None: 88 | residual = self.downsample(x) 89 | 90 | out += residual 91 | out = self.relu(out) 92 | 93 | return out 94 | 95 | 96 | class ResNet(nn.Module): 97 | 98 | def __init__(self, block, layers, num_classes=1000): 99 | self.inplanes = 64 100 | super(ResNet, self).__init__() 101 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 102 | bias=False) 103 | self.bn1 = nn.BatchNorm2d(64) 104 | self.relu = nn.ReLU(inplace=True) 105 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 106 | self.layer1 = self._make_layer(block, 64, layers[0]) 107 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 108 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 109 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 110 | self.avgpool = nn.AdaptiveAvgPool2d((1,1)) 111 | self.fc = nn.Linear(512 * block.expansion, num_classes) 112 | 113 | for m in self.modules(): 114 | if isinstance(m, nn.Conv2d): 115 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 116 | m.weight.data.normal_(0, math.sqrt(2. / n)) 117 | elif isinstance(m, nn.BatchNorm2d): 118 | m.weight.data.fill_(1) 119 | m.bias.data.zero_() 120 | 121 | def _make_layer(self, block, planes, blocks, stride=1): 122 | downsample = None 123 | if stride != 1 or self.inplanes != planes * block.expansion: 124 | downsample = nn.Sequential( 125 | nn.Conv2d(self.inplanes, planes * block.expansion, 126 | kernel_size=1, stride=stride, bias=False), 127 | nn.BatchNorm2d(planes * block.expansion), 128 | ) 129 | 130 | layers = [] 131 | layers.append(block(self.inplanes, planes, stride, downsample)) 132 | self.inplanes = planes * block.expansion 133 | for i in range(1, blocks): 134 | layers.append(block(self.inplanes, planes)) 135 | 136 | return nn.Sequential(*layers) 137 | 138 | def forward(self, x): 139 | x = self.conv1(x) 140 | x = self.bn1(x) 141 | x = self.relu(x) 142 | x = self.maxpool(x) 143 | 144 | x = self.layer1(x) 145 | x = self.layer2(x) 146 | x = self.layer3(x) 147 | x = self.layer4(x) 148 | 149 | # x = self.avgpool(x) 150 | # x = x.view(x.size(0), -1) 151 | # x = self.fc(x) 152 | 153 | return x 154 | 155 | def load_param(self, model_path): 156 | param_dict = torch.load(model_path) 157 | for i in param_dict: 158 | if 'fc' in i: 159 | continue 160 | self.state_dict()[i].copy_(param_dict[i]) 161 | 162 | def resnet18(pretrained=False, **kwargs): 163 | """Constructs a ResNet-18 model. 164 | 165 | Args: 166 | pretrained (bool): If True, returns a model pre-trained on ImageNet 167 | """ 168 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 169 | if pretrained: 170 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 171 | return model 172 | 173 | def resnet50(pretrained=False, **kwargs): 174 | """Constructs a ResNet-50 model. 175 | 176 | Args: 177 | pretrained (bool): If True, returns a model pre-trained on ImageNet 178 | """ 179 | model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=40) 180 | if pretrained: 181 | state_dict = model_zoo.load_url(model_urls['resnet50']) 182 | # print(state_dict) 183 | for i in state_dict: 184 | if 'fc' in i: 185 | continue 186 | model.state_dict()[i].copy_(state_dict[i]) 187 | return model 188 | 189 | def weights_init_kaiming(m): 190 | classname = m.__class__.__name__ 191 | if classname.find('Linear') != -1: 192 | nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out') 193 | nn.init.constant_(m.bias, 0.0) 194 | elif classname.find('Conv') != -1: 195 | nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') 196 | if m.bias is not None: 197 | nn.init.constant_(m.bias, 0.0) 198 | elif classname.find('BatchNorm') != -1: 199 | if m.affine: 200 | nn.init.normal_(m.weight, 1.0, 0.02) 201 | nn.init.constant_(m.bias, 0.0) 202 | 203 | def weights_init_kaiming1(m): 204 | classname = m.__class__.__name__ 205 | # print(classname) 206 | if classname.find('Conv2d') != -1: 207 | nn.init.kaiming_normal(m.weight.data, a=0, mode='fan_in') 208 | nn.init.constant(m.bias.data, 0.0) 209 | elif classname.find('Linear') != -1: 210 | nn.init.kaiming_normal(m.weight.data, a=0, mode='fan_out') 211 | #init.constant(m.bias.data, 0.0) 212 | elif classname.find('BatchNorm1d') != -1: 213 | nn.init.normal(m.weight.data, 1.0, 0.02) 214 | nn.init.constant(m.bias.data, 0.0) 215 | 216 | def weights_init_classifier(m): 217 | classname = m.__class__.__name__ 218 | if classname.find('Linear') != -1: 219 | nn.init.normal_(m.weight, std=0.001) 220 | if m.bias: 221 | nn.init.constant_(m.bias, 0.0) 222 | 223 | class Baseline(nn.Module): 224 | in_planes = 2048 225 | 226 | def __init__(self, num_classes = 40, model_path = '/home/zhoumi/.torch/models/resnet101-5d3b4d8f.pth', neck = 'bnneck', neck_feat = 'after', pretrain_choice = 'imagenet'): 227 | super(Baseline, self).__init__() 228 | 229 | self.base = ResNet(block=Bottleneck, layers=[3, 4, 6, 3], num_classes=num_classes) 230 | # if pretrain_choice == 'imagenet': 231 | # self.base.load_param(model_path) 232 | # print('Loading pretrained ImageNet model......') 233 | 234 | self.gap = nn.AdaptiveAvgPool2d(1) 235 | # self.gap = nn.AdaptiveMaxPool2d(1) 236 | self.num_classes = num_classes 237 | self.neck = neck 238 | self.neck_feat = neck_feat 239 | 240 | if self.neck == 'no': 241 | self.classifier = nn.Linear(self.in_planes, self.num_classes) 242 | # self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False) # new add by luo 243 | # self.classifier.apply(weights_init_classifier) # new add by luo 244 | elif self.neck == 'bnneck': 245 | self.bottleneck = nn.BatchNorm1d(self.in_planes) 246 | self.bottleneck.bias.requires_grad_(False) # no shift 247 | self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False) 248 | 249 | self.bottleneck.apply(weights_init_kaiming) 250 | self.classifier.apply(weights_init_classifier) 251 | 252 | def forward(self, x): 253 | x = self.base(x) 254 | global_feat = self.gap(x) # (b, 2048, 1, 1) 255 | global_feat = global_feat.view(global_feat.shape[0], -1) # flatten to (bs, 2048) 256 | 257 | if self.neck == 'no': 258 | feat = global_feat 259 | elif self.neck == 'bnneck': 260 | feat = self.bottleneck(global_feat) # normalize for angular softmax 261 | 262 | # if self.training: 263 | cls_score = self.classifier(feat) 264 | return cls_score, global_feat 265 | # return [global_feat], [cls_score] # global feature for triplet loss 266 | # else: 267 | # if self.neck_feat == 'after': 268 | # # print("Test with feature after BN") 269 | # return feat 270 | # else: 271 | # # print("Test with feature before BN") 272 | # return global_feat 273 | 274 | # def get_optim_policy(self): 275 | # return self.parameters() 276 | 277 | #feature fusion 278 | class ClassBlock(nn.Module): 279 | def __init__(self, input_dim, class_num, relu=True, num_bottleneck=512): 280 | super(ClassBlock, self).__init__() 281 | # add_block = [] 282 | add_block1 = [] 283 | add_block2 = [] 284 | add_block1 += [nn.BatchNorm1d(input_dim)] 285 | if relu: 286 | add_block1 += [nn.LeakyReLU(0.1)] 287 | add_block1 += [nn.Linear(input_dim, num_bottleneck, bias=False)] 288 | add_block2 += [nn.BatchNorm1d(num_bottleneck)] 289 | 290 | # add_block = nn.Sequential(*add_block) 291 | # add_block.apply(weights_init_kaiming) 292 | add_block1 = nn.Sequential(*add_block1) 293 | add_block1.apply(weights_init_kaiming1) 294 | add_block2 = nn.Sequential(*add_block2) 295 | add_block2.apply(weights_init_kaiming1) 296 | classifier = [] 297 | classifier += [nn.Linear(num_bottleneck, class_num, bias=False)] 298 | classifier = nn.Sequential(*classifier) 299 | classifier.apply(weights_init_classifier) 300 | 301 | self.add_block1 = add_block1 302 | self.add_block2 = add_block2 303 | self.classifier = classifier 304 | 305 | def forward(self, x): 306 | x = self.add_block1(x) 307 | x1 = self.add_block2(x) 308 | x2 = self.classifier(x1) 309 | return x2 310 | 311 | 312 | # ft_net_50_1 313 | class ft_net(nn.Module): 314 | 315 | def __init__(self, num_classes = 40, pretrain_choice = 'imagenet', 316 | model_path = '/home/zhoumi/.torch/models/resnet101-5d3b4d8f.pth'): 317 | super(ft_net, self).__init__() 318 | model_ft = ResNet(block=Bottleneck, layers=[3, 4, 6, 3], num_classes=num_classes) 319 | # if pretrain_choice == 'imagenet': 320 | # model_ft.load_param(model_path) 321 | # print('Loading pretrained ImageNet model......') 322 | # avg pooling to global pooling 323 | model_ft.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 324 | model_ft.fc = nn.Sequential() 325 | self.model = model_ft 326 | # remove the final downsample 327 | self.model.layer4[0].downsample[0].stride = (1, 1) 328 | self.model.layer4[0].conv2.stride = (1, 1) 329 | self.avgpool_1 = nn.AdaptiveAvgPool2d((1, 1)) 330 | # self.avgpool_2 = nn.AdaptiveAvgPool2d((2,2)) 331 | 332 | self.avgpool_2 = nn.AdaptiveAvgPool2d((2, 2)) 333 | self.avgpool_3 = nn.AdaptiveMaxPool2d((2, 2)) 334 | self.avgpool_4 = nn.AdaptiveMaxPool2d((1, 1)) 335 | self.avgpool_5 = nn.AdaptiveMaxPool2d((1, 1)) 336 | self.classifier_1 = ClassBlock(1024, num_classes, num_bottleneck=512) 337 | 338 | self.classifier_2 = ClassBlock(2048, num_classes, num_bottleneck=512) 339 | self.classifier_3 = ClassBlock(8192, num_classes, num_bottleneck=512) 340 | 341 | def forward(self, x): 342 | x = self.model.conv1(x) 343 | x = self.model.bn1(x) 344 | x = self.model.relu(x) 345 | x = self.model.maxpool(x) 346 | x = self.model.layer1(x) 347 | x = self.model.layer2(x) 348 | x0 = self.model.layer3(x) 349 | x = self.model.layer4(x0) 350 | x3 = self.model.avgpool(x) 351 | x_3 = self.avgpool_5(x) 352 | x_41 = self.avgpool_2(x) 353 | x_4 = self.avgpool_3(x) 354 | x_0 = self.avgpool_1(x0) 355 | x_1 = self.avgpool_4(x0) 356 | x0 = x_0 + x_1 357 | x_31 = x3 + x_3 358 | x4 = x_41 + x_4 359 | # 360 | x6 = torch.squeeze(x0, dim=2) 361 | x6 = torch.squeeze(x6, dim=2) 362 | 363 | # x_0 = torch.squeeze(x_0) 364 | # x_1 = torch.squeeze(x_1) 365 | # x3 = torch.squeeze(x3) 366 | # x_3 = torch.squeeze(x_3) 367 | # x7 = x1.view(x1.size(0),-1) 368 | 369 | # 370 | x9 = torch.squeeze(x_31, dim=2) 371 | x9 = torch.squeeze(x9, dim=2) 372 | 373 | #x_10 = x_4.view(x_4.size(0), -1) 374 | #x_11 = x_41.view(x_41.size(0), -1) 375 | x10 = x4.view(x4.size(0), -1) 376 | 377 | # 378 | x16 = self.classifier_1(x6) 379 | x18 = self.classifier_2(x9) 380 | x22 = self.classifier_3(x10) 381 | # 382 | return x16, x18, x22#, x_0, x_1, x3, x_3, x_10, x_11 383 | 384 | from efficientnet_pytorch import EfficientNet, efficientnet 385 | 386 | class efficient_baseline(nn.Module): 387 | in_planes = 1792 388 | def __init__(self, num_classes = 40, neck = 'bnneck', neck_feat = 'after', 389 | model_path = '/home/zhoumi/.cache/torch/checkpoints/efficientnet-b4-6ed6700e.pth'): 390 | super(efficient_baseline, self).__init__() 391 | 392 | #1.4, 1.8, 380, 0.4 393 | blocks_args, global_params = efficientnet(width_coefficient=1.4, depth_coefficient=1.8, 394 | dropout_rate=0.4, image_size=380) 395 | 396 | self.base = EfficientNet(blocks_args=blocks_args, global_params=global_params) 397 | self.base.load_param(model_path) 398 | print('Loading pretrained ImageNet model......') 399 | # self.gap = nn.AdaptiveAvgPool2d(1) 400 | # self.gap = nn.AdaptiveMaxPool2d(1) 401 | self.num_classes = num_classes 402 | self.neck = neck 403 | self.neck_feat = neck_feat 404 | 405 | if self.neck == 'no': 406 | self.classifier = nn.Linear(self.in_planes, self.num_classes) 407 | # self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False) # new add by luo 408 | # self.classifier.apply(weights_init_classifier) # new add by luo 409 | elif self.neck == 'bnneck': 410 | self.bottleneck = nn.BatchNorm1d(self.in_planes) 411 | self.bottleneck.bias.requires_grad_(False) # no shift 412 | self.classifier = nn.Linear(self.in_planes, self.num_classes, bias=False) 413 | 414 | self.bottleneck.apply(weights_init_kaiming) 415 | self.classifier.apply(weights_init_classifier) 416 | 417 | def forward(self, x): 418 | global_feat = self.base(x) 419 | 420 | # global_feat = self.gap(x) # (b, 2048, 1, 1) 421 | # global_feat = global_feat.view(global_feat.shape[0], -1) # flatten to (bs, 2048) 422 | 423 | if self.neck == 'no': 424 | feat = global_feat 425 | elif self.neck == 'bnneck': 426 | feat = self.bottleneck(global_feat) # normalize for angular softmax 427 | 428 | # if self.training: 429 | cls_score = self.classifier(feat) 430 | return cls_score, global_feat 431 | 432 | -------------------------------------------------------------------------------- /preprocess_data.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import random 4 | 5 | fd_train = open('/Users/zhoumi/Downloads/garbage_classify/train.txt', 'w') 6 | fd_test = open('/Users/zhoumi/Downloads/garbage_classify/val.txt', 'w') 7 | img_files = glob.glob('/Users/zhoumi/Downloads/garbage_classify/train_data/*jpg') 8 | 9 | for img_file in img_files: 10 | class_file = img_file.replace('.jpg', '.txt') 11 | txt = open(class_file, 'r') 12 | label = txt.readlines()[0].split(' ')[-1] 13 | 14 | if random.uniform(0, 1) > 0.1: 15 | fd_train.write(img_file) 16 | fd_train.write(' ') 17 | fd_train.write(label) 18 | fd_train.write('\n') 19 | else: 20 | fd_test.write(img_file) 21 | fd_test.write(' ') 22 | fd_test.write(label) 23 | fd_test.write('\n') 24 | print(img_file) 25 | 26 | fd_train.close() 27 | fd_test.close() -------------------------------------------------------------------------------- /ramdom_erase.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from torchvision.transforms import * 4 | 5 | from PIL import Image 6 | import random 7 | import math 8 | import numpy as np 9 | import torch 10 | 11 | 12 | class Cutout(object): 13 | def __init__(self, probability=0.5, size=64, mean=[0.4914, 0.4822, 0.4465]): 14 | self.probability = probability 15 | self.mean = mean 16 | self.size = size 17 | 18 | def __call__(self, img): 19 | 20 | if random.uniform(0, 1) > self.probability: 21 | return img 22 | 23 | h = self.size 24 | w = self.size 25 | for attempt in range(100): 26 | area = img.size()[1] * img.size()[2] 27 | if w < img.size()[2] and h < img.size()[1]: 28 | x1 = random.randint(0, img.size()[1] - h) 29 | y1 = random.randint(0, img.size()[2] - w) 30 | if img.size()[0] == 3: 31 | img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] 32 | img[1, x1:x1 + h, y1:y1 + w] = self.mean[1] 33 | img[2, x1:x1 + h, y1:y1 + w] = self.mean[2] 34 | else: 35 | img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] 36 | return img 37 | return img 38 | 39 | 40 | class RandomErasing(object): 41 | """ Randomly selects a rectangle region in an image and erases its pixels. 42 | 'Random Erasing Data Augmentation' by Zhong et al. 43 | See https://arxiv.org/pdf/1708.04896.pdf 44 | Args: 45 | probability: The probability that the Random Erasing operation will be performed. 46 | sl: Minimum proportion of erased area against input image. 47 | sh: Maximum proportion of erased area against input image. 48 | r1: Minimum aspect ratio of erased area. 49 | mean: Erasing value. 50 | """ 51 | 52 | def __init__(self, probability=0.5, sl=0.02, sh=0.4, r1=0.3, mean=[0.4914, 0.4822, 0.4465]): 53 | self.probability = probability 54 | self.mean = mean 55 | self.sl = sl 56 | self.sh = sh 57 | self.r1 = r1 58 | 59 | def __call__(self, img): 60 | 61 | if random.uniform(0, 1) > self.probability: 62 | return img 63 | 64 | for attempt in range(100): 65 | area = img.size()[1] * img.size()[2] 66 | 67 | target_area = random.uniform(self.sl, self.sh) * area 68 | aspect_ratio = random.uniform(self.r1, 1 / self.r1) 69 | 70 | h = int(round(math.sqrt(target_area * aspect_ratio))) 71 | w = int(round(math.sqrt(target_area / aspect_ratio))) 72 | 73 | if w < img.size()[2] and h < img.size()[1]: 74 | x1 = random.randint(0, img.size()[1] - h) 75 | y1 = random.randint(0, img.size()[2] - w) 76 | if img.size()[0] == 3: 77 | img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] 78 | img[1, x1:x1 + h, y1:y1 + w] = self.mean[1] 79 | img[2, x1:x1 + h, y1:y1 + w] = self.mean[2] 80 | else: 81 | img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] 82 | return img 83 | 84 | return img 85 | -------------------------------------------------------------------------------- /samplers.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from collections import defaultdict 4 | 5 | import numpy as np 6 | import torch 7 | import random 8 | from torch.utils.data.sampler import Sampler 9 | import copy 10 | 11 | 12 | class RandomIdentitySampler(Sampler): 13 | def __init__(self, data_source, num_instances=4): 14 | self.data_source = data_source 15 | self.num_instances = num_instances 16 | self.index_dic = defaultdict(list) 17 | for index, (_, pid) in enumerate(data_source): 18 | self.index_dic[pid].append(index) 19 | self.pids = list(self.index_dic.keys()) 20 | self.num_identities = len(self.pids) 21 | print(self.num_identities) 22 | 23 | def __iter__(self): 24 | indices = torch.randperm(self.num_identities) 25 | ret = [] 26 | for i in indices: 27 | pid = self.pids[i] 28 | t = self.index_dic[pid] 29 | replace = False if len(t) >= self.num_instances else True 30 | t = np.random.choice(t, size=self.num_instances, replace=replace) 31 | ret.extend(t) 32 | return iter(ret) 33 | 34 | def __len__(self): 35 | return self.num_identities * self.num_instances 36 | 37 | class RandomIdentitySampler_new(Sampler): 38 | """ 39 | Randomly sample N identities, then for each identity, 40 | randomly sample K instances, therefore batch size is N*K. 41 | Args: 42 | - data_source (list): list of (img_path, pid, camid). 43 | - num_instances (int): number of instances per identity in a batch. 44 | - batch_size (int): number of examples in a batch. 45 | """ 46 | 47 | def __init__(self, data_source, batch_size, num_instances): 48 | self.data_source = data_source 49 | self.batch_size = batch_size 50 | self.num_instances = num_instances 51 | self.num_pids_per_batch = self.batch_size // self.num_instances 52 | self.index_dic = defaultdict(list) 53 | for index, (_, pid) in enumerate(self.data_source): 54 | self.index_dic[pid].append(index) 55 | self.pids = list(self.index_dic.keys()) 56 | 57 | # estimate number of examples in an epoch 58 | self.length = 0 59 | for pid in self.pids: 60 | idxs = self.index_dic[pid] 61 | num = len(idxs) 62 | if num < self.num_instances: 63 | num = self.num_instances 64 | self.length += num - num % self.num_instances 65 | 66 | def __iter__(self): 67 | batch_idxs_dict = defaultdict(list) 68 | 69 | for pid in self.pids: 70 | idxs = copy.deepcopy(self.index_dic[pid]) 71 | if len(idxs) < self.num_instances: 72 | idxs = np.random.choice(idxs, size=self.num_instances, replace=True) 73 | random.shuffle(idxs) 74 | batch_idxs = [] 75 | for idx in idxs: 76 | batch_idxs.append(idx) 77 | if len(batch_idxs) == self.num_instances: 78 | batch_idxs_dict[pid].append(batch_idxs) 79 | batch_idxs = [] 80 | 81 | avai_pids = copy.deepcopy(self.pids) 82 | final_idxs = [] 83 | 84 | while len(avai_pids) >= self.num_pids_per_batch: 85 | selected_pids = random.sample(avai_pids, self.num_pids_per_batch) 86 | for pid in selected_pids: 87 | batch_idxs = batch_idxs_dict[pid].pop(0) 88 | final_idxs.extend(batch_idxs) 89 | if len(batch_idxs_dict[pid]) == 0: 90 | avai_pids.remove(pid) 91 | 92 | self.length = len(final_idxs) 93 | return iter(final_idxs) 94 | 95 | def __len__(self): 96 | return self.length 97 | -------------------------------------------------------------------------------- /senet.py: -------------------------------------------------------------------------------- 1 | """ 2 | ResNet code gently borrowed from 3 | https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py 4 | """ 5 | from __future__ import print_function, division, absolute_import 6 | from collections import OrderedDict 7 | import math 8 | import torch 9 | import torch.nn as nn 10 | from torch.utils import model_zoo 11 | 12 | __all__ = ['SENet', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 13 | 'se_resnext50_32x4d', 'se_resnext101_32x4d'] 14 | 15 | pretrained_settings = { 16 | 'senet154': { 17 | 'imagenet': { 18 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth', 19 | 'input_space': 'RGB', 20 | 'input_size': [3, 224, 224], 21 | 'input_range': [0, 1], 22 | 'mean': [0.485, 0.456, 0.406], 23 | 'std': [0.229, 0.224, 0.225], 24 | 'num_classes': 1000 25 | } 26 | }, 27 | 'se_resnet50': { 28 | 'imagenet': { 29 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth', 30 | 'input_space': 'RGB', 31 | 'input_size': [3, 224, 224], 32 | 'input_range': [0, 1], 33 | 'mean': [0.485, 0.456, 0.406], 34 | 'std': [0.229, 0.224, 0.225], 35 | 'num_classes': 1000 36 | } 37 | }, 38 | 'se_resnet101': { 39 | 'imagenet': { 40 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth', 41 | 'input_space': 'RGB', 42 | 'input_size': [3, 224, 224], 43 | 'input_range': [0, 1], 44 | 'mean': [0.485, 0.456, 0.406], 45 | 'std': [0.229, 0.224, 0.225], 46 | 'num_classes': 1000 47 | } 48 | }, 49 | 'se_resnet152': { 50 | 'imagenet': { 51 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth', 52 | 'input_space': 'RGB', 53 | 'input_size': [3, 224, 224], 54 | 'input_range': [0, 1], 55 | 'mean': [0.485, 0.456, 0.406], 56 | 'std': [0.229, 0.224, 0.225], 57 | 'num_classes': 1000 58 | } 59 | }, 60 | 'se_resnext50_32x4d': { 61 | 'imagenet': { 62 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth', 63 | 'input_space': 'RGB', 64 | 'input_size': [3, 224, 224], 65 | 'input_range': [0, 1], 66 | 'mean': [0.485, 0.456, 0.406], 67 | 'std': [0.229, 0.224, 0.225], 68 | 'num_classes': 1000 69 | } 70 | }, 71 | 'se_resnext101_32x4d': { 72 | 'imagenet': { 73 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth', 74 | 'input_space': 'RGB', 75 | 'input_size': [3, 224, 224], 76 | 'input_range': [0, 1], 77 | 'mean': [0.485, 0.456, 0.406], 78 | 'std': [0.229, 0.224, 0.225], 79 | 'num_classes': 1000 80 | } 81 | }, 82 | } 83 | 84 | 85 | class SEModule(nn.Module): 86 | 87 | def __init__(self, channels, reduction): 88 | super(SEModule, self).__init__() 89 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 90 | self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, 91 | padding=0) 92 | self.relu = nn.ReLU(inplace=True) 93 | self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, 94 | padding=0) 95 | self.sigmoid = nn.Sigmoid() 96 | 97 | def forward(self, x): 98 | module_input = x 99 | x = self.avg_pool(x) 100 | x = self.fc1(x) 101 | x = self.relu(x) 102 | x = self.fc2(x) 103 | x = self.sigmoid(x) 104 | return module_input * x 105 | 106 | 107 | class Bottleneck(nn.Module): 108 | """ 109 | Base class for bottlenecks that implements `forward()` method. 110 | """ 111 | def forward(self, x): 112 | residual = x 113 | 114 | out = self.conv1(x) 115 | out = self.bn1(out) 116 | out = self.relu(out) 117 | 118 | out = self.conv2(out) 119 | out = self.bn2(out) 120 | out = self.relu(out) 121 | 122 | out = self.conv3(out) 123 | out = self.bn3(out) 124 | 125 | if self.downsample is not None: 126 | residual = self.downsample(x) 127 | 128 | out = self.se_module(out) + residual 129 | out = self.relu(out) 130 | 131 | return out 132 | 133 | 134 | class SEBottleneck(Bottleneck): 135 | """ 136 | Bottleneck for SENet154. 137 | """ 138 | expansion = 4 139 | 140 | def __init__(self, inplanes, planes, groups, reduction, stride=1, 141 | downsample=None): 142 | super(SEBottleneck, self).__init__() 143 | self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False) 144 | self.bn1 = nn.BatchNorm2d(planes * 2) 145 | self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3, 146 | stride=stride, padding=1, groups=groups, 147 | bias=False) 148 | self.bn2 = nn.BatchNorm2d(planes * 4) 149 | self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1, 150 | bias=False) 151 | self.bn3 = nn.BatchNorm2d(planes * 4) 152 | self.relu = nn.ReLU(inplace=True) 153 | self.se_module = SEModule(planes * 4, reduction=reduction) 154 | self.downsample = downsample 155 | self.stride = stride 156 | 157 | 158 | class SEResNetBottleneck(Bottleneck): 159 | """ 160 | ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe 161 | implementation and uses `stride=stride` in `conv1` and not in `conv2` 162 | (the latter is used in the torchvision implementation of ResNet). 163 | """ 164 | expansion = 4 165 | 166 | def __init__(self, inplanes, planes, groups, reduction, stride=1, 167 | downsample=None): 168 | super(SEResNetBottleneck, self).__init__() 169 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False, 170 | stride=stride) 171 | self.bn1 = nn.BatchNorm2d(planes) 172 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, 173 | groups=groups, bias=False) 174 | self.bn2 = nn.BatchNorm2d(planes) 175 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 176 | self.bn3 = nn.BatchNorm2d(planes * 4) 177 | self.relu = nn.ReLU(inplace=True) 178 | self.se_module = SEModule(planes * 4, reduction=reduction) 179 | self.downsample = downsample 180 | self.stride = stride 181 | 182 | 183 | class SEResNeXtBottleneck(Bottleneck): 184 | """ 185 | ResNeXt bottleneck type C with a Squeeze-and-Excitation module. 186 | """ 187 | expansion = 4 188 | 189 | def __init__(self, inplanes, planes, groups, reduction, stride=1, 190 | downsample=None, base_width=4): 191 | super(SEResNeXtBottleneck, self).__init__() 192 | width = math.floor(planes * (base_width / 64)) * groups 193 | self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False, 194 | stride=1) 195 | self.bn1 = nn.BatchNorm2d(width) 196 | self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride, 197 | padding=1, groups=groups, bias=False) 198 | self.bn2 = nn.BatchNorm2d(width) 199 | self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False) 200 | self.bn3 = nn.BatchNorm2d(planes * 4) 201 | self.relu = nn.ReLU(inplace=True) 202 | self.se_module = SEModule(planes * 4, reduction=reduction) 203 | self.downsample = downsample 204 | self.stride = stride 205 | 206 | 207 | class SENet(nn.Module): 208 | 209 | def __init__(self, block, layers, groups, reduction, dropout_p=0.2, 210 | inplanes=128, input_3x3=True, downsample_kernel_size=3, 211 | downsample_padding=1, last_stride=2): 212 | """ 213 | Parameters 214 | ---------- 215 | block (nn.Module): Bottleneck class. 216 | - For SENet154: SEBottleneck 217 | - For SE-ResNet models: SEResNetBottleneck 218 | - For SE-ResNeXt models: SEResNeXtBottleneck 219 | layers (list of ints): Number of residual blocks for 4 layers of the 220 | network (layer1...layer4). 221 | groups (int): Number of groups for the 3x3 convolution in each 222 | bottleneck block. 223 | - For SENet154: 64 224 | - For SE-ResNet models: 1 225 | - For SE-ResNeXt models: 32 226 | reduction (int): Reduction ratio for Squeeze-and-Excitation modules. 227 | - For all models: 16 228 | dropout_p (float or None): Drop probability for the Dropout layer. 229 | If `None` the Dropout layer is not used. 230 | - For SENet154: 0.2 231 | - For SE-ResNet models: None 232 | - For SE-ResNeXt models: None 233 | inplanes (int): Number of input channels for layer1. 234 | - For SENet154: 128 235 | - For SE-ResNet models: 64 236 | - For SE-ResNeXt models: 64 237 | input_3x3 (bool): If `True`, use three 3x3 convolutions instead of 238 | a single 7x7 convolution in layer0. 239 | - For SENet154: True 240 | - For SE-ResNet models: False 241 | - For SE-ResNeXt models: False 242 | downsample_kernel_size (int): Kernel size for downsampling convolutions 243 | in layer2, layer3 and layer4. 244 | - For SENet154: 3 245 | - For SE-ResNet models: 1 246 | - For SE-ResNeXt models: 1 247 | downsample_padding (int): Padding for downsampling convolutions in 248 | layer2, layer3 and layer4. 249 | - For SENet154: 1 250 | - For SE-ResNet models: 0 251 | - For SE-ResNeXt models: 0 252 | num_classes (int): Number of outputs in `last_linear` layer. 253 | - For all models: 1000 254 | """ 255 | super(SENet, self).__init__() 256 | self.inplanes = inplanes 257 | if input_3x3: 258 | layer0_modules = [ 259 | ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1, 260 | bias=False)), 261 | ('bn1', nn.BatchNorm2d(64)), 262 | ('relu1', nn.ReLU(inplace=True)), 263 | ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1, 264 | bias=False)), 265 | ('bn2', nn.BatchNorm2d(64)), 266 | ('relu2', nn.ReLU(inplace=True)), 267 | ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1, 268 | bias=False)), 269 | ('bn3', nn.BatchNorm2d(inplanes)), 270 | ('relu3', nn.ReLU(inplace=True)), 271 | ] 272 | else: 273 | layer0_modules = [ 274 | ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2, 275 | padding=3, bias=False)), 276 | ('bn1', nn.BatchNorm2d(inplanes)), 277 | ('relu1', nn.ReLU(inplace=True)), 278 | ] 279 | # To preserve compatibility with Caffe weights `ceil_mode=True` 280 | # is used instead of `padding=1`. 281 | layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2, 282 | ceil_mode=True))) 283 | self.layer0 = nn.Sequential(OrderedDict(layer0_modules)) 284 | self.layer1 = self._make_layer( 285 | block, 286 | planes=64, 287 | blocks=layers[0], 288 | groups=groups, 289 | reduction=reduction, 290 | downsample_kernel_size=1, 291 | downsample_padding=0 292 | ) 293 | self.layer2 = self._make_layer( 294 | block, 295 | planes=128, 296 | blocks=layers[1], 297 | stride=2, 298 | groups=groups, 299 | reduction=reduction, 300 | downsample_kernel_size=downsample_kernel_size, 301 | downsample_padding=downsample_padding 302 | ) 303 | self.layer3 = self._make_layer( 304 | block, 305 | planes=256, 306 | blocks=layers[2], 307 | stride=2, 308 | groups=groups, 309 | reduction=reduction, 310 | downsample_kernel_size=downsample_kernel_size, 311 | downsample_padding=downsample_padding 312 | ) 313 | self.layer4 = self._make_layer( 314 | block, 315 | planes=512, 316 | blocks=layers[3], 317 | stride=last_stride, 318 | groups=groups, 319 | reduction=reduction, 320 | downsample_kernel_size=downsample_kernel_size, 321 | downsample_padding=downsample_padding 322 | ) 323 | self.avg_pool = nn.AvgPool2d(7, stride=1) 324 | self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None 325 | 326 | def _make_layer(self, block, planes, blocks, groups, reduction, stride=1, 327 | downsample_kernel_size=1, downsample_padding=0): 328 | downsample = None 329 | if stride != 1 or self.inplanes != planes * block.expansion: 330 | downsample = nn.Sequential( 331 | nn.Conv2d(self.inplanes, planes * block.expansion, 332 | kernel_size=downsample_kernel_size, stride=stride, 333 | padding=downsample_padding, bias=False), 334 | nn.BatchNorm2d(planes * block.expansion), 335 | ) 336 | 337 | layers = [] 338 | layers.append(block(self.inplanes, planes, groups, reduction, stride, 339 | downsample)) 340 | self.inplanes = planes * block.expansion 341 | for i in range(1, blocks): 342 | layers.append(block(self.inplanes, planes, groups, reduction)) 343 | 344 | return nn.Sequential(*layers) 345 | 346 | def load_param(self, model_path): 347 | param_dict = torch.load(model_path) 348 | for i in param_dict: 349 | if 'last_linear' in i: 350 | continue 351 | self.state_dict()[i].copy_(param_dict[i]) 352 | 353 | def forward(self, x): 354 | x = self.layer0(x) 355 | x = self.layer1(x) 356 | x = self.layer2(x) 357 | x = self.layer3(x) 358 | x = self.layer4(x) 359 | return x -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import torchvision.models 5 | from dataloader import * 6 | import torchvision.transforms as T 7 | from loss import CrossEntropyLabelSmooth, CenterLoss, TripletLoss 8 | from torch.utils.data import DataLoader 9 | from meters import AverageMeter 10 | from model import resnet50, Baseline, ft_net, efficient_baseline 11 | from torch.autograd import Variable 12 | import torch 13 | from bisect import bisect_right 14 | from ramdom_erase import Cutout, RandomErasing 15 | from samplers import RandomIdentitySampler, RandomIdentitySampler_new 16 | from efficientnet_pytorch import EfficientNet 17 | 18 | 19 | NUM_CLASSES = 40 20 | MAX_EPOC = 60 21 | BATCH_SIZE = 32 22 | TEST_BATCH_SIZE = 1 23 | use_triplet = True 24 | use_ff = False 25 | use_efficientnet = False 26 | 27 | def adjust_lr(ep): 28 | lr = 1e-4 29 | if use_triplet == True: 30 | warmup_factor = 1 31 | if ep < 10: 32 | alpha = ep / 10 33 | warmup_factor = 0.01 * (1 - alpha) + alpha 34 | 35 | lr = lr * warmup_factor * 0.1 ** bisect_right([20, 40], ep) 36 | else: 37 | if ep <4: 38 | lr = 1e-4 39 | elif ep < 7: 40 | lr =1e-5 41 | else: 42 | lr = 1e-6 43 | 44 | return lr 45 | 46 | # model = resnet50(num_classes=NUM_CLASSES, pretrained=True) 47 | if use_ff == False: 48 | if use_efficientnet == True: 49 | model = EfficientNet.from_pretrained('efficientnet-b4', num_classes=NUM_CLASSES) 50 | # model = torch.nn.DataParallel(model) 51 | else: 52 | model = efficient_baseline(num_classes=NUM_CLASSES, neck='bnneck') 53 | else: 54 | model = ft_net(num_classes=NUM_CLASSES) 55 | print(model) 56 | 57 | train_transform = T.Compose([T.Resize((224, 224)), 58 | T.RandomHorizontalFlip(), 59 | # T.RandomVerticalFlip(), 60 | # T.ColorJitter(0.5, 0.5, 0.5, 0.5), 61 | T.Pad(10), 62 | T.RandomCrop((224, 224)), 63 | # T.RandomRotation(90), 64 | T.ToTensor(), 65 | T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 66 | Cutout(probability=0.5, size=64, mean=[0.0, 0.0, 0.0]), 67 | RandomErasing(probability=0.0, mean=[0.485, 0.456, 0.406])]) 68 | 69 | test_transform = T.Compose([T.Resize((224, 224)), 70 | T.ToTensor(), 71 | T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) 72 | 73 | train_datasets = MyDataset(txt_path='/data/zhoumi/datasets/train_data/train.txt', transform=train_transform) 74 | train = process_dir(txt_path='/data/zhoumi/datasets/train_data/train.txt') 75 | 76 | if use_triplet == True: 77 | train_data = DataLoader(train_datasets, sampler=RandomIdentitySampler_new(train, NUM_CLASSES ,4), 78 | batch_size=BATCH_SIZE, pin_memory=True, num_workers=8, drop_last=True) 79 | else: 80 | train_data = DataLoader(train_datasets, batch_size=BATCH_SIZE, pin_memory=True, shuffle=True) 81 | 82 | test_data = DataLoader(MyDataset(txt_path='/data/zhoumi/datasets/train_data/val.txt', transform=test_transform), 83 | batch_size=TEST_BATCH_SIZE, pin_memory=True) 84 | 85 | optimizer = optim.Adam(params=model.parameters(), lr=1e-4, weight_decay=5e-4) 86 | # optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4) 87 | 88 | #define loss function 89 | xent_criterion = CrossEntropyLabelSmooth(NUM_CLASSES) 90 | center_criterion = CenterLoss(NUM_CLASSES, feat_dim=1792) 91 | triplet_criterion = TripletLoss(margin=0.3) 92 | 93 | best_model = model 94 | best_acc = 0 95 | print(len(test_data) * TEST_BATCH_SIZE, len(train_data)) 96 | 97 | model = model.cuda() 98 | 99 | for epoch in range(MAX_EPOC): 100 | lr = adjust_lr(epoch) 101 | for p in optimizer.param_groups: 102 | p['lr'] = lr 103 | 104 | for i, inputs in enumerate(train_data): 105 | model = model.train() 106 | images, labels = Variable(inputs[0].cuda()), Variable(inputs[1].cuda()) 107 | if use_ff == False: 108 | if use_efficientnet == True: 109 | output = model(images) 110 | else: 111 | output, feat = model(images) 112 | else: 113 | output1, output2, output3 = model(images) 114 | if use_triplet == True: 115 | sofmax_loss = xent_criterion(output, labels) 116 | triplet_loss = triplet_criterion(feat, labels)[0] 117 | losses = sofmax_loss + triplet_loss + 0.0005 * center_criterion(feat, labels) 118 | else: 119 | if use_ff == False: 120 | losses = xent_criterion(output, labels) 121 | else: 122 | losses = (xent_criterion(output1, labels) + xent_criterion(output2, labels) + xent_criterion(output3, labels))/3 123 | optimizer.zero_grad() 124 | losses.backward() 125 | optimizer.step() 126 | 127 | if (i+1) % 100 == 0: 128 | corrects = 0 129 | model = model.eval() 130 | for j, test in enumerate(test_data): 131 | t_images, t_labels = Variable(test[0].cuda()), Variable(test[1].cuda()) 132 | 133 | if use_ff == False: 134 | if use_efficientnet == True: 135 | pred = torch.argmax(model(t_images), 1) 136 | else: 137 | _, pred = torch.max(model(t_images)[0], 1) 138 | else: 139 | o1, o2, o3 = model(t_images) 140 | _, pred = torch.max((o1 + o2 + o3)/3, 1) 141 | print(pred, t_labels.data) 142 | 143 | corrects += torch.sum(pred == t_labels.data) 144 | 145 | acc = corrects.item() / len(test_data) / TEST_BATCH_SIZE 146 | if acc > best_acc: 147 | best_acc = acc 148 | best_model = model 149 | 150 | if use_triplet == True: 151 | print("epoch: {}, iter: {}, lr: {}, loss: {}, softmax_loss: {}, triplet_loss: {} acc: {}".format(epoch, 152 | i, optimizer.param_groups[0]['lr'], losses.item(), sofmax_loss.item(), triplet_loss.item(), acc)) 153 | else: 154 | print("epoch: {}, iter: {}, lr: {}, loss: {}, acc: {}".format(epoch, 155 | i, optimizer.param_groups[0]['lr'], losses.item(), acc)) 156 | 157 | torch.save(model, './best_model_v2_tri_center_old.pth') 158 | 159 | 160 | 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /tt.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import os 3 | import glob 4 | src_path = '/Users/zhoumi/Downloads/garbage_classify_v2/train_data_v2/' 5 | dst_path = '/Users/zhoumi/Downloads/garbage_classify_v2/val' 6 | with open('/Users/zhoumi/Downloads/garbage_classify_v2/train_data_v2/val.txt', 'r') as fd: 7 | lines = fd.readlines() 8 | for line in lines: 9 | img_path = line.split(' ')[0].split('/')[-1] 10 | label = str(line.split(' ')[1]) 11 | img_name = str(label).replace('\n', '') + '_' + img_path 12 | shutil.copy(os.path.join(src_path, line.split(' ')[0].split('/')[-1]), os.path.join(dst_path, img_name)) 13 | 14 | # img_paths = glob.glob('/Users/zhoumi/Downloads/garbage_classify/train_data/*jpg') 15 | # dst_path = '/Users/zhoumi/Downloads/garbage_classify/new/' 16 | # for img_path in img_paths: 17 | # txt_path = img_path.replace('.jpg', '.txt') 18 | # with open(txt_path, 'r') as fd: 19 | # line = fd.readlines()[0] 20 | # lable = line.split(' ')[-1] 21 | # if not os.path.exists(os.path.join(dst_path, str(lable))): 22 | # os.mkdir(os.path.join(dst_path, str(lable))) 23 | # 24 | # shutil.copy(img_path, os.path.join(dst_path, str(lable), line.split(' ')[0][:-1])) 25 | # fd.close() 26 | 27 | from efficientnet_pytorch import EfficientNet 28 | # model = EfficientNet.from_pretrained('efficientnet-b7', num_classes=40) 29 | 30 | 31 | --------------------------------------------------------------------------------