├── LICENSE ├── ShuffleNetV2.ExLarge ├── README.md ├── utils.py ├── network.py └── eval.py ├── ShuffleNetV2.Large ├── README.md ├── utils.py ├── network.py └── train.py ├── OneShot ├── README.md ├── utils.py ├── blocks.py ├── network.py └── train.py ├── ShuffleNetV2+ ├── README.md ├── utils.py ├── network.py ├── blocks.py └── train.py ├── ShuffleNetV2 ├── README.md ├── blocks.py ├── utils.py ├── network.py └── train.py ├── DetNAS ├── README.md ├── utils.py ├── network.py ├── blocks.py └── train.py ├── ShuffleNetV1 ├── blocks.py ├── utils.py ├── README.md ├── network.py └── train.py └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Megvii Technology 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ShuffleNetV2.ExLarge/README.md: -------------------------------------------------------------------------------- 1 | # ShuffleNetV2.ExLarge 2 | 3 | This repository contains ShuffleNetV2.ExLarge implementation by Pytorch, which is a extra large version of ShuffleNetV2. 4 | 5 | ## Requirements 6 | Download the ImageNet dataset and move validation images to labeled subfolders. To do this, you can use the following script: 7 | https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh 8 | 9 | ## Usage 10 | We used external training dataset to achieve the result, so you do not need to re-train it. 11 | 12 | This is the evaluation script: 13 | ```shell 14 | python eval.py --eval --eval-resume YOUR_WEIGHT_PATH --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH 15 | ``` 16 | 17 | 18 | ## Trained Models 19 | - OneDrive download: [Link](https://1drv.ms/f/s!AgaP37NGYuEXhRfQxHRseR7eSxXo) 20 | - BaiduYun download: [Link](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw) (extract code: mc24) 21 | 22 | 23 | ## Results 24 | 25 | | Model | FLOPs | #Params | Top-1 | Top-5 | 26 | | :--------------------- | :---: | :------: | :---: | :---: | 27 | | ShuffleNetV2.ExLarge | 46.2G | 254.7M | 15.52 | 2.9 | 28 | -------------------------------------------------------------------------------- /ShuffleNetV2.Large/README.md: -------------------------------------------------------------------------------- 1 | # ShuffleNetV2.Large 2 | 3 | This repository contains ShuffleNetV2.Large implementation by Pytorch, which is a deeper version of ShuffleNetV2. 4 | 5 | ## Requirements 6 | Download the ImageNet dataset and move validation images to labeled subfolders. To do this, you can use the following script: 7 | https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh 8 | 9 | ## Usage 10 | Train: 11 | ```shell 12 | python train.py --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH 13 | ``` 14 | Eval: 15 | ```shell 16 | python train.py --eval --eval-resume YOUR_WEIGHT_PATH --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH 17 | ``` 18 | 19 | 20 | ## Trained Models 21 | - OneDrive download: [Link](https://1drv.ms/f/s!AgaP37NGYuEXhRfQxHRseR7eSxXo) 22 | - BaiduYun download: [Link](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw) (extract code: mc24) 23 | 24 | 25 | ## Results 26 | 27 | | Model | FLOPs | #Params | Top-1 | Top-5 | 28 | | :--------------------- | :---: | :------: | :---: | :---: | 29 | | ShuffleNetV2.Large | 12.7G | 140.7M | **18.56** | 4.48 | 30 | | SEnet | 20.7G | - | 18.68 | 4.47 | 31 | -------------------------------------------------------------------------------- /OneShot/README.md: -------------------------------------------------------------------------------- 1 | # [One-Shot NAS](https://arxiv.org/abs/1904.00420) 2 | This repository contains single path one-shot NAS searched networks implementation by Pytorch. 3 | 4 | 5 | ## Requirements 6 | Download the ImageNet dataset and move validation images to labeled subfolders. To do this, you can use the following script: 7 | https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh 8 | 9 | ## Usage 10 | Train: 11 | ```shell 12 | python train.py --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH 13 | ``` 14 | Eval: 15 | ```shell 16 | python train.py --eval --eval-resume YOUR_WEIGHT_PATH --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH 17 | ``` 18 | 19 | 20 | 21 | ## Trained Models 22 | - OneDrive download: [Link](https://1drv.ms/f/s!AgaP37NGYuEXhRfQxHRseR7eSxXo) 23 | - BaiduYun download: [Link](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw) (extract code: mc24) 24 | 25 | 26 | 27 | ## Results 28 | 29 | 30 | | Model | FLOPs | #Params | Top-1 | Top-5 | 31 | | :--------------------- | :---: | :------: | :---: | :---: | 32 | | OneShot | 328M | 3.4M | **25.1** | 8.0 | 33 | | NASNET-A| 564M | 5.3M | 26.0 | 8.4 | 34 | | PNASNET| 588M | 5.1M | 25.8 | 8.1 | 35 | | MnasNet| 317M | 4.2M | 26.0 | 8.2 | 36 | | DARTS| 574M| 4.7M | 26.7 | 8.7 | 37 | | FBNet-B| 295M| 4.5M | 25.9 | - | 38 | 39 | ## Citation 40 | If you use these models in your research, please cite: 41 | 42 | 43 | @article{guo2019single, 44 | title={Single path one-shot neural architecture search with uniform sampling}, 45 | author={Guo, Zichao and Zhang, Xiangyu and Mu, Haoyuan and Heng, Wen and Liu, Zechun and Wei, Yichen and Sun, Jian}, 46 | journal={arXiv preprint arXiv:1904.00420}, 47 | year={2019} 48 | } 49 | -------------------------------------------------------------------------------- /ShuffleNetV2+/README.md: -------------------------------------------------------------------------------- 1 | # ShuffleNetV2+ 2 | 3 | This repository contains ShuffleNetV2+ implementation by Pytorch, which is a strengthen version of ShuffleNetV2 by adding Hard-Swish, Hard-Sigmoid and [SE](https://arxiv.org/abs/1709.01507) modules. 4 | 5 | 6 | 7 | ## Requirements 8 | Download the ImageNet dataset and move validation images to labeled subfolders. To do this, you can use the following script: 9 | https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh 10 | 11 | ## Usage 12 | Train: 13 | ```shell 14 | python train.py --model-size Large --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH 15 | ``` 16 | Eval: 17 | ```shell 18 | python train.py --eval --eval-resume YOUR_WEIGHT_PATH --model-size Large --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH 19 | ``` 20 | 21 | 22 | ## Trained Models 23 | - OneDrive download: [Link](https://1drv.ms/f/s!AgaP37NGYuEXhRfQxHRseR7eSxXo) 24 | - BaiduYun download: [Link](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw) (extract code: mc24) 25 | 26 | 27 | ## Results 28 | 29 | The following is a comparison with MobileNetV3 in [Searching for MobileNetV3](https://arxiv.org/pdf/1905.02244). 30 | 31 | | Model | FLOPs | #Params | Top-1 | Top-5 | 32 | |:------------------------|:---------:|:---------:|:---------:|:---------:| 33 | ShuffleNetV2+ Large | 360M | 6.7M | **22.9** | 6.7 | 34 | MobileNetV3 Large 224/1.25 | 356M | 7.5M | 23.4 | - | 35 | ShuffleNetV2+ Medium | 222M | 5.6M | **24.3** | 7.4 | 36 | MobileNetV3 Large 224/1.0 | 217M | 5.4M | 24.8 | - | 37 | ShuffleNetV2+ Small | 156M | 5.1M | **25.9** | 8.3 | 38 | MobileNetV3 Large 224/0.75 | 155M | 4.0M | 26.7 | - | 39 | 40 | -------------------------------------------------------------------------------- /ShuffleNetV2/README.md: -------------------------------------------------------------------------------- 1 | # [ShuffleNetV2](https://arxiv.org/pdf/1807.11164.pdf) 2 | This repository contains ShuffleNetV2 implementation by Pytorch. 3 | 4 | 5 | ## Requirements 6 | Download the ImageNet dataset and move validation images to labeled subfolders. To do this, you can use the following script: 7 | https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh 8 | 9 | ## Usage 10 | Train: 11 | ```shell 12 | python train.py --model-size 1.5x --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH 13 | ``` 14 | Eval: 15 | ```shell 16 | python train.py --eval --eval-resume YOUR_WEIGHT_PATH --model-size 1.5x --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH 17 | ``` 18 | 19 | 20 | ## Trained Models 21 | - OneDrive download: [Link](https://1drv.ms/f/s!AgaP37NGYuEXhRfQxHRseR7eSxXo) 22 | - BaiduYun download: [Link](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw) (extract code: mc24) 23 | 24 | 25 | ## Results 26 | 27 | | Model | FLOPs | #Params | Top-1 | Top-5 | 28 | | :--------------------- | :---: | :------: | :----------: | :------: | 29 | | ShuffleNetV2 2.0x | 591M | 7.4M | **25.0** | 7.6 | 30 | | MobileNetV2 (1.4) | 585M | 6.9M | 25.3 | - | 31 | | ShuffleNetV2 1.5x | 299M | 3.5M | **27.4** | 9.4 | 32 | | MobileNetV2 | 300M | 3.4M | 28.0 | - | 33 | | ShuffleNetV2 1.0x | 146M | 2.3M | 30.6 | 11.1 | 34 | | ShuffleNetV2 0.5x | 41M | 1.4M | 38.9 | 17.4 | 35 | 36 | 37 | 38 | 39 | ## Citation 40 | If you use these models in your research, please cite: 41 | 42 | 43 | @inproceedings{ma2018shufflenet, 44 | title={Shufflenet v2: Practical guidelines for efficient cnn architecture design}, 45 | author={Ma, Ningning and Zhang, Xiangyu and Zheng, Hai-Tao and Sun, Jian}, 46 | booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, 47 | pages={116--131}, 48 | year={2018} 49 | } 50 | -------------------------------------------------------------------------------- /DetNAS/README.md: -------------------------------------------------------------------------------- 1 | # DetNAS 2 | 3 | This repository contains DetNAS backbone networks implementation by Pytorch. 4 | 5 | ## Requirements 6 | Download the ImageNet dataset and move validation images to labeled subfolders. To do this, you can use the following script: 7 | https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh 8 | 9 | ## Usage 10 | Train: 11 | ```shell 12 | python train.py --model-size VOC_FPN_300M --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH 13 | ``` 14 | Eval: 15 | ```shell 16 | python train.py --eval --eval-resume YOUR_WEIGHT_PATH --model-size VOC_FPN_300M --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH 17 | ``` 18 | 19 | ## Trained Models 20 | - OneDrive download: [Link](https://1drv.ms/f/s!AgaP37NGYuEXhRfQxHRseR7eSxXo) 21 | - BaiduYun download: [Link](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw) (extract code: mc24) 22 | 23 | 24 | 25 | ## Results 26 | 27 | | Model | FLOPs| #Params| Top-1 | Top-5 | mAP* | 28 | | :------------ | :---:| :-----:| :---: | :---: | :--------------: | 29 | |300M (VOC, RetinaNet) | 300M | 3.5M | 25.4 | 8.1 | 80.1 | 30 | |300M (VOC, FPN) | 300M | 3.7M | 25.9 | 8.3 | 81.5 | 31 | |300M (COCO, RetinaNet) | 300M | 3.7M | 26.0 | 8.4 | 33.3 | 32 | |300M (COCO, FPN) | 300M | 3.5M | 26.2 | 8.4 | 36.4 | 33 | |1.3G (COCO, FPN) | 1.3G | 10.4M | **22.8** | 6.5 | 40.0 | 34 | |3.8G (COCO, FPN) | 3.8G | 29.5M | **21.6** | 6.3 | **42.0** | 35 | |ResNet50 (COCO, FPN) | 3.8G | - | 23.9 | 7.1 | 37.3 | 36 | |ResNet101 (COCO, FPN) | 7.6G | - | 22.6 | 6.4 | 40.0 | 37 | 38 | * More about DetNAS in [Link](https://github.com/megvii-model/DetNAS). 39 | 40 | ## Citation 41 | If you use these models in your research, please cite: 42 | 43 | 44 | @misc{chen2019detnas, 45 | title={DetNAS: Backbone Search for Object Detection}, 46 | author={Yukang Chen, Tong Yang, Xiangyu Zhang, Gaofeng Meng, Xinyu Xiao, Jian Sun}, 47 | year={2019}, 48 | booktitle = {NeurIPS}, 49 | } 50 | -------------------------------------------------------------------------------- /ShuffleNetV2/blocks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class ShuffleV2Block(nn.Module): 5 | def __init__(self, inp, oup, mid_channels, *, ksize, stride): 6 | super(ShuffleV2Block, self).__init__() 7 | self.stride = stride 8 | assert stride in [1, 2] 9 | 10 | self.mid_channels = mid_channels 11 | self.ksize = ksize 12 | pad = ksize // 2 13 | self.pad = pad 14 | self.inp = inp 15 | 16 | outputs = oup - inp 17 | 18 | branch_main = [ 19 | # pw 20 | nn.Conv2d(inp, mid_channels, 1, 1, 0, bias=False), 21 | nn.BatchNorm2d(mid_channels), 22 | nn.ReLU(inplace=True), 23 | # dw 24 | nn.Conv2d(mid_channels, mid_channels, ksize, stride, pad, groups=mid_channels, bias=False), 25 | nn.BatchNorm2d(mid_channels), 26 | # pw-linear 27 | nn.Conv2d(mid_channels, outputs, 1, 1, 0, bias=False), 28 | nn.BatchNorm2d(outputs), 29 | nn.ReLU(inplace=True), 30 | ] 31 | self.branch_main = nn.Sequential(*branch_main) 32 | 33 | if stride == 2: 34 | branch_proj = [ 35 | # dw 36 | nn.Conv2d(inp, inp, ksize, stride, pad, groups=inp, bias=False), 37 | nn.BatchNorm2d(inp), 38 | # pw-linear 39 | nn.Conv2d(inp, inp, 1, 1, 0, bias=False), 40 | nn.BatchNorm2d(inp), 41 | nn.ReLU(inplace=True), 42 | ] 43 | self.branch_proj = nn.Sequential(*branch_proj) 44 | else: 45 | self.branch_proj = None 46 | 47 | def forward(self, old_x): 48 | if self.stride==1: 49 | x_proj, x = self.channel_shuffle(old_x) 50 | return torch.cat((x_proj, self.branch_main(x)), 1) 51 | elif self.stride==2: 52 | x_proj = old_x 53 | x = old_x 54 | return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1) 55 | 56 | def channel_shuffle(self, x): 57 | batchsize, num_channels, height, width = x.data.size() 58 | assert (num_channels % 4 == 0) 59 | x = x.reshape(batchsize * num_channels // 2, 2, height * width) 60 | x = x.permute(1, 0, 2) 61 | x = x.reshape(2, -1, num_channels // 2, height, width) 62 | return x[0], x[1] 63 | -------------------------------------------------------------------------------- /ShuffleNetV1/blocks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class ShuffleV1Block(nn.Module): 6 | def __init__(self, inp, oup, *, group, first_group, mid_channels, ksize, stride): 7 | super(ShuffleV1Block, self).__init__() 8 | self.stride = stride 9 | assert stride in [1, 2] 10 | 11 | self.mid_channels = mid_channels 12 | self.ksize = ksize 13 | pad = ksize // 2 14 | self.pad = pad 15 | self.inp = inp 16 | self.group = group 17 | 18 | if stride == 2: 19 | outputs = oup - inp 20 | else: 21 | outputs = oup 22 | 23 | branch_main_1 = [ 24 | # pw 25 | nn.Conv2d(inp, mid_channels, 1, 1, 0, groups=1 if first_group else group, bias=False), 26 | nn.BatchNorm2d(mid_channels), 27 | nn.ReLU(inplace=True), 28 | # dw 29 | nn.Conv2d(mid_channels, mid_channels, ksize, stride, pad, groups=mid_channels, bias=False), 30 | nn.BatchNorm2d(mid_channels), 31 | ] 32 | branch_main_2 = [ 33 | # pw-linear 34 | nn.Conv2d(mid_channels, outputs, 1, 1, 0, groups=group, bias=False), 35 | nn.BatchNorm2d(outputs), 36 | ] 37 | self.branch_main_1 = nn.Sequential(*branch_main_1) 38 | self.branch_main_2 = nn.Sequential(*branch_main_2) 39 | 40 | if stride == 2: 41 | self.branch_proj = nn.AvgPool2d(kernel_size=3, stride=2, padding=1) 42 | 43 | def forward(self, old_x): 44 | x = old_x 45 | x_proj = old_x 46 | x = self.branch_main_1(x) 47 | if self.group > 1: 48 | x = self.channel_shuffle(x) 49 | x = self.branch_main_2(x) 50 | if self.stride == 1: 51 | return F.relu(x + x_proj) 52 | elif self.stride == 2: 53 | return torch.cat((self.branch_proj(x_proj), F.relu(x)), 1) 54 | 55 | def channel_shuffle(self, x): 56 | batchsize, num_channels, height, width = x.data.size() 57 | assert num_channels % self.group == 0 58 | group_channels = num_channels // self.group 59 | 60 | x = x.reshape(batchsize, group_channels, self.group, height, width) 61 | x = x.permute(0, 2, 1, 3, 4) 62 | x = x.reshape(batchsize, num_channels, height, width) 63 | 64 | return x 65 | -------------------------------------------------------------------------------- /DetNAS/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import torch 4 | import torch.nn as nn 5 | 6 | class CrossEntropyLabelSmooth(nn.Module): 7 | 8 | def __init__(self, num_classes, epsilon): 9 | super(CrossEntropyLabelSmooth, self).__init__() 10 | self.num_classes = num_classes 11 | self.epsilon = epsilon 12 | self.logsoftmax = nn.LogSoftmax(dim=1) 13 | 14 | def forward(self, inputs, targets): 15 | log_probs = self.logsoftmax(inputs) 16 | targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1) 17 | targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes 18 | loss = (-targets * log_probs).mean(0).sum() 19 | return loss 20 | 21 | 22 | class AvgrageMeter(object): 23 | 24 | def __init__(self): 25 | self.reset() 26 | 27 | def reset(self): 28 | self.avg = 0 29 | self.sum = 0 30 | self.cnt = 0 31 | self.val = 0 32 | 33 | def update(self, val, n=1): 34 | self.val = val 35 | self.sum += val * n 36 | self.cnt += n 37 | self.avg = self.sum / self.cnt 38 | 39 | 40 | def accuracy(output, target, topk=(1,)): 41 | maxk = max(topk) 42 | batch_size = target.size(0) 43 | 44 | _, pred = output.topk(maxk, 1, True, True) 45 | pred = pred.t() 46 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 47 | 48 | res = [] 49 | for k in topk: 50 | correct_k = correct[:k].view(-1).float().sum(0) 51 | res.append(correct_k.mul_(100.0/batch_size)) 52 | return res 53 | 54 | 55 | def save_checkpoint(state, iters, tag=''): 56 | if not os.path.exists("./models"): 57 | os.makedirs("./models") 58 | filename = os.path.join("./models/{}checkpoint-{:06}.pth.tar".format(tag, iters)) 59 | torch.save(state, filename) 60 | 61 | def get_lastest_model(): 62 | if not os.path.exists('./models'): 63 | os.mkdir('./models') 64 | model_list = os.listdir('./models/') 65 | if model_list == []: 66 | return None, 0 67 | model_list.sort() 68 | lastest_model = model_list[-1] 69 | iters = re.findall(r'\d+', lastest_model) 70 | return './models/' + lastest_model, int(iters[0]) 71 | 72 | 73 | def get_parameters(model): 74 | group_no_weight_decay = [] 75 | group_weight_decay = [] 76 | for pname, p in model.named_parameters(): 77 | if pname.find('weight') >= 0 and len(p.size()) > 1: 78 | # print('include ', pname, p.size()) 79 | group_weight_decay.append(p) 80 | else: 81 | # print('not include ', pname, p.size()) 82 | group_no_weight_decay.append(p) 83 | assert len(list(model.parameters())) == len(group_weight_decay) + len(group_no_weight_decay) 84 | groups = [dict(params=group_weight_decay), dict(params=group_no_weight_decay, weight_decay=0.)] 85 | return groups 86 | -------------------------------------------------------------------------------- /ShuffleNetV1/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import torch 4 | import torch.nn as nn 5 | 6 | class CrossEntropyLabelSmooth(nn.Module): 7 | 8 | def __init__(self, num_classes, epsilon): 9 | super(CrossEntropyLabelSmooth, self).__init__() 10 | self.num_classes = num_classes 11 | self.epsilon = epsilon 12 | self.logsoftmax = nn.LogSoftmax(dim=1) 13 | 14 | def forward(self, inputs, targets): 15 | log_probs = self.logsoftmax(inputs) 16 | targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1) 17 | targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes 18 | loss = (-targets * log_probs).mean(0).sum() 19 | return loss 20 | 21 | 22 | class AvgrageMeter(object): 23 | 24 | def __init__(self): 25 | self.reset() 26 | 27 | def reset(self): 28 | self.avg = 0 29 | self.sum = 0 30 | self.cnt = 0 31 | self.val = 0 32 | 33 | def update(self, val, n=1): 34 | self.val = val 35 | self.sum += val * n 36 | self.cnt += n 37 | self.avg = self.sum / self.cnt 38 | 39 | 40 | def accuracy(output, target, topk=(1,)): 41 | maxk = max(topk) 42 | batch_size = target.size(0) 43 | 44 | _, pred = output.topk(maxk, 1, True, True) 45 | pred = pred.t() 46 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 47 | 48 | res = [] 49 | for k in topk: 50 | correct_k = correct[:k].view(-1).float().sum(0) 51 | res.append(correct_k.mul_(100.0/batch_size)) 52 | return res 53 | 54 | 55 | def save_checkpoint(state, iters, tag=''): 56 | if not os.path.exists("./models"): 57 | os.makedirs("./models") 58 | filename = os.path.join("./models/{}checkpoint-{:06}.pth.tar".format(tag, iters)) 59 | torch.save(state, filename) 60 | 61 | def get_lastest_model(): 62 | if not os.path.exists('./models'): 63 | os.mkdir('./models') 64 | model_list = os.listdir('./models/') 65 | if model_list == []: 66 | return None, 0 67 | model_list.sort() 68 | lastest_model = model_list[-1] 69 | iters = re.findall(r'\d+', lastest_model) 70 | return './models/' + lastest_model, int(iters[0]) 71 | 72 | 73 | def get_parameters(model): 74 | group_no_weight_decay = [] 75 | group_weight_decay = [] 76 | for pname, p in model.named_parameters(): 77 | if pname.find('weight') >= 0 and len(p.size()) > 1: 78 | # print('include ', pname, p.size()) 79 | group_weight_decay.append(p) 80 | else: 81 | # print('not include ', pname, p.size()) 82 | group_no_weight_decay.append(p) 83 | assert len(list(model.parameters())) == len(group_weight_decay) + len(group_no_weight_decay) 84 | groups = [dict(params=group_weight_decay), dict(params=group_no_weight_decay, weight_decay=0.)] 85 | return groups 86 | -------------------------------------------------------------------------------- /ShuffleNetV2/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import torch 4 | import torch.nn as nn 5 | 6 | class CrossEntropyLabelSmooth(nn.Module): 7 | 8 | def __init__(self, num_classes, epsilon): 9 | super(CrossEntropyLabelSmooth, self).__init__() 10 | self.num_classes = num_classes 11 | self.epsilon = epsilon 12 | self.logsoftmax = nn.LogSoftmax(dim=1) 13 | 14 | def forward(self, inputs, targets): 15 | log_probs = self.logsoftmax(inputs) 16 | targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1) 17 | targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes 18 | loss = (-targets * log_probs).mean(0).sum() 19 | return loss 20 | 21 | 22 | class AvgrageMeter(object): 23 | 24 | def __init__(self): 25 | self.reset() 26 | 27 | def reset(self): 28 | self.avg = 0 29 | self.sum = 0 30 | self.cnt = 0 31 | self.val = 0 32 | 33 | def update(self, val, n=1): 34 | self.val = val 35 | self.sum += val * n 36 | self.cnt += n 37 | self.avg = self.sum / self.cnt 38 | 39 | 40 | def accuracy(output, target, topk=(1,)): 41 | maxk = max(topk) 42 | batch_size = target.size(0) 43 | 44 | _, pred = output.topk(maxk, 1, True, True) 45 | pred = pred.t() 46 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 47 | 48 | res = [] 49 | for k in topk: 50 | correct_k = correct[:k].view(-1).float().sum(0) 51 | res.append(correct_k.mul_(100.0/batch_size)) 52 | return res 53 | 54 | 55 | def save_checkpoint(state, iters, tag=''): 56 | if not os.path.exists("./models"): 57 | os.makedirs("./models") 58 | filename = os.path.join("./models/{}checkpoint-{:06}.pth.tar".format(tag, iters)) 59 | torch.save(state, filename) 60 | 61 | def get_lastest_model(): 62 | if not os.path.exists('./models'): 63 | os.mkdir('./models') 64 | model_list = os.listdir('./models/') 65 | if model_list == []: 66 | return None, 0 67 | model_list.sort() 68 | lastest_model = model_list[-1] 69 | iters = re.findall(r'\d+', lastest_model) 70 | return './models/' + lastest_model, int(iters[0]) 71 | 72 | 73 | def get_parameters(model): 74 | group_no_weight_decay = [] 75 | group_weight_decay = [] 76 | for pname, p in model.named_parameters(): 77 | if pname.find('weight') >= 0 and len(p.size()) > 1: 78 | # print('include ', pname, p.size()) 79 | group_weight_decay.append(p) 80 | else: 81 | # print('not include ', pname, p.size()) 82 | group_no_weight_decay.append(p) 83 | assert len(list(model.parameters())) == len(group_weight_decay) + len(group_no_weight_decay) 84 | groups = [dict(params=group_weight_decay), dict(params=group_no_weight_decay, weight_decay=0.)] 85 | return groups 86 | -------------------------------------------------------------------------------- /OneShot/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import torch 4 | import torch.nn as nn 5 | 6 | class CrossEntropyLabelSmooth(nn.Module): 7 | 8 | def __init__(self, num_classes, epsilon): 9 | super(CrossEntropyLabelSmooth, self).__init__() 10 | self.num_classes = num_classes 11 | self.epsilon = epsilon 12 | self.logsoftmax = nn.LogSoftmax(dim=1) 13 | 14 | def forward(self, inputs, targets): 15 | log_probs = self.logsoftmax(inputs) 16 | targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1) 17 | targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes 18 | loss = (-targets * log_probs).mean(0).sum() 19 | return loss 20 | 21 | 22 | class AvgrageMeter(object): 23 | 24 | def __init__(self): 25 | self.reset() 26 | 27 | def reset(self): 28 | self.avg = 0 29 | self.sum = 0 30 | self.cnt = 0 31 | self.val = 0 32 | 33 | def update(self, val, n=1): 34 | self.val = val 35 | self.sum += val * n 36 | self.cnt += n 37 | self.avg = self.sum / self.cnt 38 | 39 | 40 | def accuracy(output, target, topk=(1,)): 41 | maxk = max(topk) 42 | batch_size = target.size(0) 43 | 44 | _, pred = output.topk(maxk, 1, True, True) 45 | pred = pred.t() 46 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 47 | 48 | res = [] 49 | for k in topk: 50 | correct_k = correct[:k].view(-1).float().sum(0) 51 | res.append(correct_k.mul_(100.0/batch_size)) 52 | return res 53 | 54 | 55 | def save_checkpoint(state, iters, tag=''): 56 | if not os.path.exists("./models"): 57 | os.makedirs("./models") 58 | filename = os.path.join("./models/{}checkpoint-{:06}.pth.tar".format(tag, iters)) 59 | torch.save(state, filename) 60 | 61 | 62 | def get_lastest_model(): 63 | if not os.path.exists('./models'): 64 | os.mkdir('./models') 65 | model_list = os.listdir('./models/') 66 | if model_list == []: 67 | return None, 0 68 | model_list.sort() 69 | lastest_model = model_list[-1] 70 | iters = re.findall(r'\d+', lastest_model) 71 | return './models/' + lastest_model, int(iters[0]) 72 | 73 | 74 | def get_parameters(model): 75 | group_no_weight_decay = [] 76 | group_weight_decay = [] 77 | for pname, p in model.named_parameters(): 78 | if pname.find('weight') >= 0 and len(p.size()) > 1: 79 | # print('include ', pname, p.size()) 80 | group_weight_decay.append(p) 81 | else: 82 | # print('not include ', pname, p.size()) 83 | group_no_weight_decay.append(p) 84 | assert len(list(model.parameters())) == len(group_weight_decay) + len(group_no_weight_decay) 85 | groups = [dict(params=group_weight_decay), dict(params=group_no_weight_decay, weight_decay=0.)] 86 | return groups 87 | -------------------------------------------------------------------------------- /ShuffleNetV2+/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import torch 4 | import torch.nn as nn 5 | 6 | class CrossEntropyLabelSmooth(nn.Module): 7 | 8 | def __init__(self, num_classes, epsilon): 9 | super(CrossEntropyLabelSmooth, self).__init__() 10 | self.num_classes = num_classes 11 | self.epsilon = epsilon 12 | self.logsoftmax = nn.LogSoftmax(dim=1) 13 | 14 | def forward(self, inputs, targets): 15 | log_probs = self.logsoftmax(inputs) 16 | targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1) 17 | targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes 18 | loss = (-targets * log_probs).mean(0).sum() 19 | return loss 20 | 21 | 22 | class AvgrageMeter(object): 23 | 24 | def __init__(self): 25 | self.reset() 26 | 27 | def reset(self): 28 | self.avg = 0 29 | self.sum = 0 30 | self.cnt = 0 31 | self.val = 0 32 | 33 | def update(self, val, n=1): 34 | self.val = val 35 | self.sum += val * n 36 | self.cnt += n 37 | self.avg = self.sum / self.cnt 38 | 39 | 40 | def accuracy(output, target, topk=(1,)): 41 | maxk = max(topk) 42 | batch_size = target.size(0) 43 | 44 | _, pred = output.topk(maxk, 1, True, True) 45 | pred = pred.t() 46 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 47 | 48 | res = [] 49 | for k in topk: 50 | correct_k = correct[:k].view(-1).float().sum(0) 51 | res.append(correct_k.mul_(100.0/batch_size)) 52 | return res 53 | 54 | 55 | def save_checkpoint(state, iters, tag=''): 56 | if not os.path.exists("./models"): 57 | os.makedirs("./models") 58 | filename = os.path.join("./models/{}checkpoint-{:06}.pth.tar".format(tag, iters)) 59 | torch.save(state, filename) 60 | 61 | 62 | def get_lastest_model(): 63 | if not os.path.exists('./models'): 64 | os.mkdir('./models') 65 | model_list = os.listdir('./models/') 66 | if model_list == []: 67 | return None, 0 68 | model_list.sort() 69 | lastest_model = model_list[-1] 70 | iters = re.findall(r'\d+', lastest_model) 71 | return './models/' + lastest_model, int(iters[0]) 72 | 73 | 74 | def get_parameters(model): 75 | group_no_weight_decay = [] 76 | group_weight_decay = [] 77 | for pname, p in model.named_parameters(): 78 | if pname.find('weight') >= 0 and len(p.size()) > 1: 79 | # print('include ', pname, p.size()) 80 | group_weight_decay.append(p) 81 | else: 82 | # print('not include ', pname, p.size()) 83 | group_no_weight_decay.append(p) 84 | assert len(list(model.parameters())) == len(group_weight_decay) + len(group_no_weight_decay) 85 | groups = [dict(params=group_weight_decay), dict(params=group_no_weight_decay, weight_decay=0.)] 86 | return groups 87 | -------------------------------------------------------------------------------- /ShuffleNetV2.ExLarge/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import torch 4 | import torch.nn as nn 5 | 6 | class CrossEntropyLabelSmooth(nn.Module): 7 | 8 | def __init__(self, num_classes, epsilon): 9 | super(CrossEntropyLabelSmooth, self).__init__() 10 | self.num_classes = num_classes 11 | self.epsilon = epsilon 12 | self.logsoftmax = nn.LogSoftmax(dim=1) 13 | 14 | def forward(self, inputs, targets): 15 | log_probs = self.logsoftmax(inputs) 16 | targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1) 17 | targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes 18 | loss = (-targets * log_probs).mean(0).sum() 19 | return loss 20 | 21 | 22 | class AvgrageMeter(object): 23 | 24 | def __init__(self): 25 | self.reset() 26 | 27 | def reset(self): 28 | self.avg = 0 29 | self.sum = 0 30 | self.cnt = 0 31 | self.val = 0 32 | 33 | def update(self, val, n=1): 34 | self.val = val 35 | self.sum += val * n 36 | self.cnt += n 37 | self.avg = self.sum / self.cnt 38 | 39 | 40 | def accuracy(output, target, topk=(1,)): 41 | maxk = max(topk) 42 | batch_size = target.size(0) 43 | 44 | _, pred = output.topk(maxk, 1, True, True) 45 | pred = pred.t() 46 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 47 | 48 | res = [] 49 | for k in topk: 50 | correct_k = correct[:k].view(-1).float().sum(0) 51 | res.append(correct_k.mul_(100.0/batch_size)) 52 | return res 53 | 54 | 55 | def save_checkpoint(state, iters, tag=''): 56 | if not os.path.exists("./models"): 57 | os.makedirs("./models") 58 | filename = os.path.join("./models/{}checkpoint-{:06}.pth.tar".format(tag, iters)) 59 | torch.save(state, filename) 60 | 61 | def get_lastest_model(): 62 | if not os.path.exists('./models'): 63 | os.mkdir('./models') 64 | model_list = os.listdir('./models/') 65 | if model_list == []: 66 | return None, 0 67 | model_list.sort() 68 | lastest_model = model_list[-1] 69 | iters = re.findall(r'\d+', lastest_model) 70 | return './models/' + lastest_model, int(iters[0]) 71 | 72 | 73 | def get_parameters(model): 74 | group_no_weight_decay = [] 75 | group_weight_decay = [] 76 | for pname, p in model.named_parameters(): 77 | if pname.find('weight') >= 0 and len(p.size()) > 1: 78 | # print('include ', pname, p.size()) 79 | group_weight_decay.append(p) 80 | else: 81 | # print('not include ', pname, p.size()) 82 | group_no_weight_decay.append(p) 83 | assert len(list(model.parameters())) == len(group_weight_decay) + len(group_no_weight_decay) 84 | groups = [dict(params=group_weight_decay), dict(params=group_no_weight_decay, weight_decay=0.)] 85 | return groups 86 | -------------------------------------------------------------------------------- /ShuffleNetV2.Large/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import torch 4 | import torch.nn as nn 5 | 6 | class CrossEntropyLabelSmooth(nn.Module): 7 | 8 | def __init__(self, num_classes, epsilon): 9 | super(CrossEntropyLabelSmooth, self).__init__() 10 | self.num_classes = num_classes 11 | self.epsilon = epsilon 12 | self.logsoftmax = nn.LogSoftmax(dim=1) 13 | 14 | def forward(self, inputs, targets): 15 | log_probs = self.logsoftmax(inputs) 16 | targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1) 17 | targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes 18 | loss = (-targets * log_probs).mean(0).sum() 19 | return loss 20 | 21 | 22 | class AvgrageMeter(object): 23 | 24 | def __init__(self): 25 | self.reset() 26 | 27 | def reset(self): 28 | self.avg = 0 29 | self.sum = 0 30 | self.cnt = 0 31 | self.val = 0 32 | 33 | def update(self, val, n=1): 34 | self.val = val 35 | self.sum += val * n 36 | self.cnt += n 37 | self.avg = self.sum / self.cnt 38 | 39 | 40 | def accuracy(output, target, topk=(1,)): 41 | maxk = max(topk) 42 | batch_size = target.size(0) 43 | 44 | _, pred = output.topk(maxk, 1, True, True) 45 | pred = pred.t() 46 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 47 | 48 | res = [] 49 | for k in topk: 50 | correct_k = correct[:k].view(-1).float().sum(0) 51 | res.append(correct_k.mul_(100.0/batch_size)) 52 | return res 53 | 54 | 55 | def save_checkpoint(state, iters, tag=''): 56 | if not os.path.exists("./models"): 57 | os.makedirs("./models") 58 | filename = os.path.join("./models/{}checkpoint-{:06}.pth.tar".format(tag, iters)) 59 | torch.save(state, filename) 60 | 61 | def get_lastest_model(): 62 | if not os.path.exists('./models'): 63 | os.mkdir('./models') 64 | model_list = os.listdir('./models/') 65 | if model_list == []: 66 | return None, 0 67 | model_list.sort() 68 | lastest_model = model_list[-1] 69 | iters = re.findall(r'\d+', lastest_model) 70 | return './models/' + lastest_model, int(iters[0]) 71 | 72 | 73 | def get_parameters(model): 74 | group_no_weight_decay = [] 75 | group_weight_decay = [] 76 | for pname, p in model.named_parameters(): 77 | if pname.find('weight') >= 0 and len(p.size()) > 1: 78 | # print('include ', pname, p.size()) 79 | group_weight_decay.append(p) 80 | else: 81 | # print('not include ', pname, p.size()) 82 | group_no_weight_decay.append(p) 83 | assert len(list(model.parameters())) == len(group_weight_decay) + len(group_no_weight_decay) 84 | groups = [dict(params=group_weight_decay), dict(params=group_no_weight_decay, weight_decay=0.)] 85 | return groups 86 | -------------------------------------------------------------------------------- /ShuffleNetV1/README.md: -------------------------------------------------------------------------------- 1 | # [ShuffleNetV1](https://arxiv.org/pdf/1707.01083.pdf) 2 | 3 | This repository contains ShuffleNetV1 implementation by Pytorch. 4 | 5 | 6 | ## Requirements 7 | Download the ImageNet dataset and move validation images to labeled subfolders. To do this, you can use the following script: 8 | https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh 9 | 10 | ## Usage 11 | Train: 12 | ```shell 13 | python train.py --model-size 2.0x --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH 14 | ``` 15 | Eval: 16 | ```shell 17 | python train.py --eval --eval-resume YOUR_WEIGHT_PATH --model-size 2.0x --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH 18 | ``` 19 | 20 | 21 | ## Trained Models 22 | - OneDrive download: [Link](https://1drv.ms/f/s!AgaP37NGYuEXhRfQxHRseR7eSxXo) 23 | - BaiduYun download: [Link](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw) (extract code: mc24) 24 | 25 | 26 | ## Results 27 | 28 | | Model | FLOPs | #Params | Top-1 | Top-5 | 29 | |:------------------------|:---------:|:---------:|:---------:|:---------:| 30 | ShuffleNetV1 2.0x (group=3)| 524M | 5.4M | **25.9** | 8.6 | 31 | ShuffleNetV1 2.0x (group=8)| 522M | 6.5M | 27.1 | 9.2 | 32 | 1.0 MobileNetV1-224 | 569M | 4.2M | 29.4 | - | 33 | ShuffleNetV1 1.5x (group=3)| 292M | 3.4M | **28.4** | 9.8 | 34 | ShuffleNetV1 1.5x (group=8)| 290M | 4.3M | 29.0 | 10.4 | 35 | 0.75 MobileNetV1-224 | 325M | 2.6M | 31.6 | - | 36 | ShuffleNetV1 1.0x (group=3)| 138M | 1.9M | 32.2 | 12.3 | 37 | ShuffleNetV1 1.0x (group=8)| 138M | 2.4M | **32.0** | 13.6 | 38 | 0.5 MobileNetV1-224 | 149M | 1.3M | 36.3 | - | 39 | ShuffleNetV1 0.5x (group=3)| 38M | 0.7M | 42.7 | 20.0 | 40 | ShuffleNetV1 0.5x (group=8)| 40M | 1.0M | **41.2** | 19.0 | 41 | 0.25 MobileNetV1-224 | 41M | 0.5M | 49.4 | - | 42 | 43 | 44 | 45 | ## Citation 46 | If you use these models in your research, please cite: 47 | 48 | 49 | @inproceedings{zhang2018shufflenet, 50 | title={Shufflenet: An extremely efficient convolutional neural network for mobile devices}, 51 | author={Zhang, Xiangyu and Zhou, Xinyu and Lin, Mengxiao and Sun, Jian}, 52 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 53 | pages={6848--6856}, 54 | year={2018} 55 | } 56 | 57 | -------------------------------------------------------------------------------- /ShuffleNetV2/network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from blocks import ShuffleV2Block 4 | 5 | class ShuffleNetV2(nn.Module): 6 | def __init__(self, input_size=224, n_class=1000, model_size='1.5x'): 7 | super(ShuffleNetV2, self).__init__() 8 | print('model size is ', model_size) 9 | 10 | self.stage_repeats = [4, 8, 4] 11 | self.model_size = model_size 12 | if model_size == '0.5x': 13 | self.stage_out_channels = [-1, 24, 48, 96, 192, 1024] 14 | elif model_size == '1.0x': 15 | self.stage_out_channels = [-1, 24, 116, 232, 464, 1024] 16 | elif model_size == '1.5x': 17 | self.stage_out_channels = [-1, 24, 176, 352, 704, 1024] 18 | elif model_size == '2.0x': 19 | self.stage_out_channels = [-1, 24, 244, 488, 976, 2048] 20 | else: 21 | raise NotImplementedError 22 | 23 | # building first layer 24 | input_channel = self.stage_out_channels[1] 25 | self.first_conv = nn.Sequential( 26 | nn.Conv2d(3, input_channel, 3, 2, 1, bias=False), 27 | nn.BatchNorm2d(input_channel), 28 | nn.ReLU(inplace=True), 29 | ) 30 | 31 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 32 | 33 | self.features = [] 34 | for idxstage in range(len(self.stage_repeats)): 35 | numrepeat = self.stage_repeats[idxstage] 36 | output_channel = self.stage_out_channels[idxstage+2] 37 | 38 | for i in range(numrepeat): 39 | if i == 0: 40 | self.features.append(ShuffleV2Block(input_channel, output_channel, 41 | mid_channels=output_channel // 2, ksize=3, stride=2)) 42 | else: 43 | self.features.append(ShuffleV2Block(input_channel // 2, output_channel, 44 | mid_channels=output_channel // 2, ksize=3, stride=1)) 45 | 46 | input_channel = output_channel 47 | 48 | self.features = nn.Sequential(*self.features) 49 | 50 | self.conv_last = nn.Sequential( 51 | nn.Conv2d(input_channel, self.stage_out_channels[-1], 1, 1, 0, bias=False), 52 | nn.BatchNorm2d(self.stage_out_channels[-1]), 53 | nn.ReLU(inplace=True) 54 | ) 55 | self.globalpool = nn.AvgPool2d(7) 56 | if self.model_size == '2.0x': 57 | self.dropout = nn.Dropout(0.2) 58 | self.classifier = nn.Sequential(nn.Linear(self.stage_out_channels[-1], n_class, bias=False)) 59 | self._initialize_weights() 60 | 61 | def forward(self, x): 62 | x = self.first_conv(x) 63 | x = self.maxpool(x) 64 | x = self.features(x) 65 | x = self.conv_last(x) 66 | 67 | x = self.globalpool(x) 68 | if self.model_size == '2.0x': 69 | x = self.dropout(x) 70 | x = x.contiguous().view(-1, self.stage_out_channels[-1]) 71 | x = self.classifier(x) 72 | return x 73 | 74 | def _initialize_weights(self): 75 | for name, m in self.named_modules(): 76 | if isinstance(m, nn.Conv2d): 77 | if 'first' in name: 78 | nn.init.normal_(m.weight, 0, 0.01) 79 | else: 80 | nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1]) 81 | if m.bias is not None: 82 | nn.init.constant_(m.bias, 0) 83 | elif isinstance(m, nn.BatchNorm2d): 84 | nn.init.constant_(m.weight, 1) 85 | if m.bias is not None: 86 | nn.init.constant_(m.bias, 0.0001) 87 | nn.init.constant_(m.running_mean, 0) 88 | elif isinstance(m, nn.BatchNorm1d): 89 | nn.init.constant_(m.weight, 1) 90 | if m.bias is not None: 91 | nn.init.constant_(m.bias, 0.0001) 92 | nn.init.constant_(m.running_mean, 0) 93 | elif isinstance(m, nn.Linear): 94 | nn.init.normal_(m.weight, 0, 0.01) 95 | if m.bias is not None: 96 | nn.init.constant_(m.bias, 0) 97 | 98 | if __name__ == "__main__": 99 | model = ShuffleNetV2() 100 | # print(model) 101 | 102 | test_data = torch.rand(5, 3, 224, 224) 103 | test_outputs = model(test_data) 104 | print(test_outputs.size()) 105 | -------------------------------------------------------------------------------- /ShuffleNetV1/network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from blocks import ShuffleV1Block 4 | 5 | class ShuffleNetV1(nn.Module): 6 | def __init__(self, input_size=224, n_class=1000, model_size='2.0x', group=None): 7 | super(ShuffleNetV1, self).__init__() 8 | print('model size is ', model_size) 9 | 10 | assert group is not None 11 | 12 | self.stage_repeats = [4, 8, 4] 13 | self.model_size = model_size 14 | if group == 3: 15 | if model_size == '0.5x': 16 | self.stage_out_channels = [-1, 12, 120, 240, 480] 17 | elif model_size == '1.0x': 18 | self.stage_out_channels = [-1, 24, 240, 480, 960] 19 | elif model_size == '1.5x': 20 | self.stage_out_channels = [-1, 24, 360, 720, 1440] 21 | elif model_size == '2.0x': 22 | self.stage_out_channels = [-1, 48, 480, 960, 1920] 23 | else: 24 | raise NotImplementedError 25 | elif group == 8: 26 | if model_size == '0.5x': 27 | self.stage_out_channels = [-1, 16, 192, 384, 768] 28 | elif model_size == '1.0x': 29 | self.stage_out_channels = [-1, 24, 384, 768, 1536] 30 | elif model_size == '1.5x': 31 | self.stage_out_channels = [-1, 24, 576, 1152, 2304] 32 | elif model_size == '2.0x': 33 | self.stage_out_channels = [-1, 48, 768, 1536, 3072] 34 | else: 35 | raise NotImplementedError 36 | 37 | # building first layer 38 | input_channel = self.stage_out_channels[1] 39 | self.first_conv = nn.Sequential( 40 | nn.Conv2d(3, input_channel, 3, 2, 1, bias=False), 41 | nn.BatchNorm2d(input_channel), 42 | nn.ReLU(inplace=True), 43 | ) 44 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 45 | 46 | self.features = [] 47 | for idxstage in range(len(self.stage_repeats)): 48 | numrepeat = self.stage_repeats[idxstage] 49 | output_channel = self.stage_out_channels[idxstage+2] 50 | 51 | for i in range(numrepeat): 52 | stride = 2 if i == 0 else 1 53 | first_group = idxstage == 0 and i == 0 54 | self.features.append(ShuffleV1Block(input_channel, output_channel, 55 | group=group, first_group=first_group, 56 | mid_channels=output_channel // 4, ksize=3, stride=stride)) 57 | input_channel = output_channel 58 | 59 | self.features = nn.Sequential(*self.features) 60 | 61 | self.globalpool = nn.AvgPool2d(7) 62 | 63 | self.classifier = nn.Sequential(nn.Linear(self.stage_out_channels[-1], n_class, bias=False)) 64 | self._initialize_weights() 65 | 66 | def forward(self, x): 67 | x = self.first_conv(x) 68 | x = self.maxpool(x) 69 | x = self.features(x) 70 | 71 | x = self.globalpool(x) 72 | x = x.contiguous().view(-1, self.stage_out_channels[-1]) 73 | x = self.classifier(x) 74 | return x 75 | 76 | def _initialize_weights(self): 77 | for name, m in self.named_modules(): 78 | if isinstance(m, nn.Conv2d): 79 | if 'first' in name: 80 | nn.init.normal_(m.weight, 0, 0.01) 81 | else: 82 | nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1]) 83 | if m.bias is not None: 84 | nn.init.constant_(m.bias, 0) 85 | elif isinstance(m, nn.BatchNorm2d): 86 | nn.init.constant_(m.weight, 1) 87 | if m.bias is not None: 88 | nn.init.constant_(m.bias, 0.0001) 89 | nn.init.constant_(m.running_mean, 0) 90 | elif isinstance(m, nn.BatchNorm1d): 91 | nn.init.constant_(m.weight, 1) 92 | if m.bias is not None: 93 | nn.init.constant_(m.bias, 0.0001) 94 | nn.init.constant_(m.running_mean, 0) 95 | elif isinstance(m, nn.Linear): 96 | nn.init.normal_(m.weight, 0, 0.01) 97 | if m.bias is not None: 98 | nn.init.constant_(m.bias, 0) 99 | 100 | if __name__ == "__main__": 101 | model = ShuffleNetV1(group=3) 102 | # print(model) 103 | 104 | test_data = torch.rand(5, 3, 224, 224) 105 | test_outputs = model(test_data) 106 | print(test_outputs.size()) 107 | -------------------------------------------------------------------------------- /OneShot/blocks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Shufflenet(nn.Module): 6 | 7 | def __init__(self, inp, oup, mid_channels, *, ksize, stride): 8 | super(Shufflenet, self).__init__() 9 | self.stride = stride 10 | assert stride in [1, 2] 11 | assert ksize in [3, 5, 7] 12 | 13 | self.base_mid_channel = mid_channels 14 | self.ksize = ksize 15 | pad = ksize // 2 16 | self.pad = pad 17 | self.inp = inp 18 | 19 | outputs = oup - inp 20 | 21 | branch_main = [ 22 | # pw 23 | nn.Conv2d(inp, mid_channels, 1, 1, 0, bias=False), 24 | nn.BatchNorm2d(mid_channels), 25 | nn.ReLU(inplace=True), 26 | # dw 27 | nn.Conv2d(mid_channels, mid_channels, ksize, stride, pad, groups=mid_channels, bias=False), 28 | nn.BatchNorm2d(mid_channels), 29 | # pw-linear 30 | nn.Conv2d(mid_channels, outputs, 1, 1, 0, bias=False), 31 | nn.BatchNorm2d(outputs), 32 | nn.ReLU(inplace=True), 33 | ] 34 | self.branch_main = nn.Sequential(*branch_main) 35 | 36 | if stride == 2: 37 | branch_proj = [ 38 | # dw 39 | nn.Conv2d(inp, inp, ksize, stride, pad, groups=inp, bias=False), 40 | nn.BatchNorm2d(inp), 41 | # pw-linear 42 | nn.Conv2d(inp, inp, 1, 1, 0, bias=False), 43 | nn.BatchNorm2d(inp), 44 | nn.ReLU(inplace=True), 45 | ] 46 | self.branch_proj = nn.Sequential(*branch_proj) 47 | 48 | def forward(self, old_x): 49 | if self.stride==1: 50 | x_proj, x = channel_shuffle(old_x) 51 | return torch.cat((x_proj, self.branch_main(x)), 1) 52 | elif self.stride==2: 53 | x_proj = old_x 54 | x = old_x 55 | return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1) 56 | 57 | class Shuffle_Xception(nn.Module): 58 | 59 | def __init__(self, inp, oup, mid_channels, *, stride): 60 | super(Shuffle_Xception, self).__init__() 61 | 62 | assert stride in [1, 2] 63 | 64 | self.base_mid_channel = mid_channels 65 | self.stride = stride 66 | self.ksize = 3 67 | self.pad = 1 68 | self.inp = inp 69 | outputs = oup - inp 70 | 71 | branch_main = [ 72 | # dw 73 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), 74 | nn.BatchNorm2d(inp), 75 | # pw 76 | nn.Conv2d(inp, mid_channels, 1, 1, 0, bias=False), 77 | nn.BatchNorm2d(mid_channels), 78 | nn.ReLU(inplace=True), 79 | # dw 80 | nn.Conv2d(mid_channels, mid_channels, 3, 1, 1, groups=mid_channels, bias=False), 81 | nn.BatchNorm2d(mid_channels), 82 | # pw 83 | nn.Conv2d(mid_channels, mid_channels, 1, 1, 0, bias=False), 84 | nn.BatchNorm2d(mid_channels), 85 | nn.ReLU(inplace=True), 86 | # dw 87 | nn.Conv2d(mid_channels, mid_channels, 3, 1, 1, groups=mid_channels, bias=False), 88 | nn.BatchNorm2d(mid_channels), 89 | # pw 90 | nn.Conv2d(mid_channels, outputs, 1, 1, 0, bias=False), 91 | nn.BatchNorm2d(outputs), 92 | nn.ReLU(inplace=True), 93 | ] 94 | 95 | self.branch_main = nn.Sequential(*branch_main) 96 | 97 | if self.stride == 2: 98 | branch_proj = [ 99 | # dw 100 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), 101 | nn.BatchNorm2d(inp), 102 | # pw-linear 103 | nn.Conv2d(inp, inp, 1, 1, 0, bias=False), 104 | nn.BatchNorm2d(inp), 105 | nn.ReLU(inplace=True), 106 | ] 107 | self.branch_proj = nn.Sequential(*branch_proj) 108 | 109 | def forward(self, old_x): 110 | if self.stride==1: 111 | x_proj, x = channel_shuffle(old_x) 112 | return torch.cat((x_proj, self.branch_main(x)), 1) 113 | elif self.stride==2: 114 | x_proj = old_x 115 | x = old_x 116 | return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1) 117 | 118 | 119 | def channel_shuffle(x): 120 | batchsize, num_channels, height, width = x.data.size() 121 | assert (num_channels % 4 == 0) 122 | x = x.reshape(batchsize * num_channels // 2, 2, height * width) 123 | x = x.permute(1, 0, 2) 124 | x = x.reshape(2, -1, num_channels // 2, height, width) 125 | return x[0], x[1] 126 | -------------------------------------------------------------------------------- /DetNAS/network.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from blocks import ConvBNReLU, FC, ShuffleNetV2BlockSearched 3 | 4 | 5 | class ShuffleNetV2DetNAS(nn.Module): 6 | def __init__(self, n_class=1000, model_size='VOC_FPN_300M'): 7 | super(ShuffleNetV2DetNAS, self).__init__() 8 | print('Model size is {}.'.format(model_size)) 9 | 10 | if model_size == 'COCO_FPN_3.8G': 11 | architecture = [0, 0, 3, 1, 2, 1, 0, 2, 0, 3, 1, 2, 3, 3, 2, 0, 2, 1, 1, 3, 12 | 2, 0, 2, 2, 2, 1, 3, 1, 0, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3] 13 | stage_repeats = [8, 8, 16, 8] 14 | stage_out_channels = [-1, 72, 172, 432, 864, 1728, 1728] 15 | elif model_size == 'COCO_FPN_1.3G': 16 | architecture = [0, 0, 3, 1, 2, 1, 0, 2, 0, 3, 1, 2, 3, 3, 2, 0, 2, 1, 1, 3, 17 | 2, 0, 2, 2, 2, 1, 3, 1, 0, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3] 18 | stage_repeats = [8, 8, 16, 8] 19 | stage_out_channels = [-1, 48, 96, 240, 480, 960, 1024] 20 | elif model_size == 'COCO_FPN_300M': 21 | architecture = [2, 1, 2, 0, 2, 1, 1, 2, 3, 3, 1, 3, 0, 0, 3, 1, 3, 1, 3, 2] 22 | stage_repeats = [4, 4, 8, 4] 23 | stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024] 24 | elif model_size == 'COCO_RetinaNet_300M': 25 | architecture = [2, 3, 1, 1, 3, 2, 1, 3, 3, 1, 1, 1, 3, 3, 2, 0, 3, 3, 3, 3] 26 | stage_repeats = [4, 4, 8, 4] 27 | stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024] 28 | elif model_size == 'VOC_FPN_300M': 29 | architecture = [2, 1, 0, 3, 1, 3, 0, 3, 2, 0, 1, 1, 3, 3, 3, 3, 3, 3, 3, 1] 30 | stage_repeats = [4, 4, 8, 4] 31 | stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024] 32 | elif model_size == 'VOC_RetinaNet_300M': 33 | architecture = [1, 3, 0, 0, 2, 3, 3, 3, 2, 3, 3, 3, 3, 2, 2, 0, 2, 3, 1, 1] 34 | stage_repeats = [4, 4, 8, 4] 35 | stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024] 36 | else: 37 | raise NotImplementedError 38 | 39 | self.first_conv = ConvBNReLU(in_channel=3, out_channel=stage_out_channels[1], k_size=3, stride=2, padding=1, gaussian_init=True) 40 | 41 | self.features = list() 42 | 43 | in_channels = stage_out_channels[1] 44 | i_th = 0 45 | for id_stage in range(1, len(stage_repeats) + 1): 46 | out_channels = stage_out_channels[id_stage + 1] 47 | repeats = stage_repeats[id_stage - 1] 48 | for id_repeat in range(repeats): 49 | prefix = str(id_stage) + chr(ord('a') + id_repeat) 50 | stride = 1 if id_repeat > 0 else 2 51 | self.features.append(ShuffleNetV2BlockSearched(prefix, in_channels=in_channels, out_channels=out_channels, 52 | stride=stride, base_mid_channels=out_channels // 2, i_th=i_th, 53 | architecture=architecture)) 54 | in_channels = out_channels 55 | i_th += 1 56 | 57 | self.features = nn.Sequential(*self.features) 58 | 59 | self.last_conv = ConvBNReLU(in_channel=in_channels, out_channel=stage_out_channels[-1], k_size=1, stride=1, padding=0) 60 | self.drop_out = nn.Dropout2d(p=0.2) 61 | self.global_pool = nn.AvgPool2d(7) 62 | self.fc = FC(in_channels=stage_out_channels[-1], out_channels=n_class) 63 | self._initialize_weights() 64 | 65 | def _initialize_weights(self): 66 | for name, m in self.named_modules(): 67 | if isinstance(m, nn.Conv2d): 68 | if 'first' in name: 69 | nn.init.normal_(m.weight, 0, 0.01) 70 | else: 71 | nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1]) 72 | if m.bias is not None: 73 | nn.init.constant_(m.bias, 0) 74 | elif isinstance(m, nn.BatchNorm2d): 75 | nn.init.constant_(m.weight, 1) 76 | if m.bias is not None: 77 | nn.init.constant_(m.bias, 0.0001) 78 | nn.init.constant_(m.running_mean, 0) 79 | elif isinstance(m, nn.BatchNorm1d): 80 | nn.init.constant_(m.weight, 1) 81 | if m.bias is not None: 82 | nn.init.constant_(m.bias, 0.0001) 83 | nn.init.constant_(m.running_mean, 0) 84 | elif isinstance(m, nn.Linear): 85 | nn.init.normal_(m.weight, 0, 0.01) 86 | if m.bias is not None: 87 | nn.init.constant_(m.bias, 0) 88 | 89 | def forward(self, x): 90 | x = self.first_conv(x) 91 | 92 | x = self.features(x) 93 | 94 | x = self.last_conv(x) 95 | x = self.drop_out(x) 96 | x = self.global_pool(x).view(x.size(0), -1) 97 | x = self.fc(x) 98 | return x 99 | 100 | 101 | def create_network(): 102 | model = ShuffleNetV2DetNAS(model_size='COCO_FPN_1.3G') 103 | print(model) 104 | return model 105 | 106 | 107 | if __name__ == "__main__": 108 | create_network() 109 | 110 | -------------------------------------------------------------------------------- /OneShot/network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from blocks import Shufflenet, Shuffle_Xception 4 | 5 | class ShuffleNetV2_OneShot(nn.Module): 6 | def __init__(self, input_size=224, n_class=1000, architecture=None, channels_scales=None): 7 | super(ShuffleNetV2_OneShot, self).__init__() 8 | 9 | assert input_size % 32 == 0 10 | assert architecture is not None and channels_scales is not None 11 | 12 | self.stage_repeats = [4, 4, 8, 4] 13 | self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024] 14 | 15 | # building first layer 16 | input_channel = self.stage_out_channels[1] 17 | self.first_conv = nn.Sequential( 18 | nn.Conv2d(3, input_channel, 3, 2, 1, bias=False), 19 | nn.BatchNorm2d(input_channel), 20 | nn.ReLU(inplace=True), 21 | ) 22 | 23 | self.features = [] 24 | archIndex = 0 25 | for idxstage in range(len(self.stage_repeats)): 26 | numrepeat = self.stage_repeats[idxstage] 27 | output_channel = self.stage_out_channels[idxstage+2] 28 | 29 | for i in range(numrepeat): 30 | if i == 0: 31 | inp, outp, stride = input_channel, output_channel, 2 32 | else: 33 | inp, outp, stride = input_channel // 2, output_channel, 1 34 | 35 | blockIndex = architecture[archIndex] 36 | base_mid_channels = outp // 2 37 | mid_channels = int(base_mid_channels * channels_scales[archIndex]) 38 | archIndex += 1 39 | if blockIndex == 0: 40 | print('Shuffle3x3') 41 | self.features.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=3, stride=stride)) 42 | elif blockIndex == 1: 43 | print('Shuffle5x5') 44 | self.features.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=5, stride=stride)) 45 | elif blockIndex == 2: 46 | print('Shuffle7x7') 47 | self.features.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=7, stride=stride)) 48 | elif blockIndex == 3: 49 | print('Xception') 50 | self.features.append(Shuffle_Xception(inp, outp, mid_channels=mid_channels, stride=stride)) 51 | else: 52 | raise NotImplementedError 53 | input_channel = output_channel 54 | 55 | assert archIndex == len(architecture) 56 | self.features = nn.Sequential(*self.features) 57 | 58 | self.conv_last = nn.Sequential( 59 | nn.Conv2d(input_channel, self.stage_out_channels[-1], 1, 1, 0, bias=False), 60 | nn.BatchNorm2d(self.stage_out_channels[-1]), 61 | nn.ReLU(inplace=True), 62 | ) 63 | self.globalpool = nn.AvgPool2d(7) 64 | self.dropout = nn.Dropout(0.1) 65 | self.classifier = nn.Sequential(nn.Linear(self.stage_out_channels[-1], n_class, bias=False)) 66 | self._initialize_weights() 67 | 68 | def forward(self, x): 69 | x = self.first_conv(x) 70 | x = self.features(x) 71 | x = self.conv_last(x) 72 | 73 | x = self.globalpool(x) 74 | 75 | x = self.dropout(x) 76 | x = x.contiguous().view(-1, self.stage_out_channels[-1]) 77 | x = self.classifier(x) 78 | return x 79 | 80 | def _initialize_weights(self): 81 | for name, m in self.named_modules(): 82 | if isinstance(m, nn.Conv2d): 83 | if 'first' in name: 84 | nn.init.normal_(m.weight, 0, 0.01) 85 | else: 86 | nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1]) 87 | if m.bias is not None: 88 | nn.init.constant_(m.bias, 0) 89 | elif isinstance(m, nn.BatchNorm2d): 90 | nn.init.constant_(m.weight, 1) 91 | if m.bias is not None: 92 | nn.init.constant_(m.bias, 0.0001) 93 | nn.init.constant_(m.running_mean, 0) 94 | elif isinstance(m, nn.BatchNorm1d): 95 | nn.init.constant_(m.weight, 1) 96 | if m.bias is not None: 97 | nn.init.constant_(m.bias, 0.0001) 98 | nn.init.constant_(m.running_mean, 0) 99 | elif isinstance(m, nn.Linear): 100 | nn.init.normal_(m.weight, 0, 0.01) 101 | if m.bias is not None: 102 | nn.init.constant_(m.bias, 0) 103 | 104 | if __name__ == "__main__": 105 | architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2] 106 | scale_list = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6] 107 | scale_ids = [6, 5, 3, 5, 2, 6, 3, 4, 2, 5, 7, 5, 4, 6, 7, 4, 4, 5, 4, 3] 108 | channels_scales = [] 109 | for i in range(len(scale_ids)): 110 | channels_scales.append(scale_list[scale_ids[i]]) 111 | model = ShuffleNetV2_OneShot(architecture=architecture, channels_scales=channels_scales) 112 | # print(model) 113 | 114 | test_data = torch.rand(5, 3, 224, 224) 115 | test_outputs = model(test_data) 116 | print(test_outputs.size()) 117 | -------------------------------------------------------------------------------- /ShuffleNetV2+/network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from blocks import Shufflenet, Shuffle_Xception, HS, SELayer 4 | 5 | class ShuffleNetV2_Plus(nn.Module): 6 | def __init__(self, input_size=224, n_class=1000, architecture=None, model_size='Large'): 7 | super(ShuffleNetV2_Plus, self).__init__() 8 | 9 | print('model size is ', model_size) 10 | 11 | assert input_size % 32 == 0 12 | assert architecture is not None 13 | 14 | self.stage_repeats = [4, 4, 8, 4] 15 | if model_size == 'Large': 16 | self.stage_out_channels = [-1, 16, 68, 168, 336, 672, 1280] 17 | elif model_size == 'Medium': 18 | self.stage_out_channels = [-1, 16, 48, 128, 256, 512, 1280] 19 | elif model_size == 'Small': 20 | self.stage_out_channels = [-1, 16, 36, 104, 208, 416, 1280] 21 | else: 22 | raise NotImplementedError 23 | 24 | 25 | # building first layer 26 | input_channel = self.stage_out_channels[1] 27 | self.first_conv = nn.Sequential( 28 | nn.Conv2d(3, input_channel, 3, 2, 1, bias=False), 29 | nn.BatchNorm2d(input_channel), 30 | HS(), 31 | ) 32 | 33 | self.features = [] 34 | archIndex = 0 35 | for idxstage in range(len(self.stage_repeats)): 36 | numrepeat = self.stage_repeats[idxstage] 37 | output_channel = self.stage_out_channels[idxstage+2] 38 | 39 | activation = 'HS' if idxstage >= 1 else 'ReLU' 40 | useSE = 'True' if idxstage >= 2 else False 41 | 42 | for i in range(numrepeat): 43 | if i == 0: 44 | inp, outp, stride = input_channel, output_channel, 2 45 | else: 46 | inp, outp, stride = input_channel // 2, output_channel, 1 47 | 48 | blockIndex = architecture[archIndex] 49 | archIndex += 1 50 | if blockIndex == 0: 51 | print('Shuffle3x3') 52 | self.features.append(Shufflenet(inp, outp, base_mid_channels=outp // 2, ksize=3, stride=stride, 53 | activation=activation, useSE=useSE)) 54 | elif blockIndex == 1: 55 | print('Shuffle5x5') 56 | self.features.append(Shufflenet(inp, outp, base_mid_channels=outp // 2, ksize=5, stride=stride, 57 | activation=activation, useSE=useSE)) 58 | elif blockIndex == 2: 59 | print('Shuffle7x7') 60 | self.features.append(Shufflenet(inp, outp, base_mid_channels=outp // 2, ksize=7, stride=stride, 61 | activation=activation, useSE=useSE)) 62 | elif blockIndex == 3: 63 | print('Xception') 64 | self.features.append(Shuffle_Xception(inp, outp, base_mid_channels=outp // 2, stride=stride, 65 | activation=activation, useSE=useSE)) 66 | else: 67 | raise NotImplementedError 68 | input_channel = output_channel 69 | assert archIndex == len(architecture) 70 | self.features = nn.Sequential(*self.features) 71 | 72 | self.conv_last = nn.Sequential( 73 | nn.Conv2d(input_channel, 1280, 1, 1, 0, bias=False), 74 | nn.BatchNorm2d(1280), 75 | HS() 76 | ) 77 | self.globalpool = nn.AvgPool2d(7) 78 | self.LastSE = SELayer(1280) 79 | self.fc = nn.Sequential( 80 | nn.Linear(1280, 1280, bias=False), 81 | HS(), 82 | ) 83 | self.dropout = nn.Dropout(0.2) 84 | self.classifier = nn.Sequential(nn.Linear(1280, n_class, bias=False)) 85 | self._initialize_weights() 86 | 87 | def forward(self, x): 88 | x = self.first_conv(x) 89 | x = self.features(x) 90 | x = self.conv_last(x) 91 | 92 | x = self.globalpool(x) 93 | x = self.LastSE(x) 94 | 95 | x = x.contiguous().view(-1, 1280) 96 | 97 | x = self.fc(x) 98 | x = self.dropout(x) 99 | x = self.classifier(x) 100 | return x 101 | 102 | def _initialize_weights(self): 103 | for name, m in self.named_modules(): 104 | if isinstance(m, nn.Conv2d): 105 | if 'first' in name or 'SE' in name: 106 | nn.init.normal_(m.weight, 0, 0.01) 107 | else: 108 | nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1]) 109 | if m.bias is not None: 110 | nn.init.constant_(m.bias, 0) 111 | elif isinstance(m, nn.BatchNorm2d): 112 | nn.init.constant_(m.weight, 1) 113 | if m.bias is not None: 114 | nn.init.constant_(m.bias, 0.0001) 115 | nn.init.constant_(m.running_mean, 0) 116 | elif isinstance(m, nn.BatchNorm1d): 117 | nn.init.constant_(m.weight, 1) 118 | if m.bias is not None: 119 | nn.init.constant_(m.bias, 0.0001) 120 | nn.init.constant_(m.running_mean, 0) 121 | elif isinstance(m, nn.Linear): 122 | nn.init.normal_(m.weight, 0, 0.01) 123 | if m.bias is not None: 124 | nn.init.constant_(m.bias, 0) 125 | 126 | if __name__ == "__main__": 127 | architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2] 128 | model = ShuffleNetV2_Plus(architecture=architecture) 129 | # print(model) 130 | 131 | test_data = torch.rand(5, 3, 224, 224) 132 | test_outputs = model(test_data) 133 | print(test_outputs.size()) 134 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ShuffleNet Series 2 | ShuffleNet Series by Megvii Research. 3 | 4 | ## Introduction 5 | This repository contains the following ShuffleNet series models: 6 | - ShuffleNetV1: [ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices](https://arxiv.org/abs/1707.01083) 7 | - ShuffleNetV2: [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164) 8 | - ShuffleNetV2+: A strengthen version of ShuffleNetV2. 9 | - ShuffleNetV2.Large: A deeper version based on ShuffleNetV2 with 10G+ FLOPs. 10 | - ShuffleNetV2.ExLarge: A deeper version based on ShuffleNetV2 with 40G+ FLOPs. 11 | - OneShot: [Single Path One-Shot Neural Architecture Search with Uniform Sampling](https://arxiv.org/abs/1904.00420) 12 | - DetNAS: [DetNAS: Backbone Search for Object Detection](https://arxiv.org/abs/1903.10979) 13 | 14 | ## Trained Models 15 | - OneDrive download: [Link](https://1drv.ms/f/s!AgaP37NGYuEXhRfQxHRseR7eSxXo) 16 | - BaiduYun download: [Link](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw) (extract code: mc24) 17 | 18 | ## Details 19 | 20 | ### ShuffleNetV2+ 21 | The following is the comparison between ShuffleNetV2+ and [MobileNetV3](https://arxiv.org/pdf/1905.02244). Details can be seen in [ShuffleNetV2+](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2%2B). 22 | 23 | | Model | FLOPs | #Params | Top-1 | Top-5 | 24 | |:------------------------|:---------:|:---------:|:---------:|:---------:| 25 | ShuffleNetV2+ Large | 360M | 6.7M | **22.9** | 6.7 | 26 | MobileNetV3 Large 224/1.25 | 356M | 7.5M | 23.4 | - | 27 | ShuffleNetV2+ Medium | 222M | 5.6M | **24.3** | 7.4 | 28 | MobileNetV3 Large 224/1.0 | 217M | 5.4M | 24.8 | - | 29 | ShuffleNetV2+ Small | 156M | 5.1M | **25.9** | 8.3 | 30 | MobileNetV3 Large 224/0.75 | 155M | 4.0M | 26.7 | - | 31 | 32 | ### ShuffleNetV2 33 | The following is the comparison between ShuffleNetV2 and [MobileNetV2](https://arxiv.org/abs/1801.04381). Details can be seen in [ShuffleNetV2](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2). 34 | 35 | | Model | FLOPs | #Params | Top-1 | Top-5 | 36 | | :--------------------- | :---: | :------: | :----------: | :------: | 37 | | ShuffleNetV2 2.0x | 591M | 7.4M | **25.0** | 7.6 | 38 | | MobileNetV2 (1.4) | 585M | 6.9M | 25.3 | - | 39 | | ShuffleNetV2 1.5x | 299M | 3.5M | **27.4** | 9.4 | 40 | | MobileNetV2 | 300M | 3.4M | 28.0 | - | 41 | | ShuffleNetV2 1.0x | 146M | 2.3M | 30.6 | 11.1 | 42 | | ShuffleNetV2 0.5x | 41M | 1.4M | 38.9 | 17.4 | 43 | 44 | ### ShuffleNetV2.Large 45 | The following is the comparison between ShuffleNetV2.Large and [SENet](https://arxiv.org/abs/1709.01507). Details can be seen in [ShuffleNetV2.Large](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2.Large). 46 | 47 | | Model | FLOPs | #Params | Top-1 | Top-5 | 48 | | :--------------------- | :---: | :------: | :---: | :---: | 49 | | ShuffleNetV2.Large | 12.7G | 140.7M | **18.56** | 4.48 | 50 | | SENet | 20.7G | - | 18.68 | 4.47 | 51 | 52 | ### ShuffleNetV2.ExLarge 53 | The following is the result of ShuffleNetV2.ExLarge. Details can be seen in [ShuffleNetV2.ExLarge](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2.ExLarge). 54 | 55 | | Model | FLOPs | #Params | Top-1 | Top-5 | 56 | | :--------------------- | :---: | :------: | :---: | :---: | 57 | | ShuffleNetV2.ExLarge | 46.2G | 254.7M | 15.52 | 2.9 | 58 | 59 | 60 | 61 | ### ShuffleNetV1 62 | The following is the comparison between ShuffleNetV1 and [MobileNetV1](https://arxiv.org/abs/1704.04861). Details can be seen in [ShuffleNetV1](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV1). 63 | 64 | | Model | FLOPs | #Params | Top-1 | Top-5 | 65 | |:------------------------|:---------:|:---------:|:---------:|:---------:| 66 | ShuffleNetV1 2.0x (group=3)| 524M | 5.4M | **25.9** | 8.6 | 67 | ShuffleNetV1 2.0x (group=8)| 522M | 6.5M | 27.1 | 9.2 | 68 | 1.0 MobileNetV1-224 | 569M | 4.2M | 29.4 | - | 69 | ShuffleNetV1 1.5x (group=3)| 292M | 3.4M | **28.4** | 9.8 | 70 | ShuffleNetV1 1.5x (group=8)| 290M | 4.3M | 29.0 | 10.4 | 71 | 0.75 MobileNetV1-224 | 325M | 2.6M | 31.6 | - | 72 | ShuffleNetV1 1.0x (group=3)| 138M | 1.9M | 32.2 | 12.3 | 73 | ShuffleNetV1 1.0x (group=8)| 138M | 2.4M | **32.0** | 13.6 | 74 | 0.5 MobileNetV1-224 | 149M | 1.3M | 36.3 | - | 75 | ShuffleNetV1 0.5x (group=3)| 38M | 0.7M | 42.7 | 20.0 | 76 | ShuffleNetV1 0.5x (group=8)| 40M | 1.0M | **41.2** | 19.0 | 77 | 0.25 MobileNetV1-224 | 41M | 0.5M | 49.4 | - | 78 | 79 | 80 | ### OneShot 81 | The following is the comparison between Single Path One-Shot NAS and other NAS counterparts. Details can be seen in [OneShot](https://github.com/megvii-model/ShuffleNet-Series/tree/master/OneShot). 82 | 83 | | Model | FLOPs | #Params | Top-1 | Top-5 | 84 | | :--------------------- | :---: | :------: | :---: | :---: | 85 | | OneShot | 328M | 3.4M | **25.1** | 8.0 | 86 | | NASNET-A| 564M | 5.3M | 26.0 | 8.4 | 87 | | PNASNET| 588M | 5.1M | 25.8 | 8.1 | 88 | | MnasNet| 317M | 4.2M | 26.0 | 8.2 | 89 | | DARTS| 574M| 4.7M | 26.7 | 8.7 | 90 | | FBNet-B| 295M| 4.5M | 25.9 | - | 91 | 92 | ### DetNAS 93 | The following is the performance of DetNAS backbones on ImageNet, compared with ResNet. Backbone details can be seen in [DetNAS](https://github.com/megvii-model/ShuffleNet-Series/tree/master/DetNAS). 94 | 95 | | Model | FLOPs| #Params| Top-1 | Top-5 | mAP* | 96 | | :------------ | :---:| :-----:| :---: | :---: | :--------------: | 97 | |300M (VOC, RetinaNet) | 300M | 3.5M | 25.4 | 8.1 | 80.1 | 98 | |300M (VOC, FPN) | 300M | 3.7M | 25.9 | 8.3 | 81.5 | 99 | |300M (COCO, RetinaNet) | 300M | 3.7M | 26.0 | 8.4 | 33.3 | 100 | |300M (COCO, FPN) | 300M | 3.5M | 26.2 | 8.4 | 36.4 | 101 | |1.3G (COCO, FPN) | 1.3G | 10.4M | **22.8** | 6.5 | 40.0 | 102 | |3.8G (COCO, FPN) | 3.8G | 29.5M | **21.6** | 6.3 | **42.0** | 103 | |ResNet50 (COCO, FPN) | 3.8G | - | 23.9 | 7.1 | 37.3 | 104 | |ResNet101 (COCO, FPN) | 7.6G | - | 22.6 | 6.4 | 40.0 | 105 | 106 | * More about DetNAS in [Link](https://github.com/megvii-model/DetNAS). 107 | 108 | -------------------------------------------------------------------------------- /ShuffleNetV2+/blocks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class SELayer(nn.Module): 6 | 7 | def __init__(self, inplanes, isTensor=True): 8 | super(SELayer, self).__init__() 9 | if isTensor: 10 | # if the input is (N, C, H, W) 11 | self.SE_opr = nn.Sequential( 12 | nn.AdaptiveAvgPool2d(1), 13 | nn.Conv2d(inplanes, inplanes // 4, kernel_size=1, stride=1, bias=False), 14 | nn.BatchNorm2d(inplanes // 4), 15 | nn.ReLU(inplace=True), 16 | nn.Conv2d(inplanes // 4, inplanes, kernel_size=1, stride=1, bias=False), 17 | ) 18 | else: 19 | # if the input is (N, C) 20 | self.SE_opr = nn.Sequential( 21 | nn.AdaptiveAvgPool2d(1), 22 | nn.Linear(inplanes, inplanes // 4, bias=False), 23 | nn.BatchNorm1d(inplanes // 4), 24 | nn.ReLU(inplace=True), 25 | nn.Linear(inplanes // 4, inplanes, bias=False), 26 | ) 27 | 28 | def forward(self, x): 29 | atten = self.SE_opr(x) 30 | atten = torch.clamp(atten + 3, 0, 6) / 6 31 | return x * atten 32 | 33 | 34 | class HS(nn.Module): 35 | 36 | def __init__(self): 37 | super(HS, self).__init__() 38 | 39 | def forward(self, inputs): 40 | clip = torch.clamp(inputs + 3, 0, 6) / 6 41 | return inputs * clip 42 | 43 | 44 | 45 | class Shufflenet(nn.Module): 46 | 47 | def __init__(self, inp, oup, base_mid_channels, *, ksize, stride, activation, useSE): 48 | super(Shufflenet, self).__init__() 49 | self.stride = stride 50 | assert stride in [1, 2] 51 | assert ksize in [3, 5, 7] 52 | assert base_mid_channels == oup//2 53 | 54 | self.base_mid_channel = base_mid_channels 55 | self.ksize = ksize 56 | pad = ksize // 2 57 | self.pad = pad 58 | self.inp = inp 59 | 60 | outputs = oup - inp 61 | 62 | branch_main = [ 63 | # pw 64 | nn.Conv2d(inp, base_mid_channels, 1, 1, 0, bias=False), 65 | nn.BatchNorm2d(base_mid_channels), 66 | None, 67 | # dw 68 | nn.Conv2d(base_mid_channels, base_mid_channels, ksize, stride, pad, groups=base_mid_channels, bias=False), 69 | nn.BatchNorm2d(base_mid_channels), 70 | # pw-linear 71 | nn.Conv2d(base_mid_channels, outputs, 1, 1, 0, bias=False), 72 | nn.BatchNorm2d(outputs), 73 | None, 74 | ] 75 | if activation == 'ReLU': 76 | assert useSE == False 77 | '''This model should not have SE with ReLU''' 78 | branch_main[2] = nn.ReLU(inplace=True) 79 | branch_main[-1] = nn.ReLU(inplace=True) 80 | else: 81 | branch_main[2] = HS() 82 | branch_main[-1] = HS() 83 | if useSE: 84 | branch_main.append(SELayer(outputs)) 85 | self.branch_main = nn.Sequential(*branch_main) 86 | 87 | if stride == 2: 88 | branch_proj = [ 89 | # dw 90 | nn.Conv2d(inp, inp, ksize, stride, pad, groups=inp, bias=False), 91 | nn.BatchNorm2d(inp), 92 | # pw-linear 93 | nn.Conv2d(inp, inp, 1, 1, 0, bias=False), 94 | nn.BatchNorm2d(inp), 95 | None, 96 | ] 97 | if activation == 'ReLU': 98 | branch_proj[-1] = nn.ReLU(inplace=True) 99 | else: 100 | branch_proj[-1] = HS() 101 | self.branch_proj = nn.Sequential(*branch_proj) 102 | else: 103 | self.branch_proj = None 104 | 105 | def forward(self, old_x): 106 | if self.stride==1: 107 | x_proj, x = channel_shuffle(old_x) 108 | return torch.cat((x_proj, self.branch_main(x)), 1) 109 | elif self.stride==2: 110 | x_proj = old_x 111 | x = old_x 112 | return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1) 113 | 114 | class Shuffle_Xception(nn.Module): 115 | 116 | def __init__(self, inp, oup, base_mid_channels, *, stride, activation, useSE): 117 | super(Shuffle_Xception, self).__init__() 118 | 119 | assert stride in [1, 2] 120 | assert base_mid_channels == oup//2 121 | 122 | self.base_mid_channel = base_mid_channels 123 | self.stride = stride 124 | self.ksize = 3 125 | self.pad = 1 126 | self.inp = inp 127 | outputs = oup - inp 128 | 129 | branch_main = [ 130 | # dw 131 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), 132 | nn.BatchNorm2d(inp), 133 | # pw 134 | nn.Conv2d(inp, base_mid_channels, 1, 1, 0, bias=False), 135 | nn.BatchNorm2d(base_mid_channels), 136 | None, 137 | # dw 138 | nn.Conv2d(base_mid_channels, base_mid_channels, 3, stride, 1, groups=base_mid_channels, bias=False), 139 | nn.BatchNorm2d(base_mid_channels), 140 | # pw 141 | nn.Conv2d(base_mid_channels, base_mid_channels, 1, 1, 0, bias=False), 142 | nn.BatchNorm2d(base_mid_channels), 143 | None, 144 | # dw 145 | nn.Conv2d(base_mid_channels, base_mid_channels, 3, stride, 1, groups=base_mid_channels, bias=False), 146 | nn.BatchNorm2d(base_mid_channels), 147 | # pw 148 | nn.Conv2d(base_mid_channels, outputs, 1, 1, 0, bias=False), 149 | nn.BatchNorm2d(outputs), 150 | None, 151 | ] 152 | 153 | if activation == 'ReLU': 154 | branch_main[4] = nn.ReLU(inplace=True) 155 | branch_main[9] = nn.ReLU(inplace=True) 156 | branch_main[14] = nn.ReLU(inplace=True) 157 | else: 158 | branch_main[4] = HS() 159 | branch_main[9] = HS() 160 | branch_main[14] = HS() 161 | assert None not in branch_main 162 | 163 | if useSE: 164 | assert activation != 'ReLU' 165 | branch_main.append(SELayer(outputs)) 166 | 167 | self.branch_main = nn.Sequential(*branch_main) 168 | 169 | if self.stride == 2: 170 | branch_proj = [ 171 | # dw 172 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), 173 | nn.BatchNorm2d(inp), 174 | # pw-linear 175 | nn.Conv2d(inp, inp, 1, 1, 0, bias=False), 176 | nn.BatchNorm2d(inp), 177 | None, 178 | ] 179 | if activation == 'ReLU': 180 | branch_proj[-1] = nn.ReLU(inplace=True) 181 | else: 182 | branch_proj[-1] = HS() 183 | self.branch_proj = nn.Sequential(*branch_proj) 184 | 185 | def forward(self, old_x): 186 | if self.stride==1: 187 | x_proj, x = channel_shuffle(old_x) 188 | return torch.cat((x_proj, self.branch_main(x)), 1) 189 | elif self.stride==2: 190 | x_proj = old_x 191 | x = old_x 192 | return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1) 193 | 194 | 195 | def channel_shuffle(x): 196 | batchsize, num_channels, height, width = x.data.size() 197 | assert (num_channels % 4 == 0) 198 | x = x.reshape(batchsize * num_channels // 2, 2, height * width) 199 | x = x.permute(1, 0, 2) 200 | x = x.reshape(2, -1, num_channels // 2, height, width) 201 | return x[0], x[1] 202 | -------------------------------------------------------------------------------- /ShuffleNetV2.Large/network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Conv_BN_ReLU(nn.Module): 6 | 7 | def __init__(self, in_channel, out_channel, k_size, stride=1, padding=0, groups=1, has_bn=True, has_relu=True): 8 | super(Conv_BN_ReLU, self).__init__() 9 | self.conv = nn.Conv2d(in_channel, out_channel, kernel_size=k_size, 10 | stride=stride, padding=padding, 11 | groups=groups, bias=False) 12 | self.bn = nn.BatchNorm2d(out_channel, eps=1e-9) 13 | self.has_bn = has_bn 14 | 15 | self.has_relu = has_relu 16 | self.relu = nn.ReLU(inplace=True) 17 | 18 | def forward(self, x): 19 | x = self.conv(x) 20 | if self.has_bn: 21 | x = self.bn(x) 22 | if self.has_relu: 23 | x = self.relu(x) 24 | return x 25 | 26 | 27 | class ShuffleV2Block(nn.Module): 28 | def __init__(self, in_channels, out_channels, stride, groups, has_proj=False, has_se=False): 29 | super(ShuffleV2Block, self).__init__() 30 | self.stride = stride 31 | assert stride in [1, 2] 32 | self.has_proj = has_proj 33 | self.has_se = has_se 34 | self.relu = nn.ReLU(inplace=True) 35 | 36 | if stride == 2: 37 | self.down = Conv_BN_ReLU(out_channels * 2, out_channels * 2, k_size=1, stride=1, padding=0) 38 | 39 | if has_proj: 40 | self.proj = Conv_BN_ReLU(in_channels, out_channels, k_size=3, stride=stride, padding=1, has_bn=True, has_relu=False) 41 | 42 | self.branch_main = nn.Sequential( 43 | Conv_BN_ReLU(in_channels, out_channels, k_size=1, stride=1, padding=0, has_bn=True, has_relu=True), 44 | Conv_BN_ReLU(out_channels, out_channels, k_size=3, stride=stride, padding=1, groups=groups, has_bn=True, has_relu=True), 45 | Conv_BN_ReLU(out_channels, out_channels, k_size=1, stride=1, padding=0, has_bn=True, has_relu=False), 46 | ) 47 | 48 | if has_se: 49 | self.se_globalpool = nn.AdaptiveAvgPool2d(output_size=1) 50 | self.se_fc1 = nn.Linear(out_channels, out_channels) 51 | self.se_fc2 = nn.Linear(out_channels, out_channels) 52 | se_block = [ 53 | self.se_fc1, 54 | nn.ReLU(inplace=True), 55 | self.se_fc2, 56 | nn.Sigmoid(), 57 | ] 58 | self.se_block = nn.Sequential(*se_block) 59 | 60 | def forward(self, old_x): 61 | proj, x = self.channel_shuffle(old_x) 62 | x_proj = x 63 | if self.has_proj: 64 | proj = self.proj(proj) 65 | 66 | x = self.branch_main(x) 67 | 68 | if self.has_se: 69 | se_scale = self.se_globalpool(x).view(x.size(0), -1) 70 | se_scale = self.se_block(se_scale).unsqueeze(-1).unsqueeze(-1) 71 | x = x * se_scale 72 | 73 | if not self.has_proj: 74 | x = x_proj + x 75 | 76 | x = self.relu(torch.cat((proj, x), dim=1)) 77 | 78 | if self.stride == 2: 79 | x = self.down(x) 80 | 81 | return x 82 | 83 | def channel_shuffle(self, x): 84 | batchsize, num_channels, height, width = x.data.size() 85 | assert (num_channels % 4 == 0) 86 | x = x.reshape(batchsize * num_channels // 2, 2, height * width) 87 | x = x.permute(1, 0, 2) 88 | x = x.reshape(2, -1, num_channels // 2, height, width) 89 | return x[0], x[1] 90 | 91 | 92 | class ExtraLabelPredict(nn.Module): 93 | def __init__(self, in_channels, out_channels, num_classes=1000): 94 | super(ExtraLabelPredict, self).__init__() 95 | self.num_classes = num_classes 96 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 97 | self.conv = nn.Sequential( 98 | Conv_BN_ReLU(in_channels, out_channels, 1, 1, 0), 99 | Conv_BN_ReLU(out_channels, out_channels, 3, 1, 1) 100 | ) 101 | self.globalpool = nn.AdaptiveAvgPool2d(output_size=1) 102 | self.fc = nn.Linear(out_channels, num_classes) 103 | 104 | def forward(self, inputs): 105 | inputs = self.maxpool(inputs) 106 | inputs = self.conv(inputs) 107 | inputs = self.globalpool(inputs) 108 | inputs = inputs.view(inputs.size(0), -1) 109 | inputs = self.fc(inputs) 110 | return inputs 111 | 112 | 113 | class ShuffleNetV2(nn.Module): 114 | def __init__(self, n_class=1000, model_size='large'): 115 | super(ShuffleNetV2, self).__init__() 116 | 117 | self.stage_repeats = [4, 8, 4] 118 | self.model_size = model_size 119 | if model_size == 'large': 120 | self.pre = [2, 3, 4, 5] 121 | self.stage_repeats = [10, 10, 23, 10] 122 | self.mid_outputs = [64, 128, 256, 512] 123 | self.enable_stride = [False, True, True, True] 124 | else: 125 | raise NotImplementedError 126 | 127 | self.first_conv = nn.Sequential( 128 | Conv_BN_ReLU(3, 64, k_size=3, stride=2, padding=1), 129 | Conv_BN_ReLU(64, 64, k_size=3, stride=1, padding=1), 130 | Conv_BN_ReLU(64, 128, k_size=3, stride=1, padding=1), 131 | ) 132 | 133 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 134 | 135 | self.features = nn.ModuleList() 136 | input_channel = 64 137 | if model_size == 'large': 138 | for p, s, o, es in zip(self.pre, self.stage_repeats, self.mid_outputs, self.enable_stride): 139 | feature = nn.Sequential() 140 | for i in range(s): 141 | prefix = "{}{}".format(p, chr(ord("a") + i)) 142 | stride = 1 if not es or i > 0 else 2 143 | has_proj = False if i > 0 else True 144 | feature.add_module(prefix, ShuffleV2Block(input_channel, o * 2, stride, groups=8, has_proj=has_proj, has_se=True)) 145 | input_channel = o * 2 146 | self.features.append(feature) 147 | if p == 2: 148 | self.predict_56 = ExtraLabelPredict(in_channels=256, out_channels=256) 149 | elif p == 3: 150 | self.predict_28 = ExtraLabelPredict(in_channels=512, out_channels=512) 151 | elif p == 4: 152 | self.predict_14 = ExtraLabelPredict(in_channels=1024, out_channels=1024) 153 | 154 | self.conv_last = Conv_BN_ReLU(input_channel * 2, 1280, 3, 1, 1) 155 | self.globalpool = nn.AvgPool2d(7) 156 | if self.model_size == 'large': 157 | self.dropout = nn.Dropout(0.2) 158 | self.fc = nn.Linear(1280, n_class) 159 | 160 | self._initialize_weights() 161 | 162 | def _initialize_weights(self): 163 | for name, m in self.named_modules(): 164 | if isinstance(m, nn.Conv2d): 165 | if 'first' in name: 166 | nn.init.normal_(m.weight, 0, 0.01) 167 | else: 168 | nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1]) 169 | if m.bias is not None: 170 | nn.init.constant_(m.bias, 0) 171 | elif isinstance(m, nn.BatchNorm2d): 172 | nn.init.constant_(m.weight, 1) 173 | if m.bias is not None: 174 | nn.init.constant_(m.bias, 0.0001) 175 | nn.init.constant_(m.running_mean, 0) 176 | elif isinstance(m, nn.BatchNorm1d): 177 | nn.init.constant_(m.weight, 1) 178 | if m.bias is not None: 179 | nn.init.constant_(m.bias, 0.0001) 180 | nn.init.constant_(m.running_mean, 0) 181 | elif isinstance(m, nn.Linear): 182 | nn.init.normal_(m.weight, 0, 0.01) 183 | if m.bias is not None: 184 | nn.init.constant_(m.bias, 0) 185 | 186 | def forward(self, x): 187 | x = self.first_conv(x) 188 | x = self.maxpool(x) 189 | # 5 * 128 * 56 * 56 190 | 191 | x = self.features[0](x) 192 | # 5 * 256 * 56 * 56 193 | if self.training: 194 | predict_56 = self.predict_56(x) 195 | 196 | x = self.features[1](x) 197 | # 5 * 512 * 28 * 28 198 | if self.training: 199 | predict_28 = self.predict_28(x) 200 | 201 | x = self.features[2](x) 202 | # 5 * 1024 * 14 * 14 203 | if self.training: 204 | predict_14 = self.predict_14(x) 205 | 206 | x = self.features[3](x) 207 | # 5 * 2048 * 7 * 7 208 | 209 | x = self.conv_last(x) 210 | x = self.globalpool(x) 211 | if self.model_size == 'large': 212 | x = self.dropout(x) 213 | x = x.reshape(x.size(0), -1) 214 | x = self.fc(x) 215 | if self.training: 216 | # Loss is scaled by 1.0, 0.7, 0.5, 0.3 217 | return x, predict_14, predict_28, predict_56 218 | else: 219 | return x 220 | 221 | 222 | def create_network(): 223 | model = ShuffleNetV2() 224 | return model 225 | 226 | 227 | if __name__ == "__main__": 228 | create_network() 229 | 230 | 231 | 232 | 233 | -------------------------------------------------------------------------------- /ShuffleNetV2.ExLarge/network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Conv_BN_ReLU(nn.Module): 6 | 7 | def __init__(self, in_channel, out_channel, k_size, stride=1, padding=0, groups=1, 8 | has_bn=True, has_relu=True, gaussian_init=False): 9 | super(Conv_BN_ReLU, self).__init__() 10 | self.conv = nn.Conv2d(in_channel, out_channel, kernel_size=k_size, 11 | stride=stride, padding=padding, 12 | groups=groups, bias=False) 13 | if gaussian_init: 14 | nn.init.normal_(self.conv.weight.data, 0, 0.01) 15 | 16 | if has_bn: 17 | self.bn = nn.BatchNorm2d(out_channel) 18 | 19 | self.has_bn = has_bn 20 | self.has_relu = has_relu 21 | if has_relu: 22 | self.relu = nn.ReLU(inplace=True) 23 | 24 | def forward(self, x): 25 | x = self.conv(x) 26 | if self.has_bn: 27 | x = self.bn(x) 28 | if self.has_relu: 29 | x = self.relu(x) 30 | return x 31 | 32 | 33 | class FC(nn.Module): 34 | def __init__(self, in_channels, out_channels): 35 | super(FC, self).__init__() 36 | self.fc = nn.Linear(in_channels, out_channels) 37 | nn.init.normal_(self.fc.weight.data, 0, 0.01) 38 | 39 | def forward(self, x): 40 | return self.fc(x) 41 | 42 | 43 | class ExtraLabelPredict(nn.Module): 44 | def __init__(self, in_channels, out_channels, num_classes=1000): 45 | super(ExtraLabelPredict, self).__init__() 46 | self.num_classes = num_classes 47 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 48 | self.conv = nn.Sequential( 49 | Conv_BN_ReLU(in_channels, out_channels, 1, 1, 0), 50 | Conv_BN_ReLU(out_channels, out_channels, 3, 1, 1) 51 | ) 52 | self.globalpool = nn.AdaptiveAvgPool2d(output_size=1) 53 | self.fc = nn.Linear(out_channels, num_classes) 54 | 55 | def forward(self, inputs): 56 | inputs = self.maxpool(inputs) 57 | inputs = self.conv(inputs) 58 | inputs = self.globalpool(inputs) 59 | inputs = inputs.view(inputs.size(0), -1) 60 | inputs = self.fc(inputs) 61 | return inputs 62 | 63 | 64 | class ShuffleV2Block(nn.Module): 65 | def __init__(self, in_channels, out_channels, mid_channels, stride, groups, has_proj=False, has_se=False): 66 | super(ShuffleV2Block, self).__init__() 67 | self.stride = stride 68 | assert stride in [1, 2] 69 | self.has_proj = has_proj 70 | self.has_se = has_se 71 | self.relu = nn.ReLU(inplace=True) 72 | 73 | if has_proj: 74 | self.proj = Conv_BN_ReLU(in_channels, out_channels - mid_channels, k_size=3, stride=stride, padding=1, 75 | has_bn=True, has_relu=True) 76 | 77 | self.branch_main = nn.Sequential( 78 | Conv_BN_ReLU(in_channels, out_channels, k_size=1, stride=1, padding=0, 79 | has_bn=True, has_relu=True), 80 | Conv_BN_ReLU(out_channels, out_channels, k_size=3, stride=stride, padding=1, groups=groups, 81 | has_bn=True, has_relu=True), 82 | Conv_BN_ReLU(out_channels, out_channels, k_size=3, stride=1, padding=1, groups=out_channels, 83 | has_bn=True, has_relu=False), 84 | Conv_BN_ReLU(out_channels, mid_channels, k_size=1, stride=1, padding=0, 85 | has_bn=True, has_relu=False), 86 | ) 87 | 88 | if has_se: 89 | self.se_globalpool = nn.AdaptiveAvgPool2d(output_size=1) 90 | self.se_fc1 = FC(mid_channels, mid_channels // 4) 91 | self.se_fc2 = FC(mid_channels // 4, mid_channels) 92 | se_block = [ 93 | self.se_fc1, 94 | nn.ReLU(inplace=True), 95 | self.se_fc2, 96 | nn.Sigmoid(), 97 | ] 98 | self.se_block = nn.Sequential(*se_block) 99 | 100 | def forward(self, old_x): 101 | if self.has_proj: 102 | proj, x = old_x, old_x 103 | else: 104 | proj, x = self.channel_shuffle(old_x) 105 | x_proj = x 106 | if self.has_proj: 107 | proj = self.proj(proj) 108 | 109 | x = self.branch_main(x) 110 | 111 | if self.has_se: 112 | se_scale = self.se_globalpool(x).view(x.size(0), -1) 113 | se_scale = self.se_block(se_scale).unsqueeze(-1).unsqueeze(-1) 114 | x = x * se_scale 115 | 116 | if not self.has_proj: 117 | x = self.relu(x_proj + x) 118 | 119 | x = torch.cat((proj, x), dim=1) 120 | 121 | return x 122 | 123 | def channel_shuffle(self, x): 124 | batchsize, num_channels, height, width = x.data.size() 125 | assert (num_channels % 4 == 0) 126 | x = x.reshape(batchsize * num_channels // 2, 2, height * width) 127 | x = x.permute(1, 0, 2) 128 | x = x.reshape(2, -1, num_channels // 2, height, width) 129 | return x[0], x[1] 130 | 131 | 132 | class ShuffleNetV2(nn.Module): 133 | def __init__(self, n_class=1000, model_size='ExLarge'): 134 | super(ShuffleNetV2, self).__init__() 135 | 136 | self.stage_repeats = [4, 8, 4] 137 | self.model_size = model_size 138 | if model_size == 'ExLarge': 139 | self.pre = [2, 3, 4, 5] 140 | self.stage_repeats = [8, 16, 36, 10] 141 | self.outputs = [320, 640, 1280, 2560] 142 | self.enable_stride = [False, True, True, True] 143 | else: 144 | raise NotImplementedError 145 | 146 | self.first_conv = nn.Sequential( 147 | Conv_BN_ReLU(3, 64, k_size=3, stride=2, padding=1), 148 | Conv_BN_ReLU(64, 128, k_size=3, stride=1, padding=1), 149 | Conv_BN_ReLU(128, 256, k_size=3, stride=1, padding=1), 150 | ) 151 | 152 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 153 | 154 | self.features = nn.ModuleList() 155 | input_channel = 256 156 | if model_size == 'ExLarge': 157 | for p, s, o, es in zip(self.pre, self.stage_repeats, self.outputs, self.enable_stride): 158 | feature = [] 159 | for i in range(s): 160 | prefix = "{}{}".format(p, str(i)) 161 | stride = 1 if not es or i > 0 else 2 162 | has_proj = False if i > 0 else True 163 | feature.append(ShuffleV2Block(in_channels=input_channel, out_channels=o, mid_channels=o // 2, 164 | stride=stride, groups=16, has_proj=has_proj, has_se=True)) 165 | input_channel = o // 2 166 | feature.append(Conv_BN_ReLU(o, o, k_size=1, stride=1, padding=0)) 167 | input_channel = o 168 | feature = nn.Sequential(*feature) 169 | self.features.append(feature) 170 | if p == 2: 171 | self.predict_56 = ExtraLabelPredict(in_channels=320, out_channels=256) 172 | elif p == 3: 173 | self.predict_28 = ExtraLabelPredict(in_channels=640, out_channels=512) 174 | elif p == 4: 175 | self.predict_14 = ExtraLabelPredict(in_channels=1280, out_channels=1024) 176 | 177 | self.globalpool = nn.AvgPool2d(7) 178 | if self.model_size == 'ExLarge': 179 | self.dropout = nn.Dropout(0.2) 180 | self.fc = FC(2560, n_class) 181 | 182 | self._initialize_weights() 183 | 184 | def _initialize_weights(self): 185 | for name, m in self.named_modules(): 186 | if isinstance(m, nn.Conv2d): 187 | if 'first' in name: 188 | nn.init.normal_(m.weight, 0, 0.01) 189 | else: 190 | nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1]) 191 | if m.bias is not None: 192 | nn.init.constant_(m.bias, 0) 193 | elif isinstance(m, nn.BatchNorm2d): 194 | nn.init.constant_(m.weight, 1) 195 | if m.bias is not None: 196 | nn.init.constant_(m.bias, 0.0001) 197 | nn.init.constant_(m.running_mean, 0) 198 | elif isinstance(m, nn.BatchNorm1d): 199 | nn.init.constant_(m.weight, 1) 200 | if m.bias is not None: 201 | nn.init.constant_(m.bias, 0.0001) 202 | nn.init.constant_(m.running_mean, 0) 203 | elif isinstance(m, nn.Linear): 204 | nn.init.normal_(m.weight, 0, 0.01) 205 | if m.bias is not None: 206 | nn.init.constant_(m.bias, 0) 207 | 208 | def forward(self, x): 209 | x = self.first_conv(x) 210 | x = self.maxpool(x) 211 | # 1 * 256 * 56 * 56 212 | 213 | x = self.features[0](x) 214 | # 1 * 320 * 56 * 56 215 | if self.training: 216 | predict_56 = self.predict_56(x) 217 | 218 | x = self.features[1](x) 219 | # 1 * 640 * 28 * 28 220 | if self.training: 221 | predict_28 = self.predict_28(x) 222 | 223 | x = self.features[2](x) 224 | # 1 * 1280 * 14 * 14 225 | if self.training: 226 | predict_14 = self.predict_14(x) 227 | 228 | x = self.features[3](x) 229 | # 1 * 2560 * 7 * 7 230 | 231 | x = self.globalpool(x) 232 | if self.model_size == 'ExLarge': 233 | x = self.dropout(x) 234 | x = x.reshape(x.size(0), -1) 235 | x = self.fc(x) 236 | if self.training: 237 | # Loss is scaled by 1.0, 0.7, 0.5, 0.3 238 | return x, predict_14, predict_28, predict_56 239 | else: 240 | return x 241 | 242 | 243 | def create_network(): 244 | model = ShuffleNetV2() 245 | return model 246 | 247 | 248 | if __name__ == "__main__": 249 | create_network() 250 | -------------------------------------------------------------------------------- /DetNAS/blocks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | blocks_key = [ 6 | 'shufflenet_3x3', 7 | 'shufflenet_5x5', 8 | 'shufflenet_7x7', 9 | 'xception_3x3', 10 | ] 11 | 12 | 13 | Blocks = { 14 | 'shufflenet_3x3': lambda prefix, in_channels, output_channels, base_mid_channels, stride, bn_training: conv1x1_dwconv_conv1x1(prefix, in_channels, output_channels, base_mid_channels, 3, stride, bn_training), 15 | 'shufflenet_5x5': lambda prefix, in_channels, output_channels, base_mid_channels, stride, bn_training: conv1x1_dwconv_conv1x1(prefix, in_channels, output_channels, base_mid_channels, 5, stride, bn_training), 16 | 'shufflenet_7x7': lambda prefix, in_channels, output_channels, base_mid_channels, stride, bn_training: conv1x1_dwconv_conv1x1(prefix, in_channels, output_channels, base_mid_channels, 7, stride, bn_training), 17 | 'xception_3x3': lambda prefix, in_channels, output_channels, base_mid_channels, stride, bn_training: xception(prefix, in_channels, output_channels, base_mid_channels, stride, bn_training), 18 | } 19 | 20 | 21 | def create_spatial_conv2d_group_bn_relu(prefix, in_channels, out_channels, kernel_size, stride, padding=0, dilation=1, groups=1, 22 | bias=False, has_bn=True, has_relu=True, channel_shuffle=False, has_spatial_conv=True, has_spatial_conv_bn=True, 23 | conv_name_fun=None, bn_name_fun=None, bn_training=True, fix_weights=False): 24 | conv_name = prefix 25 | if conv_name_fun: 26 | conv_name = conv_name_fun(prefix) 27 | 28 | layer = nn.Sequential() 29 | 30 | if has_spatial_conv: 31 | spatial_conv_name = conv_name + '_s' 32 | layer.add_module(spatial_conv_name, nn.Conv2d(in_channels=in_channels, out_channels=in_channels, 33 | kernel_size=kernel_size, stride=stride, padding=padding, 34 | dilation=dilation, groups=in_channels, bias=bias)) 35 | if fix_weights: 36 | pass 37 | 38 | if has_spatial_conv_bn: 39 | layer.add_module(spatial_conv_name + '_bn', nn.BatchNorm2d(in_channels)) 40 | 41 | if channel_shuffle: 42 | pass 43 | 44 | assert in_channels % groups == 0 45 | assert out_channels % groups == 0 46 | 47 | layer.add_module(conv_name, nn.Conv2d(in_channels=in_channels, out_channels=out_channels, 48 | kernel_size=1, stride=1, padding=0, 49 | groups=groups, bias=bias)) 50 | if fix_weights: 51 | pass 52 | 53 | if has_bn: 54 | bn_name = 'bn_' + prefix 55 | if bn_name_fun: 56 | bn_name = bn_name_fun(prefix) 57 | layer.add_module(bn_name, nn.BatchNorm2d(out_channels)) 58 | if bn_training: 59 | pass 60 | 61 | if has_relu: 62 | layer.add_module('relu' + prefix, nn.ReLU(inplace=True)) 63 | 64 | return layer 65 | 66 | 67 | def conv1x1_dwconv_conv1x1(prefix, in_channels, out_channels, mid_channels, kernel_size, stride, bn_training=True): 68 | mid_channels = int(mid_channels) 69 | layer = list() 70 | 71 | layer.append(create_spatial_conv2d_group_bn_relu(prefix=prefix + '_branch2a', in_channels=in_channels, out_channels=mid_channels, 72 | kernel_size=-1, stride=1, padding=0, groups=1, has_bn=True, has_relu=True, 73 | channel_shuffle=False, has_spatial_conv=False, has_spatial_conv_bn=False, 74 | conv_name_fun=lambda p: 'interstellar' + p, 75 | bn_name_fun=lambda p: 'bn' + p, 76 | bn_training=bn_training)) 77 | layer.append(create_spatial_conv2d_group_bn_relu(prefix=prefix + '_branch2b', in_channels=mid_channels, out_channels=out_channels, 78 | kernel_size=kernel_size, stride=stride, padding=kernel_size // 2, groups=1, 79 | has_bn=True, has_relu=False, channel_shuffle=False, has_spatial_conv=True, 80 | has_spatial_conv_bn=True, 81 | conv_name_fun=lambda p: 'interstellar' + p, 82 | bn_name_fun=lambda p: 'bn' + p, 83 | bn_training=bn_training)) 84 | return nn.Sequential(*layer) 85 | 86 | 87 | def xception(prefix, in_channels, out_channels, mid_channels, stride, bn_training=True): 88 | mid_channels = int(mid_channels) 89 | layer = list() 90 | 91 | layer.append(create_spatial_conv2d_group_bn_relu(prefix=prefix + '_branch2a', in_channels=in_channels, out_channels=mid_channels, 92 | kernel_size=3, stride=stride, padding=1, groups=1, has_bn=True, has_relu=True, 93 | channel_shuffle=False, has_spatial_conv=True, has_spatial_conv_bn=True, 94 | conv_name_fun=lambda p: 'interstellar' + p, 95 | bn_name_fun=lambda p: 'bn' + p, 96 | bn_training=bn_training)) 97 | 98 | layer.append(create_spatial_conv2d_group_bn_relu(prefix=prefix + '_branch2b', in_channels=mid_channels, 99 | out_channels=mid_channels, 100 | kernel_size=3, stride=1, padding=1, groups=1, has_bn=True, 101 | has_relu=True, 102 | channel_shuffle=False, has_spatial_conv=True, 103 | has_spatial_conv_bn=True, 104 | conv_name_fun=lambda p: 'interstellar' + p, 105 | bn_name_fun=lambda p: 'bn' + p, 106 | bn_training=bn_training)) 107 | 108 | layer.append(create_spatial_conv2d_group_bn_relu(prefix=prefix + '_branch2c', in_channels=mid_channels, 109 | out_channels=out_channels, 110 | kernel_size=3, stride=1, padding=1, groups=1, has_bn=True, 111 | has_relu=False, 112 | channel_shuffle=False, has_spatial_conv=True, 113 | has_spatial_conv_bn=True, 114 | conv_name_fun=lambda p: 'interstellar' + p, 115 | bn_name_fun=lambda p: 'bn' + p, 116 | bn_training=bn_training)) 117 | return nn.Sequential(*layer) 118 | 119 | 120 | class ConvBNReLU(nn.Module): 121 | 122 | def __init__(self, in_channel, out_channel, k_size, stride=1, padding=0, groups=1, 123 | has_bn=True, has_relu=True, gaussian_init=False): 124 | super(ConvBNReLU, self).__init__() 125 | self.conv = nn.Conv2d(in_channel, out_channel, kernel_size=k_size, 126 | stride=stride, padding=padding, 127 | groups=groups, bias=True) 128 | if gaussian_init: 129 | nn.init.normal_(self.conv.weight.data, 0, 0.01) 130 | 131 | if has_bn: 132 | self.bn = nn.BatchNorm2d(out_channel) 133 | 134 | self.has_bn = has_bn 135 | self.has_relu = has_relu 136 | self.relu = nn.ReLU(inplace=True) 137 | 138 | def forward(self, x): 139 | x = self.conv(x) 140 | if self.has_bn: 141 | x = self.bn(x) 142 | if self.has_relu: 143 | x = self.relu(x) 144 | return x 145 | 146 | 147 | class FC(nn.Module): 148 | def __init__(self, in_channels, out_channels): 149 | super(FC, self).__init__() 150 | self.fc = nn.Linear(in_channels, out_channels) 151 | nn.init.normal_(self.fc.weight.data, 0, 0.01) 152 | 153 | def forward(self, x): 154 | return self.fc(x) 155 | 156 | 157 | def channel_shuffle2(x): 158 | channels = x.shape[1] 159 | assert channels % 4 == 0 160 | 161 | height = x.shape[2] 162 | width = x.shape[3] 163 | 164 | x = x.reshape(x.shape[0] * channels // 2, 2, height * width) 165 | x = x.permute(1, 0, 2) 166 | x = x.reshape(2, -1, channels // 2, height, width) 167 | return x[0], x[1] 168 | 169 | 170 | class ShuffleNetV2BlockSearched(nn.Module): 171 | def __init__(self, prefix, in_channels, out_channels, stride, base_mid_channels, i_th, architecture): 172 | super(ShuffleNetV2BlockSearched, self).__init__() 173 | op = blocks_key[architecture[i_th]] 174 | self.ksize = int(op.split('_')[1][0]) 175 | self.stride = stride 176 | if self.stride == 2: 177 | self.conv = Blocks[op](prefix + '_' + op, in_channels, out_channels - in_channels, base_mid_channels, stride, True) 178 | else: 179 | self.conv = Blocks[op](prefix + '_' + op, in_channels // 2, out_channels // 2, base_mid_channels, stride, True) 180 | if stride > 1: 181 | self.proj_conv = create_spatial_conv2d_group_bn_relu(prefix + '_proj', in_channels, in_channels, self.ksize, 182 | stride, self.ksize // 2, 183 | has_bn=True, has_relu=True, channel_shuffle=False, 184 | has_spatial_conv=True, has_spatial_conv_bn=True, 185 | conv_name_fun=lambda p: 'interstellar' + p, 186 | bn_name_fun=lambda p: 'bn' + p) 187 | self.relu = nn.ReLU(inplace=True) 188 | 189 | def forward(self, x_in): 190 | if self.stride == 1: 191 | x_proj, x = channel_shuffle2(x_in) 192 | else: 193 | x_proj = x_in 194 | x = x_in 195 | x_proj = self.proj_conv(x_proj) 196 | x = self.relu(self.conv(x)) 197 | 198 | return torch.cat((x_proj, x), dim=1) 199 | 200 | 201 | 202 | -------------------------------------------------------------------------------- /OneShot/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import argparse 5 | import torch.nn as nn 6 | import torchvision.transforms as transforms 7 | import torchvision.datasets as datasets 8 | import cv2 9 | import numpy as np 10 | import PIL 11 | from PIL import Image 12 | import time 13 | import logging 14 | import argparse 15 | from network import ShuffleNetV2_OneShot 16 | from utils import accuracy, AvgrageMeter, CrossEntropyLabelSmooth, save_checkpoint, get_lastest_model, get_parameters 17 | 18 | class OpencvResize(object): 19 | 20 | def __init__(self, size=256): 21 | self.size = size 22 | 23 | def __call__(self, img): 24 | assert isinstance(img, PIL.Image.Image) 25 | img = np.asarray(img) # (H,W,3) RGB 26 | img = img[:,:,::-1] # 2 BGR 27 | img = np.ascontiguousarray(img) 28 | H, W, _ = img.shape 29 | target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5)) 30 | img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR) 31 | img = img[:,:,::-1] # 2 RGB 32 | img = np.ascontiguousarray(img) 33 | img = Image.fromarray(img) 34 | return img 35 | 36 | class ToBGRTensor(object): 37 | 38 | def __call__(self, img): 39 | assert isinstance(img, (np.ndarray, PIL.Image.Image)) 40 | if isinstance(img, PIL.Image.Image): 41 | img = np.asarray(img) 42 | img = img[:,:,::-1] # 2 BGR 43 | img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W) 44 | img = np.ascontiguousarray(img) 45 | img = torch.from_numpy(img).float() 46 | return img 47 | 48 | class DataIterator(object): 49 | 50 | def __init__(self, dataloader): 51 | self.dataloader = dataloader 52 | self.iterator = enumerate(self.dataloader) 53 | 54 | def next(self): 55 | try: 56 | _, data = next(self.iterator) 57 | except Exception: 58 | self.iterator = enumerate(self.dataloader) 59 | _, data = next(self.iterator) 60 | return data[0], data[1] 61 | 62 | 63 | def get_args(): 64 | parser = argparse.ArgumentParser("ShuffleNetV2_OneShot") 65 | parser.add_argument('--eval', default=False, action='store_true') 66 | parser.add_argument('--eval-resume', type=str, default='./snet_detnas.pkl', help='path for eval model') 67 | parser.add_argument('--batch-size', type=int, default=1024, help='batch size') 68 | parser.add_argument('--total-iters', type=int, default=300000, help='total iters') 69 | parser.add_argument('--learning-rate', type=float, default=0.5, help='init learning rate') 70 | parser.add_argument('--momentum', type=float, default=0.9, help='momentum') 71 | parser.add_argument('--weight-decay', type=float, default=4e-5, help='weight decay') 72 | parser.add_argument('--save', type=str, default='./models', help='path for saving trained models') 73 | parser.add_argument('--label-smooth', type=float, default=0.1, help='label smoothing') 74 | 75 | parser.add_argument('--auto-continue', type=bool, default=True, help='report frequency') 76 | parser.add_argument('--display-interval', type=int, default=20, help='report frequency') 77 | parser.add_argument('--val-interval', type=int, default=10000, help='report frequency') 78 | parser.add_argument('--save-interval', type=int, default=10000, help='report frequency') 79 | 80 | parser.add_argument('--train-dir', type=str, default='data/train', help='path to training dataset') 81 | parser.add_argument('--val-dir', type=str, default='data/val', help='path to validation dataset') 82 | 83 | args = parser.parse_args() 84 | return args 85 | 86 | def main(): 87 | args = get_args() 88 | 89 | # Log 90 | log_format = '[%(asctime)s] %(message)s' 91 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, 92 | format=log_format, datefmt='%d %I:%M:%S') 93 | t = time.time() 94 | local_time = time.localtime(t) 95 | if not os.path.exists('./log'): 96 | os.mkdir('./log') 97 | fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t))) 98 | fh.setFormatter(logging.Formatter(log_format)) 99 | logging.getLogger().addHandler(fh) 100 | 101 | use_gpu = False 102 | if torch.cuda.is_available(): 103 | use_gpu = True 104 | 105 | assert os.path.exists(args.train_dir) 106 | train_dataset = datasets.ImageFolder( 107 | args.train_dir, 108 | transforms.Compose([ 109 | transforms.RandomResizedCrop(224), 110 | transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), 111 | transforms.RandomHorizontalFlip(0.5), 112 | ToBGRTensor(), 113 | ]) 114 | ) 115 | train_loader = torch.utils.data.DataLoader( 116 | train_dataset, batch_size=args.batch_size, shuffle=True, 117 | num_workers=1, pin_memory=use_gpu) 118 | train_dataprovider = DataIterator(train_loader) 119 | 120 | assert os.path.exists(args.val_dir) 121 | val_loader = torch.utils.data.DataLoader( 122 | datasets.ImageFolder(args.val_dir, transforms.Compose([ 123 | OpencvResize(256), 124 | transforms.CenterCrop(224), 125 | ToBGRTensor(), 126 | ])), 127 | batch_size=200, shuffle=False, 128 | num_workers=1, pin_memory=use_gpu 129 | ) 130 | val_dataprovider = DataIterator(val_loader) 131 | print('load data successfully') 132 | 133 | architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2] 134 | scale_list = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6] 135 | scale_ids = [6, 5, 3, 5, 2, 6, 3, 4, 2, 5, 7, 5, 4, 6, 7, 4, 4, 5, 4, 3] 136 | channels_scales = [] 137 | for i in range(len(scale_ids)): 138 | channels_scales.append(scale_list[scale_ids[i]]) 139 | model = ShuffleNetV2_OneShot(architecture=architecture, channels_scales=channels_scales) 140 | 141 | optimizer = torch.optim.SGD(get_parameters(model), 142 | lr=args.learning_rate, 143 | momentum=args.momentum, 144 | weight_decay=args.weight_decay) 145 | criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1) 146 | 147 | if use_gpu: 148 | model = nn.DataParallel(model) 149 | loss_function = criterion_smooth.cuda() 150 | device = torch.device("cuda") 151 | else: 152 | loss_function = criterion_smooth 153 | device = torch.device("cpu") 154 | 155 | scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, 156 | lambda step : (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1) 157 | 158 | model = model.to(device) 159 | 160 | all_iters = 0 161 | if args.auto_continue: 162 | lastest_model, iters = get_lastest_model() 163 | if lastest_model is not None: 164 | all_iters = iters 165 | checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu') 166 | model.load_state_dict(checkpoint['state_dict'], strict=True) 167 | print('load from checkpoint') 168 | for i in range(iters): 169 | scheduler.step() 170 | 171 | args.optimizer = optimizer 172 | args.loss_function = loss_function 173 | args.scheduler = scheduler 174 | args.train_dataprovider = train_dataprovider 175 | args.val_dataprovider = val_dataprovider 176 | 177 | if args.eval: 178 | if args.eval_resume is not None: 179 | checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu') 180 | model.load_state_dict(checkpoint, strict=True) 181 | validate(model, device, args, all_iters=all_iters) 182 | exit(0) 183 | 184 | while all_iters < args.total_iters: 185 | all_iters = train(model, device, args, val_interval=args.val_interval, bn_process=False, all_iters=all_iters) 186 | validate(model, device, args, all_iters=all_iters) 187 | all_iters = train(model, device, args, val_interval=int(1280000/args.batch_size), bn_process=True, all_iters=all_iters) 188 | validate(model, device, args, all_iters=all_iters) 189 | save_checkpoint({'state_dict': model.state_dict(),}, args.total_iters, tag='bnps-') 190 | 191 | def adjust_bn_momentum(model, iters): 192 | for m in model.modules(): 193 | if isinstance(m, nn.BatchNorm2d): 194 | m.momentum = 1 / iters 195 | 196 | def train(model, device, args, *, val_interval, bn_process=False, all_iters=None): 197 | 198 | optimizer = args.optimizer 199 | loss_function = args.loss_function 200 | scheduler = args.scheduler 201 | train_dataprovider = args.train_dataprovider 202 | 203 | t1 = time.time() 204 | Top1_err, Top5_err = 0.0, 0.0 205 | model.train() 206 | for iters in range(1, val_interval + 1): 207 | scheduler.step() 208 | if bn_process: 209 | adjust_bn_momentum(model, iters) 210 | 211 | all_iters += 1 212 | d_st = time.time() 213 | data, target = train_dataprovider.next() 214 | target = target.type(torch.LongTensor) 215 | data, target = data.to(device), target.to(device) 216 | data_time = time.time() - d_st 217 | 218 | output = model(data) 219 | loss = loss_function(output, target) 220 | optimizer.zero_grad() 221 | loss.backward() 222 | optimizer.step() 223 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 224 | 225 | Top1_err += 1 - prec1.item() / 100 226 | Top5_err += 1 - prec5.item() / 100 227 | 228 | if all_iters % args.display_interval == 0: 229 | printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \ 230 | 'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \ 231 | 'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \ 232 | 'data_time = {:.6f},\ttrain_time = {:.6f}'.format(data_time, (time.time() - t1) / args.display_interval) 233 | logging.info(printInfo) 234 | t1 = time.time() 235 | Top1_err, Top5_err = 0.0, 0.0 236 | 237 | if all_iters % args.save_interval == 0: 238 | save_checkpoint({ 239 | 'state_dict': model.state_dict(), 240 | }, all_iters) 241 | 242 | return all_iters 243 | 244 | def validate(model, device, args, *, all_iters=None): 245 | objs = AvgrageMeter() 246 | top1 = AvgrageMeter() 247 | top5 = AvgrageMeter() 248 | 249 | loss_function = args.loss_function 250 | val_dataprovider = args.val_dataprovider 251 | 252 | model.eval() 253 | max_val_iters = 250 254 | t1 = time.time() 255 | with torch.no_grad(): 256 | for _ in range(1, max_val_iters + 1): 257 | data, target = val_dataprovider.next() 258 | target = target.type(torch.LongTensor) 259 | data, target = data.to(device), target.to(device) 260 | 261 | output = model(data) 262 | loss = loss_function(output, target) 263 | 264 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 265 | n = data.size(0) 266 | objs.update(loss.item(), n) 267 | top1.update(prec1.item(), n) 268 | top5.update(prec5.item(), n) 269 | 270 | logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \ 271 | 'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \ 272 | 'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \ 273 | 'val_time = {:.6f}'.format(time.time() - t1) 274 | logging.info(logInfo) 275 | 276 | 277 | if __name__ == "__main__": 278 | main() 279 | 280 | -------------------------------------------------------------------------------- /ShuffleNetV2/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import argparse 5 | import torch.nn as nn 6 | import torchvision.transforms as transforms 7 | import torchvision.datasets as datasets 8 | import cv2 9 | import numpy as np 10 | import PIL 11 | from PIL import Image 12 | import time 13 | import logging 14 | import argparse 15 | from network import ShuffleNetV2 16 | from utils import accuracy, AvgrageMeter, CrossEntropyLabelSmooth, save_checkpoint, get_lastest_model, get_parameters 17 | 18 | class OpencvResize(object): 19 | 20 | def __init__(self, size=256): 21 | self.size = size 22 | 23 | def __call__(self, img): 24 | assert isinstance(img, PIL.Image.Image) 25 | img = np.asarray(img) # (H,W,3) RGB 26 | img = img[:,:,::-1] # 2 BGR 27 | img = np.ascontiguousarray(img) 28 | H, W, _ = img.shape 29 | target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5)) 30 | img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR) 31 | img = img[:,:,::-1] # 2 RGB 32 | img = np.ascontiguousarray(img) 33 | img = Image.fromarray(img) 34 | return img 35 | 36 | class ToBGRTensor(object): 37 | 38 | def __call__(self, img): 39 | assert isinstance(img, (np.ndarray, PIL.Image.Image)) 40 | if isinstance(img, PIL.Image.Image): 41 | img = np.asarray(img) 42 | img = img[:,:,::-1] # 2 BGR 43 | img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W) 44 | img = np.ascontiguousarray(img) 45 | img = torch.from_numpy(img).float() 46 | return img 47 | 48 | class DataIterator(object): 49 | 50 | def __init__(self, dataloader): 51 | self.dataloader = dataloader 52 | self.iterator = enumerate(self.dataloader) 53 | 54 | def next(self): 55 | try: 56 | _, data = next(self.iterator) 57 | except Exception: 58 | self.iterator = enumerate(self.dataloader) 59 | _, data = next(self.iterator) 60 | return data[0], data[1] 61 | 62 | def get_args(): 63 | parser = argparse.ArgumentParser("ShuffleNetV2_Plus") 64 | parser.add_argument('--eval', default=False, action='store_true') 65 | parser.add_argument('--eval-resume', type=str, default='./snet_detnas.pkl', help='path for eval model') 66 | parser.add_argument('--batch-size', type=int, default=1024, help='batch size') 67 | parser.add_argument('--total-iters', type=int, default=300000, help='total iters') 68 | parser.add_argument('--learning-rate', type=float, default=0.5, help='init learning rate') 69 | parser.add_argument('--momentum', type=float, default=0.9, help='momentum') 70 | parser.add_argument('--weight-decay', type=float, default=4e-5, help='weight decay') 71 | parser.add_argument('--save', type=str, default='./models', help='path for saving trained models') 72 | parser.add_argument('--label-smooth', type=float, default=0.1, help='label smoothing') 73 | 74 | parser.add_argument('--auto-continue', type=bool, default=True, help='auto continue') 75 | parser.add_argument('--display-interval', type=int, default=20, help='display interval') 76 | parser.add_argument('--val-interval', type=int, default=10000, help='val interval') 77 | parser.add_argument('--save-interval', type=int, default=10000, help='save interval') 78 | 79 | 80 | parser.add_argument('--model-size', type=str, default='1.5x', choices=['0.5x', '1.0x', '1.5x', '2.0x'], help='size of the model') 81 | 82 | parser.add_argument('--train-dir', type=str, default='data/train', help='path to training dataset') 83 | parser.add_argument('--val-dir', type=str, default='data/val', help='path to validation dataset') 84 | 85 | args = parser.parse_args() 86 | return args 87 | 88 | def main(): 89 | args = get_args() 90 | 91 | # Log 92 | log_format = '[%(asctime)s] %(message)s' 93 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, 94 | format=log_format, datefmt='%d %I:%M:%S') 95 | t = time.time() 96 | local_time = time.localtime(t) 97 | if not os.path.exists('./log'): 98 | os.mkdir('./log') 99 | fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t))) 100 | fh.setFormatter(logging.Formatter(log_format)) 101 | logging.getLogger().addHandler(fh) 102 | 103 | use_gpu = False 104 | if torch.cuda.is_available(): 105 | use_gpu = True 106 | 107 | assert os.path.exists(args.train_dir) 108 | train_dataset = datasets.ImageFolder( 109 | args.train_dir, 110 | transforms.Compose([ 111 | transforms.RandomResizedCrop(224), 112 | transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), 113 | transforms.RandomHorizontalFlip(0.5), 114 | ToBGRTensor(), 115 | ]) 116 | ) 117 | train_loader = torch.utils.data.DataLoader( 118 | train_dataset, batch_size=args.batch_size, shuffle=True, 119 | num_workers=1, pin_memory=use_gpu) 120 | train_dataprovider = DataIterator(train_loader) 121 | 122 | assert os.path.exists(args.val_dir) 123 | val_loader = torch.utils.data.DataLoader( 124 | datasets.ImageFolder(args.val_dir, transforms.Compose([ 125 | OpencvResize(256), 126 | transforms.CenterCrop(224), 127 | ToBGRTensor(), 128 | ])), 129 | batch_size=200, shuffle=False, 130 | num_workers=1, pin_memory=use_gpu 131 | ) 132 | val_dataprovider = DataIterator(val_loader) 133 | print('load data successfully') 134 | 135 | architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2] 136 | model = ShuffleNetV2(model_size=args.model_size) 137 | 138 | optimizer = torch.optim.SGD(get_parameters(model), 139 | lr=args.learning_rate, 140 | momentum=args.momentum, 141 | weight_decay=args.weight_decay) 142 | criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1) 143 | 144 | if use_gpu: 145 | model = nn.DataParallel(model) 146 | loss_function = criterion_smooth.cuda() 147 | device = torch.device("cuda") 148 | else: 149 | loss_function = criterion_smooth 150 | device = torch.device("cpu") 151 | 152 | scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, 153 | lambda step : (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1) 154 | 155 | model = model.to(device) 156 | 157 | all_iters = 0 158 | if args.auto_continue: 159 | lastest_model, iters = get_lastest_model() 160 | if lastest_model is not None: 161 | all_iters = iters 162 | checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu') 163 | model.load_state_dict(checkpoint['state_dict'], strict=True) 164 | print('load from checkpoint') 165 | for i in range(iters): 166 | scheduler.step() 167 | 168 | args.optimizer = optimizer 169 | args.loss_function = loss_function 170 | args.scheduler = scheduler 171 | args.train_dataprovider = train_dataprovider 172 | args.val_dataprovider = val_dataprovider 173 | 174 | if args.eval: 175 | if args.eval_resume is not None: 176 | checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu') 177 | load_checkpoint(model, checkpoint) 178 | validate(model, device, args, all_iters=all_iters) 179 | exit(0) 180 | 181 | while all_iters < args.total_iters: 182 | all_iters = train(model, device, args, val_interval=args.val_interval, bn_process=False, all_iters=all_iters) 183 | validate(model, device, args, all_iters=all_iters) 184 | all_iters = train(model, device, args, val_interval=int(1280000/args.batch_size), bn_process=True, all_iters=all_iters) 185 | validate(model, device, args, all_iters=all_iters) 186 | save_checkpoint({'state_dict': model.state_dict(),}, args.total_iters, tag='bnps-') 187 | 188 | def adjust_bn_momentum(model, iters): 189 | for m in model.modules(): 190 | if isinstance(m, nn.BatchNorm2d): 191 | m.momentum = 1 / iters 192 | 193 | def train(model, device, args, *, val_interval, bn_process=False, all_iters=None): 194 | 195 | optimizer = args.optimizer 196 | loss_function = args.loss_function 197 | scheduler = args.scheduler 198 | train_dataprovider = args.train_dataprovider 199 | 200 | t1 = time.time() 201 | Top1_err, Top5_err = 0.0, 0.0 202 | model.train() 203 | for iters in range(1, val_interval + 1): 204 | scheduler.step() 205 | if bn_process: 206 | adjust_bn_momentum(model, iters) 207 | 208 | all_iters += 1 209 | d_st = time.time() 210 | data, target = train_dataprovider.next() 211 | target = target.type(torch.LongTensor) 212 | data, target = data.to(device), target.to(device) 213 | data_time = time.time() - d_st 214 | 215 | output = model(data) 216 | loss = loss_function(output, target) 217 | optimizer.zero_grad() 218 | loss.backward() 219 | optimizer.step() 220 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 221 | 222 | Top1_err += 1 - prec1.item() / 100 223 | Top5_err += 1 - prec5.item() / 100 224 | 225 | if all_iters % args.display_interval == 0: 226 | printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \ 227 | 'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \ 228 | 'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \ 229 | 'data_time = {:.6f},\ttrain_time = {:.6f}'.format(data_time, (time.time() - t1) / args.display_interval) 230 | logging.info(printInfo) 231 | t1 = time.time() 232 | Top1_err, Top5_err = 0.0, 0.0 233 | 234 | if all_iters % args.save_interval == 0: 235 | save_checkpoint({ 236 | 'state_dict': model.state_dict(), 237 | }, all_iters) 238 | 239 | return all_iters 240 | 241 | def validate(model, device, args, *, all_iters=None): 242 | objs = AvgrageMeter() 243 | top1 = AvgrageMeter() 244 | top5 = AvgrageMeter() 245 | 246 | loss_function = args.loss_function 247 | val_dataprovider = args.val_dataprovider 248 | 249 | model.eval() 250 | max_val_iters = 250 251 | t1 = time.time() 252 | with torch.no_grad(): 253 | for _ in range(1, max_val_iters + 1): 254 | data, target = val_dataprovider.next() 255 | target = target.type(torch.LongTensor) 256 | data, target = data.to(device), target.to(device) 257 | 258 | output = model(data) 259 | loss = loss_function(output, target) 260 | 261 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 262 | n = data.size(0) 263 | objs.update(loss.item(), n) 264 | top1.update(prec1.item(), n) 265 | top5.update(prec5.item(), n) 266 | 267 | logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \ 268 | 'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \ 269 | 'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \ 270 | 'val_time = {:.6f}'.format(time.time() - t1) 271 | logging.info(logInfo) 272 | 273 | def load_checkpoint(net, checkpoint): 274 | from collections import OrderedDict 275 | 276 | temp = OrderedDict() 277 | if 'state_dict' in checkpoint: 278 | checkpoint = dict(checkpoint['state_dict']) 279 | for k in checkpoint: 280 | k2 = 'module.'+k if not k.startswith('module.') else k 281 | temp[k2] = checkpoint[k] 282 | 283 | net.load_state_dict(temp, strict=True) 284 | 285 | if __name__ == "__main__": 286 | main() 287 | 288 | -------------------------------------------------------------------------------- /ShuffleNetV2.ExLarge/eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import argparse 5 | import torch.nn as nn 6 | import torchvision.transforms as transforms 7 | import torchvision.datasets as datasets 8 | import cv2 9 | import numpy as np 10 | import PIL 11 | from PIL import Image 12 | import time 13 | import logging 14 | import argparse 15 | from network import ShuffleNetV2 16 | from utils import accuracy, AvgrageMeter, CrossEntropyLabelSmooth, save_checkpoint, get_lastest_model, get_parameters 17 | 18 | class OpencvResize(object): 19 | 20 | def __init__(self, size=256): 21 | self.size = size 22 | 23 | def __call__(self, img): 24 | assert isinstance(img, PIL.Image.Image) 25 | img = np.asarray(img) # (H,W,3) RGB 26 | img = img[:,:,::-1] # 2 BGR 27 | img = np.ascontiguousarray(img) 28 | H, W, _ = img.shape 29 | target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5)) 30 | img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR) 31 | img = img[:,:,::-1] # 2 RGB 32 | img = np.ascontiguousarray(img) 33 | img = Image.fromarray(img) 34 | return img 35 | 36 | class ToBGRTensor(object): 37 | 38 | def __call__(self, img): 39 | assert isinstance(img, (np.ndarray, PIL.Image.Image)) 40 | if isinstance(img, PIL.Image.Image): 41 | img = np.asarray(img) 42 | img = img[:,:,::-1] # 2 BGR 43 | img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W) 44 | img = np.ascontiguousarray(img) 45 | img = torch.from_numpy(img).float() 46 | return img 47 | 48 | class DataIterator(object): 49 | 50 | def __init__(self, dataloader): 51 | self.dataloader = dataloader 52 | self.iterator = enumerate(self.dataloader) 53 | 54 | def next(self): 55 | try: 56 | _, data = next(self.iterator) 57 | except Exception: 58 | self.iterator = enumerate(self.dataloader) 59 | _, data = next(self.iterator) 60 | return data[0], data[1] 61 | 62 | def get_args(): 63 | parser = argparse.ArgumentParser("ShuffleNetV2_Plus") 64 | parser.add_argument('--eval', default=False, action='store_true') 65 | parser.add_argument('--eval-resume', type=str, default='./snet_detnas.pkl', help='path for eval model') 66 | parser.add_argument('--batch-size', type=int, default=1024, help='batch size') 67 | parser.add_argument('--total-iters', type=int, default=300000, help='total iters') 68 | parser.add_argument('--learning-rate', type=float, default=0.5, help='init learning rate') 69 | parser.add_argument('--momentum', type=float, default=0.9, help='momentum') 70 | parser.add_argument('--weight-decay', type=float, default=4e-5, help='weight decay') 71 | parser.add_argument('--save', type=str, default='./models', help='path for saving trained models') 72 | parser.add_argument('--label-smooth', type=float, default=0.1, help='label smoothing') 73 | 74 | parser.add_argument('--auto-continue', type=bool, default=True, help='auto continue') 75 | parser.add_argument('--display-interval', type=int, default=20, help='display interval') 76 | parser.add_argument('--val-interval', type=int, default=10000, help='val interval') 77 | parser.add_argument('--save-interval', type=int, default=10000, help='save interval') 78 | 79 | 80 | parser.add_argument('--model-size', type=str, default='1.5x', choices=['0.5x', '1.0x', '1.5x', '2.0x'], help='size of the model') 81 | 82 | parser.add_argument('--train-dir', type=str, default='data/train', help='path to training dataset') 83 | parser.add_argument('--val-dir', type=str, default='data/val', help='path to validation dataset') 84 | 85 | args = parser.parse_args() 86 | return args 87 | 88 | def main(): 89 | args = get_args() 90 | 91 | # Log 92 | log_format = '[%(asctime)s] %(message)s' 93 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, 94 | format=log_format, datefmt='%d %I:%M:%S') 95 | t = time.time() 96 | local_time = time.localtime(t) 97 | if not os.path.exists('./log'): 98 | os.mkdir('./log') 99 | fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t))) 100 | fh.setFormatter(logging.Formatter(log_format)) 101 | logging.getLogger().addHandler(fh) 102 | 103 | use_gpu = False 104 | if torch.cuda.is_available(): 105 | use_gpu = True 106 | 107 | assert os.path.exists(args.train_dir) 108 | train_dataset = datasets.ImageFolder( 109 | args.train_dir, 110 | transforms.Compose([ 111 | transforms.RandomResizedCrop(224), 112 | transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), 113 | transforms.RandomHorizontalFlip(0.5), 114 | ToBGRTensor(), 115 | ]) 116 | ) 117 | train_loader = torch.utils.data.DataLoader( 118 | train_dataset, batch_size=args.batch_size, shuffle=True, 119 | num_workers=1, pin_memory=use_gpu) 120 | train_dataprovider = DataIterator(train_loader) 121 | 122 | assert os.path.exists(args.val_dir) 123 | val_loader = torch.utils.data.DataLoader( 124 | datasets.ImageFolder(args.val_dir, transforms.Compose([ 125 | OpencvResize(256), 126 | transforms.CenterCrop(224), 127 | ToBGRTensor(), 128 | ])), 129 | batch_size=200, shuffle=False, 130 | num_workers=1, pin_memory=use_gpu 131 | ) 132 | val_dataprovider = DataIterator(val_loader) 133 | print('load data successfully') 134 | 135 | architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2] 136 | model = ShuffleNetV2(model_size=args.model_size) 137 | 138 | optimizer = torch.optim.SGD(get_parameters(model), 139 | lr=args.learning_rate, 140 | momentum=args.momentum, 141 | weight_decay=args.weight_decay) 142 | criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1) 143 | 144 | if use_gpu: 145 | model = nn.DataParallel(model) 146 | loss_function = criterion_smooth.cuda() 147 | device = torch.device("cuda") 148 | else: 149 | loss_function = criterion_smooth 150 | device = torch.device("cpu") 151 | 152 | scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, 153 | lambda step : (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1) 154 | 155 | model = model.to(device) 156 | 157 | all_iters = 0 158 | if args.auto_continue: 159 | lastest_model, iters = get_lastest_model() 160 | if lastest_model is not None: 161 | all_iters = iters 162 | checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu') 163 | model.load_state_dict(checkpoint['state_dict'], strict=True) 164 | print('load from checkpoint') 165 | for i in range(iters): 166 | scheduler.step() 167 | 168 | args.optimizer = optimizer 169 | args.loss_function = loss_function 170 | args.scheduler = scheduler 171 | args.train_dataprovider = train_dataprovider 172 | args.val_dataprovider = val_dataprovider 173 | 174 | if args.eval: 175 | if args.eval_resume is not None: 176 | checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu') 177 | load_checkpoint(model, checkpoint) 178 | validate(model, device, args, all_iters=all_iters) 179 | exit(0) 180 | 181 | while all_iters < args.total_iters: 182 | all_iters = train(model, device, args, val_interval=args.val_interval, bn_process=False, all_iters=all_iters) 183 | validate(model, device, args, all_iters=all_iters) 184 | all_iters = train(model, device, args, val_interval=int(1280000/args.batch_size), bn_process=True, all_iters=all_iters) 185 | validate(model, device, args, all_iters=all_iters) 186 | save_checkpoint({'state_dict': model.state_dict(),}, args.total_iters, tag='bnps-') 187 | 188 | def adjust_bn_momentum(model, iters): 189 | for m in model.modules(): 190 | if isinstance(m, nn.BatchNorm2d): 191 | m.momentum = 1 / iters 192 | 193 | def train(model, device, args, *, val_interval, bn_process=False, all_iters=None): 194 | 195 | optimizer = args.optimizer 196 | loss_function = args.loss_function 197 | scheduler = args.scheduler 198 | train_dataprovider = args.train_dataprovider 199 | 200 | t1 = time.time() 201 | Top1_err, Top5_err = 0.0, 0.0 202 | model.train() 203 | for iters in range(1, val_interval + 1): 204 | scheduler.step() 205 | if bn_process: 206 | adjust_bn_momentum(model, iters) 207 | 208 | all_iters += 1 209 | d_st = time.time() 210 | data, target = train_dataprovider.next() 211 | target = target.type(torch.LongTensor) 212 | data, target = data.to(device), target.to(device) 213 | data_time = time.time() - d_st 214 | 215 | output = model(data) 216 | loss = loss_function(output, target) 217 | optimizer.zero_grad() 218 | loss.backward() 219 | optimizer.step() 220 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 221 | 222 | Top1_err += 1 - prec1.item() / 100 223 | Top5_err += 1 - prec5.item() / 100 224 | 225 | if all_iters % args.display_interval == 0: 226 | printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \ 227 | 'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \ 228 | 'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \ 229 | 'data_time = {:.6f},\ttrain_time = {:.6f}'.format(data_time, (time.time() - t1) / args.display_interval) 230 | logging.info(printInfo) 231 | t1 = time.time() 232 | Top1_err, Top5_err = 0.0, 0.0 233 | 234 | if all_iters % args.save_interval == 0: 235 | save_checkpoint({ 236 | 'state_dict': model.state_dict(), 237 | }, all_iters) 238 | 239 | return all_iters 240 | 241 | def validate(model, device, args, *, all_iters=None): 242 | objs = AvgrageMeter() 243 | top1 = AvgrageMeter() 244 | top5 = AvgrageMeter() 245 | 246 | loss_function = args.loss_function 247 | val_dataprovider = args.val_dataprovider 248 | 249 | model.eval() 250 | max_val_iters = 250 251 | t1 = time.time() 252 | with torch.no_grad(): 253 | for _ in range(1, max_val_iters + 1): 254 | data, target = val_dataprovider.next() 255 | target = target.type(torch.LongTensor) 256 | data, target = data.to(device), target.to(device) 257 | 258 | output = model(data) 259 | loss = loss_function(output, target) 260 | 261 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 262 | n = data.size(0) 263 | objs.update(loss.item(), n) 264 | top1.update(prec1.item(), n) 265 | top5.update(prec5.item(), n) 266 | 267 | logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \ 268 | 'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \ 269 | 'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \ 270 | 'val_time = {:.6f}'.format(time.time() - t1) 271 | logging.info(logInfo) 272 | 273 | def load_checkpoint(net, checkpoint): 274 | from collections import OrderedDict 275 | 276 | temp = OrderedDict() 277 | if 'state_dict' in checkpoint: 278 | checkpoint = dict(checkpoint['state_dict']) 279 | for k in checkpoint: 280 | k2 = 'module.'+k if not k.startswith('module.') else k 281 | temp[k2] = checkpoint[k] 282 | 283 | net.load_state_dict(temp, strict=True) 284 | 285 | if __name__ == "__main__": 286 | main() 287 | 288 | -------------------------------------------------------------------------------- /ShuffleNetV2+/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import argparse 5 | import torch.nn as nn 6 | import torchvision.transforms as transforms 7 | import torchvision.datasets as datasets 8 | import cv2 9 | import numpy as np 10 | import PIL 11 | from PIL import Image 12 | import time 13 | import logging 14 | import argparse 15 | from network import ShuffleNetV2_Plus 16 | from utils import accuracy, AvgrageMeter, CrossEntropyLabelSmooth, save_checkpoint, get_lastest_model, get_parameters 17 | 18 | class OpencvResize(object): 19 | 20 | def __init__(self, size=256): 21 | self.size = size 22 | 23 | def __call__(self, img): 24 | assert isinstance(img, PIL.Image.Image) 25 | img = np.asarray(img) # (H,W,3) RGB 26 | img = img[:,:,::-1] # 2 BGR 27 | img = np.ascontiguousarray(img) 28 | H, W, _ = img.shape 29 | target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5)) 30 | img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR) 31 | img = img[:,:,::-1] # 2 RGB 32 | img = np.ascontiguousarray(img) 33 | img = Image.fromarray(img) 34 | return img 35 | 36 | class ToBGRTensor(object): 37 | 38 | def __call__(self, img): 39 | assert isinstance(img, (np.ndarray, PIL.Image.Image)) 40 | if isinstance(img, PIL.Image.Image): 41 | img = np.asarray(img) 42 | img = img[:,:,::-1] # 2 BGR 43 | img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W) 44 | img = np.ascontiguousarray(img) 45 | img = torch.from_numpy(img).float() 46 | return img 47 | 48 | class DataIterator(object): 49 | 50 | def __init__(self, dataloader): 51 | self.dataloader = dataloader 52 | self.iterator = enumerate(self.dataloader) 53 | 54 | def next(self): 55 | try: 56 | _, data = next(self.iterator) 57 | except Exception: 58 | self.iterator = enumerate(self.dataloader) 59 | _, data = next(self.iterator) 60 | return data[0], data[1] 61 | 62 | 63 | def get_args(): 64 | parser = argparse.ArgumentParser("ShuffleNetV2_Plus") 65 | parser.add_argument('--eval', default=False, action='store_true') 66 | parser.add_argument('--eval-resume', type=str, default='./snet_detnas.pkl', help='path for eval model') 67 | parser.add_argument('--batch-size', type=int, default=1024, help='batch size') 68 | parser.add_argument('--total-iters', type=int, default=450000, help='total iters') 69 | parser.add_argument('--learning-rate', type=float, default=0.5, help='init learning rate') 70 | parser.add_argument('--momentum', type=float, default=0.9, help='momentum') 71 | parser.add_argument('--weight-decay', type=float, default=4e-5, help='weight decay') 72 | parser.add_argument('--save', type=str, default='./models', help='path for saving trained models') 73 | parser.add_argument('--label-smooth', type=float, default=0.1, help='label smoothing') 74 | 75 | parser.add_argument('--auto-continue', type=bool, default=True, help='auto continue') 76 | parser.add_argument('--display-interval', type=int, default=20, help='display interval') 77 | parser.add_argument('--val-interval', type=int, default=10000, help='val interval') 78 | parser.add_argument('--save-interval', type=int, default=10000, help='save interval') 79 | 80 | parser.add_argument('--model-size', type=str, default='Large', choices=['Small', 'Medium', 'Large'], help='size of the model') 81 | 82 | parser.add_argument('--train-dir', type=str, default='data/train', help='path to training dataset') 83 | parser.add_argument('--val-dir', type=str, default='data/val', help='path to validation dataset') 84 | 85 | args = parser.parse_args() 86 | return args 87 | 88 | def main(): 89 | args = get_args() 90 | 91 | # Log 92 | log_format = '[%(asctime)s] %(message)s' 93 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, 94 | format=log_format, datefmt='%d %I:%M:%S') 95 | t = time.time() 96 | local_time = time.localtime(t) 97 | if not os.path.exists('./log'): 98 | os.mkdir('./log') 99 | fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t))) 100 | fh.setFormatter(logging.Formatter(log_format)) 101 | logging.getLogger().addHandler(fh) 102 | 103 | use_gpu = False 104 | if torch.cuda.is_available(): 105 | use_gpu = True 106 | 107 | assert os.path.exists(args.train_dir) 108 | train_dataset = datasets.ImageFolder( 109 | args.train_dir, 110 | transforms.Compose([ 111 | transforms.RandomResizedCrop(224), 112 | transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), 113 | transforms.RandomHorizontalFlip(0.5), 114 | ToBGRTensor(), 115 | ]) 116 | ) 117 | train_loader = torch.utils.data.DataLoader( 118 | train_dataset, batch_size=args.batch_size, shuffle=True, 119 | num_workers=1, pin_memory=use_gpu) 120 | train_dataprovider = DataIterator(train_loader) 121 | 122 | assert os.path.exists(args.val_dir) 123 | val_loader = torch.utils.data.DataLoader( 124 | datasets.ImageFolder(args.val_dir, transforms.Compose([ 125 | OpencvResize(256), 126 | transforms.CenterCrop(224), 127 | ToBGRTensor(), 128 | ])), 129 | batch_size=200, shuffle=False, 130 | num_workers=1, pin_memory=use_gpu 131 | ) 132 | val_dataprovider = DataIterator(val_loader) 133 | print('load data successfully') 134 | 135 | architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2] 136 | model = ShuffleNetV2_Plus(architecture=architecture, model_size=args.model_size) 137 | 138 | optimizer = torch.optim.SGD(get_parameters(model), 139 | lr=args.learning_rate, 140 | momentum=args.momentum, 141 | weight_decay=args.weight_decay) 142 | criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1) 143 | 144 | if use_gpu: 145 | model = nn.DataParallel(model) 146 | loss_function = criterion_smooth.cuda() 147 | device = torch.device("cuda") 148 | else: 149 | loss_function = criterion_smooth 150 | device = torch.device("cpu") 151 | 152 | scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, 153 | lambda step : (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1) 154 | 155 | model = model.to(device) 156 | 157 | all_iters = 0 158 | if args.auto_continue: 159 | lastest_model, iters = get_lastest_model() 160 | if lastest_model is not None: 161 | all_iters = iters 162 | checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu') 163 | model.load_state_dict(checkpoint['state_dict'], strict=True) 164 | print('load from checkpoint') 165 | for i in range(iters): 166 | scheduler.step() 167 | 168 | args.optimizer = optimizer 169 | args.loss_function = loss_function 170 | args.scheduler = scheduler 171 | args.train_dataprovider = train_dataprovider 172 | args.val_dataprovider = val_dataprovider 173 | 174 | if args.eval: 175 | if args.eval_resume is not None: 176 | checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu') 177 | load_checkpoint(model, checkpoint) 178 | validate(model, device, args, all_iters=all_iters) 179 | exit(0) 180 | 181 | while all_iters < args.total_iters: 182 | all_iters = train(model, device, args, val_interval=args.val_interval, bn_process=False, all_iters=all_iters) 183 | validate(model, device, args, all_iters=all_iters) 184 | all_iters = train(model, device, args, val_interval=int(1280000/args.batch_size), bn_process=True, all_iters=all_iters) 185 | validate(model, device, args, all_iters=all_iters) 186 | save_checkpoint({'state_dict': model.state_dict(),}, args.total_iters, tag='bnps-') 187 | 188 | 189 | def adjust_bn_momentum(model, iters): 190 | for m in model.modules(): 191 | if isinstance(m, nn.BatchNorm2d): 192 | m.momentum = 1 / iters 193 | 194 | def train(model, device, args, *, val_interval, bn_process=False, all_iters=None): 195 | 196 | optimizer = args.optimizer 197 | loss_function = args.loss_function 198 | scheduler = args.scheduler 199 | train_dataprovider = args.train_dataprovider 200 | 201 | t1 = time.time() 202 | Top1_err, Top5_err = 0.0, 0.0 203 | model.train() 204 | for iters in range(1, val_interval + 1): 205 | scheduler.step() 206 | if bn_process: 207 | adjust_bn_momentum(model, iters) 208 | 209 | all_iters += 1 210 | d_st = time.time() 211 | data, target = train_dataprovider.next() 212 | target = target.type(torch.LongTensor) 213 | data, target = data.to(device), target.to(device) 214 | data_time = time.time() - d_st 215 | 216 | output = model(data) 217 | loss = loss_function(output, target) 218 | optimizer.zero_grad() 219 | loss.backward() 220 | optimizer.step() 221 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 222 | 223 | Top1_err += 1 - prec1.item() / 100 224 | Top5_err += 1 - prec5.item() / 100 225 | 226 | if all_iters % args.display_interval == 0: 227 | printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \ 228 | 'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \ 229 | 'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \ 230 | 'data_time = {:.6f},\ttrain_time = {:.6f}'.format(data_time, (time.time() - t1) / args.display_interval) 231 | logging.info(printInfo) 232 | t1 = time.time() 233 | Top1_err, Top5_err = 0.0, 0.0 234 | 235 | if all_iters % args.save_interval == 0: 236 | save_checkpoint({ 237 | 'state_dict': model.state_dict(), 238 | }, all_iters) 239 | 240 | return all_iters 241 | 242 | def validate(model, device, args, *, all_iters=None): 243 | objs = AvgrageMeter() 244 | top1 = AvgrageMeter() 245 | top5 = AvgrageMeter() 246 | 247 | loss_function = args.loss_function 248 | val_dataprovider = args.val_dataprovider 249 | 250 | model.eval() 251 | max_val_iters = 250 252 | t1 = time.time() 253 | with torch.no_grad(): 254 | for _ in range(1, max_val_iters + 1): 255 | data, target = val_dataprovider.next() 256 | target = target.type(torch.LongTensor) 257 | data, target = data.to(device), target.to(device) 258 | 259 | output = model(data) 260 | loss = loss_function(output, target) 261 | 262 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 263 | n = data.size(0) 264 | objs.update(loss.item(), n) 265 | top1.update(prec1.item(), n) 266 | top5.update(prec5.item(), n) 267 | 268 | logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \ 269 | 'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \ 270 | 'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \ 271 | 'val_time = {:.6f}'.format(time.time() - t1) 272 | logging.info(logInfo) 273 | 274 | def load_checkpoint(net, checkpoint): 275 | from collections import OrderedDict 276 | 277 | temp = OrderedDict() 278 | if 'state_dict' in checkpoint: 279 | checkpoint = dict(checkpoint['state_dict']) 280 | for k in checkpoint: 281 | k2 = 'module.'+k if not k.startswith('module.') else k 282 | temp[k2] = checkpoint[k] 283 | 284 | net.load_state_dict(temp, strict=True) 285 | 286 | if __name__ == "__main__": 287 | main() 288 | 289 | -------------------------------------------------------------------------------- /ShuffleNetV1/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import argparse 5 | import torch.nn as nn 6 | import torchvision.transforms as transforms 7 | import torchvision.datasets as datasets 8 | import cv2 9 | import numpy as np 10 | import PIL 11 | from PIL import Image 12 | import time 13 | import logging 14 | import argparse 15 | from network import ShuffleNetV1 16 | from utils import accuracy, AvgrageMeter, CrossEntropyLabelSmooth, save_checkpoint, get_lastest_model, get_parameters 17 | 18 | class OpencvResize(object): 19 | 20 | def __init__(self, size=256): 21 | self.size = size 22 | 23 | def __call__(self, img): 24 | assert isinstance(img, PIL.Image.Image) 25 | img = np.asarray(img) # (H,W,3) RGB 26 | img = img[:,:,::-1] # 2 BGR 27 | img = np.ascontiguousarray(img) 28 | H, W, _ = img.shape 29 | target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5)) 30 | img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR) 31 | img = img[:,:,::-1] # 2 RGB 32 | img = np.ascontiguousarray(img) 33 | img = Image.fromarray(img) 34 | return img 35 | 36 | class ToBGRTensor(object): 37 | 38 | def __call__(self, img): 39 | assert isinstance(img, (np.ndarray, PIL.Image.Image)) 40 | if isinstance(img, PIL.Image.Image): 41 | img = np.asarray(img) 42 | img = img[:,:,::-1] # 2 BGR 43 | img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W) 44 | img = np.ascontiguousarray(img) 45 | img = torch.from_numpy(img).float() 46 | return img 47 | 48 | class DataIterator(object): 49 | 50 | def __init__(self, dataloader): 51 | self.dataloader = dataloader 52 | self.iterator = enumerate(self.dataloader) 53 | 54 | def next(self): 55 | try: 56 | _, data = next(self.iterator) 57 | except Exception: 58 | self.iterator = enumerate(self.dataloader) 59 | _, data = next(self.iterator) 60 | return data[0], data[1] 61 | 62 | def get_args(): 63 | parser = argparse.ArgumentParser("ShuffleNetV1") 64 | parser.add_argument('--eval', default=False, action='store_true') 65 | parser.add_argument('--eval-resume', type=str, default='./snet_detnas.pkl', help='path for eval model') 66 | parser.add_argument('--batch-size', type=int, default=1024, help='batch size') 67 | parser.add_argument('--total-iters', type=int, default=300000, help='total iters') 68 | parser.add_argument('--learning-rate', type=float, default=0.5, help='init learning rate') 69 | parser.add_argument('--momentum', type=float, default=0.9, help='momentum') 70 | parser.add_argument('--weight-decay', type=float, default=4e-5, help='weight decay') 71 | parser.add_argument('--save', type=str, default='./models', help='path for saving trained models') 72 | parser.add_argument('--label-smooth', type=float, default=0.1, help='label smoothing') 73 | 74 | 75 | parser.add_argument('--auto-continue', type=bool, default=True, help='auto continue') 76 | parser.add_argument('--display-interval', type=int, default=20, help='display interval') 77 | parser.add_argument('--val-interval', type=int, default=10000, help='val interval') 78 | parser.add_argument('--save-interval', type=int, default=10000, help='save interval') 79 | 80 | 81 | parser.add_argument('--group', type=int, default=3, help='group number') 82 | parser.add_argument('--model-size', type=str, default='2.0x', choices=['0.5x', '1.0x', '1.5x', '2.0x'], help='size of the model') 83 | 84 | parser.add_argument('--train-dir', type=str, default='data/train', help='path to training dataset') 85 | parser.add_argument('--val-dir', type=str, default='data/val', help='path to validation dataset') 86 | 87 | args = parser.parse_args() 88 | return args 89 | 90 | def main(): 91 | args = get_args() 92 | 93 | # Log 94 | log_format = '[%(asctime)s] %(message)s' 95 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, 96 | format=log_format, datefmt='%d %I:%M:%S') 97 | t = time.time() 98 | local_time = time.localtime(t) 99 | if not os.path.exists('./log'): 100 | os.mkdir('./log') 101 | fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t))) 102 | fh.setFormatter(logging.Formatter(log_format)) 103 | logging.getLogger().addHandler(fh) 104 | 105 | use_gpu = False 106 | if torch.cuda.is_available(): 107 | use_gpu = True 108 | 109 | assert os.path.exists(args.train_dir) 110 | train_dataset = datasets.ImageFolder( 111 | args.train_dir, 112 | transforms.Compose([ 113 | transforms.RandomResizedCrop(224), 114 | transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), 115 | transforms.RandomHorizontalFlip(0.5), 116 | ToBGRTensor(), 117 | ]) 118 | ) 119 | train_loader = torch.utils.data.DataLoader( 120 | train_dataset, batch_size=args.batch_size, shuffle=True, 121 | num_workers=1, pin_memory=use_gpu) 122 | train_dataprovider = DataIterator(train_loader) 123 | 124 | assert os.path.exists(args.val_dir) 125 | val_loader = torch.utils.data.DataLoader( 126 | datasets.ImageFolder(args.val_dir, transforms.Compose([ 127 | OpencvResize(256), 128 | transforms.CenterCrop(224), 129 | ToBGRTensor(), 130 | ])), 131 | batch_size=200, shuffle=False, 132 | num_workers=1, pin_memory=use_gpu 133 | ) 134 | val_dataprovider = DataIterator(val_loader) 135 | print('load data successfully') 136 | 137 | model = ShuffleNetV1(group=args.group, model_size=args.model_size) 138 | 139 | optimizer = torch.optim.SGD(get_parameters(model), 140 | lr=args.learning_rate, 141 | momentum=args.momentum, 142 | weight_decay=args.weight_decay) 143 | criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1) 144 | 145 | if use_gpu: 146 | model = nn.DataParallel(model) 147 | loss_function = criterion_smooth.cuda() 148 | device = torch.device("cuda") 149 | else: 150 | loss_function = criterion_smooth 151 | device = torch.device("cpu") 152 | 153 | scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, 154 | lambda step : (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1) 155 | 156 | model = model.to(device) 157 | 158 | all_iters = 0 159 | if args.auto_continue: 160 | lastest_model, iters = get_lastest_model() 161 | if lastest_model is not None: 162 | all_iters = iters 163 | checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu') 164 | model.load_state_dict(checkpoint['state_dict'], strict=True) 165 | print('load from checkpoint') 166 | for i in range(iters): 167 | scheduler.step() 168 | 169 | args.optimizer = optimizer 170 | args.loss_function = loss_function 171 | args.scheduler = scheduler 172 | args.train_dataprovider = train_dataprovider 173 | args.val_dataprovider = val_dataprovider 174 | 175 | if args.eval: 176 | if args.eval_resume is not None: 177 | checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu') 178 | load_checkpoint(model, checkpoint) 179 | validate(model, device, args, all_iters=all_iters) 180 | exit(0) 181 | 182 | while all_iters < args.total_iters: 183 | all_iters = train(model, device, args, val_interval=args.val_interval, bn_process=False, all_iters=all_iters) 184 | validate(model, device, args, all_iters=all_iters) 185 | all_iters = train(model, device, args, val_interval=int(1280000/args.batch_size), bn_process=True, all_iters=all_iters) 186 | validate(model, device, args, all_iters=all_iters) 187 | save_checkpoint({'state_dict': model.state_dict(),}, args.total_iters, tag='bnps-') 188 | torch.save(model.state_dict(), 'model.mdl') 189 | 190 | def adjust_bn_momentum(model, iters): 191 | for m in model.modules(): 192 | if isinstance(m, nn.BatchNorm2d): 193 | m.momentum = 1 / iters 194 | 195 | def train(model, device, args, *, val_interval, bn_process=False, all_iters=None): 196 | 197 | optimizer = args.optimizer 198 | loss_function = args.loss_function 199 | scheduler = args.scheduler 200 | train_dataprovider = args.train_dataprovider 201 | 202 | t1 = time.time() 203 | Top1_err, Top5_err = 0.0, 0.0 204 | model.train() 205 | for iters in range(1, val_interval + 1): 206 | scheduler.step() 207 | if bn_process: 208 | adjust_bn_momentum(model, iters) 209 | 210 | all_iters += 1 211 | d_st = time.time() 212 | data, target = train_dataprovider.next() 213 | target = target.type(torch.LongTensor) 214 | data, target = data.to(device), target.to(device) 215 | data_time = time.time() - d_st 216 | 217 | output = model(data) 218 | loss = loss_function(output, target) 219 | optimizer.zero_grad() 220 | loss.backward() 221 | optimizer.step() 222 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 223 | 224 | Top1_err += 1 - prec1.item() / 100 225 | Top5_err += 1 - prec5.item() / 100 226 | 227 | if all_iters % args.display_interval == 0: 228 | printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \ 229 | 'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \ 230 | 'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \ 231 | 'data_time = {:.6f},\ttrain_time = {:.6f}'.format(data_time, (time.time() - t1) / args.display_interval) 232 | logging.info(printInfo) 233 | t1 = time.time() 234 | Top1_err, Top5_err = 0.0, 0.0 235 | 236 | if all_iters % args.save_interval == 0: 237 | save_checkpoint({ 238 | 'state_dict': model.state_dict(), 239 | }, all_iters) 240 | 241 | return all_iters 242 | 243 | def validate(model, device, args, *, all_iters=None): 244 | objs = AvgrageMeter() 245 | top1 = AvgrageMeter() 246 | top5 = AvgrageMeter() 247 | 248 | loss_function = args.loss_function 249 | val_dataprovider = args.val_dataprovider 250 | 251 | model.eval() 252 | max_val_iters = 250 253 | t1 = time.time() 254 | with torch.no_grad(): 255 | for _ in range(1, max_val_iters + 1): 256 | data, target = val_dataprovider.next() 257 | target = target.type(torch.LongTensor) 258 | data, target = data.to(device), target.to(device) 259 | 260 | output = model(data) 261 | loss = loss_function(output, target) 262 | 263 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 264 | n = data.size(0) 265 | objs.update(loss.item(), n) 266 | top1.update(prec1.item(), n) 267 | top5.update(prec5.item(), n) 268 | 269 | logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \ 270 | 'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \ 271 | 'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \ 272 | 'val_time = {:.6f}'.format(time.time() - t1) 273 | logging.info(logInfo) 274 | 275 | def load_checkpoint(net, checkpoint): 276 | from collections import OrderedDict 277 | 278 | temp = OrderedDict() 279 | if 'state_dict' in checkpoint: 280 | checkpoint = dict(checkpoint['state_dict']) 281 | for k in checkpoint: 282 | k2 = 'module.'+k if not k.startswith('module.') else k 283 | temp[k2] = checkpoint[k] 284 | 285 | net.load_state_dict(temp, strict=True) 286 | 287 | if __name__ == "__main__": 288 | main() 289 | 290 | -------------------------------------------------------------------------------- /DetNAS/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import torch.nn as nn 5 | import time 6 | import logging 7 | import argparse 8 | import torchvision.transforms as transforms 9 | import torchvision.datasets as datasets 10 | import cv2 11 | import numpy as np 12 | import PIL 13 | from PIL import Image 14 | from network import ShuffleNetV2DetNAS 15 | from utils import accuracy, AvgrageMeter, CrossEntropyLabelSmooth, save_checkpoint, get_lastest_model, get_parameters 16 | 17 | class OpencvResize(object): 18 | 19 | def __init__(self, size=256): 20 | self.size = size 21 | 22 | def __call__(self, img): 23 | assert isinstance(img, PIL.Image.Image) 24 | img = np.asarray(img) # (H,W,3) RGB 25 | img = img[:,:,::-1] # 2 BGR 26 | img = np.ascontiguousarray(img) 27 | H, W, _ = img.shape 28 | target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5)) 29 | img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR) 30 | img = img[:,:,::-1] # 2 RGB 31 | img = np.ascontiguousarray(img) 32 | img = Image.fromarray(img) 33 | return img 34 | 35 | class ToBGRTensor(object): 36 | 37 | def __call__(self, img): 38 | assert isinstance(img, (np.ndarray, PIL.Image.Image)) 39 | if isinstance(img, PIL.Image.Image): 40 | img = np.asarray(img) 41 | img = img[:,:,::-1] # 2 BGR 42 | img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W) 43 | img = np.ascontiguousarray(img) 44 | img = torch.from_numpy(img).float() 45 | return img 46 | 47 | class DataIterator(object): 48 | 49 | def __init__(self, dataloader): 50 | self.dataloader = dataloader 51 | self.iterator = enumerate(self.dataloader) 52 | 53 | def next(self): 54 | try: 55 | _, data = next(self.iterator) 56 | except Exception: 57 | self.iterator = enumerate(self.dataloader) 58 | _, data = next(self.iterator) 59 | return data[0], data[1] 60 | 61 | 62 | def get_args(): 63 | parser = argparse.ArgumentParser() 64 | parser.add_argument('--eval', default=False, action='store_true') 65 | parser.add_argument('--eval-resume', type=str, default='./snet_detnas.pkl', help='path for eval model') 66 | parser.add_argument('--batch-size', type=int, default=1024, help='batch size') 67 | parser.add_argument('--total-iters', type=int, default=300000, help='total iters') 68 | parser.add_argument('--learning-rate', type=float, default=0.5, help='init learning rate') 69 | parser.add_argument('--momentum', type=float, default=0.9, help='momentum') 70 | parser.add_argument('--weight-decay', type=float, default=4e-5, help='weight decay') 71 | parser.add_argument('--save', type=str, default='./models', help='path for saving trained models') 72 | parser.add_argument('--label-smooth', type=float, default=0.1, help='label smoothing') 73 | 74 | parser.add_argument('--auto-continue', default=False, action='store_true', help='report frequency') 75 | parser.add_argument('--display-interval', type=int, default=20, help='report frequency') 76 | parser.add_argument('--val-interval', type=int, default=10000, help='report frequency') 77 | parser.add_argument('--save-interval', type=int, default=10000, help='report frequency') 78 | 79 | parser.add_argument('--model-size', type=str, default='VOC_FPN_300M', 80 | choices=['COCO_FPN_300M', 81 | 'COCO_FPN_1.3G', 82 | 'COCO_FPN_3.8G', 83 | 'COCO_RetinaNet_300M', 84 | 'VOC_FPN_300M', 85 | 'VOC_RetinaNet_300M'], 86 | help='size of the model') 87 | 88 | parser.add_argument('--train-dir', type=str, default='data/train', help='path to training dataset') 89 | parser.add_argument('--val-dir', type=str, default='data/val', help='path to validation dataset') 90 | 91 | args = parser.parse_args() 92 | return args 93 | 94 | 95 | def main(): 96 | args = get_args() 97 | 98 | # Log 99 | log_format = '[%(asctime)s] %(message)s' 100 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%d %I:%M:%S') 101 | t = time.time() 102 | local_time = time.localtime(t) 103 | if not os.path.exists('./log'): 104 | os.mkdir('./log') 105 | fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t))) 106 | fh.setFormatter(logging.Formatter(log_format)) 107 | logging.getLogger().addHandler(fh) 108 | 109 | use_gpu = False 110 | if torch.cuda.is_available(): 111 | use_gpu = True 112 | 113 | assert os.path.exists(args.train_dir) 114 | train_dataset = datasets.ImageFolder( 115 | args.train_dir, 116 | transforms.Compose([ 117 | transforms.RandomResizedCrop(224), 118 | transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), 119 | transforms.RandomHorizontalFlip(0.5), 120 | ToBGRTensor(), 121 | ]) 122 | ) 123 | train_loader = torch.utils.data.DataLoader( 124 | train_dataset, batch_size=args.batch_size, shuffle=True, 125 | num_workers=1, pin_memory=use_gpu) 126 | train_dataprovider = DataIterator(train_loader) 127 | 128 | assert os.path.exists(args.val_dir) 129 | val_loader = torch.utils.data.DataLoader( 130 | datasets.ImageFolder(args.val_dir, transforms.Compose([ 131 | OpencvResize(256), 132 | transforms.CenterCrop(224), 133 | ToBGRTensor(), 134 | ])), 135 | batch_size=200, shuffle=False, 136 | num_workers=1, pin_memory=use_gpu 137 | ) 138 | val_dataprovider = DataIterator(val_loader) 139 | print('load data successfully') 140 | 141 | model = ShuffleNetV2DetNAS(model_size=args.model_size) 142 | if args.eval: 143 | if args.eval_resume is not None: 144 | checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu') 145 | print('==> Resuming from checkpoint..') 146 | load_checkpoint(model, checkpoint) 147 | 148 | optimizer = torch.optim.SGD(get_parameters(model), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) 149 | criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1) 150 | 151 | if use_gpu: 152 | model = nn.DataParallel(model) 153 | loss_function = criterion_smooth.cuda() 154 | device = torch.device("cuda") 155 | else: 156 | loss_function = criterion_smooth 157 | device = torch.device("cpu") 158 | 159 | scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, 160 | lambda step : (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1) 161 | 162 | model = model.to(device) 163 | 164 | all_iters = 0 165 | if args.auto_continue: 166 | lastest_model, iters = get_lastest_model() 167 | if lastest_model is not None: 168 | all_iters = iters 169 | checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu') 170 | model.load_state_dict(checkpoint['state_dict'], strict=True) 171 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 172 | scheduler.load_state_dict(checkpoint['lr_scheduler_state_dict']) 173 | print('load from checkpoint') 174 | 175 | args.optimizer = optimizer 176 | args.loss_function = loss_function 177 | args.scheduler = scheduler 178 | args.train_dataprovider = train_dataprovider 179 | args.val_dataprovider = val_dataprovider 180 | 181 | if args.eval: 182 | if args.eval_resume is not None: 183 | validate(model, device, args, all_iters=all_iters) 184 | else: 185 | while all_iters < args.total_iters: 186 | all_iters = train(model, device, args, val_interval=args.val_interval, all_iters=all_iters) 187 | validate(model, device, args, all_iters=all_iters) 188 | save_checkpoint({'state_dict': model.state_dict(), 189 | 'optimizer_state_dict': args.optimizer.state_dict(), 190 | 'lr_scheduler_state_dict': args.scheduler.state_dict()}, 191 | args.total_iters, tag='bnps-') 192 | 193 | 194 | def train(model, device, args, *, val_interval, all_iters=None): 195 | 196 | optimizer = args.optimizer 197 | loss_function = args.loss_function 198 | scheduler = args.scheduler 199 | train_dataprovider = args.train_dataprovider 200 | 201 | t1 = time.time() 202 | Top1_err, Top5_err = 0.0, 0.0 203 | model.train() 204 | for iters in range(1, val_interval + 1): 205 | scheduler.step() 206 | all_iters += 1 207 | d_st = time.time() 208 | data, target = train_dataprovider.next() 209 | target = target.type(torch.LongTensor) 210 | data, target = data.to(device), target.to(device) 211 | data_time = time.time() - d_st 212 | 213 | output = model(data) 214 | loss = loss_function(output, target) 215 | optimizer.zero_grad() 216 | loss.backward() 217 | optimizer.step() 218 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 219 | 220 | Top1_err += 1 - prec1.item() / 100 221 | Top5_err += 1 - prec5.item() / 100 222 | 223 | if all_iters % args.display_interval == 0: 224 | printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \ 225 | 'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \ 226 | 'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \ 227 | 'data_time = {:.6f},\ttrain_time = {:.6f}'.format(data_time, (time.time() - t1) / args.display_interval) 228 | logging.info(printInfo) 229 | t1 = time.time() 230 | Top1_err, Top5_err = 0.0, 0.0 231 | 232 | if all_iters % args.save_interval == 0: 233 | save_checkpoint({'state_dict': model.state_dict(), 234 | 'optimizer_state_dict': args.optimizer.state_dict(), 235 | 'lr_scheduler_state_dict': args.scheduler.state_dict()}, 236 | all_iters) 237 | 238 | return all_iters 239 | 240 | 241 | def validate(model, device, args, *, all_iters=None): 242 | objs = AvgrageMeter() 243 | top1 = AvgrageMeter() 244 | top5 = AvgrageMeter() 245 | 246 | loss_function = args.loss_function 247 | val_dataprovider = args.val_dataprovider 248 | 249 | model.eval() 250 | max_val_iters = 250 251 | t1 = time.time() 252 | with torch.no_grad(): 253 | for _ in range(1, max_val_iters + 1): 254 | data, target = val_dataprovider.next() 255 | target = target.type(torch.LongTensor) 256 | data, target = data.to(device), target.to(device) 257 | 258 | output = model(data) 259 | loss = loss_function(output, target) 260 | 261 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 262 | n = data.size(0) 263 | objs.update(loss.item(), n) 264 | top1.update(prec1.item(), n) 265 | top5.update(prec5.item(), n) 266 | 267 | logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \ 268 | 'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \ 269 | 'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \ 270 | 'val_time = {:.6f}'.format(time.time() - t1) 271 | logging.info(logInfo) 272 | 273 | 274 | def load_checkpoint(net, checkpoint): 275 | if 'state_dict' in checkpoint: 276 | checkpoint = dict(checkpoint['state_dict']) 277 | for k in checkpoint: 278 | if 'module' in k: 279 | checkpoint[k[7:]] = checkpoint.pop(k) 280 | for name, param in net.named_parameters(): 281 | if name not in checkpoint: 282 | if 'predict' not in name: 283 | print(name) 284 | else: 285 | param.data = checkpoint[name].data 286 | for name, buffer in net.named_buffers(): 287 | if name not in checkpoint: 288 | if 'predict' not in name: 289 | print(name) 290 | else: 291 | buffer.data = checkpoint[name].data 292 | 293 | 294 | if __name__ == "__main__": 295 | main() 296 | 297 | -------------------------------------------------------------------------------- /ShuffleNetV2.Large/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import torch.nn as nn 5 | import time 6 | import logging 7 | import argparse 8 | import torchvision.transforms as transforms 9 | import torchvision.datasets as datasets 10 | import cv2 11 | import numpy as np 12 | import PIL 13 | from PIL import Image 14 | from network import ShuffleNetV2 15 | from utils import accuracy, AvgrageMeter, CrossEntropyLabelSmooth, save_checkpoint, get_lastest_model, get_parameters 16 | 17 | class OpencvResize(object): 18 | 19 | def __init__(self, size=256): 20 | self.size = size 21 | 22 | def __call__(self, img): 23 | assert isinstance(img, PIL.Image.Image) 24 | img = np.asarray(img) # (H,W,3) RGB 25 | img = img[:,:,::-1] # 2 BGR 26 | img = np.ascontiguousarray(img) 27 | H, W, _ = img.shape 28 | target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5)) 29 | img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR) 30 | img = img[:,:,::-1] # 2 RGB 31 | img = np.ascontiguousarray(img) 32 | img = Image.fromarray(img) 33 | return img 34 | 35 | class ToBGRTensor(object): 36 | 37 | def __call__(self, img): 38 | assert isinstance(img, (np.ndarray, PIL.Image.Image)) 39 | if isinstance(img, PIL.Image.Image): 40 | img = np.asarray(img) 41 | img = img[:,:,::-1] # 2 BGR 42 | img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W) 43 | img = np.ascontiguousarray(img) 44 | img = torch.from_numpy(img).float() 45 | return img 46 | 47 | class DataIterator(object): 48 | 49 | def __init__(self, dataloader): 50 | self.dataloader = dataloader 51 | self.iterator = enumerate(self.dataloader) 52 | 53 | def next(self): 54 | try: 55 | _, data = next(self.iterator) 56 | except Exception: 57 | self.iterator = enumerate(self.dataloader) 58 | _, data = next(self.iterator) 59 | return data[0], data[1] 60 | 61 | class Lighting(object): 62 | 63 | def __init__(self, alphastd, eigval=None, eigvec=None): 64 | self.alphastd = alphastd 65 | if eigval is None: 66 | eigval = torch.Tensor([0.2175, 0.0188, 0.0045]) 67 | if eigvec is None: 68 | eigvec = torch.Tensor([ 69 | [-0.5675, 0.7192, 0.4009], 70 | [-0.5808, -0.0045, -0.8140], 71 | [-0.5836, -0.6948, 0.4203], 72 | ]) 73 | self.eigval = eigval 74 | self.eigvec = eigvec 75 | 76 | def __call__(self, img): 77 | """ 78 | :param img : (N,3,H,W) RGB 79 | """ 80 | if self.alphastd == 0: 81 | return img 82 | 83 | device = img.device 84 | alpha = torch.normal(torch.zeros_like(self.eigval), self.alphastd) 85 | alpha = alpha.to(device) 86 | eigval = self.eigval.to(device) 87 | eigvec = self.eigvec.to(device) 88 | rgb = torch.mm(eigvec, eigval.mul(alpha).reshape(3,1)).squeeze() # (3) 89 | img = img.add(rgb.view(1, 3, 1, 1)) 90 | return img 91 | 92 | class ColorNormalize(object): 93 | 94 | def __init__(self, mean=None, std=None): 95 | if mean is None: 96 | mean = torch.Tensor([0.485, 0.456, 0.406]) 97 | if std is None: 98 | std = torch.Tensor([0.229, 0.224, 0.225]) 99 | self.mean = mean 100 | self.std = std 101 | 102 | def __call__(self, img): 103 | """ 104 | :param img : (N,3,H,W) RGB 105 | """ 106 | device = img.device 107 | mean = self.mean.to(device) 108 | std = self.std.to(device) 109 | img.sub_(mean.reshape(1, -1, 1, 1)).div_(std.reshape(1, -1, 1, 1)) 110 | return img 111 | 112 | def get_mean(): 113 | from xml.dom.minidom import parse 114 | import numpy as np 115 | 116 | f = './ImageNet_1000_scale224_mean.xml' 117 | tree = parse(f) 118 | content = tree.documentElement 119 | data = content.getElementsByTagName('MeanImg')[0] 120 | data = data.getElementsByTagName('data')[0] 121 | mean = data.childNodes[0].data 122 | mean = mean.split(' ') 123 | res = [] 124 | for m in mean: 125 | if m == '\n' or m == '': 126 | continue 127 | m = float(m[:-1]) if m.endswith('\n') else float(m) 128 | assert m <= 255 129 | res.append(m) 130 | mean = np.array(res).reshape((224,224,3)) # BGR 131 | mean = np.transpose(mean, [2, 0, 1]) 132 | mean = mean[np.newaxis, ...] 133 | return mean 134 | 135 | def get_args(): 136 | parser = argparse.ArgumentParser() 137 | parser.add_argument('--eval', default=False, action='store_true') 138 | parser.add_argument('--eval-resume', type=str, default='./snetv2_residual_se.pkl', help='path for eval model') 139 | parser.add_argument('--batch-size', type=int, default=256, help='batch size') 140 | parser.add_argument('--total-iters', type=int, default=600000, help='total iters') 141 | parser.add_argument('--learning-rate', type=float, default=0.25, help='init learning rate') 142 | parser.add_argument('--momentum', type=float, default=0.9, help='momentum') 143 | parser.add_argument('--weight-decay', type=float, default=4e-5, help='weight decay') 144 | parser.add_argument('--save', type=str, default='./models', help='path for saving trained models') 145 | parser.add_argument('--label-smooth', type=float, default=0.1, help='label smoothing') 146 | 147 | parser.add_argument('--auto-continue', default=False, action='store_true', help='report frequency') 148 | parser.add_argument('--display-interval', type=int, default=20, help='report frequency') 149 | parser.add_argument('--val-interval', type=int, default=10000, help='report frequency') 150 | parser.add_argument('--save-interval', type=int, default=10000, help='report frequency') 151 | 152 | parser.add_argument('--train-dir', type=str, default='data/train', help='path to training dataset') 153 | parser.add_argument('--val-dir', type=str, default='data/val', help='path to validation dataset') 154 | 155 | args = parser.parse_args() 156 | return args 157 | 158 | 159 | def main(): 160 | args = get_args() 161 | 162 | # Log 163 | log_format = '[%(asctime)s] %(message)s' 164 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%d %I:%M:%S') 165 | t = time.time() 166 | local_time = time.localtime(t) 167 | if not os.path.exists('./log'): 168 | os.mkdir('./log') 169 | fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t))) 170 | fh.setFormatter(logging.Formatter(log_format)) 171 | logging.getLogger().addHandler(fh) 172 | 173 | use_gpu = False 174 | if torch.cuda.is_available(): 175 | use_gpu = True 176 | 177 | assert os.path.exists(args.train_dir) 178 | train_dataset = datasets.ImageFolder( 179 | args.train_dir, 180 | transforms.Compose([ 181 | transforms.RandomResizedCrop(224), 182 | transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), 183 | transforms.RandomHorizontalFlip(0.5), 184 | transforms.ToTensor(), 185 | ]) 186 | ) 187 | train_loader = torch.utils.data.DataLoader( 188 | train_dataset, batch_size=args.batch_size, shuffle=True, 189 | num_workers=1, pin_memory=use_gpu) 190 | train_dataprovider = DataIterator(train_loader) 191 | 192 | assert os.path.exists(args.val_dir) 193 | val_loader = torch.utils.data.DataLoader( 194 | datasets.ImageFolder(args.val_dir, transforms.Compose([ 195 | OpencvResize(256), 196 | transforms.CenterCrop(224), 197 | ToBGRTensor(), 198 | ])), 199 | batch_size=200, shuffle=False, 200 | num_workers=1, pin_memory=use_gpu 201 | ) 202 | val_dataprovider = DataIterator(val_loader) 203 | print('load data successfully') 204 | 205 | model = ShuffleNetV2() 206 | if args.eval: 207 | if args.eval_resume is not None: 208 | checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu') 209 | print('==> Resuming from checkpoint..') 210 | load_checkpoint(model, checkpoint) 211 | 212 | optimizer = torch.optim.SGD(get_parameters(model), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) 213 | criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1) 214 | 215 | if use_gpu: 216 | model = nn.DataParallel(model) 217 | loss_function = criterion_smooth.cuda() 218 | device = torch.device("cuda") 219 | else: 220 | loss_function = criterion_smooth 221 | device = torch.device("cpu") 222 | 223 | scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, 224 | lambda step : (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1) 225 | 226 | model = model.to(device) 227 | 228 | all_iters = 0 229 | if args.auto_continue: 230 | lastest_model, iters = get_lastest_model() 231 | if lastest_model is not None: 232 | all_iters = iters 233 | checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu') 234 | model.load_state_dict(checkpoint['state_dict'], strict=True) 235 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 236 | scheduler.load_state_dict(checkpoint['lr_scheduler_state_dict']) 237 | print('load from checkpoint') 238 | 239 | args.optimizer = optimizer 240 | args.loss_function = loss_function 241 | args.scheduler = scheduler 242 | args.train_dataprovider = train_dataprovider 243 | args.val_dataprovider = val_dataprovider 244 | 245 | if args.eval: 246 | if args.eval_resume is not None: 247 | validate(model, device, args, all_iters=all_iters) 248 | else: 249 | while all_iters < args.total_iters: 250 | all_iters = train(model, device, args, val_interval=args.val_interval, all_iters=all_iters) 251 | validate(model, device, args, all_iters=all_iters) 252 | save_checkpoint({'state_dict': model.state_dict(), 253 | 'optimizer_state_dict': args.optimizer.state_dict(), 254 | 'lr_scheduler_state_dict': args.scheduler.state_dict()}, 255 | args.total_iters, tag='bnps-') 256 | 257 | 258 | def train(model, device, args, *, val_interval, all_iters=None): 259 | 260 | optimizer = args.optimizer 261 | loss_function = args.loss_function 262 | scheduler = args.scheduler 263 | train_dataprovider = args.train_dataprovider 264 | 265 | t1 = time.time() 266 | Top1_err, Top5_err = 0.0, 0.0 267 | model.train() 268 | for iters in range(1, val_interval + 1): 269 | scheduler.step() 270 | all_iters += 1 271 | d_st = time.time() 272 | data, target = train_dataprovider.next() 273 | target = target.type(torch.LongTensor) 274 | data, target = data.to(device), target.to(device) # (N,3,H,W) RGB 0~1 275 | data = ColorNormalize()(Lighting(alphastd=0.1)(data)) 276 | data = data.cpu().numpy()[:,::-1,:,:] # 2 BGR 277 | data = np.ascontiguousarray(data) 278 | data = torch.from_numpy(data).to(device) 279 | data_time = time.time() - d_st 280 | 281 | output_7, output_14, output_28, output_56 = model(data) 282 | loss = 1.0 * loss_function(output_7, target) + 0.7 * loss_function(output_14, target) + \ 283 | 0.5 * loss_function(output_28, target) + 0.3 * loss_function(output_56, target) 284 | optimizer.zero_grad() 285 | loss.backward() 286 | optimizer.step() 287 | prec1, prec5 = accuracy(output_7, target, topk=(1, 5)) 288 | 289 | Top1_err += 1 - prec1.item() / 100 290 | Top5_err += 1 - prec5.item() / 100 291 | 292 | if all_iters % args.display_interval == 0: 293 | printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \ 294 | 'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \ 295 | 'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \ 296 | 'data_time = {:.6f},\ttrain_time = {:.6f}'.format(data_time, (time.time() - t1) / args.display_interval) 297 | logging.info(printInfo) 298 | t1 = time.time() 299 | Top1_err, Top5_err = 0.0, 0.0 300 | 301 | if all_iters % args.save_interval == 0: 302 | save_checkpoint({'state_dict': model.state_dict(), 303 | 'optimizer_state_dict': args.optimizer.state_dict(), 304 | 'lr_scheduler_state_dict': args.scheduler.state_dict()}, 305 | all_iters) 306 | 307 | return all_iters 308 | 309 | 310 | def validate(model, device, args, *, all_iters=None): 311 | objs = AvgrageMeter() 312 | top1 = AvgrageMeter() 313 | top5 = AvgrageMeter() 314 | 315 | loss_function = args.loss_function 316 | val_dataprovider = args.val_dataprovider 317 | 318 | mean = get_mean() 319 | mean = torch.from_numpy(mean).to(device).float() # (1, 3, 224, 224) BGR 320 | 321 | model.eval() 322 | max_val_iters = 250 323 | t1 = time.time() 324 | with torch.no_grad(): 325 | for _ in range(1, max_val_iters + 1): 326 | data, target = val_dataprovider.next() 327 | target = target.type(torch.LongTensor) 328 | data, target = data.to(device), target.to(device) # data : BGR [0,255] 329 | data -= mean 330 | 331 | output = model(data) 332 | loss = loss_function(output, target) 333 | 334 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 335 | n = data.size(0) 336 | objs.update(loss.item(), n) 337 | top1.update(prec1.item(), n) 338 | top5.update(prec5.item(), n) 339 | 340 | logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \ 341 | 'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \ 342 | 'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \ 343 | 'val_time = {:.6f}'.format(time.time() - t1) 344 | logging.info(logInfo) 345 | 346 | 347 | 348 | def load_checkpoint(net, checkpoint): 349 | if 'state_dict' in checkpoint: 350 | checkpoint = dict(checkpoint['state_dict']) 351 | for k in checkpoint: 352 | if 'module' in k: 353 | checkpoint[k[7:]] = checkpoint.pop(k) 354 | for name, param in net.named_parameters(): 355 | if name not in checkpoint: 356 | if 'predict' not in name: 357 | print(name) 358 | else: 359 | param.data = checkpoint[name].data 360 | for name, buffer in net.named_buffers(): 361 | if name not in checkpoint: 362 | if 'predict' not in name: 363 | print(name) 364 | else: 365 | buffer.data = checkpoint[name].data 366 | 367 | 368 | if __name__ == "__main__": 369 | main() 370 | 371 | --------------------------------------------------------------------------------