├── LICENSE
├── ShuffleNetV2.ExLarge
    ├── README.md
    ├── utils.py
    ├── network.py
    └── eval.py
├── ShuffleNetV2.Large
    ├── README.md
    ├── utils.py
    ├── network.py
    └── train.py
├── OneShot
    ├── README.md
    ├── utils.py
    ├── blocks.py
    ├── network.py
    └── train.py
├── ShuffleNetV2+
    ├── README.md
    ├── utils.py
    ├── network.py
    ├── blocks.py
    └── train.py
├── ShuffleNetV2
    ├── README.md
    ├── blocks.py
    ├── utils.py
    ├── network.py
    └── train.py
├── DetNAS
    ├── README.md
    ├── utils.py
    ├── network.py
    ├── blocks.py
    └── train.py
├── ShuffleNetV1
    ├── blocks.py
    ├── utils.py
    ├── README.md
    ├── network.py
    └── train.py
└── README.md


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Megvii Technology
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/ShuffleNetV2.ExLarge/README.md:
--------------------------------------------------------------------------------
 1 | # ShuffleNetV2.ExLarge
 2 | 
 3 | This repository contains ShuffleNetV2.ExLarge implementation by Pytorch, which is a extra large version of ShuffleNetV2.
 4 | 
 5 | ## Requirements
 6 | Download the ImageNet dataset and move validation images to labeled subfolders. To do this, you can use the following script:
 7 | https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh
 8 | 
 9 | ## Usage
10 | We used external training dataset to achieve the result, so you do not need to re-train it.
11 | 
12 | This is the evaluation script:
13 | ```shell
14 | python eval.py --eval --eval-resume YOUR_WEIGHT_PATH --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH
15 | ```
16 | 
17 | 
18 | ## Trained Models
19 | - OneDrive download: [Link](https://1drv.ms/f/s!AgaP37NGYuEXhRfQxHRseR7eSxXo)
20 | - BaiduYun download: [Link](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw) (extract code: mc24)
21 | 
22 | 
23 | ## Results
24 | 
25 | | Model                  | FLOPs | #Params   | Top-1     | Top-5 |
26 | | :--------------------- | :---: | :------:  | :---:     | :---: |
27 | | ShuffleNetV2.ExLarge     | 46.2G | 254.7M    | 15.52 | 2.9  |
28 | 


--------------------------------------------------------------------------------
/ShuffleNetV2.Large/README.md:
--------------------------------------------------------------------------------
 1 | # ShuffleNetV2.Large
 2 | 
 3 | This repository contains ShuffleNetV2.Large implementation by Pytorch, which is a deeper version of ShuffleNetV2.
 4 | 
 5 | ## Requirements
 6 | Download the ImageNet dataset and move validation images to labeled subfolders. To do this, you can use the following script:
 7 | https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh
 8 | 
 9 | ## Usage
10 | Train:
11 | ```shell
12 | python train.py --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH
13 | ```
14 | Eval:
15 | ```shell
16 | python train.py --eval --eval-resume YOUR_WEIGHT_PATH --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH
17 | ```
18 | 
19 | 
20 | ## Trained Models
21 | - OneDrive download: [Link](https://1drv.ms/f/s!AgaP37NGYuEXhRfQxHRseR7eSxXo)
22 | - BaiduYun download: [Link](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw) (extract code: mc24)
23 | 
24 | 
25 | ## Results
26 | 
27 | | Model                  | FLOPs | #Params   | Top-1     | Top-5 |
28 | | :--------------------- | :---: | :------:  | :---:     | :---: |
29 | | ShuffleNetV2.Large     | 12.7G | 140.7M    | **18.56** | 4.48  |
30 | | SEnet                  | 20.7G |    -      | 18.68     | 4.47  |
31 | 


--------------------------------------------------------------------------------
/OneShot/README.md:
--------------------------------------------------------------------------------
 1 | # [One-Shot NAS](https://arxiv.org/abs/1904.00420)
 2 | This repository contains single path one-shot NAS searched networks implementation by Pytorch.
 3 | 
 4 | 
 5 | ## Requirements
 6 | Download the ImageNet dataset and move validation images to labeled subfolders. To do this, you can use the following script:
 7 | https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh
 8 | 
 9 | ## Usage
10 | Train:
11 | ```shell
12 | python train.py --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH
13 | ```
14 | Eval:
15 | ```shell
16 | python train.py --eval --eval-resume YOUR_WEIGHT_PATH --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH
17 | ```
18 |   
19 | 
20 | 
21 | ## Trained Models
22 | - OneDrive download: [Link](https://1drv.ms/f/s!AgaP37NGYuEXhRfQxHRseR7eSxXo)
23 | - BaiduYun download: [Link](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw) (extract code: mc24)
24 | 
25 | 
26 | 
27 | ## Results
28 | 
29 | 
30 | | Model                  | FLOPs | #Params   | Top-1 | Top-5 |
31 | | :--------------------- | :---: | :------:  | :---: | :---: |
32 | |    OneShot |  328M |  3.4M |  **25.1**   |   8.0   |
33 | |    NASNET-A|  564M |  5.3M |  26.0   |   8.4   |
34 | |    PNASNET|  588M |  5.1M |  25.8   |   8.1   |
35 | |    MnasNet|  317M |  4.2M |  26.0   |  8.2   |
36 | |    DARTS|  574M|  4.7M |  26.7   |   8.7  |
37 | |    FBNet-B|  295M|  4.5M |  25.9   |   -   |
38 |     
39 | ## Citation
40 | If you use these models in your research, please cite:
41 | 
42 | 
43 |     @article{guo2019single,
44 |             title={Single path one-shot neural architecture search with uniform sampling},
45 |             author={Guo, Zichao and Zhang, Xiangyu and Mu, Haoyuan and Heng, Wen and Liu, Zechun and Wei, Yichen and Sun, Jian},
46 |             journal={arXiv preprint arXiv:1904.00420},
47 |             year={2019}
48 |     }
49 | 


--------------------------------------------------------------------------------
/ShuffleNetV2+/README.md:
--------------------------------------------------------------------------------
 1 | # ShuffleNetV2+
 2 | 
 3 | This repository contains ShuffleNetV2+ implementation by Pytorch, which is a strengthen version of ShuffleNetV2 by adding Hard-Swish, Hard-Sigmoid and [SE](https://arxiv.org/abs/1709.01507) modules.
 4 | 
 5 | 
 6 | 
 7 | ## Requirements
 8 | Download the ImageNet dataset and move validation images to labeled subfolders. To do this, you can use the following script:
 9 | https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh
10 | 
11 | ## Usage
12 | Train:
13 | ```shell
14 | python train.py --model-size Large --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH
15 | ```
16 | Eval:
17 | ```shell
18 | python train.py --eval --eval-resume YOUR_WEIGHT_PATH --model-size Large --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH
19 | ```
20 | 
21 | 
22 | ## Trained Models
23 | - OneDrive download: [Link](https://1drv.ms/f/s!AgaP37NGYuEXhRfQxHRseR7eSxXo)
24 | - BaiduYun download: [Link](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw) (extract code: mc24)
25 | 
26 | 
27 | ## Results
28 | 
29 | The following is a comparison with MobileNetV3 in [Searching for MobileNetV3](https://arxiv.org/pdf/1905.02244).
30 | 
31 | |    Model                 |  FLOPs    |   #Params |   Top-1   |   Top-5   |
32 | |:------------------------|:---------:|:---------:|:---------:|:---------:|
33 | ShuffleNetV2+ Large        |   360M     |	6.7M    |      **22.9**    |       6.7   |
34 | MobileNetV3 Large 224/1.25       |   356M     |	7.5M    |      23.4    |       -   |
35 | ShuffleNetV2+ Medium       |   222M     |	5.6M    |      **24.3**    |       7.4    |
36 | MobileNetV3 Large 224/1.0       |   217M     |	5.4M    |      24.8    |       -    |
37 | ShuffleNetV2+ Small        |   156M     |	5.1M    |      **25.9**    |       8.3    |
38 | MobileNetV3 Large 224/0.75        |   155M     |	4.0M    |      26.7    |       -    |
39 | 
40 | 


--------------------------------------------------------------------------------
/ShuffleNetV2/README.md:
--------------------------------------------------------------------------------
 1 | # [ShuffleNetV2](https://arxiv.org/pdf/1807.11164.pdf)
 2 | This repository contains ShuffleNetV2 implementation by Pytorch.
 3 | 
 4 | 
 5 | ## Requirements
 6 | Download the ImageNet dataset and move validation images to labeled subfolders. To do this, you can use the following script:
 7 | https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh
 8 | 
 9 | ## Usage
10 | Train:
11 | ```shell
12 | python train.py --model-size 1.5x --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH
13 | ```
14 | Eval:
15 | ```shell
16 | python train.py --eval --eval-resume YOUR_WEIGHT_PATH --model-size 1.5x --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH
17 | ```
18 | 
19 | 
20 | ## Trained Models
21 | - OneDrive download: [Link](https://1drv.ms/f/s!AgaP37NGYuEXhRfQxHRseR7eSxXo)
22 | - BaiduYun download: [Link](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw) (extract code: mc24)
23 | 
24 | 
25 | ## Results
26 | 
27 | | Model                   | FLOPs | #Params  | Top-1        | Top-5     |
28 | | :--------------------- | :---: | :------: | :----------: | :------:  |
29 | |    ShuffleNetV2 2.0x    | 591M  |     7.4M |     **25.0** 	|     7.6   |
30 | | MobileNetV2 (1.4) | 585M | 6.9M | 25.3 | - |
31 | |    ShuffleNetV2 1.5x    | 299M  |     3.5M |     **27.4** 	|     9.4   | 
32 | | MobileNetV2 | 300M | 3.4M | 28.0 | - | 
33 | |    ShuffleNetV2 1.0x    | 146M  |     2.3M |     30.6 	|    11.1   |   
34 | |    ShuffleNetV2 0.5x    |  41M  |     1.4M |     38.9 	|    17.4   |
35 | 
36 | 
37 | 
38 | 
39 | ## Citation
40 | If you use these models in your research, please cite:
41 | 
42 | 
43 |     @inproceedings{ma2018shufflenet, 
44 |                 title={Shufflenet v2: Practical guidelines for efficient cnn architecture design},  
45 |                 author={Ma, Ningning and Zhang, Xiangyu and Zheng, Hai-Tao and Sun, Jian},  
46 |                 booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},  
47 |                 pages={116--131}, 
48 |                 year={2018} 
49 |     }
50 | 


--------------------------------------------------------------------------------
/DetNAS/README.md:
--------------------------------------------------------------------------------
 1 | # DetNAS
 2 | 
 3 | This repository contains DetNAS backbone networks implementation by Pytorch.
 4 | 
 5 | ## Requirements
 6 | Download the ImageNet dataset and move validation images to labeled subfolders. To do this, you can use the following script:
 7 | https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh
 8 | 
 9 | ## Usage
10 | Train:
11 | ```shell
12 | python train.py --model-size VOC_FPN_300M --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH
13 | ```
14 | Eval:
15 | ```shell
16 | python train.py --eval --eval-resume YOUR_WEIGHT_PATH --model-size VOC_FPN_300M --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH
17 | ```
18 | 
19 | ## Trained Models
20 | - OneDrive download: [Link](https://1drv.ms/f/s!AgaP37NGYuEXhRfQxHRseR7eSxXo)
21 | - BaiduYun download: [Link](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw) (extract code: mc24)
22 | 
23 | 
24 | 
25 | ## Results
26 | 
27 | | Model                  | FLOPs| #Params| Top-1    | Top-5 |         mAP*       |
28 | | :------------          | :---:| :-----:| :---:    | :---: | :--------------:   |
29 | |300M (VOC, RetinaNet)	 | 300M	|  3.5M  |  25.4	|  8.1  |       80.1         |
30 | |300M (VOC, FPN)	     | 300M	|  3.7M	 |  25.9    |  8.3  |       81.5         |
31 | |300M (COCO, RetinaNet)  | 300M	|  3.7M  |  26.0    |  8.4  |       33.3         |
32 | |300M (COCO, FPN) 	     | 300M	|  3.5M  |  26.2    |  8.4  |       36.4         |
33 | |1.3G (COCO, FPN)	     | 1.3G	|  10.4M | **22.8** |  6.5  |       40.0         |
34 | |3.8G (COCO, FPN)        | 3.8G	|  29.5M | **21.6** |  6.3  |     **42.0**       |
35 | |ResNet50 (COCO, FPN)    | 3.8G	|  -     |  23.9    |  7.1  |       37.3         |
36 | |ResNet101 (COCO, FPN)   | 7.6G	|  -     |  22.6    |  6.4  |       40.0         |
37 | 
38 | * More about DetNAS in [Link](https://github.com/megvii-model/DetNAS).
39 | 
40 | ## Citation
41 | If you use these models in your research, please cite:
42 | 
43 | 
44 |     @misc{chen2019detnas,
45 |         title={DetNAS: Backbone Search for Object Detection},
46 |         author={Yukang Chen, Tong Yang, Xiangyu Zhang, Gaofeng Meng, Xinyu Xiao, Jian Sun},
47 |         year={2019},
48 |         booktitle = {NeurIPS},
49 |     }
50 | 


--------------------------------------------------------------------------------
/ShuffleNetV2/blocks.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class ShuffleV2Block(nn.Module):
 5 |     def __init__(self, inp, oup, mid_channels, *, ksize, stride):
 6 |         super(ShuffleV2Block, self).__init__()
 7 |         self.stride = stride
 8 |         assert stride in [1, 2]
 9 | 
10 |         self.mid_channels = mid_channels
11 |         self.ksize = ksize
12 |         pad = ksize // 2
13 |         self.pad = pad
14 |         self.inp = inp
15 | 
16 |         outputs = oup - inp
17 | 
18 |         branch_main = [
19 |             # pw
20 |             nn.Conv2d(inp, mid_channels, 1, 1, 0, bias=False),
21 |             nn.BatchNorm2d(mid_channels),
22 |             nn.ReLU(inplace=True),
23 |             # dw
24 |             nn.Conv2d(mid_channels, mid_channels, ksize, stride, pad, groups=mid_channels, bias=False),
25 |             nn.BatchNorm2d(mid_channels),
26 |             # pw-linear
27 |             nn.Conv2d(mid_channels, outputs, 1, 1, 0, bias=False),
28 |             nn.BatchNorm2d(outputs),
29 |             nn.ReLU(inplace=True),
30 |         ]
31 |         self.branch_main = nn.Sequential(*branch_main)
32 | 
33 |         if stride == 2:
34 |             branch_proj = [
35 |                 # dw
36 |                 nn.Conv2d(inp, inp, ksize, stride, pad, groups=inp, bias=False),
37 |                 nn.BatchNorm2d(inp),
38 |                 # pw-linear
39 |                 nn.Conv2d(inp, inp, 1, 1, 0, bias=False),
40 |                 nn.BatchNorm2d(inp),
41 |                 nn.ReLU(inplace=True),
42 |             ]
43 |             self.branch_proj = nn.Sequential(*branch_proj)
44 |         else:
45 |             self.branch_proj = None
46 | 
47 |     def forward(self, old_x):
48 |         if self.stride==1:
49 |             x_proj, x = self.channel_shuffle(old_x)
50 |             return torch.cat((x_proj, self.branch_main(x)), 1)
51 |         elif self.stride==2:
52 |             x_proj = old_x
53 |             x = old_x
54 |             return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1)
55 | 
56 |     def channel_shuffle(self, x):
57 |         batchsize, num_channels, height, width = x.data.size()
58 |         assert (num_channels % 4 == 0)
59 |         x = x.reshape(batchsize * num_channels // 2, 2, height * width)
60 |         x = x.permute(1, 0, 2)
61 |         x = x.reshape(2, -1, num_channels // 2, height, width)
62 |         return x[0], x[1]
63 | 


--------------------------------------------------------------------------------
/ShuffleNetV1/blocks.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class ShuffleV1Block(nn.Module):
 6 |     def __init__(self, inp, oup, *, group, first_group, mid_channels, ksize, stride):
 7 |         super(ShuffleV1Block, self).__init__()
 8 |         self.stride = stride
 9 |         assert stride in [1, 2]
10 | 
11 |         self.mid_channels = mid_channels
12 |         self.ksize = ksize
13 |         pad = ksize // 2
14 |         self.pad = pad
15 |         self.inp = inp
16 |         self.group = group
17 | 
18 |         if stride == 2:
19 |             outputs = oup - inp
20 |         else:
21 |             outputs = oup
22 | 
23 |         branch_main_1 = [
24 |             # pw
25 |             nn.Conv2d(inp, mid_channels, 1, 1, 0, groups=1 if first_group else group, bias=False),
26 |             nn.BatchNorm2d(mid_channels),
27 |             nn.ReLU(inplace=True),
28 |             # dw
29 |             nn.Conv2d(mid_channels, mid_channels, ksize, stride, pad, groups=mid_channels, bias=False),
30 |             nn.BatchNorm2d(mid_channels),
31 |         ]
32 |         branch_main_2 = [
33 |             # pw-linear
34 |             nn.Conv2d(mid_channels, outputs, 1, 1, 0, groups=group, bias=False),
35 |             nn.BatchNorm2d(outputs),
36 |         ]
37 |         self.branch_main_1 = nn.Sequential(*branch_main_1)
38 |         self.branch_main_2 = nn.Sequential(*branch_main_2)
39 | 
40 |         if stride == 2:
41 |             self.branch_proj = nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
42 | 
43 |     def forward(self, old_x):
44 |         x = old_x
45 |         x_proj = old_x
46 |         x = self.branch_main_1(x)
47 |         if self.group > 1:
48 |             x = self.channel_shuffle(x)
49 |         x = self.branch_main_2(x)
50 |         if self.stride == 1:
51 |             return F.relu(x + x_proj)
52 |         elif self.stride == 2:
53 |             return torch.cat((self.branch_proj(x_proj), F.relu(x)), 1)
54 | 
55 |     def channel_shuffle(self, x):
56 |         batchsize, num_channels, height, width = x.data.size()
57 |         assert num_channels % self.group == 0
58 |         group_channels = num_channels // self.group
59 |         
60 |         x = x.reshape(batchsize, group_channels, self.group, height, width)
61 |         x = x.permute(0, 2, 1, 3, 4)
62 |         x = x.reshape(batchsize, num_channels, height, width)
63 | 
64 |         return x
65 | 


--------------------------------------------------------------------------------
/DetNAS/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | class CrossEntropyLabelSmooth(nn.Module):
 7 | 
 8 | 	def __init__(self, num_classes, epsilon):
 9 | 		super(CrossEntropyLabelSmooth, self).__init__()
10 | 		self.num_classes = num_classes
11 | 		self.epsilon = epsilon
12 | 		self.logsoftmax = nn.LogSoftmax(dim=1)
13 | 
14 | 	def forward(self, inputs, targets):
15 | 		log_probs = self.logsoftmax(inputs)
16 | 		targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
17 | 		targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
18 | 		loss = (-targets * log_probs).mean(0).sum()
19 | 		return loss
20 | 
21 | 
22 | class AvgrageMeter(object):
23 | 
24 | 	def __init__(self):
25 | 		self.reset()
26 | 
27 | 	def reset(self):
28 | 		self.avg = 0
29 | 		self.sum = 0
30 | 		self.cnt = 0
31 | 		self.val = 0
32 | 
33 | 	def update(self, val, n=1):
34 | 		self.val = val
35 | 		self.sum += val * n
36 | 		self.cnt += n
37 | 		self.avg = self.sum / self.cnt
38 | 
39 | 
40 | def accuracy(output, target, topk=(1,)):
41 | 	maxk = max(topk)
42 | 	batch_size = target.size(0)
43 | 
44 | 	_, pred = output.topk(maxk, 1, True, True)
45 | 	pred = pred.t()
46 | 	correct = pred.eq(target.view(1, -1).expand_as(pred))
47 | 
48 | 	res = []
49 | 	for k in topk:
50 | 		correct_k = correct[:k].view(-1).float().sum(0)
51 | 		res.append(correct_k.mul_(100.0/batch_size))
52 | 	return res
53 | 
54 | 
55 | def save_checkpoint(state, iters, tag=''):
56 | 	if not os.path.exists("./models"):
57 | 		os.makedirs("./models")
58 | 	filename = os.path.join("./models/{}checkpoint-{:06}.pth.tar".format(tag, iters))
59 | 	torch.save(state, filename)
60 | 
61 | def get_lastest_model():
62 | 	if not os.path.exists('./models'):
63 | 		os.mkdir('./models')
64 | 	model_list = os.listdir('./models/')
65 | 	if model_list == []:
66 | 		return None, 0
67 | 	model_list.sort()
68 | 	lastest_model = model_list[-1]
69 | 	iters = re.findall(r'\d+', lastest_model)
70 | 	return './models/' + lastest_model, int(iters[0])
71 | 
72 | 
73 | def get_parameters(model):
74 | 	group_no_weight_decay = []
75 | 	group_weight_decay = []
76 | 	for pname, p in model.named_parameters():
77 | 		if pname.find('weight') >= 0 and len(p.size()) > 1:
78 | 			# print('include ', pname, p.size())
79 | 			group_weight_decay.append(p)
80 | 		else:
81 | 			# print('not include ', pname, p.size())
82 | 			group_no_weight_decay.append(p)
83 | 	assert len(list(model.parameters())) == len(group_weight_decay) + len(group_no_weight_decay)
84 | 	groups = [dict(params=group_weight_decay), dict(params=group_no_weight_decay, weight_decay=0.)]
85 | 	return groups
86 | 


--------------------------------------------------------------------------------
/ShuffleNetV1/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | class CrossEntropyLabelSmooth(nn.Module):
 7 | 
 8 | 	def __init__(self, num_classes, epsilon):
 9 | 		super(CrossEntropyLabelSmooth, self).__init__()
10 | 		self.num_classes = num_classes
11 | 		self.epsilon = epsilon
12 | 		self.logsoftmax = nn.LogSoftmax(dim=1)
13 | 
14 | 	def forward(self, inputs, targets):
15 | 		log_probs = self.logsoftmax(inputs)
16 | 		targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
17 | 		targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
18 | 		loss = (-targets * log_probs).mean(0).sum()
19 | 		return loss
20 | 
21 | 
22 | class AvgrageMeter(object):
23 | 
24 | 	def __init__(self):
25 | 		self.reset()
26 | 
27 | 	def reset(self):
28 | 		self.avg = 0
29 | 		self.sum = 0
30 | 		self.cnt = 0
31 | 		self.val = 0
32 | 
33 | 	def update(self, val, n=1):
34 | 		self.val = val
35 | 		self.sum += val * n
36 | 		self.cnt += n
37 | 		self.avg = self.sum / self.cnt
38 | 
39 | 
40 | def accuracy(output, target, topk=(1,)):
41 | 	maxk = max(topk)
42 | 	batch_size = target.size(0)
43 | 
44 | 	_, pred = output.topk(maxk, 1, True, True)
45 | 	pred = pred.t()
46 | 	correct = pred.eq(target.view(1, -1).expand_as(pred))
47 | 
48 | 	res = []
49 | 	for k in topk:
50 | 		correct_k = correct[:k].view(-1).float().sum(0)
51 | 		res.append(correct_k.mul_(100.0/batch_size))
52 | 	return res
53 | 
54 | 
55 | def save_checkpoint(state, iters, tag=''):
56 | 	if not os.path.exists("./models"):
57 | 		os.makedirs("./models")
58 | 	filename = os.path.join("./models/{}checkpoint-{:06}.pth.tar".format(tag, iters))
59 | 	torch.save(state, filename)
60 | 
61 | def get_lastest_model():
62 | 	if not os.path.exists('./models'):
63 | 		os.mkdir('./models')
64 | 	model_list = os.listdir('./models/')
65 | 	if model_list == []:
66 | 		return None, 0
67 | 	model_list.sort()
68 | 	lastest_model = model_list[-1]
69 | 	iters = re.findall(r'\d+', lastest_model)
70 | 	return './models/' + lastest_model, int(iters[0])
71 | 
72 | 
73 | def get_parameters(model):
74 | 	group_no_weight_decay = []
75 | 	group_weight_decay = []
76 | 	for pname, p in model.named_parameters():
77 | 		if pname.find('weight') >= 0 and len(p.size()) > 1:
78 | 			# print('include ', pname, p.size())
79 | 			group_weight_decay.append(p)
80 | 		else:
81 | 			# print('not include ', pname, p.size())
82 | 			group_no_weight_decay.append(p)
83 | 	assert len(list(model.parameters())) == len(group_weight_decay) + len(group_no_weight_decay)
84 | 	groups = [dict(params=group_weight_decay), dict(params=group_no_weight_decay, weight_decay=0.)]
85 | 	return groups
86 | 


--------------------------------------------------------------------------------
/ShuffleNetV2/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | class CrossEntropyLabelSmooth(nn.Module):
 7 | 
 8 | 	def __init__(self, num_classes, epsilon):
 9 | 		super(CrossEntropyLabelSmooth, self).__init__()
10 | 		self.num_classes = num_classes
11 | 		self.epsilon = epsilon
12 | 		self.logsoftmax = nn.LogSoftmax(dim=1)
13 | 
14 | 	def forward(self, inputs, targets):
15 | 		log_probs = self.logsoftmax(inputs)
16 | 		targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
17 | 		targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
18 | 		loss = (-targets * log_probs).mean(0).sum()
19 | 		return loss
20 | 
21 | 
22 | class AvgrageMeter(object):
23 | 
24 | 	def __init__(self):
25 | 		self.reset()
26 | 
27 | 	def reset(self):
28 | 		self.avg = 0
29 | 		self.sum = 0
30 | 		self.cnt = 0
31 | 		self.val = 0
32 | 
33 | 	def update(self, val, n=1):
34 | 		self.val = val
35 | 		self.sum += val * n
36 | 		self.cnt += n
37 | 		self.avg = self.sum / self.cnt
38 | 
39 | 
40 | def accuracy(output, target, topk=(1,)):
41 | 	maxk = max(topk)
42 | 	batch_size = target.size(0)
43 | 
44 | 	_, pred = output.topk(maxk, 1, True, True)
45 | 	pred = pred.t()
46 | 	correct = pred.eq(target.view(1, -1).expand_as(pred))
47 | 
48 | 	res = []
49 | 	for k in topk:
50 | 		correct_k = correct[:k].view(-1).float().sum(0)
51 | 		res.append(correct_k.mul_(100.0/batch_size))
52 | 	return res
53 | 
54 | 
55 | def save_checkpoint(state, iters, tag=''):
56 | 	if not os.path.exists("./models"):
57 | 		os.makedirs("./models")
58 | 	filename = os.path.join("./models/{}checkpoint-{:06}.pth.tar".format(tag, iters))
59 | 	torch.save(state, filename)
60 | 
61 | def get_lastest_model():
62 | 	if not os.path.exists('./models'):
63 | 		os.mkdir('./models')
64 | 	model_list = os.listdir('./models/')
65 | 	if model_list == []:
66 | 		return None, 0
67 | 	model_list.sort()
68 | 	lastest_model = model_list[-1]
69 | 	iters = re.findall(r'\d+', lastest_model)
70 | 	return './models/' + lastest_model, int(iters[0])
71 | 
72 | 
73 | def get_parameters(model):
74 | 	group_no_weight_decay = []
75 | 	group_weight_decay = []
76 | 	for pname, p in model.named_parameters():
77 | 		if pname.find('weight') >= 0 and len(p.size()) > 1:
78 | 			# print('include ', pname, p.size())
79 | 			group_weight_decay.append(p)
80 | 		else:
81 | 			# print('not include ', pname, p.size())
82 | 			group_no_weight_decay.append(p)
83 | 	assert len(list(model.parameters())) == len(group_weight_decay) + len(group_no_weight_decay)
84 | 	groups = [dict(params=group_weight_decay), dict(params=group_no_weight_decay, weight_decay=0.)]
85 | 	return groups
86 | 


--------------------------------------------------------------------------------
/OneShot/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | class CrossEntropyLabelSmooth(nn.Module):
 7 | 
 8 | 	def __init__(self, num_classes, epsilon):
 9 | 		super(CrossEntropyLabelSmooth, self).__init__()
10 | 		self.num_classes = num_classes
11 | 		self.epsilon = epsilon
12 | 		self.logsoftmax = nn.LogSoftmax(dim=1)
13 | 
14 | 	def forward(self, inputs, targets):
15 | 		log_probs = self.logsoftmax(inputs)
16 | 		targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
17 | 		targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
18 | 		loss = (-targets * log_probs).mean(0).sum()
19 | 		return loss
20 | 
21 | 
22 | class AvgrageMeter(object):
23 | 
24 | 	def __init__(self):
25 | 		self.reset()
26 | 
27 | 	def reset(self):
28 | 		self.avg = 0
29 | 		self.sum = 0
30 | 		self.cnt = 0
31 | 		self.val = 0
32 | 
33 | 	def update(self, val, n=1):
34 | 		self.val = val
35 | 		self.sum += val * n
36 | 		self.cnt += n
37 | 		self.avg = self.sum / self.cnt
38 | 
39 | 
40 | def accuracy(output, target, topk=(1,)):
41 | 	maxk = max(topk)
42 | 	batch_size = target.size(0)
43 | 
44 | 	_, pred = output.topk(maxk, 1, True, True)
45 | 	pred = pred.t()
46 | 	correct = pred.eq(target.view(1, -1).expand_as(pred))
47 | 
48 | 	res = []
49 | 	for k in topk:
50 | 		correct_k = correct[:k].view(-1).float().sum(0)
51 | 		res.append(correct_k.mul_(100.0/batch_size))
52 | 	return res
53 | 
54 | 
55 | def save_checkpoint(state, iters, tag=''):
56 | 	if not os.path.exists("./models"):
57 | 		os.makedirs("./models")
58 | 	filename = os.path.join("./models/{}checkpoint-{:06}.pth.tar".format(tag, iters))
59 | 	torch.save(state, filename)
60 | 
61 | 
62 | def get_lastest_model():
63 | 	if not os.path.exists('./models'):
64 | 		os.mkdir('./models')
65 | 	model_list = os.listdir('./models/')
66 | 	if model_list == []:
67 | 		return None, 0
68 | 	model_list.sort()
69 | 	lastest_model = model_list[-1]
70 | 	iters = re.findall(r'\d+', lastest_model)
71 | 	return './models/' + lastest_model, int(iters[0])
72 | 
73 | 
74 | def get_parameters(model):
75 | 	group_no_weight_decay = []
76 | 	group_weight_decay = []
77 | 	for pname, p in model.named_parameters():
78 | 		if pname.find('weight') >= 0 and len(p.size()) > 1:
79 | 			# print('include ', pname, p.size())
80 | 			group_weight_decay.append(p)
81 | 		else:
82 | 			# print('not include ', pname, p.size())
83 | 			group_no_weight_decay.append(p)
84 | 	assert len(list(model.parameters())) == len(group_weight_decay) + len(group_no_weight_decay)
85 | 	groups = [dict(params=group_weight_decay), dict(params=group_no_weight_decay, weight_decay=0.)]
86 | 	return groups
87 | 


--------------------------------------------------------------------------------
/ShuffleNetV2+/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | class CrossEntropyLabelSmooth(nn.Module):
 7 | 
 8 | 	def __init__(self, num_classes, epsilon):
 9 | 		super(CrossEntropyLabelSmooth, self).__init__()
10 | 		self.num_classes = num_classes
11 | 		self.epsilon = epsilon
12 | 		self.logsoftmax = nn.LogSoftmax(dim=1)
13 | 
14 | 	def forward(self, inputs, targets):
15 | 		log_probs = self.logsoftmax(inputs)
16 | 		targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
17 | 		targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
18 | 		loss = (-targets * log_probs).mean(0).sum()
19 | 		return loss
20 | 
21 | 
22 | class AvgrageMeter(object):
23 | 
24 | 	def __init__(self):
25 | 		self.reset()
26 | 
27 | 	def reset(self):
28 | 		self.avg = 0
29 | 		self.sum = 0
30 | 		self.cnt = 0
31 | 		self.val = 0
32 | 
33 | 	def update(self, val, n=1):
34 | 		self.val = val
35 | 		self.sum += val * n
36 | 		self.cnt += n
37 | 		self.avg = self.sum / self.cnt
38 | 
39 | 
40 | def accuracy(output, target, topk=(1,)):
41 | 	maxk = max(topk)
42 | 	batch_size = target.size(0)
43 | 
44 | 	_, pred = output.topk(maxk, 1, True, True)
45 | 	pred = pred.t()
46 | 	correct = pred.eq(target.view(1, -1).expand_as(pred))
47 | 
48 | 	res = []
49 | 	for k in topk:
50 | 		correct_k = correct[:k].view(-1).float().sum(0)
51 | 		res.append(correct_k.mul_(100.0/batch_size))
52 | 	return res
53 | 
54 | 
55 | def save_checkpoint(state, iters, tag=''):
56 | 	if not os.path.exists("./models"):
57 | 		os.makedirs("./models")
58 | 	filename = os.path.join("./models/{}checkpoint-{:06}.pth.tar".format(tag, iters))
59 | 	torch.save(state, filename)
60 | 
61 | 
62 | def get_lastest_model():
63 | 	if not os.path.exists('./models'):
64 | 		os.mkdir('./models')
65 | 	model_list = os.listdir('./models/')
66 | 	if model_list == []:
67 | 		return None, 0
68 | 	model_list.sort()
69 | 	lastest_model = model_list[-1]
70 | 	iters = re.findall(r'\d+', lastest_model)
71 | 	return './models/' + lastest_model, int(iters[0])
72 | 
73 | 
74 | def get_parameters(model):
75 | 	group_no_weight_decay = []
76 | 	group_weight_decay = []
77 | 	for pname, p in model.named_parameters():
78 | 		if pname.find('weight') >= 0 and len(p.size()) > 1:
79 | 			# print('include ', pname, p.size())
80 | 			group_weight_decay.append(p)
81 | 		else:
82 | 			# print('not include ', pname, p.size())
83 | 			group_no_weight_decay.append(p)
84 | 	assert len(list(model.parameters())) == len(group_weight_decay) + len(group_no_weight_decay)
85 | 	groups = [dict(params=group_weight_decay), dict(params=group_no_weight_decay, weight_decay=0.)]
86 | 	return groups
87 | 


--------------------------------------------------------------------------------
/ShuffleNetV2.ExLarge/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | class CrossEntropyLabelSmooth(nn.Module):
 7 | 
 8 | 	def __init__(self, num_classes, epsilon):
 9 | 		super(CrossEntropyLabelSmooth, self).__init__()
10 | 		self.num_classes = num_classes
11 | 		self.epsilon = epsilon
12 | 		self.logsoftmax = nn.LogSoftmax(dim=1)
13 | 
14 | 	def forward(self, inputs, targets):
15 | 		log_probs = self.logsoftmax(inputs)
16 | 		targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
17 | 		targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
18 | 		loss = (-targets * log_probs).mean(0).sum()
19 | 		return loss
20 | 
21 | 
22 | class AvgrageMeter(object):
23 | 
24 | 	def __init__(self):
25 | 		self.reset()
26 | 
27 | 	def reset(self):
28 | 		self.avg = 0
29 | 		self.sum = 0
30 | 		self.cnt = 0
31 | 		self.val = 0
32 | 
33 | 	def update(self, val, n=1):
34 | 		self.val = val
35 | 		self.sum += val * n
36 | 		self.cnt += n
37 | 		self.avg = self.sum / self.cnt
38 | 
39 | 
40 | def accuracy(output, target, topk=(1,)):
41 | 	maxk = max(topk)
42 | 	batch_size = target.size(0)
43 | 
44 | 	_, pred = output.topk(maxk, 1, True, True)
45 | 	pred = pred.t()
46 | 	correct = pred.eq(target.view(1, -1).expand_as(pred))
47 | 
48 | 	res = []
49 | 	for k in topk:
50 | 		correct_k = correct[:k].view(-1).float().sum(0)
51 | 		res.append(correct_k.mul_(100.0/batch_size))
52 | 	return res
53 | 
54 | 
55 | def save_checkpoint(state, iters, tag=''):
56 | 	if not os.path.exists("./models"):
57 | 		os.makedirs("./models")
58 | 	filename = os.path.join("./models/{}checkpoint-{:06}.pth.tar".format(tag, iters))
59 | 	torch.save(state, filename)
60 | 
61 | def get_lastest_model():
62 | 	if not os.path.exists('./models'):
63 | 		os.mkdir('./models')
64 | 	model_list = os.listdir('./models/')
65 | 	if model_list == []:
66 | 		return None, 0
67 | 	model_list.sort()
68 | 	lastest_model = model_list[-1]
69 | 	iters = re.findall(r'\d+', lastest_model)
70 | 	return './models/' + lastest_model, int(iters[0])
71 | 
72 | 
73 | def get_parameters(model):
74 | 	group_no_weight_decay = []
75 | 	group_weight_decay = []
76 | 	for pname, p in model.named_parameters():
77 | 		if pname.find('weight') >= 0 and len(p.size()) > 1:
78 | 			# print('include ', pname, p.size())
79 | 			group_weight_decay.append(p)
80 | 		else:
81 | 			# print('not include ', pname, p.size())
82 | 			group_no_weight_decay.append(p)
83 | 	assert len(list(model.parameters())) == len(group_weight_decay) + len(group_no_weight_decay)
84 | 	groups = [dict(params=group_weight_decay), dict(params=group_no_weight_decay, weight_decay=0.)]
85 | 	return groups
86 | 


--------------------------------------------------------------------------------
/ShuffleNetV2.Large/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | class CrossEntropyLabelSmooth(nn.Module):
 7 | 
 8 | 	def __init__(self, num_classes, epsilon):
 9 | 		super(CrossEntropyLabelSmooth, self).__init__()
10 | 		self.num_classes = num_classes
11 | 		self.epsilon = epsilon
12 | 		self.logsoftmax = nn.LogSoftmax(dim=1)
13 | 
14 | 	def forward(self, inputs, targets):
15 | 		log_probs = self.logsoftmax(inputs)
16 | 		targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
17 | 		targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
18 | 		loss = (-targets * log_probs).mean(0).sum()
19 | 		return loss
20 | 
21 | 
22 | class AvgrageMeter(object):
23 | 
24 | 	def __init__(self):
25 | 		self.reset()
26 | 
27 | 	def reset(self):
28 | 		self.avg = 0
29 | 		self.sum = 0
30 | 		self.cnt = 0
31 | 		self.val = 0
32 | 
33 | 	def update(self, val, n=1):
34 | 		self.val = val
35 | 		self.sum += val * n
36 | 		self.cnt += n
37 | 		self.avg = self.sum / self.cnt
38 | 
39 | 
40 | def accuracy(output, target, topk=(1,)):
41 | 	maxk = max(topk)
42 | 	batch_size = target.size(0)
43 | 
44 | 	_, pred = output.topk(maxk, 1, True, True)
45 | 	pred = pred.t()
46 | 	correct = pred.eq(target.view(1, -1).expand_as(pred))
47 | 
48 | 	res = []
49 | 	for k in topk:
50 | 		correct_k = correct[:k].view(-1).float().sum(0)
51 | 		res.append(correct_k.mul_(100.0/batch_size))
52 | 	return res
53 | 
54 | 
55 | def save_checkpoint(state, iters, tag=''):
56 | 	if not os.path.exists("./models"):
57 | 		os.makedirs("./models")
58 | 	filename = os.path.join("./models/{}checkpoint-{:06}.pth.tar".format(tag, iters))
59 | 	torch.save(state, filename)
60 | 
61 | def get_lastest_model():
62 | 	if not os.path.exists('./models'):
63 | 		os.mkdir('./models')
64 | 	model_list = os.listdir('./models/')
65 | 	if model_list == []:
66 | 		return None, 0
67 | 	model_list.sort()
68 | 	lastest_model = model_list[-1]
69 | 	iters = re.findall(r'\d+', lastest_model)
70 | 	return './models/' + lastest_model, int(iters[0])
71 | 
72 | 
73 | def get_parameters(model):
74 | 	group_no_weight_decay = []
75 | 	group_weight_decay = []
76 | 	for pname, p in model.named_parameters():
77 | 		if pname.find('weight') >= 0 and len(p.size()) > 1:
78 | 			# print('include ', pname, p.size())
79 | 			group_weight_decay.append(p)
80 | 		else:
81 | 			# print('not include ', pname, p.size())
82 | 			group_no_weight_decay.append(p)
83 | 	assert len(list(model.parameters())) == len(group_weight_decay) + len(group_no_weight_decay)
84 | 	groups = [dict(params=group_weight_decay), dict(params=group_no_weight_decay, weight_decay=0.)]
85 | 	return groups
86 | 


--------------------------------------------------------------------------------
/ShuffleNetV1/README.md:
--------------------------------------------------------------------------------
 1 | # [ShuffleNetV1](https://arxiv.org/pdf/1707.01083.pdf)
 2 | 
 3 | This repository contains ShuffleNetV1 implementation by Pytorch.
 4 | 
 5 | 
 6 | ## Requirements
 7 | Download the ImageNet dataset and move validation images to labeled subfolders. To do this, you can use the following script:
 8 | https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh
 9 | 
10 | ## Usage
11 | Train:
12 | ```shell
13 | python train.py --model-size 2.0x --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH
14 | ```
15 | Eval:
16 | ```shell
17 | python train.py --eval --eval-resume YOUR_WEIGHT_PATH --model-size 2.0x --train-dir YOUR_TRAINDATASET_PATH --val-dir YOUR_VALDATASET_PATH
18 | ```
19 | 
20 | 
21 | ## Trained Models
22 | - OneDrive download: [Link](https://1drv.ms/f/s!AgaP37NGYuEXhRfQxHRseR7eSxXo)
23 | - BaiduYun download: [Link](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw) (extract code: mc24)
24 | 
25 | 
26 | ## Results
27 | 
28 | |    Model                 |  FLOPs    |   #Params |   Top-1   |   Top-5   |
29 | |:------------------------|:---------:|:---------:|:---------:|:---------:|
30 | ShuffleNetV1 2.0x (group=3)|    524M    |	5.4M    |      **25.9**    |        8.6   |
31 | ShuffleNetV1 2.0x (group=8)|    522M    |   6.5M    |      27.1    |        9.2   |
32 | 1.0 MobileNetV1-224 |    569M    |   4.2M    |      29.4    |        -   |
33 | ShuffleNetV1 1.5x (group=3)|    292M    |	3.4M    |      **28.4**    |        9.8   |
34 | ShuffleNetV1 1.5x (group=8)|    290M    |   4.3M    |      29.0    |       10.4   |
35 | 0.75 MobileNetV1-224 |    325M    |   2.6M    |      31.6    |        -   |
36 | ShuffleNetV1 1.0x (group=3)|   138M     |	1.9M    |      32.2    |       12.3    |
37 | ShuffleNetV1 1.0x (group=8)|    138M    |   2.4M    |      **32.0**    |       13.6   |
38 | 0.5 MobileNetV1-224 |    149M    |   1.3M    |      36.3    |        -   |
39 | ShuffleNetV1 0.5x (group=3)|   38M      |	0.7M    |      42.7    |       20.0    |
40 | ShuffleNetV1 0.5x (group=8)|    40M     |   1.0M    |      **41.2**    |       19.0   |
41 | 0.25 MobileNetV1-224 |    41M    |   0.5M    |      49.4    |        -   |
42 | 
43 | 
44 | 
45 | ## Citation
46 | If you use these models in your research, please cite:
47 | 
48 | 
49 |     @inproceedings{zhang2018shufflenet,
50 |                 title={Shufflenet: An extremely efficient convolutional neural network for mobile devices},
51 |                 author={Zhang, Xiangyu and Zhou, Xinyu and Lin, Mengxiao and Sun, Jian},
52 |                 booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
53 |                 pages={6848--6856},
54 |                 year={2018}
55 |     }
56 | 
57 | 


--------------------------------------------------------------------------------
/ShuffleNetV2/network.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from blocks import ShuffleV2Block
  4 | 
  5 | class ShuffleNetV2(nn.Module):
  6 |     def __init__(self, input_size=224, n_class=1000, model_size='1.5x'):
  7 |         super(ShuffleNetV2, self).__init__()
  8 |         print('model size is ', model_size)
  9 | 
 10 |         self.stage_repeats = [4, 8, 4]
 11 |         self.model_size = model_size
 12 |         if model_size == '0.5x':
 13 |             self.stage_out_channels = [-1, 24, 48, 96, 192, 1024]
 14 |         elif model_size == '1.0x':
 15 |             self.stage_out_channels = [-1, 24, 116, 232, 464, 1024]
 16 |         elif model_size == '1.5x':
 17 |             self.stage_out_channels = [-1, 24, 176, 352, 704, 1024]
 18 |         elif model_size == '2.0x':
 19 |             self.stage_out_channels = [-1, 24, 244, 488, 976, 2048]
 20 |         else:
 21 |             raise NotImplementedError
 22 | 
 23 |         # building first layer
 24 |         input_channel = self.stage_out_channels[1]
 25 |         self.first_conv = nn.Sequential(
 26 |             nn.Conv2d(3, input_channel, 3, 2, 1, bias=False),
 27 |             nn.BatchNorm2d(input_channel),
 28 |             nn.ReLU(inplace=True),
 29 |         )
 30 | 
 31 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 32 | 
 33 |         self.features = []
 34 |         for idxstage in range(len(self.stage_repeats)):
 35 |             numrepeat = self.stage_repeats[idxstage]
 36 |             output_channel = self.stage_out_channels[idxstage+2]
 37 | 
 38 |             for i in range(numrepeat):
 39 |                 if i == 0:
 40 |                     self.features.append(ShuffleV2Block(input_channel, output_channel, 
 41 |                                                 mid_channels=output_channel // 2, ksize=3, stride=2))
 42 |                 else:
 43 |                     self.features.append(ShuffleV2Block(input_channel // 2, output_channel, 
 44 |                                                 mid_channels=output_channel // 2, ksize=3, stride=1))
 45 | 
 46 |                 input_channel = output_channel
 47 |                 
 48 |         self.features = nn.Sequential(*self.features)
 49 | 
 50 |         self.conv_last = nn.Sequential(
 51 |             nn.Conv2d(input_channel, self.stage_out_channels[-1], 1, 1, 0, bias=False),
 52 |             nn.BatchNorm2d(self.stage_out_channels[-1]),
 53 |             nn.ReLU(inplace=True)
 54 |         )
 55 |         self.globalpool = nn.AvgPool2d(7)
 56 |         if self.model_size == '2.0x':
 57 |             self.dropout = nn.Dropout(0.2)
 58 |         self.classifier = nn.Sequential(nn.Linear(self.stage_out_channels[-1], n_class, bias=False))
 59 |         self._initialize_weights()
 60 | 
 61 |     def forward(self, x):
 62 |         x = self.first_conv(x)
 63 |         x = self.maxpool(x)
 64 |         x = self.features(x)
 65 |         x = self.conv_last(x)
 66 | 
 67 |         x = self.globalpool(x)
 68 |         if self.model_size == '2.0x':
 69 |             x = self.dropout(x)
 70 |         x = x.contiguous().view(-1, self.stage_out_channels[-1])
 71 |         x = self.classifier(x)
 72 |         return x
 73 | 
 74 |     def _initialize_weights(self):
 75 |         for name, m in self.named_modules():
 76 |             if isinstance(m, nn.Conv2d):
 77 |                 if 'first' in name:
 78 |                     nn.init.normal_(m.weight, 0, 0.01)
 79 |                 else:
 80 |                     nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1])
 81 |                 if m.bias is not None:
 82 |                     nn.init.constant_(m.bias, 0)
 83 |             elif isinstance(m, nn.BatchNorm2d):
 84 |                 nn.init.constant_(m.weight, 1)
 85 |                 if m.bias is not None:
 86 |                     nn.init.constant_(m.bias, 0.0001)
 87 |                 nn.init.constant_(m.running_mean, 0)
 88 |             elif isinstance(m, nn.BatchNorm1d):
 89 |                 nn.init.constant_(m.weight, 1)
 90 |                 if m.bias is not None:
 91 |                     nn.init.constant_(m.bias, 0.0001)
 92 |                 nn.init.constant_(m.running_mean, 0)
 93 |             elif isinstance(m, nn.Linear):
 94 |                 nn.init.normal_(m.weight, 0, 0.01)
 95 |                 if m.bias is not None:
 96 |                     nn.init.constant_(m.bias, 0)
 97 | 
 98 | if __name__ == "__main__":
 99 |     model = ShuffleNetV2()
100 |     # print(model)
101 | 
102 |     test_data = torch.rand(5, 3, 224, 224)
103 |     test_outputs = model(test_data)
104 |     print(test_outputs.size())
105 | 


--------------------------------------------------------------------------------
/ShuffleNetV1/network.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from blocks import ShuffleV1Block
  4 | 
  5 | class ShuffleNetV1(nn.Module):
  6 |     def __init__(self, input_size=224, n_class=1000, model_size='2.0x', group=None):
  7 |         super(ShuffleNetV1, self).__init__()
  8 |         print('model size is ', model_size)
  9 | 
 10 |         assert group is not None
 11 | 
 12 |         self.stage_repeats = [4, 8, 4]
 13 |         self.model_size = model_size
 14 |         if group == 3:
 15 |             if model_size == '0.5x':
 16 |                 self.stage_out_channels = [-1, 12, 120, 240, 480]
 17 |             elif model_size == '1.0x':
 18 |                 self.stage_out_channels = [-1, 24, 240, 480, 960]
 19 |             elif model_size == '1.5x':
 20 |                 self.stage_out_channels = [-1, 24, 360, 720, 1440]
 21 |             elif model_size == '2.0x':
 22 |                 self.stage_out_channels = [-1, 48, 480, 960, 1920]
 23 |             else:
 24 |                 raise NotImplementedError
 25 |         elif group == 8:
 26 |             if model_size == '0.5x':
 27 |                 self.stage_out_channels = [-1, 16, 192, 384, 768]
 28 |             elif model_size == '1.0x':
 29 |                 self.stage_out_channels = [-1, 24, 384, 768, 1536]
 30 |             elif model_size == '1.5x':
 31 |                 self.stage_out_channels = [-1, 24, 576, 1152, 2304]
 32 |             elif model_size == '2.0x':
 33 |                 self.stage_out_channels = [-1, 48, 768, 1536, 3072]
 34 |             else:
 35 |                 raise NotImplementedError
 36 | 
 37 |         # building first layer
 38 |         input_channel = self.stage_out_channels[1]
 39 |         self.first_conv = nn.Sequential(
 40 |             nn.Conv2d(3, input_channel, 3, 2, 1, bias=False),
 41 |             nn.BatchNorm2d(input_channel),
 42 |             nn.ReLU(inplace=True),
 43 |         )
 44 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 45 | 
 46 |         self.features = []
 47 |         for idxstage in range(len(self.stage_repeats)):
 48 |             numrepeat = self.stage_repeats[idxstage]
 49 |             output_channel = self.stage_out_channels[idxstage+2]
 50 | 
 51 |             for i in range(numrepeat):
 52 |                 stride = 2 if i == 0 else 1
 53 |                 first_group = idxstage == 0 and i == 0
 54 |                 self.features.append(ShuffleV1Block(input_channel, output_channel,
 55 |                                             group=group, first_group=first_group,
 56 |                                             mid_channels=output_channel // 4, ksize=3, stride=stride))
 57 |                 input_channel = output_channel
 58 | 
 59 |         self.features = nn.Sequential(*self.features)
 60 | 
 61 |         self.globalpool = nn.AvgPool2d(7)
 62 | 
 63 |         self.classifier = nn.Sequential(nn.Linear(self.stage_out_channels[-1], n_class, bias=False))
 64 |         self._initialize_weights()
 65 | 
 66 |     def forward(self, x):
 67 |         x = self.first_conv(x)
 68 |         x = self.maxpool(x)
 69 |         x = self.features(x)
 70 | 
 71 |         x = self.globalpool(x)
 72 |         x = x.contiguous().view(-1, self.stage_out_channels[-1])
 73 |         x = self.classifier(x)
 74 |         return x
 75 | 
 76 |     def _initialize_weights(self):
 77 |         for name, m in self.named_modules():
 78 |             if isinstance(m, nn.Conv2d):
 79 |                 if 'first' in name:
 80 |                     nn.init.normal_(m.weight, 0, 0.01)
 81 |                 else:
 82 |                     nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1])
 83 |                 if m.bias is not None:
 84 |                     nn.init.constant_(m.bias, 0)
 85 |             elif isinstance(m, nn.BatchNorm2d):
 86 |                 nn.init.constant_(m.weight, 1)
 87 |                 if m.bias is not None:
 88 |                     nn.init.constant_(m.bias, 0.0001)
 89 |                 nn.init.constant_(m.running_mean, 0)
 90 |             elif isinstance(m, nn.BatchNorm1d):
 91 |                 nn.init.constant_(m.weight, 1)
 92 |                 if m.bias is not None:
 93 |                     nn.init.constant_(m.bias, 0.0001)
 94 |                 nn.init.constant_(m.running_mean, 0)
 95 |             elif isinstance(m, nn.Linear):
 96 |                 nn.init.normal_(m.weight, 0, 0.01)
 97 |                 if m.bias is not None:
 98 |                     nn.init.constant_(m.bias, 0)
 99 | 
100 | if __name__ == "__main__":
101 |     model = ShuffleNetV1(group=3)
102 |     # print(model)
103 | 
104 |     test_data = torch.rand(5, 3, 224, 224)
105 |     test_outputs = model(test_data)
106 |     print(test_outputs.size())
107 | 


--------------------------------------------------------------------------------
/OneShot/blocks.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | class Shufflenet(nn.Module):
  6 | 
  7 |     def __init__(self, inp, oup, mid_channels, *, ksize, stride):
  8 |         super(Shufflenet, self).__init__()
  9 |         self.stride = stride
 10 |         assert stride in [1, 2]
 11 |         assert ksize in [3, 5, 7]
 12 | 
 13 |         self.base_mid_channel = mid_channels
 14 |         self.ksize = ksize
 15 |         pad = ksize // 2
 16 |         self.pad = pad
 17 |         self.inp = inp
 18 | 
 19 |         outputs = oup - inp
 20 | 
 21 |         branch_main = [
 22 |             # pw
 23 |             nn.Conv2d(inp, mid_channels, 1, 1, 0, bias=False),
 24 |             nn.BatchNorm2d(mid_channels),
 25 |             nn.ReLU(inplace=True),
 26 |             # dw
 27 |             nn.Conv2d(mid_channels, mid_channels, ksize, stride, pad, groups=mid_channels, bias=False),
 28 |             nn.BatchNorm2d(mid_channels),
 29 |             # pw-linear
 30 |             nn.Conv2d(mid_channels, outputs, 1, 1, 0, bias=False),
 31 |             nn.BatchNorm2d(outputs),
 32 |             nn.ReLU(inplace=True),
 33 |         ]
 34 |         self.branch_main = nn.Sequential(*branch_main)
 35 | 
 36 |         if stride == 2:
 37 |             branch_proj = [
 38 |                 # dw
 39 |                 nn.Conv2d(inp, inp, ksize, stride, pad, groups=inp, bias=False),
 40 |                 nn.BatchNorm2d(inp),
 41 |                 # pw-linear
 42 |                 nn.Conv2d(inp, inp, 1, 1, 0, bias=False),
 43 |                 nn.BatchNorm2d(inp),
 44 |                 nn.ReLU(inplace=True),
 45 |             ]
 46 |             self.branch_proj = nn.Sequential(*branch_proj)
 47 | 
 48 |     def forward(self, old_x):
 49 |         if self.stride==1:
 50 |             x_proj, x = channel_shuffle(old_x)
 51 |             return torch.cat((x_proj, self.branch_main(x)), 1)
 52 |         elif self.stride==2:
 53 |             x_proj = old_x
 54 |             x = old_x
 55 |             return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1)
 56 | 
 57 | class Shuffle_Xception(nn.Module):
 58 | 
 59 |     def __init__(self, inp, oup, mid_channels, *, stride):
 60 |         super(Shuffle_Xception, self).__init__()
 61 | 
 62 |         assert stride in [1, 2]
 63 | 
 64 |         self.base_mid_channel = mid_channels
 65 |         self.stride = stride
 66 |         self.ksize = 3
 67 |         self.pad = 1
 68 |         self.inp = inp
 69 |         outputs = oup - inp
 70 | 
 71 |         branch_main = [
 72 |             # dw
 73 |             nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
 74 |             nn.BatchNorm2d(inp),
 75 |             # pw
 76 |             nn.Conv2d(inp, mid_channels, 1, 1, 0, bias=False),
 77 |             nn.BatchNorm2d(mid_channels),
 78 |             nn.ReLU(inplace=True),
 79 |             # dw
 80 |             nn.Conv2d(mid_channels, mid_channels, 3, 1, 1, groups=mid_channels, bias=False),
 81 |             nn.BatchNorm2d(mid_channels),
 82 |             # pw
 83 |             nn.Conv2d(mid_channels, mid_channels, 1, 1, 0, bias=False),
 84 |             nn.BatchNorm2d(mid_channels),
 85 |             nn.ReLU(inplace=True),
 86 |             # dw
 87 |             nn.Conv2d(mid_channels, mid_channels, 3, 1, 1, groups=mid_channels, bias=False),
 88 |             nn.BatchNorm2d(mid_channels),
 89 |             # pw
 90 |             nn.Conv2d(mid_channels, outputs, 1, 1, 0, bias=False),
 91 |             nn.BatchNorm2d(outputs),
 92 |             nn.ReLU(inplace=True),
 93 |         ]
 94 | 
 95 |         self.branch_main = nn.Sequential(*branch_main)
 96 | 
 97 |         if self.stride == 2:
 98 |             branch_proj = [
 99 |                 # dw
100 |                 nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
101 |                 nn.BatchNorm2d(inp),
102 |                 # pw-linear
103 |                 nn.Conv2d(inp, inp, 1, 1, 0, bias=False),
104 |                 nn.BatchNorm2d(inp),
105 |                 nn.ReLU(inplace=True),
106 |             ]
107 |             self.branch_proj = nn.Sequential(*branch_proj)
108 | 
109 |     def forward(self, old_x):
110 |         if self.stride==1:
111 |             x_proj, x = channel_shuffle(old_x)
112 |             return torch.cat((x_proj, self.branch_main(x)), 1)
113 |         elif self.stride==2:
114 |             x_proj = old_x
115 |             x = old_x
116 |             return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1)
117 | 
118 | 
119 | def channel_shuffle(x):
120 |     batchsize, num_channels, height, width = x.data.size()
121 |     assert (num_channels % 4 == 0)
122 |     x = x.reshape(batchsize * num_channels // 2, 2, height * width)
123 |     x = x.permute(1, 0, 2)
124 |     x = x.reshape(2, -1, num_channels // 2, height, width)
125 |     return x[0], x[1]
126 | 


--------------------------------------------------------------------------------
/DetNAS/network.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from blocks import ConvBNReLU, FC, ShuffleNetV2BlockSearched
  3 | 
  4 | 
  5 | class ShuffleNetV2DetNAS(nn.Module):
  6 |     def __init__(self, n_class=1000, model_size='VOC_FPN_300M'):
  7 |         super(ShuffleNetV2DetNAS, self).__init__()
  8 |         print('Model size is {}.'.format(model_size))
  9 | 
 10 |         if model_size == 'COCO_FPN_3.8G':
 11 |             architecture = [0, 0, 3, 1, 2, 1, 0, 2, 0, 3, 1, 2, 3, 3, 2, 0, 2, 1, 1, 3,
 12 |                             2, 0, 2, 2, 2, 1, 3, 1, 0, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3]
 13 |             stage_repeats = [8, 8, 16, 8]
 14 |             stage_out_channels = [-1, 72, 172, 432, 864, 1728, 1728]
 15 |         elif model_size == 'COCO_FPN_1.3G':
 16 |             architecture = [0, 0, 3, 1, 2, 1, 0, 2, 0, 3, 1, 2, 3, 3, 2, 0, 2, 1, 1, 3,
 17 |                             2, 0, 2, 2, 2, 1, 3, 1, 0, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3]
 18 |             stage_repeats = [8, 8, 16, 8]
 19 |             stage_out_channels = [-1, 48, 96, 240, 480, 960, 1024]
 20 |         elif model_size == 'COCO_FPN_300M':
 21 |             architecture = [2, 1, 2, 0, 2, 1, 1, 2, 3, 3, 1, 3, 0, 0, 3, 1, 3, 1, 3, 2]
 22 |             stage_repeats = [4, 4, 8, 4]
 23 |             stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024]
 24 |         elif model_size == 'COCO_RetinaNet_300M':
 25 |             architecture = [2, 3, 1, 1, 3, 2, 1, 3, 3, 1, 1, 1, 3, 3, 2, 0, 3, 3, 3, 3]
 26 |             stage_repeats = [4, 4, 8, 4]
 27 |             stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024]
 28 |         elif model_size == 'VOC_FPN_300M':
 29 |             architecture = [2, 1, 0, 3, 1, 3, 0, 3, 2, 0, 1, 1, 3, 3, 3, 3, 3, 3, 3, 1]
 30 |             stage_repeats = [4, 4, 8, 4]
 31 |             stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024]
 32 |         elif model_size == 'VOC_RetinaNet_300M':
 33 |             architecture = [1, 3, 0, 0, 2, 3, 3, 3, 2, 3, 3, 3, 3, 2, 2, 0, 2, 3, 1, 1]
 34 |             stage_repeats = [4, 4, 8, 4]
 35 |             stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024]
 36 |         else:
 37 |             raise NotImplementedError
 38 | 
 39 |         self.first_conv = ConvBNReLU(in_channel=3, out_channel=stage_out_channels[1], k_size=3, stride=2, padding=1, gaussian_init=True)
 40 | 
 41 |         self.features = list()
 42 | 
 43 |         in_channels = stage_out_channels[1]
 44 |         i_th = 0
 45 |         for id_stage in range(1, len(stage_repeats) + 1):
 46 |             out_channels = stage_out_channels[id_stage + 1]
 47 |             repeats = stage_repeats[id_stage - 1]
 48 |             for id_repeat in range(repeats):
 49 |                 prefix = str(id_stage) + chr(ord('a') + id_repeat)
 50 |                 stride = 1 if id_repeat > 0 else 2
 51 |                 self.features.append(ShuffleNetV2BlockSearched(prefix, in_channels=in_channels, out_channels=out_channels,
 52 |                                                                stride=stride, base_mid_channels=out_channels // 2, i_th=i_th,
 53 |                                                                architecture=architecture))
 54 |                 in_channels = out_channels
 55 |                 i_th += 1
 56 | 
 57 |         self.features = nn.Sequential(*self.features)
 58 | 
 59 |         self.last_conv = ConvBNReLU(in_channel=in_channels, out_channel=stage_out_channels[-1], k_size=1, stride=1, padding=0)
 60 |         self.drop_out = nn.Dropout2d(p=0.2)
 61 |         self.global_pool = nn.AvgPool2d(7)
 62 |         self.fc = FC(in_channels=stage_out_channels[-1], out_channels=n_class)
 63 |         self._initialize_weights()
 64 | 
 65 |     def _initialize_weights(self):
 66 |         for name, m in self.named_modules():
 67 |             if isinstance(m, nn.Conv2d):
 68 |                 if 'first' in name:
 69 |                     nn.init.normal_(m.weight, 0, 0.01)
 70 |                 else:
 71 |                     nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1])
 72 |                 if m.bias is not None:
 73 |                     nn.init.constant_(m.bias, 0)
 74 |             elif isinstance(m, nn.BatchNorm2d):
 75 |                 nn.init.constant_(m.weight, 1)
 76 |                 if m.bias is not None:
 77 |                     nn.init.constant_(m.bias, 0.0001)
 78 |                 nn.init.constant_(m.running_mean, 0)
 79 |             elif isinstance(m, nn.BatchNorm1d):
 80 |                 nn.init.constant_(m.weight, 1)
 81 |                 if m.bias is not None:
 82 |                     nn.init.constant_(m.bias, 0.0001)
 83 |                 nn.init.constant_(m.running_mean, 0)
 84 |             elif isinstance(m, nn.Linear):
 85 |                 nn.init.normal_(m.weight, 0, 0.01)
 86 |                 if m.bias is not None:
 87 |                     nn.init.constant_(m.bias, 0)
 88 | 
 89 |     def forward(self, x):
 90 |         x = self.first_conv(x)
 91 | 
 92 |         x = self.features(x)
 93 | 
 94 |         x = self.last_conv(x)
 95 |         x = self.drop_out(x)
 96 |         x = self.global_pool(x).view(x.size(0), -1)
 97 |         x = self.fc(x)
 98 |         return x
 99 | 
100 | 
101 | def create_network():
102 |     model = ShuffleNetV2DetNAS(model_size='COCO_FPN_1.3G')
103 |     print(model)
104 |     return model
105 | 
106 | 
107 | if __name__ == "__main__":
108 |     create_network()
109 | 
110 | 


--------------------------------------------------------------------------------
/OneShot/network.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from blocks import Shufflenet, Shuffle_Xception
  4 | 
  5 | class ShuffleNetV2_OneShot(nn.Module):
  6 |     def __init__(self, input_size=224, n_class=1000, architecture=None, channels_scales=None):
  7 |         super(ShuffleNetV2_OneShot, self).__init__()
  8 | 
  9 |         assert input_size % 32 == 0
 10 |         assert architecture is not None and channels_scales is not None
 11 | 
 12 |         self.stage_repeats = [4, 4, 8, 4]
 13 |         self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024]
 14 | 
 15 |         # building first layer
 16 |         input_channel = self.stage_out_channels[1]
 17 |         self.first_conv = nn.Sequential(
 18 |             nn.Conv2d(3, input_channel, 3, 2, 1, bias=False),
 19 |             nn.BatchNorm2d(input_channel),
 20 |             nn.ReLU(inplace=True),
 21 |         )
 22 | 
 23 |         self.features = []
 24 |         archIndex = 0
 25 |         for idxstage in range(len(self.stage_repeats)):
 26 |             numrepeat = self.stage_repeats[idxstage]
 27 |             output_channel = self.stage_out_channels[idxstage+2]
 28 | 
 29 |             for i in range(numrepeat):
 30 |                 if i == 0:
 31 |                     inp, outp, stride = input_channel, output_channel, 2
 32 |                 else:
 33 |                     inp, outp, stride = input_channel // 2, output_channel, 1
 34 | 
 35 |                 blockIndex = architecture[archIndex]
 36 |                 base_mid_channels = outp // 2
 37 |                 mid_channels = int(base_mid_channels * channels_scales[archIndex])
 38 |                 archIndex += 1
 39 |                 if blockIndex == 0:
 40 |                     print('Shuffle3x3')
 41 |                     self.features.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=3, stride=stride))
 42 |                 elif blockIndex == 1:
 43 |                     print('Shuffle5x5')
 44 |                     self.features.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=5, stride=stride))
 45 |                 elif blockIndex == 2:
 46 |                     print('Shuffle7x7')
 47 |                     self.features.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=7, stride=stride))
 48 |                 elif blockIndex == 3:
 49 |                     print('Xception')
 50 |                     self.features.append(Shuffle_Xception(inp, outp, mid_channels=mid_channels, stride=stride))
 51 |                 else:
 52 |                     raise NotImplementedError
 53 |                 input_channel = output_channel
 54 | 
 55 |         assert archIndex == len(architecture)
 56 |         self.features = nn.Sequential(*self.features)
 57 | 
 58 |         self.conv_last = nn.Sequential(
 59 |             nn.Conv2d(input_channel, self.stage_out_channels[-1], 1, 1, 0, bias=False),
 60 |             nn.BatchNorm2d(self.stage_out_channels[-1]),
 61 |             nn.ReLU(inplace=True),
 62 |         )
 63 |         self.globalpool = nn.AvgPool2d(7)
 64 |         self.dropout = nn.Dropout(0.1)
 65 |         self.classifier = nn.Sequential(nn.Linear(self.stage_out_channels[-1], n_class, bias=False))
 66 |         self._initialize_weights()
 67 | 
 68 |     def forward(self, x):
 69 |         x = self.first_conv(x)
 70 |         x = self.features(x)
 71 |         x = self.conv_last(x)
 72 | 
 73 |         x = self.globalpool(x)
 74 |         
 75 |         x = self.dropout(x)
 76 |         x = x.contiguous().view(-1, self.stage_out_channels[-1])
 77 |         x = self.classifier(x)
 78 |         return x
 79 | 
 80 |     def _initialize_weights(self):
 81 |         for name, m in self.named_modules():
 82 |             if isinstance(m, nn.Conv2d):
 83 |                 if 'first' in name:
 84 |                     nn.init.normal_(m.weight, 0, 0.01)
 85 |                 else:
 86 |                     nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1])
 87 |                 if m.bias is not None:
 88 |                     nn.init.constant_(m.bias, 0)
 89 |             elif isinstance(m, nn.BatchNorm2d):
 90 |                 nn.init.constant_(m.weight, 1)
 91 |                 if m.bias is not None:
 92 |                     nn.init.constant_(m.bias, 0.0001)
 93 |                 nn.init.constant_(m.running_mean, 0)
 94 |             elif isinstance(m, nn.BatchNorm1d):
 95 |                 nn.init.constant_(m.weight, 1)
 96 |                 if m.bias is not None:
 97 |                     nn.init.constant_(m.bias, 0.0001)
 98 |                 nn.init.constant_(m.running_mean, 0)
 99 |             elif isinstance(m, nn.Linear):
100 |                 nn.init.normal_(m.weight, 0, 0.01)
101 |                 if m.bias is not None:
102 |                     nn.init.constant_(m.bias, 0)
103 | 
104 | if __name__ == "__main__":
105 |     architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2]
106 |     scale_list = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6]
107 |     scale_ids = [6, 5, 3, 5, 2, 6, 3, 4, 2, 5, 7, 5, 4, 6, 7, 4, 4, 5, 4, 3]
108 |     channels_scales = []
109 |     for i in range(len(scale_ids)):
110 |         channels_scales.append(scale_list[scale_ids[i]])
111 |     model = ShuffleNetV2_OneShot(architecture=architecture, channels_scales=channels_scales)
112 |     # print(model)
113 | 
114 |     test_data = torch.rand(5, 3, 224, 224)
115 |     test_outputs = model(test_data)
116 |     print(test_outputs.size())
117 | 


--------------------------------------------------------------------------------
/ShuffleNetV2+/network.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from blocks import Shufflenet, Shuffle_Xception, HS, SELayer
  4 | 
  5 | class ShuffleNetV2_Plus(nn.Module):
  6 |     def __init__(self, input_size=224, n_class=1000, architecture=None, model_size='Large'):
  7 |         super(ShuffleNetV2_Plus, self).__init__()
  8 | 
  9 |         print('model size is ', model_size)
 10 | 
 11 |         assert input_size % 32 == 0
 12 |         assert architecture is not None
 13 | 
 14 |         self.stage_repeats = [4, 4, 8, 4]
 15 |         if model_size == 'Large':
 16 |             self.stage_out_channels = [-1, 16, 68, 168, 336, 672, 1280]
 17 |         elif model_size == 'Medium':
 18 |             self.stage_out_channels = [-1, 16, 48, 128, 256, 512, 1280]
 19 |         elif model_size == 'Small':
 20 |             self.stage_out_channels = [-1, 16, 36, 104, 208, 416, 1280]
 21 |         else:
 22 |             raise NotImplementedError
 23 | 
 24 | 
 25 |         # building first layer
 26 |         input_channel = self.stage_out_channels[1]
 27 |         self.first_conv = nn.Sequential(
 28 |             nn.Conv2d(3, input_channel, 3, 2, 1, bias=False),
 29 |             nn.BatchNorm2d(input_channel),
 30 |             HS(),
 31 |         )
 32 | 
 33 |         self.features = []
 34 |         archIndex = 0
 35 |         for idxstage in range(len(self.stage_repeats)):
 36 |             numrepeat = self.stage_repeats[idxstage]
 37 |             output_channel = self.stage_out_channels[idxstage+2]
 38 | 
 39 |             activation = 'HS' if idxstage >= 1 else 'ReLU'
 40 |             useSE = 'True' if idxstage >= 2 else False
 41 | 
 42 |             for i in range(numrepeat):
 43 |                 if i == 0:
 44 |                     inp, outp, stride = input_channel, output_channel, 2
 45 |                 else:
 46 |                     inp, outp, stride = input_channel // 2, output_channel, 1
 47 | 
 48 |                 blockIndex = architecture[archIndex]
 49 |                 archIndex += 1
 50 |                 if blockIndex == 0:
 51 |                     print('Shuffle3x3')
 52 |                     self.features.append(Shufflenet(inp, outp, base_mid_channels=outp // 2, ksize=3, stride=stride,
 53 |                                     activation=activation, useSE=useSE))
 54 |                 elif blockIndex == 1:
 55 |                     print('Shuffle5x5')
 56 |                     self.features.append(Shufflenet(inp, outp, base_mid_channels=outp // 2, ksize=5, stride=stride,
 57 |                                     activation=activation, useSE=useSE))
 58 |                 elif blockIndex == 2:
 59 |                     print('Shuffle7x7')
 60 |                     self.features.append(Shufflenet(inp, outp, base_mid_channels=outp // 2, ksize=7, stride=stride,
 61 |                                     activation=activation, useSE=useSE))
 62 |                 elif blockIndex == 3:
 63 |                     print('Xception')
 64 |                     self.features.append(Shuffle_Xception(inp, outp, base_mid_channels=outp // 2, stride=stride,
 65 |                                     activation=activation, useSE=useSE))
 66 |                 else:
 67 |                     raise NotImplementedError
 68 |                 input_channel = output_channel
 69 |         assert archIndex == len(architecture)
 70 |         self.features = nn.Sequential(*self.features)
 71 | 
 72 |         self.conv_last = nn.Sequential(
 73 |             nn.Conv2d(input_channel, 1280, 1, 1, 0, bias=False),
 74 |             nn.BatchNorm2d(1280),
 75 |             HS()
 76 |         )
 77 |         self.globalpool = nn.AvgPool2d(7)
 78 |         self.LastSE = SELayer(1280)
 79 |         self.fc = nn.Sequential(
 80 |             nn.Linear(1280, 1280, bias=False),
 81 |             HS(),
 82 |         )
 83 |         self.dropout = nn.Dropout(0.2)
 84 |         self.classifier = nn.Sequential(nn.Linear(1280, n_class, bias=False))
 85 |         self._initialize_weights()
 86 | 
 87 |     def forward(self, x):
 88 |         x = self.first_conv(x)
 89 |         x = self.features(x)
 90 |         x = self.conv_last(x)
 91 | 
 92 |         x = self.globalpool(x)
 93 |         x = self.LastSE(x)
 94 | 
 95 |         x = x.contiguous().view(-1, 1280)
 96 | 
 97 |         x = self.fc(x)
 98 |         x = self.dropout(x)
 99 |         x = self.classifier(x)
100 |         return x
101 | 
102 |     def _initialize_weights(self):
103 |         for name, m in self.named_modules():
104 |             if isinstance(m, nn.Conv2d):
105 |                 if 'first' in name or 'SE' in name:
106 |                     nn.init.normal_(m.weight, 0, 0.01)
107 |                 else:
108 |                     nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1])
109 |                 if m.bias is not None:
110 |                     nn.init.constant_(m.bias, 0)
111 |             elif isinstance(m, nn.BatchNorm2d):
112 |                 nn.init.constant_(m.weight, 1)
113 |                 if m.bias is not None:
114 |                     nn.init.constant_(m.bias, 0.0001)
115 |                 nn.init.constant_(m.running_mean, 0)
116 |             elif isinstance(m, nn.BatchNorm1d):
117 |                 nn.init.constant_(m.weight, 1)
118 |                 if m.bias is not None:
119 |                     nn.init.constant_(m.bias, 0.0001)
120 |                 nn.init.constant_(m.running_mean, 0)
121 |             elif isinstance(m, nn.Linear):
122 |                 nn.init.normal_(m.weight, 0, 0.01)
123 |                 if m.bias is not None:
124 |                     nn.init.constant_(m.bias, 0)
125 | 
126 | if __name__ == "__main__":
127 |     architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2]
128 |     model = ShuffleNetV2_Plus(architecture=architecture)
129 |     # print(model)
130 | 
131 |     test_data = torch.rand(5, 3, 224, 224)
132 |     test_outputs = model(test_data)
133 |     print(test_outputs.size())
134 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ShuffleNet Series
  2 | ShuffleNet Series by Megvii Research.
  3 | 
  4 | ## Introduction
  5 | This repository contains the following ShuffleNet series models:
  6 | - ShuffleNetV1:   [ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices](https://arxiv.org/abs/1707.01083)
  7 | -  ShuffleNetV2:   [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164)
  8 | -  ShuffleNetV2+:  A strengthen version of ShuffleNetV2.
  9 | -  ShuffleNetV2.Large:  A deeper version based on ShuffleNetV2 with 10G+ FLOPs.
 10 | -  ShuffleNetV2.ExLarge:  A deeper version based on ShuffleNetV2 with 40G+ FLOPs.
 11 | -  OneShot:    [Single Path One-Shot Neural Architecture Search with Uniform Sampling](https://arxiv.org/abs/1904.00420)
 12 | -  DetNAS:     [DetNAS: Backbone Search for Object Detection](https://arxiv.org/abs/1903.10979)
 13 | 
 14 | ## Trained Models
 15 | - OneDrive download: [Link](https://1drv.ms/f/s!AgaP37NGYuEXhRfQxHRseR7eSxXo)
 16 | - BaiduYun download: [Link](https://pan.baidu.com/s/1EUQVoFPb74yZm0JWHKjFOw) (extract code: mc24)
 17 | 
 18 | ## Details
 19 | 
 20 | ### ShuffleNetV2+
 21 | The following is the comparison between ShuffleNetV2+ and [MobileNetV3](https://arxiv.org/pdf/1905.02244). Details can be seen in [ShuffleNetV2+](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2%2B).
 22 | 
 23 | |    Model                 |  FLOPs    |   #Params |   Top-1   |   Top-5   |
 24 | |:------------------------|:---------:|:---------:|:---------:|:---------:|
 25 | ShuffleNetV2+ Large        |   360M     |	6.7M    |      **22.9**    |       6.7   |
 26 | MobileNetV3 Large 224/1.25       |   356M     |	7.5M    |      23.4    |       -   |
 27 | ShuffleNetV2+ Medium       |   222M     |	5.6M    |      **24.3**    |       7.4    |
 28 | MobileNetV3 Large 224/1.0       |   217M     |	5.4M    |      24.8    |       -    |
 29 | ShuffleNetV2+ Small        |   156M     |	5.1M    |      **25.9**    |       8.3    |
 30 | MobileNetV3 Large 224/0.75        |   155M     |	4.0M    |      26.7    |       -    |
 31 | 
 32 | ### ShuffleNetV2
 33 | The following is the comparison between ShuffleNetV2 and [MobileNetV2](https://arxiv.org/abs/1801.04381). Details can be seen in [ShuffleNetV2](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2).
 34 | 
 35 | | Model                   | FLOPs | #Params  | Top-1        | Top-5     |
 36 | | :--------------------- | :---: | :------: | :----------: | :------:  |
 37 | |    ShuffleNetV2 2.0x    | 591M  |     7.4M |     **25.0** 	|     7.6   |
 38 | | MobileNetV2 (1.4) | 585M | 6.9M | 25.3 | - |
 39 | |    ShuffleNetV2 1.5x    | 299M  |     3.5M |     **27.4** 	|     9.4   | 
 40 | | MobileNetV2 | 300M | 3.4M | 28.0 | - | 
 41 | |    ShuffleNetV2 1.0x    | 146M  |     2.3M |     30.6 	|    11.1   |   
 42 | |    ShuffleNetV2 0.5x    |  41M  |     1.4M |     38.9 	|    17.4   |
 43 | 
 44 | ### ShuffleNetV2.Large
 45 | The following is the comparison between ShuffleNetV2.Large and [SENet](https://arxiv.org/abs/1709.01507). Details can be seen in [ShuffleNetV2.Large](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2.Large).
 46 | 
 47 | | Model                  | FLOPs | #Params   | Top-1     | Top-5 |
 48 | | :--------------------- | :---: | :------:  | :---:     | :---: |
 49 | | ShuffleNetV2.Large     | 12.7G | 140.7M    | **18.56** | 4.48  |
 50 | | SENet                  | 20.7G |    -      | 18.68     | 4.47  |
 51 | 
 52 | ### ShuffleNetV2.ExLarge
 53 | The following is the result of ShuffleNetV2.ExLarge. Details can be seen in [ShuffleNetV2.ExLarge](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV2.ExLarge).
 54 | 
 55 | | Model                  | FLOPs | #Params   | Top-1     | Top-5 |
 56 | | :--------------------- | :---: | :------:  | :---:     | :---: |
 57 | | ShuffleNetV2.ExLarge     | 46.2G | 254.7M    | 15.52 | 2.9  |
 58 | 
 59 | 
 60 | 
 61 | ### ShuffleNetV1
 62 | The following is the comparison between ShuffleNetV1 and [MobileNetV1](https://arxiv.org/abs/1704.04861). Details can be seen in [ShuffleNetV1](https://github.com/megvii-model/ShuffleNet-Series/tree/master/ShuffleNetV1).
 63 | 
 64 | |    Model                 |  FLOPs    |   #Params |   Top-1   |   Top-5   |
 65 | |:------------------------|:---------:|:---------:|:---------:|:---------:|
 66 | ShuffleNetV1 2.0x (group=3)|    524M    |	5.4M    |      **25.9**    |        8.6   |
 67 | ShuffleNetV1 2.0x (group=8)|    522M    |   6.5M    |      27.1    |        9.2   |
 68 | 1.0 MobileNetV1-224 |    569M    |   4.2M    |      29.4    |        -   |
 69 | ShuffleNetV1 1.5x (group=3)|    292M    |	3.4M    |      **28.4**    |        9.8   |
 70 | ShuffleNetV1 1.5x (group=8)|    290M    |   4.3M    |      29.0    |       10.4   |
 71 | 0.75 MobileNetV1-224 |    325M    |   2.6M    |      31.6    |        -   |
 72 | ShuffleNetV1 1.0x (group=3)|   138M     |	1.9M    |      32.2    |       12.3    |
 73 | ShuffleNetV1 1.0x (group=8)|    138M    |   2.4M    |      **32.0**    |       13.6   |
 74 | 0.5 MobileNetV1-224 |    149M    |   1.3M    |      36.3    |        -   |
 75 | ShuffleNetV1 0.5x (group=3)|   38M      |	0.7M    |      42.7    |       20.0    |
 76 | ShuffleNetV1 0.5x (group=8)|    40M     |   1.0M    |      **41.2**    |       19.0   |
 77 | 0.25 MobileNetV1-224 |    41M    |   0.5M    |      49.4    |        -   |
 78 | 
 79 | 
 80 | ### OneShot
 81 | The following is the comparison between Single Path One-Shot NAS and other NAS counterparts. Details can be seen in [OneShot](https://github.com/megvii-model/ShuffleNet-Series/tree/master/OneShot).
 82 | 
 83 | | Model                  | FLOPs | #Params   | Top-1 | Top-5 |
 84 | | :--------------------- | :---: | :------:  | :---: | :---: |
 85 | |    OneShot |  328M |  3.4M |  **25.1**   |   8.0   |
 86 | |    NASNET-A|  564M |  5.3M |  26.0   |   8.4   |
 87 | |    PNASNET|  588M |  5.1M |  25.8   |   8.1   |
 88 | |    MnasNet|  317M |  4.2M |  26.0   |  8.2   |
 89 | |    DARTS|  574M|  4.7M |  26.7   |   8.7  |
 90 | |    FBNet-B|  295M|  4.5M |  25.9   |   -   |
 91 | 
 92 | ### DetNAS
 93 | The following is the performance of DetNAS backbones on ImageNet, compared with ResNet. Backbone details can be seen in [DetNAS](https://github.com/megvii-model/ShuffleNet-Series/tree/master/DetNAS).
 94 | 
 95 | | Model                  | FLOPs| #Params| Top-1    | Top-5 |         mAP*       |
 96 | | :------------          | :---:| :-----:| :---:    | :---: | :--------------:   |
 97 | |300M (VOC, RetinaNet)   | 300M |  3.5M  |  25.4  |  8.1  |       80.1         |
 98 | |300M (VOC, FPN)       | 300M |  3.7M  |  25.9    |  8.3  |       81.5         |
 99 | |300M (COCO, RetinaNet)  | 300M |  3.7M  |  26.0    |  8.4  |       33.3         |
100 | |300M (COCO, FPN)        | 300M |  3.5M  |  26.2    |  8.4  |       36.4         |
101 | |1.3G (COCO, FPN)      | 1.3G |  10.4M | **22.8** |  6.5  |       40.0         |
102 | |3.8G (COCO, FPN)        | 3.8G |  29.5M | **21.6** |  6.3  |     **42.0**       |
103 | |ResNet50 (COCO, FPN)    | 3.8G |  -     |  23.9    |  7.1  |       37.3         |
104 | |ResNet101 (COCO, FPN)   | 7.6G |  -     |  22.6    |  6.4  |       40.0         |
105 | 
106 | * More about DetNAS in [Link](https://github.com/megvii-model/DetNAS).
107 | 
108 | 


--------------------------------------------------------------------------------
/ShuffleNetV2+/blocks.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | class SELayer(nn.Module):
  6 | 
  7 | 	def __init__(self, inplanes, isTensor=True):
  8 | 		super(SELayer, self).__init__()
  9 | 		if isTensor:
 10 | 			# if the input is (N, C, H, W)
 11 | 			self.SE_opr = nn.Sequential(
 12 | 				nn.AdaptiveAvgPool2d(1),
 13 | 				nn.Conv2d(inplanes, inplanes // 4, kernel_size=1, stride=1, bias=False),
 14 | 				nn.BatchNorm2d(inplanes // 4),
 15 | 				nn.ReLU(inplace=True),
 16 | 				nn.Conv2d(inplanes // 4, inplanes, kernel_size=1, stride=1, bias=False),
 17 | 			)
 18 | 		else:
 19 | 			# if the input is (N, C)
 20 | 			self.SE_opr = nn.Sequential(
 21 | 				nn.AdaptiveAvgPool2d(1),
 22 | 				nn.Linear(inplanes, inplanes // 4, bias=False),
 23 | 				nn.BatchNorm1d(inplanes // 4),
 24 | 				nn.ReLU(inplace=True),
 25 | 				nn.Linear(inplanes // 4, inplanes, bias=False),
 26 | 			)
 27 | 
 28 | 	def forward(self, x):
 29 | 		atten = self.SE_opr(x)
 30 | 		atten = torch.clamp(atten + 3, 0, 6) / 6
 31 | 		return x * atten
 32 | 
 33 | 
 34 | class HS(nn.Module):
 35 | 
 36 | 	def __init__(self):
 37 | 		super(HS, self).__init__()
 38 | 
 39 | 	def forward(self, inputs):
 40 | 		clip = torch.clamp(inputs + 3, 0, 6) / 6
 41 | 		return inputs * clip
 42 | 
 43 | 
 44 | 
 45 | class Shufflenet(nn.Module):
 46 | 
 47 |     def __init__(self, inp, oup, base_mid_channels, *, ksize, stride, activation, useSE):
 48 |         super(Shufflenet, self).__init__()
 49 |         self.stride = stride
 50 |         assert stride in [1, 2]
 51 |         assert ksize in [3, 5, 7]
 52 |         assert base_mid_channels == oup//2
 53 | 
 54 |         self.base_mid_channel = base_mid_channels
 55 |         self.ksize = ksize
 56 |         pad = ksize // 2
 57 |         self.pad = pad
 58 |         self.inp = inp
 59 | 
 60 |         outputs = oup - inp
 61 | 
 62 |         branch_main = [
 63 |             # pw
 64 |             nn.Conv2d(inp, base_mid_channels, 1, 1, 0, bias=False),
 65 |             nn.BatchNorm2d(base_mid_channels),
 66 |             None,
 67 |             # dw
 68 |             nn.Conv2d(base_mid_channels, base_mid_channels, ksize, stride, pad, groups=base_mid_channels, bias=False),
 69 |             nn.BatchNorm2d(base_mid_channels),
 70 |             # pw-linear
 71 |             nn.Conv2d(base_mid_channels, outputs, 1, 1, 0, bias=False),
 72 |             nn.BatchNorm2d(outputs),
 73 |             None,
 74 |         ]
 75 |         if activation == 'ReLU':
 76 |             assert useSE == False
 77 |             '''This model should not have SE with ReLU'''
 78 |             branch_main[2] = nn.ReLU(inplace=True)
 79 |             branch_main[-1] = nn.ReLU(inplace=True)
 80 |         else:
 81 |             branch_main[2] = HS()
 82 |             branch_main[-1] = HS()
 83 |             if useSE:
 84 |                 branch_main.append(SELayer(outputs))
 85 |         self.branch_main = nn.Sequential(*branch_main)
 86 | 
 87 |         if stride == 2:
 88 |             branch_proj = [
 89 |                 # dw
 90 |                 nn.Conv2d(inp, inp, ksize, stride, pad, groups=inp, bias=False),
 91 |                 nn.BatchNorm2d(inp),
 92 |                 # pw-linear
 93 |                 nn.Conv2d(inp, inp, 1, 1, 0, bias=False),
 94 |                 nn.BatchNorm2d(inp),
 95 |                 None,
 96 |             ]
 97 |             if activation == 'ReLU':
 98 |                 branch_proj[-1] = nn.ReLU(inplace=True)
 99 |             else:
100 |                 branch_proj[-1] = HS()
101 |             self.branch_proj = nn.Sequential(*branch_proj)
102 |         else:
103 |             self.branch_proj = None
104 | 
105 |     def forward(self, old_x):
106 |         if self.stride==1:
107 |             x_proj, x = channel_shuffle(old_x)
108 |             return torch.cat((x_proj, self.branch_main(x)), 1)
109 |         elif self.stride==2:
110 |             x_proj = old_x
111 |             x = old_x
112 |             return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1)
113 | 
114 | class Shuffle_Xception(nn.Module):
115 | 
116 |     def __init__(self, inp, oup, base_mid_channels, *, stride, activation, useSE):
117 |         super(Shuffle_Xception, self).__init__()
118 | 
119 |         assert stride in [1, 2]
120 |         assert base_mid_channels == oup//2
121 | 
122 |         self.base_mid_channel = base_mid_channels
123 |         self.stride = stride
124 |         self.ksize = 3
125 |         self.pad = 1
126 |         self.inp = inp
127 |         outputs = oup - inp
128 | 
129 |         branch_main = [
130 |             # dw
131 |             nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
132 |             nn.BatchNorm2d(inp),
133 |             # pw
134 |             nn.Conv2d(inp, base_mid_channels, 1, 1, 0, bias=False),
135 |             nn.BatchNorm2d(base_mid_channels),
136 |             None,
137 |             # dw
138 |             nn.Conv2d(base_mid_channels, base_mid_channels, 3, stride, 1, groups=base_mid_channels, bias=False),
139 |             nn.BatchNorm2d(base_mid_channels),
140 |             # pw
141 |             nn.Conv2d(base_mid_channels, base_mid_channels, 1, 1, 0, bias=False),
142 |             nn.BatchNorm2d(base_mid_channels),
143 |             None,
144 |             # dw
145 |             nn.Conv2d(base_mid_channels, base_mid_channels, 3, stride, 1, groups=base_mid_channels, bias=False),
146 |             nn.BatchNorm2d(base_mid_channels),
147 |             # pw
148 |             nn.Conv2d(base_mid_channels, outputs, 1, 1, 0, bias=False),
149 |             nn.BatchNorm2d(outputs),
150 |             None,
151 |         ]
152 | 
153 |         if activation == 'ReLU':
154 |             branch_main[4] = nn.ReLU(inplace=True)
155 |             branch_main[9] = nn.ReLU(inplace=True)
156 |             branch_main[14] = nn.ReLU(inplace=True)
157 |         else:
158 |             branch_main[4] = HS()
159 |             branch_main[9] = HS()
160 |             branch_main[14] = HS()
161 |         assert None not in branch_main
162 | 
163 |         if useSE:
164 |             assert activation != 'ReLU'
165 |             branch_main.append(SELayer(outputs))
166 | 
167 |         self.branch_main = nn.Sequential(*branch_main)
168 | 
169 |         if self.stride == 2:
170 |             branch_proj = [
171 |                 # dw
172 |                 nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
173 |                 nn.BatchNorm2d(inp),
174 |                 # pw-linear
175 |                 nn.Conv2d(inp, inp, 1, 1, 0, bias=False),
176 |                 nn.BatchNorm2d(inp),
177 |                 None,
178 |             ]
179 |             if activation == 'ReLU':
180 |                 branch_proj[-1] = nn.ReLU(inplace=True)
181 |             else:
182 |                 branch_proj[-1] = HS()
183 |             self.branch_proj = nn.Sequential(*branch_proj)
184 | 
185 |     def forward(self, old_x):
186 |         if self.stride==1:
187 |             x_proj, x = channel_shuffle(old_x)
188 |             return torch.cat((x_proj, self.branch_main(x)), 1)
189 |         elif self.stride==2:
190 |             x_proj = old_x
191 |             x = old_x
192 |             return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1)
193 | 
194 | 
195 | def channel_shuffle(x):
196 |     batchsize, num_channels, height, width = x.data.size()
197 |     assert (num_channels % 4 == 0)
198 |     x = x.reshape(batchsize * num_channels // 2, 2, height * width)
199 |     x = x.permute(1, 0, 2)
200 |     x = x.reshape(2, -1, num_channels // 2, height, width)
201 |     return x[0], x[1]
202 | 


--------------------------------------------------------------------------------
/ShuffleNetV2.Large/network.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | class Conv_BN_ReLU(nn.Module):
  6 | 
  7 |     def __init__(self, in_channel, out_channel, k_size, stride=1, padding=0, groups=1, has_bn=True, has_relu=True):
  8 |         super(Conv_BN_ReLU, self).__init__()
  9 |         self.conv = nn.Conv2d(in_channel, out_channel, kernel_size=k_size,
 10 |                               stride=stride, padding=padding,
 11 |                               groups=groups, bias=False)
 12 |         self.bn = nn.BatchNorm2d(out_channel, eps=1e-9)
 13 |         self.has_bn = has_bn
 14 | 
 15 |         self.has_relu = has_relu
 16 |         self.relu = nn.ReLU(inplace=True)
 17 | 
 18 |     def forward(self, x):
 19 |         x = self.conv(x)
 20 |         if self.has_bn:
 21 |             x = self.bn(x)
 22 |         if self.has_relu:
 23 |             x = self.relu(x)
 24 |         return x
 25 | 
 26 | 
 27 | class ShuffleV2Block(nn.Module):
 28 |     def __init__(self, in_channels, out_channels, stride, groups, has_proj=False, has_se=False):
 29 |         super(ShuffleV2Block, self).__init__()
 30 |         self.stride = stride
 31 |         assert stride in [1, 2]
 32 |         self.has_proj = has_proj
 33 |         self.has_se = has_se
 34 |         self.relu = nn.ReLU(inplace=True)
 35 | 
 36 |         if stride == 2:
 37 |             self.down = Conv_BN_ReLU(out_channels * 2, out_channels * 2, k_size=1, stride=1, padding=0)
 38 | 
 39 |         if has_proj:
 40 |             self.proj = Conv_BN_ReLU(in_channels, out_channels, k_size=3, stride=stride, padding=1, has_bn=True, has_relu=False)
 41 | 
 42 |         self.branch_main = nn.Sequential(
 43 |             Conv_BN_ReLU(in_channels, out_channels, k_size=1, stride=1, padding=0, has_bn=True, has_relu=True),
 44 |             Conv_BN_ReLU(out_channels, out_channels, k_size=3, stride=stride, padding=1, groups=groups, has_bn=True, has_relu=True),
 45 |             Conv_BN_ReLU(out_channels, out_channels, k_size=1, stride=1, padding=0, has_bn=True, has_relu=False),
 46 |         )
 47 | 
 48 |         if has_se:
 49 |             self.se_globalpool = nn.AdaptiveAvgPool2d(output_size=1)
 50 |             self.se_fc1 = nn.Linear(out_channels, out_channels)
 51 |             self.se_fc2 = nn.Linear(out_channels, out_channels)
 52 |             se_block = [
 53 |                 self.se_fc1,
 54 |                 nn.ReLU(inplace=True),
 55 |                 self.se_fc2,
 56 |                 nn.Sigmoid(),
 57 |             ]
 58 |             self.se_block = nn.Sequential(*se_block)
 59 | 
 60 |     def forward(self, old_x):
 61 |         proj, x = self.channel_shuffle(old_x)
 62 |         x_proj = x
 63 |         if self.has_proj:
 64 |             proj = self.proj(proj)
 65 | 
 66 |         x = self.branch_main(x)
 67 | 
 68 |         if self.has_se:
 69 |             se_scale = self.se_globalpool(x).view(x.size(0), -1)
 70 |             se_scale = self.se_block(se_scale).unsqueeze(-1).unsqueeze(-1)
 71 |             x = x * se_scale
 72 | 
 73 |         if not self.has_proj:
 74 |             x = x_proj + x
 75 | 
 76 |         x = self.relu(torch.cat((proj, x), dim=1))
 77 | 
 78 |         if self.stride == 2:
 79 |             x = self.down(x)
 80 | 
 81 |         return x
 82 | 
 83 |     def channel_shuffle(self, x):
 84 |         batchsize, num_channels, height, width = x.data.size()
 85 |         assert (num_channels % 4 == 0)
 86 |         x = x.reshape(batchsize * num_channels // 2, 2, height * width)
 87 |         x = x.permute(1, 0, 2)
 88 |         x = x.reshape(2, -1, num_channels // 2, height, width)
 89 |         return x[0], x[1]
 90 | 
 91 | 
 92 | class ExtraLabelPredict(nn.Module):
 93 |     def __init__(self, in_channels, out_channels, num_classes=1000):
 94 |         super(ExtraLabelPredict, self).__init__()
 95 |         self.num_classes = num_classes
 96 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 97 |         self.conv = nn.Sequential(
 98 |             Conv_BN_ReLU(in_channels, out_channels,  1, 1, 0),
 99 |             Conv_BN_ReLU(out_channels, out_channels, 3, 1, 1)
100 |         )
101 |         self.globalpool = nn.AdaptiveAvgPool2d(output_size=1)
102 |         self.fc = nn.Linear(out_channels, num_classes)
103 | 
104 |     def forward(self, inputs):
105 |         inputs = self.maxpool(inputs)
106 |         inputs = self.conv(inputs)
107 |         inputs = self.globalpool(inputs)
108 |         inputs = inputs.view(inputs.size(0), -1)
109 |         inputs = self.fc(inputs)
110 |         return inputs
111 | 
112 | 
113 | class ShuffleNetV2(nn.Module):
114 |     def __init__(self, n_class=1000, model_size='large'):
115 |         super(ShuffleNetV2, self).__init__()
116 | 
117 |         self.stage_repeats = [4, 8, 4]
118 |         self.model_size = model_size
119 |         if model_size == 'large':
120 |             self.pre = [2, 3, 4, 5]
121 |             self.stage_repeats = [10, 10, 23, 10]
122 |             self.mid_outputs = [64, 128, 256, 512]
123 |             self.enable_stride = [False, True, True, True]
124 |         else:
125 |             raise NotImplementedError
126 | 
127 |         self.first_conv = nn.Sequential(
128 |             Conv_BN_ReLU(3, 64, k_size=3, stride=2, padding=1),
129 |             Conv_BN_ReLU(64, 64, k_size=3, stride=1, padding=1),
130 |             Conv_BN_ReLU(64, 128, k_size=3, stride=1, padding=1),
131 |         )
132 | 
133 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
134 | 
135 |         self.features = nn.ModuleList()
136 |         input_channel = 64
137 |         if model_size == 'large':
138 |             for p, s, o, es in zip(self.pre, self.stage_repeats, self.mid_outputs, self.enable_stride):
139 |                 feature = nn.Sequential()
140 |                 for i in range(s):
141 |                     prefix = "{}{}".format(p, chr(ord("a") + i))
142 |                     stride = 1 if not es or i > 0 else 2
143 |                     has_proj = False if i > 0 else True
144 |                     feature.add_module(prefix, ShuffleV2Block(input_channel, o * 2, stride, groups=8, has_proj=has_proj, has_se=True))
145 |                     input_channel = o * 2
146 |                 self.features.append(feature)
147 |                 if p == 2:
148 |                     self.predict_56 = ExtraLabelPredict(in_channels=256, out_channels=256)
149 |                 elif p == 3:
150 |                     self.predict_28 = ExtraLabelPredict(in_channels=512, out_channels=512)
151 |                 elif p == 4:
152 |                     self.predict_14 = ExtraLabelPredict(in_channels=1024, out_channels=1024)
153 | 
154 |         self.conv_last = Conv_BN_ReLU(input_channel * 2, 1280, 3, 1, 1)
155 |         self.globalpool = nn.AvgPool2d(7)
156 |         if self.model_size == 'large':
157 |             self.dropout = nn.Dropout(0.2)
158 |         self.fc = nn.Linear(1280, n_class)
159 | 
160 |         self._initialize_weights()
161 | 
162 |     def _initialize_weights(self):
163 |         for name, m in self.named_modules():
164 |             if isinstance(m, nn.Conv2d):
165 |                 if 'first' in name:
166 |                     nn.init.normal_(m.weight, 0, 0.01)
167 |                 else:
168 |                     nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1])
169 |                 if m.bias is not None:
170 |                     nn.init.constant_(m.bias, 0)
171 |             elif isinstance(m, nn.BatchNorm2d):
172 |                 nn.init.constant_(m.weight, 1)
173 |                 if m.bias is not None:
174 |                     nn.init.constant_(m.bias, 0.0001)
175 |                 nn.init.constant_(m.running_mean, 0)
176 |             elif isinstance(m, nn.BatchNorm1d):
177 |                 nn.init.constant_(m.weight, 1)
178 |                 if m.bias is not None:
179 |                     nn.init.constant_(m.bias, 0.0001)
180 |                 nn.init.constant_(m.running_mean, 0)
181 |             elif isinstance(m, nn.Linear):
182 |                 nn.init.normal_(m.weight, 0, 0.01)
183 |                 if m.bias is not None:
184 |                     nn.init.constant_(m.bias, 0)
185 | 
186 |     def forward(self, x):
187 |         x = self.first_conv(x)
188 |         x = self.maxpool(x)
189 |         # 5 * 128 * 56 * 56
190 | 
191 |         x = self.features[0](x)
192 |         # 5 * 256 * 56 * 56
193 |         if self.training:
194 |             predict_56 = self.predict_56(x)
195 | 
196 |         x = self.features[1](x)
197 |         # 5 * 512 * 28 * 28
198 |         if self.training:
199 |             predict_28 = self.predict_28(x)
200 | 
201 |         x = self.features[2](x)
202 |         # 5 * 1024 * 14 * 14
203 |         if self.training:
204 |             predict_14 = self.predict_14(x)
205 | 
206 |         x = self.features[3](x)
207 |         # 5 * 2048 * 7 * 7
208 | 
209 |         x = self.conv_last(x)
210 |         x = self.globalpool(x)
211 |         if self.model_size == 'large':
212 |             x = self.dropout(x)
213 |         x = x.reshape(x.size(0), -1)
214 |         x = self.fc(x)
215 |         if self.training:
216 |             # Loss is scaled by 1.0, 0.7, 0.5, 0.3
217 |             return x, predict_14, predict_28, predict_56
218 |         else:
219 |             return x
220 | 
221 | 
222 | def create_network():
223 |     model = ShuffleNetV2()
224 |     return model
225 | 
226 | 
227 | if __name__ == "__main__":
228 |     create_network()
229 | 
230 | 
231 | 
232 | 
233 | 


--------------------------------------------------------------------------------
/ShuffleNetV2.ExLarge/network.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | class Conv_BN_ReLU(nn.Module):
  6 | 
  7 |     def __init__(self, in_channel, out_channel, k_size, stride=1, padding=0, groups=1,
  8 |                  has_bn=True, has_relu=True, gaussian_init=False):
  9 |         super(Conv_BN_ReLU, self).__init__()
 10 |         self.conv = nn.Conv2d(in_channel, out_channel, kernel_size=k_size,
 11 |                               stride=stride, padding=padding,
 12 |                               groups=groups, bias=False)
 13 |         if gaussian_init:
 14 |             nn.init.normal_(self.conv.weight.data, 0, 0.01)
 15 | 
 16 |         if has_bn:
 17 |             self.bn = nn.BatchNorm2d(out_channel)
 18 | 
 19 |         self.has_bn = has_bn
 20 |         self.has_relu = has_relu
 21 |         if has_relu:
 22 |             self.relu = nn.ReLU(inplace=True)
 23 | 
 24 |     def forward(self, x):
 25 |         x = self.conv(x)
 26 |         if self.has_bn:
 27 |             x = self.bn(x)
 28 |         if self.has_relu:
 29 |             x = self.relu(x)
 30 |         return x
 31 | 
 32 | 
 33 | class FC(nn.Module):
 34 |     def __init__(self, in_channels, out_channels):
 35 |         super(FC, self).__init__()
 36 |         self.fc = nn.Linear(in_channels, out_channels)
 37 |         nn.init.normal_(self.fc.weight.data, 0, 0.01)
 38 | 
 39 |     def forward(self, x):
 40 |         return self.fc(x)
 41 | 
 42 | 
 43 | class ExtraLabelPredict(nn.Module):
 44 |     def __init__(self, in_channels, out_channels, num_classes=1000):
 45 |         super(ExtraLabelPredict, self).__init__()
 46 |         self.num_classes = num_classes
 47 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 48 |         self.conv = nn.Sequential(
 49 |             Conv_BN_ReLU(in_channels, out_channels,  1, 1, 0),
 50 |             Conv_BN_ReLU(out_channels, out_channels, 3, 1, 1)
 51 |         )
 52 |         self.globalpool = nn.AdaptiveAvgPool2d(output_size=1)
 53 |         self.fc = nn.Linear(out_channels, num_classes)
 54 | 
 55 |     def forward(self, inputs):
 56 |         inputs = self.maxpool(inputs)
 57 |         inputs = self.conv(inputs)
 58 |         inputs = self.globalpool(inputs)
 59 |         inputs = inputs.view(inputs.size(0), -1)
 60 |         inputs = self.fc(inputs)
 61 |         return inputs
 62 | 
 63 | 
 64 | class ShuffleV2Block(nn.Module):
 65 |     def __init__(self, in_channels, out_channels, mid_channels, stride, groups, has_proj=False, has_se=False):
 66 |         super(ShuffleV2Block, self).__init__()
 67 |         self.stride = stride
 68 |         assert stride in [1, 2]
 69 |         self.has_proj = has_proj
 70 |         self.has_se = has_se
 71 |         self.relu = nn.ReLU(inplace=True)
 72 | 
 73 |         if has_proj:
 74 |             self.proj = Conv_BN_ReLU(in_channels, out_channels - mid_channels, k_size=3, stride=stride, padding=1,
 75 |                                      has_bn=True, has_relu=True)
 76 | 
 77 |         self.branch_main = nn.Sequential(
 78 |             Conv_BN_ReLU(in_channels, out_channels, k_size=1, stride=1, padding=0,
 79 |                          has_bn=True, has_relu=True),
 80 |             Conv_BN_ReLU(out_channels, out_channels, k_size=3, stride=stride, padding=1, groups=groups,
 81 |                          has_bn=True, has_relu=True),
 82 |             Conv_BN_ReLU(out_channels, out_channels, k_size=3, stride=1, padding=1, groups=out_channels,
 83 |                          has_bn=True, has_relu=False),
 84 |             Conv_BN_ReLU(out_channels, mid_channels, k_size=1, stride=1, padding=0,
 85 |                          has_bn=True, has_relu=False),
 86 |         )
 87 | 
 88 |         if has_se:
 89 |             self.se_globalpool = nn.AdaptiveAvgPool2d(output_size=1)
 90 |             self.se_fc1 = FC(mid_channels, mid_channels // 4)
 91 |             self.se_fc2 = FC(mid_channels // 4, mid_channels)
 92 |             se_block = [
 93 |                 self.se_fc1,
 94 |                 nn.ReLU(inplace=True),
 95 |                 self.se_fc2,
 96 |                 nn.Sigmoid(),
 97 |             ]
 98 |             self.se_block = nn.Sequential(*se_block)
 99 | 
100 |     def forward(self, old_x):
101 |         if self.has_proj:
102 |             proj, x = old_x, old_x
103 |         else:
104 |             proj, x = self.channel_shuffle(old_x)
105 |         x_proj = x
106 |         if self.has_proj:
107 |             proj = self.proj(proj)
108 | 
109 |         x = self.branch_main(x)
110 | 
111 |         if self.has_se:
112 |             se_scale = self.se_globalpool(x).view(x.size(0), -1)
113 |             se_scale = self.se_block(se_scale).unsqueeze(-1).unsqueeze(-1)
114 |             x = x * se_scale
115 | 
116 |         if not self.has_proj:
117 |             x = self.relu(x_proj + x)
118 | 
119 |         x = torch.cat((proj, x), dim=1)
120 | 
121 |         return x
122 | 
123 |     def channel_shuffle(self, x):
124 |         batchsize, num_channels, height, width = x.data.size()
125 |         assert (num_channels % 4 == 0)
126 |         x = x.reshape(batchsize * num_channels // 2, 2, height * width)
127 |         x = x.permute(1, 0, 2)
128 |         x = x.reshape(2, -1, num_channels // 2, height, width)
129 |         return x[0], x[1]
130 | 
131 | 
132 | class ShuffleNetV2(nn.Module):
133 |     def __init__(self, n_class=1000, model_size='ExLarge'):
134 |         super(ShuffleNetV2, self).__init__()
135 | 
136 |         self.stage_repeats = [4, 8, 4]
137 |         self.model_size = model_size
138 |         if model_size == 'ExLarge':
139 |             self.pre = [2, 3, 4, 5]
140 |             self.stage_repeats = [8, 16, 36, 10]
141 |             self.outputs = [320, 640, 1280, 2560]
142 |             self.enable_stride = [False, True, True, True]
143 |         else:
144 |             raise NotImplementedError
145 | 
146 |         self.first_conv = nn.Sequential(
147 |             Conv_BN_ReLU(3, 64, k_size=3, stride=2, padding=1),
148 |             Conv_BN_ReLU(64, 128, k_size=3, stride=1, padding=1),
149 |             Conv_BN_ReLU(128, 256, k_size=3, stride=1, padding=1),
150 |         )
151 | 
152 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
153 | 
154 |         self.features = nn.ModuleList()
155 |         input_channel = 256
156 |         if model_size == 'ExLarge':
157 |             for p, s, o, es in zip(self.pre, self.stage_repeats, self.outputs, self.enable_stride):
158 |                 feature = []
159 |                 for i in range(s):
160 |                     prefix = "{}{}".format(p, str(i))
161 |                     stride = 1 if not es or i > 0 else 2
162 |                     has_proj = False if i > 0 else True
163 |                     feature.append(ShuffleV2Block(in_channels=input_channel, out_channels=o, mid_channels=o // 2,
164 |                                                       stride=stride, groups=16, has_proj=has_proj, has_se=True))
165 |                     input_channel = o // 2
166 |                 feature.append(Conv_BN_ReLU(o, o, k_size=1, stride=1, padding=0))
167 |                 input_channel = o
168 |                 feature = nn.Sequential(*feature)
169 |                 self.features.append(feature)
170 |                 if p == 2:
171 |                     self.predict_56 = ExtraLabelPredict(in_channels=320, out_channels=256)
172 |                 elif p == 3:
173 |                     self.predict_28 = ExtraLabelPredict(in_channels=640, out_channels=512)
174 |                 elif p == 4:
175 |                     self.predict_14 = ExtraLabelPredict(in_channels=1280, out_channels=1024)
176 | 
177 |         self.globalpool = nn.AvgPool2d(7)
178 |         if self.model_size == 'ExLarge':
179 |             self.dropout = nn.Dropout(0.2)
180 |         self.fc = FC(2560, n_class)
181 | 
182 |         self._initialize_weights()
183 | 
184 |     def _initialize_weights(self):
185 |         for name, m in self.named_modules():
186 |             if isinstance(m, nn.Conv2d):
187 |                 if 'first' in name:
188 |                     nn.init.normal_(m.weight, 0, 0.01)
189 |                 else:
190 |                     nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1])
191 |                 if m.bias is not None:
192 |                     nn.init.constant_(m.bias, 0)
193 |             elif isinstance(m, nn.BatchNorm2d):
194 |                 nn.init.constant_(m.weight, 1)
195 |                 if m.bias is not None:
196 |                     nn.init.constant_(m.bias, 0.0001)
197 |                 nn.init.constant_(m.running_mean, 0)
198 |             elif isinstance(m, nn.BatchNorm1d):
199 |                 nn.init.constant_(m.weight, 1)
200 |                 if m.bias is not None:
201 |                     nn.init.constant_(m.bias, 0.0001)
202 |                 nn.init.constant_(m.running_mean, 0)
203 |             elif isinstance(m, nn.Linear):
204 |                 nn.init.normal_(m.weight, 0, 0.01)
205 |                 if m.bias is not None:
206 |                     nn.init.constant_(m.bias, 0)
207 | 
208 |     def forward(self, x):
209 |         x = self.first_conv(x)
210 |         x = self.maxpool(x)
211 |         # 1 * 256 * 56 * 56
212 | 
213 |         x = self.features[0](x)
214 |         # 1 * 320 * 56 * 56
215 |         if self.training:
216 |             predict_56 = self.predict_56(x)
217 | 
218 |         x = self.features[1](x)
219 |         # 1 * 640 * 28 * 28
220 |         if self.training:
221 |             predict_28 = self.predict_28(x)
222 | 
223 |         x = self.features[2](x)
224 |         # 1 * 1280 * 14 * 14
225 |         if self.training:
226 |             predict_14 = self.predict_14(x)
227 | 
228 |         x = self.features[3](x)
229 |         # 1 * 2560 * 7 * 7
230 | 
231 |         x = self.globalpool(x)
232 |         if self.model_size == 'ExLarge':
233 |             x = self.dropout(x)
234 |         x = x.reshape(x.size(0), -1)
235 |         x = self.fc(x)
236 |         if self.training:
237 |             # Loss is scaled by 1.0, 0.7, 0.5, 0.3
238 |             return x, predict_14, predict_28, predict_56
239 |         else:
240 |             return x
241 | 
242 | 
243 | def create_network():
244 |     model = ShuffleNetV2()
245 |     return model
246 | 
247 | 
248 | if __name__ == "__main__":
249 |     create_network()
250 | 


--------------------------------------------------------------------------------
/DetNAS/blocks.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | blocks_key = [
  6 |     'shufflenet_3x3',
  7 |     'shufflenet_5x5',
  8 |     'shufflenet_7x7',
  9 |     'xception_3x3',
 10 | ]
 11 | 
 12 | 
 13 | Blocks = {
 14 |   'shufflenet_3x3': lambda prefix, in_channels, output_channels, base_mid_channels, stride, bn_training: conv1x1_dwconv_conv1x1(prefix, in_channels, output_channels, base_mid_channels, 3, stride, bn_training),
 15 |   'shufflenet_5x5': lambda prefix, in_channels, output_channels, base_mid_channels, stride, bn_training: conv1x1_dwconv_conv1x1(prefix, in_channels, output_channels, base_mid_channels, 5, stride, bn_training),
 16 |   'shufflenet_7x7': lambda prefix, in_channels, output_channels, base_mid_channels, stride, bn_training: conv1x1_dwconv_conv1x1(prefix, in_channels, output_channels, base_mid_channels, 7, stride, bn_training),
 17 |   'xception_3x3': lambda prefix, in_channels, output_channels, base_mid_channels, stride, bn_training: xception(prefix, in_channels, output_channels, base_mid_channels, stride, bn_training),
 18 | }
 19 | 
 20 | 
 21 | def create_spatial_conv2d_group_bn_relu(prefix, in_channels, out_channels, kernel_size, stride, padding=0, dilation=1, groups=1,
 22 |                           bias=False, has_bn=True, has_relu=True, channel_shuffle=False, has_spatial_conv=True, has_spatial_conv_bn=True,
 23 |                           conv_name_fun=None, bn_name_fun=None, bn_training=True, fix_weights=False):
 24 |     conv_name = prefix
 25 |     if conv_name_fun:
 26 |         conv_name = conv_name_fun(prefix)
 27 | 
 28 |     layer = nn.Sequential()
 29 | 
 30 |     if has_spatial_conv:
 31 |         spatial_conv_name = conv_name + '_s'
 32 |         layer.add_module(spatial_conv_name, nn.Conv2d(in_channels=in_channels, out_channels=in_channels,
 33 |                                                       kernel_size=kernel_size, stride=stride, padding=padding,
 34 |                                                       dilation=dilation, groups=in_channels, bias=bias))
 35 |         if fix_weights:
 36 |             pass
 37 | 
 38 |         if has_spatial_conv_bn:
 39 |             layer.add_module(spatial_conv_name + '_bn', nn.BatchNorm2d(in_channels))
 40 | 
 41 |     if channel_shuffle:
 42 |         pass
 43 | 
 44 |     assert in_channels % groups == 0
 45 |     assert out_channels % groups == 0
 46 | 
 47 |     layer.add_module(conv_name, nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
 48 |                                                     kernel_size=1, stride=1, padding=0,
 49 |                                                     groups=groups, bias=bias))
 50 |     if fix_weights:
 51 |         pass
 52 | 
 53 |     if has_bn:
 54 |         bn_name = 'bn_' + prefix
 55 |         if bn_name_fun:
 56 |             bn_name = bn_name_fun(prefix)
 57 |         layer.add_module(bn_name, nn.BatchNorm2d(out_channels))
 58 |         if bn_training:
 59 |             pass
 60 | 
 61 |     if has_relu:
 62 |         layer.add_module('relu' + prefix, nn.ReLU(inplace=True))
 63 | 
 64 |     return layer
 65 | 
 66 | 
 67 | def conv1x1_dwconv_conv1x1(prefix, in_channels, out_channels, mid_channels, kernel_size, stride, bn_training=True):
 68 |     mid_channels = int(mid_channels)
 69 |     layer = list()
 70 | 
 71 |     layer.append(create_spatial_conv2d_group_bn_relu(prefix=prefix + '_branch2a', in_channels=in_channels, out_channels=mid_channels,
 72 |                                                      kernel_size=-1, stride=1, padding=0, groups=1, has_bn=True, has_relu=True,
 73 |                                                      channel_shuffle=False, has_spatial_conv=False, has_spatial_conv_bn=False,
 74 |                                                      conv_name_fun=lambda p: 'interstellar' + p,
 75 |                                                      bn_name_fun=lambda p: 'bn' + p,
 76 |                                                      bn_training=bn_training))
 77 |     layer.append(create_spatial_conv2d_group_bn_relu(prefix=prefix + '_branch2b', in_channels=mid_channels, out_channels=out_channels,
 78 |                                                      kernel_size=kernel_size, stride=stride, padding=kernel_size // 2, groups=1,
 79 |                                                      has_bn=True, has_relu=False, channel_shuffle=False, has_spatial_conv=True,
 80 |                                                      has_spatial_conv_bn=True,
 81 |                                                      conv_name_fun=lambda p: 'interstellar' + p,
 82 |                                                      bn_name_fun=lambda p: 'bn' + p,
 83 |                                                      bn_training=bn_training))
 84 |     return nn.Sequential(*layer)
 85 | 
 86 | 
 87 | def xception(prefix, in_channels, out_channels, mid_channels, stride, bn_training=True):
 88 |     mid_channels = int(mid_channels)
 89 |     layer = list()
 90 | 
 91 |     layer.append(create_spatial_conv2d_group_bn_relu(prefix=prefix + '_branch2a', in_channels=in_channels, out_channels=mid_channels,
 92 |                                                      kernel_size=3, stride=stride, padding=1, groups=1, has_bn=True, has_relu=True,
 93 |                                                      channel_shuffle=False, has_spatial_conv=True, has_spatial_conv_bn=True,
 94 |                                                      conv_name_fun=lambda p: 'interstellar' + p,
 95 |                                                      bn_name_fun=lambda p: 'bn' + p,
 96 |                                                      bn_training=bn_training))
 97 | 
 98 |     layer.append(create_spatial_conv2d_group_bn_relu(prefix=prefix + '_branch2b', in_channels=mid_channels,
 99 |                                                      out_channels=mid_channels,
100 |                                                      kernel_size=3, stride=1, padding=1, groups=1, has_bn=True,
101 |                                                      has_relu=True,
102 |                                                      channel_shuffle=False, has_spatial_conv=True,
103 |                                                      has_spatial_conv_bn=True,
104 |                                                      conv_name_fun=lambda p: 'interstellar' + p,
105 |                                                      bn_name_fun=lambda p: 'bn' + p,
106 |                                                      bn_training=bn_training))
107 | 
108 |     layer.append(create_spatial_conv2d_group_bn_relu(prefix=prefix + '_branch2c', in_channels=mid_channels,
109 |                                                      out_channels=out_channels,
110 |                                                      kernel_size=3, stride=1, padding=1, groups=1, has_bn=True,
111 |                                                      has_relu=False,
112 |                                                      channel_shuffle=False, has_spatial_conv=True,
113 |                                                      has_spatial_conv_bn=True,
114 |                                                      conv_name_fun=lambda p: 'interstellar' + p,
115 |                                                      bn_name_fun=lambda p: 'bn' + p,
116 |                                                      bn_training=bn_training))
117 |     return nn.Sequential(*layer)
118 | 
119 | 
120 | class ConvBNReLU(nn.Module):
121 | 
122 |     def __init__(self, in_channel, out_channel, k_size, stride=1, padding=0, groups=1,
123 |                  has_bn=True, has_relu=True, gaussian_init=False):
124 |         super(ConvBNReLU, self).__init__()
125 |         self.conv = nn.Conv2d(in_channel, out_channel, kernel_size=k_size,
126 |                               stride=stride, padding=padding,
127 |                               groups=groups, bias=True)
128 |         if gaussian_init:
129 |             nn.init.normal_(self.conv.weight.data, 0, 0.01)
130 | 
131 |         if has_bn:
132 |             self.bn = nn.BatchNorm2d(out_channel)
133 | 
134 |         self.has_bn = has_bn
135 |         self.has_relu = has_relu
136 |         self.relu = nn.ReLU(inplace=True)
137 | 
138 |     def forward(self, x):
139 |         x = self.conv(x)
140 |         if self.has_bn:
141 |             x = self.bn(x)
142 |         if self.has_relu:
143 |             x = self.relu(x)
144 |         return x
145 | 
146 | 
147 | class FC(nn.Module):
148 |     def __init__(self, in_channels, out_channels):
149 |         super(FC, self).__init__()
150 |         self.fc = nn.Linear(in_channels, out_channels)
151 |         nn.init.normal_(self.fc.weight.data, 0, 0.01)
152 | 
153 |     def forward(self, x):
154 |         return self.fc(x)
155 | 
156 | 
157 | def channel_shuffle2(x):
158 |     channels = x.shape[1]
159 |     assert channels % 4 == 0
160 | 
161 |     height = x.shape[2]
162 |     width = x.shape[3]
163 | 
164 |     x = x.reshape(x.shape[0] * channels // 2, 2, height * width)
165 |     x = x.permute(1, 0, 2)
166 |     x = x.reshape(2, -1, channels // 2, height, width)
167 |     return x[0], x[1]
168 | 
169 | 
170 | class ShuffleNetV2BlockSearched(nn.Module):
171 |     def __init__(self, prefix, in_channels, out_channels, stride, base_mid_channels, i_th, architecture):
172 |         super(ShuffleNetV2BlockSearched, self).__init__()
173 |         op = blocks_key[architecture[i_th]]
174 |         self.ksize = int(op.split('_')[1][0])
175 |         self.stride = stride
176 |         if self.stride == 2:
177 |             self.conv = Blocks[op](prefix + '_' + op, in_channels, out_channels - in_channels, base_mid_channels, stride, True)
178 |         else:
179 |             self.conv = Blocks[op](prefix + '_' + op, in_channels // 2, out_channels // 2, base_mid_channels, stride, True)
180 |         if stride > 1:
181 |             self.proj_conv = create_spatial_conv2d_group_bn_relu(prefix + '_proj', in_channels, in_channels, self.ksize,
182 |                                                                  stride, self.ksize // 2,
183 |                                                                  has_bn=True, has_relu=True, channel_shuffle=False,
184 |                                                                  has_spatial_conv=True, has_spatial_conv_bn=True,
185 |                                                                  conv_name_fun=lambda p: 'interstellar' + p,
186 |                                                                  bn_name_fun=lambda p: 'bn' + p)
187 |         self.relu = nn.ReLU(inplace=True)
188 | 
189 |     def forward(self, x_in):
190 |         if self.stride == 1:
191 |             x_proj, x = channel_shuffle2(x_in)
192 |         else:
193 |             x_proj = x_in
194 |             x = x_in
195 |             x_proj = self.proj_conv(x_proj)
196 |         x = self.relu(self.conv(x))
197 | 
198 |         return torch.cat((x_proj, x), dim=1)
199 | 
200 | 
201 | 
202 | 


--------------------------------------------------------------------------------
/OneShot/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import torch
  4 | import argparse
  5 | import torch.nn as nn
  6 | import torchvision.transforms as transforms
  7 | import torchvision.datasets as datasets
  8 | import cv2
  9 | import numpy as np
 10 | import PIL
 11 | from PIL import Image
 12 | import time
 13 | import logging
 14 | import argparse
 15 | from network import ShuffleNetV2_OneShot
 16 | from utils import accuracy, AvgrageMeter, CrossEntropyLabelSmooth, save_checkpoint, get_lastest_model, get_parameters
 17 | 
 18 | class OpencvResize(object):
 19 | 
 20 |     def __init__(self, size=256):
 21 |         self.size = size
 22 | 
 23 |     def __call__(self, img):
 24 |         assert isinstance(img, PIL.Image.Image)
 25 |         img = np.asarray(img) # (H,W,3) RGB
 26 |         img = img[:,:,::-1] # 2 BGR
 27 |         img = np.ascontiguousarray(img)
 28 |         H, W, _ = img.shape
 29 |         target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5))
 30 |         img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR)
 31 |         img = img[:,:,::-1] # 2 RGB
 32 |         img = np.ascontiguousarray(img)
 33 |         img = Image.fromarray(img)
 34 |         return img
 35 | 
 36 | class ToBGRTensor(object):
 37 | 
 38 |     def __call__(self, img):
 39 |         assert isinstance(img, (np.ndarray, PIL.Image.Image))
 40 |         if isinstance(img, PIL.Image.Image):
 41 |             img = np.asarray(img)
 42 |         img = img[:,:,::-1] # 2 BGR
 43 |         img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W)
 44 |         img = np.ascontiguousarray(img)
 45 |         img = torch.from_numpy(img).float()
 46 |         return img
 47 | 
 48 | class DataIterator(object):
 49 | 
 50 |     def __init__(self, dataloader):
 51 |         self.dataloader = dataloader
 52 |         self.iterator = enumerate(self.dataloader)
 53 | 
 54 |     def next(self):
 55 |         try:
 56 |             _, data = next(self.iterator)
 57 |         except Exception:
 58 |             self.iterator = enumerate(self.dataloader)
 59 |             _, data = next(self.iterator)
 60 |         return data[0], data[1]
 61 | 
 62 | 
 63 | def get_args():
 64 |     parser = argparse.ArgumentParser("ShuffleNetV2_OneShot")
 65 |     parser.add_argument('--eval', default=False, action='store_true')
 66 |     parser.add_argument('--eval-resume', type=str, default='./snet_detnas.pkl', help='path for eval model')
 67 |     parser.add_argument('--batch-size', type=int, default=1024, help='batch size')
 68 |     parser.add_argument('--total-iters', type=int, default=300000, help='total iters')
 69 |     parser.add_argument('--learning-rate', type=float, default=0.5, help='init learning rate')
 70 |     parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
 71 |     parser.add_argument('--weight-decay', type=float, default=4e-5, help='weight decay')
 72 |     parser.add_argument('--save', type=str, default='./models', help='path for saving trained models')
 73 |     parser.add_argument('--label-smooth', type=float, default=0.1, help='label smoothing')
 74 | 
 75 |     parser.add_argument('--auto-continue', type=bool, default=True, help='report frequency')
 76 |     parser.add_argument('--display-interval', type=int, default=20, help='report frequency')
 77 |     parser.add_argument('--val-interval', type=int, default=10000, help='report frequency')
 78 |     parser.add_argument('--save-interval', type=int, default=10000, help='report frequency')
 79 | 
 80 |     parser.add_argument('--train-dir', type=str, default='data/train', help='path to training dataset')
 81 |     parser.add_argument('--val-dir', type=str, default='data/val', help='path to validation dataset')
 82 | 
 83 |     args = parser.parse_args()
 84 |     return args
 85 | 
 86 | def main():
 87 |     args = get_args()
 88 | 
 89 |     # Log
 90 |     log_format = '[%(asctime)s] %(message)s'
 91 |     logging.basicConfig(stream=sys.stdout, level=logging.INFO,
 92 |         format=log_format, datefmt='%d %I:%M:%S')
 93 |     t = time.time()
 94 |     local_time = time.localtime(t)
 95 |     if not os.path.exists('./log'):
 96 |         os.mkdir('./log')
 97 |     fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t)))
 98 |     fh.setFormatter(logging.Formatter(log_format))
 99 |     logging.getLogger().addHandler(fh)
100 | 
101 |     use_gpu = False
102 |     if torch.cuda.is_available():
103 |         use_gpu = True
104 | 
105 |     assert os.path.exists(args.train_dir)
106 |     train_dataset = datasets.ImageFolder(
107 |         args.train_dir,
108 |         transforms.Compose([
109 |             transforms.RandomResizedCrop(224),
110 |             transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
111 |             transforms.RandomHorizontalFlip(0.5),
112 |             ToBGRTensor(),
113 |         ])
114 |     )
115 |     train_loader = torch.utils.data.DataLoader(
116 |         train_dataset, batch_size=args.batch_size, shuffle=True,
117 |         num_workers=1, pin_memory=use_gpu)
118 |     train_dataprovider = DataIterator(train_loader)
119 | 
120 |     assert os.path.exists(args.val_dir)
121 |     val_loader = torch.utils.data.DataLoader(
122 |         datasets.ImageFolder(args.val_dir, transforms.Compose([
123 |             OpencvResize(256),
124 |             transforms.CenterCrop(224),
125 |             ToBGRTensor(),
126 |         ])),
127 |         batch_size=200, shuffle=False,
128 |         num_workers=1, pin_memory=use_gpu
129 |     )
130 |     val_dataprovider = DataIterator(val_loader)
131 |     print('load data successfully')
132 | 
133 |     architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2]
134 |     scale_list = [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6]
135 |     scale_ids = [6, 5, 3, 5, 2, 6, 3, 4, 2, 5, 7, 5, 4, 6, 7, 4, 4, 5, 4, 3]
136 |     channels_scales = []
137 |     for i in range(len(scale_ids)):
138 |         channels_scales.append(scale_list[scale_ids[i]])
139 |     model = ShuffleNetV2_OneShot(architecture=architecture, channels_scales=channels_scales)
140 | 
141 |     optimizer = torch.optim.SGD(get_parameters(model),
142 |                                 lr=args.learning_rate,
143 |                                 momentum=args.momentum,
144 |                                 weight_decay=args.weight_decay)
145 |     criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1)
146 | 
147 |     if use_gpu:
148 |         model = nn.DataParallel(model)
149 |         loss_function = criterion_smooth.cuda()
150 |         device = torch.device("cuda")
151 |     else:
152 |         loss_function = criterion_smooth
153 |         device = torch.device("cpu")
154 | 
155 |     scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
156 |                     lambda step : (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1)
157 | 
158 |     model = model.to(device)
159 | 
160 |     all_iters = 0
161 |     if args.auto_continue:
162 |         lastest_model, iters = get_lastest_model()
163 |         if lastest_model is not None:
164 |             all_iters = iters
165 |             checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu')
166 |             model.load_state_dict(checkpoint['state_dict'], strict=True)
167 |             print('load from checkpoint')
168 |             for i in range(iters):
169 |                 scheduler.step()
170 | 
171 |     args.optimizer = optimizer
172 |     args.loss_function = loss_function
173 |     args.scheduler = scheduler
174 |     args.train_dataprovider = train_dataprovider
175 |     args.val_dataprovider = val_dataprovider
176 | 
177 |     if args.eval:
178 |         if args.eval_resume is not None:
179 |             checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu')
180 |             model.load_state_dict(checkpoint, strict=True)
181 |             validate(model, device, args, all_iters=all_iters)
182 |         exit(0)
183 | 
184 |     while all_iters < args.total_iters:
185 |         all_iters = train(model, device, args, val_interval=args.val_interval, bn_process=False, all_iters=all_iters)
186 |         validate(model, device, args, all_iters=all_iters)
187 |     all_iters = train(model, device, args, val_interval=int(1280000/args.batch_size), bn_process=True, all_iters=all_iters)
188 |     validate(model, device, args, all_iters=all_iters)
189 |     save_checkpoint({'state_dict': model.state_dict(),}, args.total_iters, tag='bnps-')
190 | 
191 | def adjust_bn_momentum(model, iters):
192 |     for m in model.modules():
193 |         if isinstance(m, nn.BatchNorm2d):
194 |             m.momentum = 1 / iters
195 | 
196 | def train(model, device, args, *, val_interval, bn_process=False, all_iters=None):
197 | 
198 |     optimizer = args.optimizer
199 |     loss_function = args.loss_function
200 |     scheduler = args.scheduler
201 |     train_dataprovider = args.train_dataprovider
202 | 
203 |     t1 = time.time()
204 |     Top1_err, Top5_err = 0.0, 0.0
205 |     model.train()
206 |     for iters in range(1, val_interval + 1):
207 |         scheduler.step()
208 |         if bn_process:
209 |             adjust_bn_momentum(model, iters)
210 | 
211 |         all_iters += 1
212 |         d_st = time.time()
213 |         data, target = train_dataprovider.next()
214 |         target = target.type(torch.LongTensor)
215 |         data, target = data.to(device), target.to(device)
216 |         data_time = time.time() - d_st
217 | 
218 |         output = model(data)
219 |         loss = loss_function(output, target)
220 |         optimizer.zero_grad()
221 |         loss.backward()
222 |         optimizer.step()
223 |         prec1, prec5 = accuracy(output, target, topk=(1, 5))
224 | 
225 |         Top1_err += 1 - prec1.item() / 100
226 |         Top5_err += 1 - prec5.item() / 100
227 | 
228 |         if all_iters % args.display_interval == 0:
229 |             printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \
230 |                         'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \
231 |                         'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \
232 |                         'data_time = {:.6f},\ttrain_time = {:.6f}'.format(data_time, (time.time() - t1) / args.display_interval)
233 |             logging.info(printInfo)
234 |             t1 = time.time()
235 |             Top1_err, Top5_err = 0.0, 0.0
236 | 
237 |         if all_iters % args.save_interval == 0:
238 |             save_checkpoint({
239 |                 'state_dict': model.state_dict(),
240 |                 }, all_iters)
241 | 
242 |     return all_iters
243 | 
244 | def validate(model, device, args, *, all_iters=None):
245 |     objs = AvgrageMeter()
246 |     top1 = AvgrageMeter()
247 |     top5 = AvgrageMeter()
248 | 
249 |     loss_function = args.loss_function
250 |     val_dataprovider = args.val_dataprovider
251 | 
252 |     model.eval()
253 |     max_val_iters = 250
254 |     t1  = time.time()
255 |     with torch.no_grad():
256 |         for _ in range(1, max_val_iters + 1):
257 |             data, target = val_dataprovider.next()
258 |             target = target.type(torch.LongTensor)
259 |             data, target = data.to(device), target.to(device)
260 | 
261 |             output = model(data)
262 |             loss = loss_function(output, target)
263 | 
264 |             prec1, prec5 = accuracy(output, target, topk=(1, 5))
265 |             n = data.size(0)
266 |             objs.update(loss.item(), n)
267 |             top1.update(prec1.item(), n)
268 |             top5.update(prec5.item(), n)
269 | 
270 |     logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \
271 |               'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \
272 |               'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \
273 |               'val_time = {:.6f}'.format(time.time() - t1)
274 |     logging.info(logInfo)
275 | 
276 | 
277 | if __name__ == "__main__":
278 |     main()
279 | 
280 | 


--------------------------------------------------------------------------------
/ShuffleNetV2/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import torch
  4 | import argparse
  5 | import torch.nn as nn
  6 | import torchvision.transforms as transforms
  7 | import torchvision.datasets as datasets
  8 | import cv2
  9 | import numpy as np
 10 | import PIL
 11 | from PIL import Image
 12 | import time
 13 | import logging
 14 | import argparse
 15 | from network import ShuffleNetV2
 16 | from utils import accuracy, AvgrageMeter, CrossEntropyLabelSmooth, save_checkpoint, get_lastest_model, get_parameters
 17 | 
 18 | class OpencvResize(object):
 19 | 
 20 |     def __init__(self, size=256):
 21 |         self.size = size
 22 | 
 23 |     def __call__(self, img):
 24 |         assert isinstance(img, PIL.Image.Image)
 25 |         img = np.asarray(img) # (H,W,3) RGB
 26 |         img = img[:,:,::-1] # 2 BGR
 27 |         img = np.ascontiguousarray(img)
 28 |         H, W, _ = img.shape
 29 |         target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5))
 30 |         img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR)
 31 |         img = img[:,:,::-1] # 2 RGB
 32 |         img = np.ascontiguousarray(img)
 33 |         img = Image.fromarray(img)
 34 |         return img
 35 | 
 36 | class ToBGRTensor(object):
 37 | 
 38 |     def __call__(self, img):
 39 |         assert isinstance(img, (np.ndarray, PIL.Image.Image))
 40 |         if isinstance(img, PIL.Image.Image):
 41 |             img = np.asarray(img)
 42 |         img = img[:,:,::-1] # 2 BGR
 43 |         img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W)
 44 |         img = np.ascontiguousarray(img)
 45 |         img = torch.from_numpy(img).float()
 46 |         return img
 47 | 
 48 | class DataIterator(object):
 49 | 
 50 |     def __init__(self, dataloader):
 51 |         self.dataloader = dataloader
 52 |         self.iterator = enumerate(self.dataloader)
 53 | 
 54 |     def next(self):
 55 |         try:
 56 |             _, data = next(self.iterator)
 57 |         except Exception:
 58 |             self.iterator = enumerate(self.dataloader)
 59 |             _, data = next(self.iterator)
 60 |         return data[0], data[1]
 61 | 
 62 | def get_args():
 63 |     parser = argparse.ArgumentParser("ShuffleNetV2_Plus")
 64 |     parser.add_argument('--eval', default=False, action='store_true')
 65 |     parser.add_argument('--eval-resume', type=str, default='./snet_detnas.pkl', help='path for eval model')
 66 |     parser.add_argument('--batch-size', type=int, default=1024, help='batch size')
 67 |     parser.add_argument('--total-iters', type=int, default=300000, help='total iters')
 68 |     parser.add_argument('--learning-rate', type=float, default=0.5, help='init learning rate')
 69 |     parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
 70 |     parser.add_argument('--weight-decay', type=float, default=4e-5, help='weight decay')
 71 |     parser.add_argument('--save', type=str, default='./models', help='path for saving trained models')
 72 |     parser.add_argument('--label-smooth', type=float, default=0.1, help='label smoothing')
 73 | 
 74 |     parser.add_argument('--auto-continue', type=bool, default=True, help='auto continue')
 75 |     parser.add_argument('--display-interval', type=int, default=20, help='display interval')
 76 |     parser.add_argument('--val-interval', type=int, default=10000, help='val interval')
 77 |     parser.add_argument('--save-interval', type=int, default=10000, help='save interval')
 78 | 
 79 | 
 80 |     parser.add_argument('--model-size', type=str, default='1.5x', choices=['0.5x', '1.0x', '1.5x', '2.0x'], help='size of the model')
 81 | 
 82 |     parser.add_argument('--train-dir', type=str, default='data/train', help='path to training dataset')
 83 |     parser.add_argument('--val-dir', type=str, default='data/val', help='path to validation dataset')
 84 | 
 85 |     args = parser.parse_args()
 86 |     return args
 87 | 
 88 | def main():
 89 |     args = get_args()
 90 | 
 91 |     # Log
 92 |     log_format = '[%(asctime)s] %(message)s'
 93 |     logging.basicConfig(stream=sys.stdout, level=logging.INFO,
 94 |         format=log_format, datefmt='%d %I:%M:%S')
 95 |     t = time.time()
 96 |     local_time = time.localtime(t)
 97 |     if not os.path.exists('./log'):
 98 |         os.mkdir('./log')
 99 |     fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t)))
100 |     fh.setFormatter(logging.Formatter(log_format))
101 |     logging.getLogger().addHandler(fh)
102 | 
103 |     use_gpu = False
104 |     if torch.cuda.is_available():
105 |         use_gpu = True
106 | 
107 |     assert os.path.exists(args.train_dir)
108 |     train_dataset = datasets.ImageFolder(
109 |         args.train_dir,
110 |         transforms.Compose([
111 |             transforms.RandomResizedCrop(224),
112 |             transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
113 |             transforms.RandomHorizontalFlip(0.5),
114 |             ToBGRTensor(),
115 |         ])
116 |     )
117 |     train_loader = torch.utils.data.DataLoader(
118 |         train_dataset, batch_size=args.batch_size, shuffle=True,
119 |         num_workers=1, pin_memory=use_gpu)
120 |     train_dataprovider = DataIterator(train_loader)
121 | 
122 |     assert os.path.exists(args.val_dir)
123 |     val_loader = torch.utils.data.DataLoader(
124 |         datasets.ImageFolder(args.val_dir, transforms.Compose([
125 |             OpencvResize(256),
126 |             transforms.CenterCrop(224),
127 |             ToBGRTensor(),
128 |         ])),
129 |         batch_size=200, shuffle=False,
130 |         num_workers=1, pin_memory=use_gpu
131 |     )
132 |     val_dataprovider = DataIterator(val_loader)
133 |     print('load data successfully')
134 | 
135 |     architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2]
136 |     model = ShuffleNetV2(model_size=args.model_size)
137 | 
138 |     optimizer = torch.optim.SGD(get_parameters(model),
139 |                                 lr=args.learning_rate,
140 |                                 momentum=args.momentum,
141 |                                 weight_decay=args.weight_decay)
142 |     criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1)
143 | 
144 |     if use_gpu:
145 |         model = nn.DataParallel(model)
146 |         loss_function = criterion_smooth.cuda()
147 |         device = torch.device("cuda")
148 |     else:
149 |         loss_function = criterion_smooth
150 |         device = torch.device("cpu")
151 | 
152 |     scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
153 |                     lambda step : (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1)
154 | 
155 |     model = model.to(device)
156 | 
157 |     all_iters = 0
158 |     if args.auto_continue:
159 |         lastest_model, iters = get_lastest_model()
160 |         if lastest_model is not None:
161 |             all_iters = iters
162 |             checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu')
163 |             model.load_state_dict(checkpoint['state_dict'], strict=True)
164 |             print('load from checkpoint')
165 |             for i in range(iters):
166 |                 scheduler.step()
167 | 
168 |     args.optimizer = optimizer
169 |     args.loss_function = loss_function
170 |     args.scheduler = scheduler
171 |     args.train_dataprovider = train_dataprovider
172 |     args.val_dataprovider = val_dataprovider
173 | 
174 |     if args.eval:
175 |         if args.eval_resume is not None:
176 |             checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu')
177 |             load_checkpoint(model, checkpoint)
178 |             validate(model, device, args, all_iters=all_iters)
179 |         exit(0)
180 | 
181 |     while all_iters < args.total_iters:
182 |         all_iters = train(model, device, args, val_interval=args.val_interval, bn_process=False, all_iters=all_iters)
183 |         validate(model, device, args, all_iters=all_iters)
184 |     all_iters = train(model, device, args, val_interval=int(1280000/args.batch_size), bn_process=True, all_iters=all_iters)
185 |     validate(model, device, args, all_iters=all_iters)
186 |     save_checkpoint({'state_dict': model.state_dict(),}, args.total_iters, tag='bnps-')
187 | 
188 | def adjust_bn_momentum(model, iters):
189 |     for m in model.modules():
190 |         if isinstance(m, nn.BatchNorm2d):
191 |             m.momentum = 1 / iters
192 | 
193 | def train(model, device, args, *, val_interval, bn_process=False, all_iters=None):
194 | 
195 |     optimizer = args.optimizer
196 |     loss_function = args.loss_function
197 |     scheduler = args.scheduler
198 |     train_dataprovider = args.train_dataprovider
199 | 
200 |     t1 = time.time()
201 |     Top1_err, Top5_err = 0.0, 0.0
202 |     model.train()
203 |     for iters in range(1, val_interval + 1):
204 |         scheduler.step()
205 |         if bn_process:
206 |             adjust_bn_momentum(model, iters)
207 | 
208 |         all_iters += 1
209 |         d_st = time.time()
210 |         data, target = train_dataprovider.next()
211 |         target = target.type(torch.LongTensor)
212 |         data, target = data.to(device), target.to(device)
213 |         data_time = time.time() - d_st
214 | 
215 |         output = model(data)
216 |         loss = loss_function(output, target)
217 |         optimizer.zero_grad()
218 |         loss.backward()
219 |         optimizer.step()
220 |         prec1, prec5 = accuracy(output, target, topk=(1, 5))
221 | 
222 |         Top1_err += 1 - prec1.item() / 100
223 |         Top5_err += 1 - prec5.item() / 100
224 | 
225 |         if all_iters % args.display_interval == 0:
226 |             printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \
227 |                         'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \
228 |                         'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \
229 |                         'data_time = {:.6f},\ttrain_time = {:.6f}'.format(data_time, (time.time() - t1) / args.display_interval)
230 |             logging.info(printInfo)
231 |             t1 = time.time()
232 |             Top1_err, Top5_err = 0.0, 0.0
233 | 
234 |         if all_iters % args.save_interval == 0:
235 |             save_checkpoint({
236 |                 'state_dict': model.state_dict(),
237 |                 }, all_iters)
238 | 
239 |     return all_iters
240 | 
241 | def validate(model, device, args, *, all_iters=None):
242 |     objs = AvgrageMeter()
243 |     top1 = AvgrageMeter()
244 |     top5 = AvgrageMeter()
245 | 
246 |     loss_function = args.loss_function
247 |     val_dataprovider = args.val_dataprovider
248 | 
249 |     model.eval()
250 |     max_val_iters = 250
251 |     t1  = time.time()
252 |     with torch.no_grad():
253 |         for _ in range(1, max_val_iters + 1):
254 |             data, target = val_dataprovider.next()
255 |             target = target.type(torch.LongTensor)
256 |             data, target = data.to(device), target.to(device)
257 | 
258 |             output = model(data)
259 |             loss = loss_function(output, target)
260 | 
261 |             prec1, prec5 = accuracy(output, target, topk=(1, 5))
262 |             n = data.size(0)
263 |             objs.update(loss.item(), n)
264 |             top1.update(prec1.item(), n)
265 |             top5.update(prec5.item(), n)
266 | 
267 |     logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \
268 |               'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \
269 |               'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \
270 |               'val_time = {:.6f}'.format(time.time() - t1)
271 |     logging.info(logInfo)
272 | 
273 | def load_checkpoint(net, checkpoint):
274 |     from collections import OrderedDict
275 | 
276 |     temp = OrderedDict()
277 |     if 'state_dict' in checkpoint:
278 |         checkpoint = dict(checkpoint['state_dict'])
279 |     for k in checkpoint:
280 |         k2 = 'module.'+k if not k.startswith('module.') else k
281 |         temp[k2] = checkpoint[k]
282 | 
283 |     net.load_state_dict(temp, strict=True)
284 | 
285 | if __name__ == "__main__":
286 |     main()
287 | 
288 | 


--------------------------------------------------------------------------------
/ShuffleNetV2.ExLarge/eval.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import torch
  4 | import argparse
  5 | import torch.nn as nn
  6 | import torchvision.transforms as transforms
  7 | import torchvision.datasets as datasets
  8 | import cv2
  9 | import numpy as np
 10 | import PIL
 11 | from PIL import Image
 12 | import time
 13 | import logging
 14 | import argparse
 15 | from network import ShuffleNetV2
 16 | from utils import accuracy, AvgrageMeter, CrossEntropyLabelSmooth, save_checkpoint, get_lastest_model, get_parameters
 17 | 
 18 | class OpencvResize(object):
 19 | 
 20 |     def __init__(self, size=256):
 21 |         self.size = size
 22 | 
 23 |     def __call__(self, img):
 24 |         assert isinstance(img, PIL.Image.Image)
 25 |         img = np.asarray(img) # (H,W,3) RGB
 26 |         img = img[:,:,::-1] # 2 BGR
 27 |         img = np.ascontiguousarray(img)
 28 |         H, W, _ = img.shape
 29 |         target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5))
 30 |         img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR)
 31 |         img = img[:,:,::-1] # 2 RGB
 32 |         img = np.ascontiguousarray(img)
 33 |         img = Image.fromarray(img)
 34 |         return img
 35 | 
 36 | class ToBGRTensor(object):
 37 | 
 38 |     def __call__(self, img):
 39 |         assert isinstance(img, (np.ndarray, PIL.Image.Image))
 40 |         if isinstance(img, PIL.Image.Image):
 41 |             img = np.asarray(img)
 42 |         img = img[:,:,::-1] # 2 BGR
 43 |         img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W)
 44 |         img = np.ascontiguousarray(img)
 45 |         img = torch.from_numpy(img).float()
 46 |         return img
 47 | 
 48 | class DataIterator(object):
 49 | 
 50 |     def __init__(self, dataloader):
 51 |         self.dataloader = dataloader
 52 |         self.iterator = enumerate(self.dataloader)
 53 | 
 54 |     def next(self):
 55 |         try:
 56 |             _, data = next(self.iterator)
 57 |         except Exception:
 58 |             self.iterator = enumerate(self.dataloader)
 59 |             _, data = next(self.iterator)
 60 |         return data[0], data[1]
 61 | 
 62 | def get_args():
 63 |     parser = argparse.ArgumentParser("ShuffleNetV2_Plus")
 64 |     parser.add_argument('--eval', default=False, action='store_true')
 65 |     parser.add_argument('--eval-resume', type=str, default='./snet_detnas.pkl', help='path for eval model')
 66 |     parser.add_argument('--batch-size', type=int, default=1024, help='batch size')
 67 |     parser.add_argument('--total-iters', type=int, default=300000, help='total iters')
 68 |     parser.add_argument('--learning-rate', type=float, default=0.5, help='init learning rate')
 69 |     parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
 70 |     parser.add_argument('--weight-decay', type=float, default=4e-5, help='weight decay')
 71 |     parser.add_argument('--save', type=str, default='./models', help='path for saving trained models')
 72 |     parser.add_argument('--label-smooth', type=float, default=0.1, help='label smoothing')
 73 | 
 74 |     parser.add_argument('--auto-continue', type=bool, default=True, help='auto continue')
 75 |     parser.add_argument('--display-interval', type=int, default=20, help='display interval')
 76 |     parser.add_argument('--val-interval', type=int, default=10000, help='val interval')
 77 |     parser.add_argument('--save-interval', type=int, default=10000, help='save interval')
 78 | 
 79 | 
 80 |     parser.add_argument('--model-size', type=str, default='1.5x', choices=['0.5x', '1.0x', '1.5x', '2.0x'], help='size of the model')
 81 | 
 82 |     parser.add_argument('--train-dir', type=str, default='data/train', help='path to training dataset')
 83 |     parser.add_argument('--val-dir', type=str, default='data/val', help='path to validation dataset')
 84 | 
 85 |     args = parser.parse_args()
 86 |     return args
 87 | 
 88 | def main():
 89 |     args = get_args()
 90 | 
 91 |     # Log
 92 |     log_format = '[%(asctime)s] %(message)s'
 93 |     logging.basicConfig(stream=sys.stdout, level=logging.INFO,
 94 |         format=log_format, datefmt='%d %I:%M:%S')
 95 |     t = time.time()
 96 |     local_time = time.localtime(t)
 97 |     if not os.path.exists('./log'):
 98 |         os.mkdir('./log')
 99 |     fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t)))
100 |     fh.setFormatter(logging.Formatter(log_format))
101 |     logging.getLogger().addHandler(fh)
102 | 
103 |     use_gpu = False
104 |     if torch.cuda.is_available():
105 |         use_gpu = True
106 | 
107 |     assert os.path.exists(args.train_dir)
108 |     train_dataset = datasets.ImageFolder(
109 |         args.train_dir,
110 |         transforms.Compose([
111 |             transforms.RandomResizedCrop(224),
112 |             transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
113 |             transforms.RandomHorizontalFlip(0.5),
114 |             ToBGRTensor(),
115 |         ])
116 |     )
117 |     train_loader = torch.utils.data.DataLoader(
118 |         train_dataset, batch_size=args.batch_size, shuffle=True,
119 |         num_workers=1, pin_memory=use_gpu)
120 |     train_dataprovider = DataIterator(train_loader)
121 | 
122 |     assert os.path.exists(args.val_dir)
123 |     val_loader = torch.utils.data.DataLoader(
124 |         datasets.ImageFolder(args.val_dir, transforms.Compose([
125 |             OpencvResize(256),
126 |             transforms.CenterCrop(224),
127 |             ToBGRTensor(),
128 |         ])),
129 |         batch_size=200, shuffle=False,
130 |         num_workers=1, pin_memory=use_gpu
131 |     )
132 |     val_dataprovider = DataIterator(val_loader)
133 |     print('load data successfully')
134 | 
135 |     architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2]
136 |     model = ShuffleNetV2(model_size=args.model_size)
137 | 
138 |     optimizer = torch.optim.SGD(get_parameters(model),
139 |                                 lr=args.learning_rate,
140 |                                 momentum=args.momentum,
141 |                                 weight_decay=args.weight_decay)
142 |     criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1)
143 | 
144 |     if use_gpu:
145 |         model = nn.DataParallel(model)
146 |         loss_function = criterion_smooth.cuda()
147 |         device = torch.device("cuda")
148 |     else:
149 |         loss_function = criterion_smooth
150 |         device = torch.device("cpu")
151 | 
152 |     scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
153 |                     lambda step : (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1)
154 | 
155 |     model = model.to(device)
156 | 
157 |     all_iters = 0
158 |     if args.auto_continue:
159 |         lastest_model, iters = get_lastest_model()
160 |         if lastest_model is not None:
161 |             all_iters = iters
162 |             checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu')
163 |             model.load_state_dict(checkpoint['state_dict'], strict=True)
164 |             print('load from checkpoint')
165 |             for i in range(iters):
166 |                 scheduler.step()
167 | 
168 |     args.optimizer = optimizer
169 |     args.loss_function = loss_function
170 |     args.scheduler = scheduler
171 |     args.train_dataprovider = train_dataprovider
172 |     args.val_dataprovider = val_dataprovider
173 | 
174 |     if args.eval:
175 |         if args.eval_resume is not None:
176 |             checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu')
177 |             load_checkpoint(model, checkpoint)
178 |             validate(model, device, args, all_iters=all_iters)
179 |         exit(0)
180 | 
181 |     while all_iters < args.total_iters:
182 |         all_iters = train(model, device, args, val_interval=args.val_interval, bn_process=False, all_iters=all_iters)
183 |         validate(model, device, args, all_iters=all_iters)
184 |     all_iters = train(model, device, args, val_interval=int(1280000/args.batch_size), bn_process=True, all_iters=all_iters)
185 |     validate(model, device, args, all_iters=all_iters)
186 |     save_checkpoint({'state_dict': model.state_dict(),}, args.total_iters, tag='bnps-')
187 | 
188 | def adjust_bn_momentum(model, iters):
189 |     for m in model.modules():
190 |         if isinstance(m, nn.BatchNorm2d):
191 |             m.momentum = 1 / iters
192 | 
193 | def train(model, device, args, *, val_interval, bn_process=False, all_iters=None):
194 | 
195 |     optimizer = args.optimizer
196 |     loss_function = args.loss_function
197 |     scheduler = args.scheduler
198 |     train_dataprovider = args.train_dataprovider
199 | 
200 |     t1 = time.time()
201 |     Top1_err, Top5_err = 0.0, 0.0
202 |     model.train()
203 |     for iters in range(1, val_interval + 1):
204 |         scheduler.step()
205 |         if bn_process:
206 |             adjust_bn_momentum(model, iters)
207 | 
208 |         all_iters += 1
209 |         d_st = time.time()
210 |         data, target = train_dataprovider.next()
211 |         target = target.type(torch.LongTensor)
212 |         data, target = data.to(device), target.to(device)
213 |         data_time = time.time() - d_st
214 | 
215 |         output = model(data)
216 |         loss = loss_function(output, target)
217 |         optimizer.zero_grad()
218 |         loss.backward()
219 |         optimizer.step()
220 |         prec1, prec5 = accuracy(output, target, topk=(1, 5))
221 | 
222 |         Top1_err += 1 - prec1.item() / 100
223 |         Top5_err += 1 - prec5.item() / 100
224 | 
225 |         if all_iters % args.display_interval == 0:
226 |             printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \
227 |                         'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \
228 |                         'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \
229 |                         'data_time = {:.6f},\ttrain_time = {:.6f}'.format(data_time, (time.time() - t1) / args.display_interval)
230 |             logging.info(printInfo)
231 |             t1 = time.time()
232 |             Top1_err, Top5_err = 0.0, 0.0
233 | 
234 |         if all_iters % args.save_interval == 0:
235 |             save_checkpoint({
236 |                 'state_dict': model.state_dict(),
237 |                 }, all_iters)
238 | 
239 |     return all_iters
240 | 
241 | def validate(model, device, args, *, all_iters=None):
242 |     objs = AvgrageMeter()
243 |     top1 = AvgrageMeter()
244 |     top5 = AvgrageMeter()
245 | 
246 |     loss_function = args.loss_function
247 |     val_dataprovider = args.val_dataprovider
248 | 
249 |     model.eval()
250 |     max_val_iters = 250
251 |     t1  = time.time()
252 |     with torch.no_grad():
253 |         for _ in range(1, max_val_iters + 1):
254 |             data, target = val_dataprovider.next()
255 |             target = target.type(torch.LongTensor)
256 |             data, target = data.to(device), target.to(device)
257 | 
258 |             output = model(data)
259 |             loss = loss_function(output, target)
260 | 
261 |             prec1, prec5 = accuracy(output, target, topk=(1, 5))
262 |             n = data.size(0)
263 |             objs.update(loss.item(), n)
264 |             top1.update(prec1.item(), n)
265 |             top5.update(prec5.item(), n)
266 | 
267 |     logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \
268 |               'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \
269 |               'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \
270 |               'val_time = {:.6f}'.format(time.time() - t1)
271 |     logging.info(logInfo)
272 | 
273 | def load_checkpoint(net, checkpoint):
274 |     from collections import OrderedDict
275 | 
276 |     temp = OrderedDict()
277 |     if 'state_dict' in checkpoint:
278 |         checkpoint = dict(checkpoint['state_dict'])
279 |     for k in checkpoint:
280 |         k2 = 'module.'+k if not k.startswith('module.') else k
281 |         temp[k2] = checkpoint[k]
282 | 
283 |     net.load_state_dict(temp, strict=True)
284 | 
285 | if __name__ == "__main__":
286 |     main()
287 | 
288 | 


--------------------------------------------------------------------------------
/ShuffleNetV2+/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import torch
  4 | import argparse
  5 | import torch.nn as nn
  6 | import torchvision.transforms as transforms
  7 | import torchvision.datasets as datasets
  8 | import cv2
  9 | import numpy as np
 10 | import PIL
 11 | from PIL import Image
 12 | import time
 13 | import logging
 14 | import argparse
 15 | from network import ShuffleNetV2_Plus
 16 | from utils import accuracy, AvgrageMeter, CrossEntropyLabelSmooth, save_checkpoint, get_lastest_model, get_parameters
 17 | 
 18 | class OpencvResize(object):
 19 | 
 20 |     def __init__(self, size=256):
 21 |         self.size = size
 22 | 
 23 |     def __call__(self, img):
 24 |         assert isinstance(img, PIL.Image.Image)
 25 |         img = np.asarray(img) # (H,W,3) RGB
 26 |         img = img[:,:,::-1] # 2 BGR
 27 |         img = np.ascontiguousarray(img)
 28 |         H, W, _ = img.shape
 29 |         target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5))
 30 |         img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR)
 31 |         img = img[:,:,::-1] # 2 RGB
 32 |         img = np.ascontiguousarray(img)
 33 |         img = Image.fromarray(img)
 34 |         return img
 35 | 
 36 | class ToBGRTensor(object):
 37 | 
 38 |     def __call__(self, img):
 39 |         assert isinstance(img, (np.ndarray, PIL.Image.Image))
 40 |         if isinstance(img, PIL.Image.Image):
 41 |             img = np.asarray(img)
 42 |         img = img[:,:,::-1] # 2 BGR
 43 |         img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W)
 44 |         img = np.ascontiguousarray(img)
 45 |         img = torch.from_numpy(img).float()
 46 |         return img
 47 | 
 48 | class DataIterator(object):
 49 | 
 50 |     def __init__(self, dataloader):
 51 |         self.dataloader = dataloader
 52 |         self.iterator = enumerate(self.dataloader)
 53 | 
 54 |     def next(self):
 55 |         try:
 56 |             _, data = next(self.iterator)
 57 |         except Exception:
 58 |             self.iterator = enumerate(self.dataloader)
 59 |             _, data = next(self.iterator)
 60 |         return data[0], data[1]
 61 | 
 62 | 
 63 | def get_args():
 64 |     parser = argparse.ArgumentParser("ShuffleNetV2_Plus")
 65 |     parser.add_argument('--eval', default=False, action='store_true')
 66 |     parser.add_argument('--eval-resume', type=str, default='./snet_detnas.pkl', help='path for eval model')
 67 |     parser.add_argument('--batch-size', type=int, default=1024, help='batch size')
 68 |     parser.add_argument('--total-iters', type=int, default=450000, help='total iters')
 69 |     parser.add_argument('--learning-rate', type=float, default=0.5, help='init learning rate')
 70 |     parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
 71 |     parser.add_argument('--weight-decay', type=float, default=4e-5, help='weight decay')
 72 |     parser.add_argument('--save', type=str, default='./models', help='path for saving trained models')
 73 |     parser.add_argument('--label-smooth', type=float, default=0.1, help='label smoothing')
 74 | 
 75 |     parser.add_argument('--auto-continue', type=bool, default=True, help='auto continue')
 76 |     parser.add_argument('--display-interval', type=int, default=20, help='display interval')
 77 |     parser.add_argument('--val-interval', type=int, default=10000, help='val interval')
 78 |     parser.add_argument('--save-interval', type=int, default=10000, help='save interval')
 79 | 
 80 |     parser.add_argument('--model-size', type=str, default='Large', choices=['Small', 'Medium', 'Large'], help='size of the model')
 81 | 
 82 |     parser.add_argument('--train-dir', type=str, default='data/train', help='path to training dataset')
 83 |     parser.add_argument('--val-dir', type=str, default='data/val', help='path to validation dataset')
 84 | 
 85 |     args = parser.parse_args()
 86 |     return args
 87 | 
 88 | def main():
 89 |     args = get_args()
 90 | 
 91 |     # Log
 92 |     log_format = '[%(asctime)s] %(message)s'
 93 |     logging.basicConfig(stream=sys.stdout, level=logging.INFO,
 94 |         format=log_format, datefmt='%d %I:%M:%S')
 95 |     t = time.time()
 96 |     local_time = time.localtime(t)
 97 |     if not os.path.exists('./log'):
 98 |         os.mkdir('./log')
 99 |     fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t)))
100 |     fh.setFormatter(logging.Formatter(log_format))
101 |     logging.getLogger().addHandler(fh)
102 | 
103 |     use_gpu = False
104 |     if torch.cuda.is_available():
105 |         use_gpu = True
106 | 
107 |     assert os.path.exists(args.train_dir)
108 |     train_dataset = datasets.ImageFolder(
109 |         args.train_dir,
110 |         transforms.Compose([
111 |             transforms.RandomResizedCrop(224),
112 |             transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
113 |             transforms.RandomHorizontalFlip(0.5),
114 |             ToBGRTensor(),
115 |         ])
116 |     )
117 |     train_loader = torch.utils.data.DataLoader(
118 |         train_dataset, batch_size=args.batch_size, shuffle=True,
119 |         num_workers=1, pin_memory=use_gpu)
120 |     train_dataprovider = DataIterator(train_loader)
121 | 
122 |     assert os.path.exists(args.val_dir)
123 |     val_loader = torch.utils.data.DataLoader(
124 |         datasets.ImageFolder(args.val_dir, transforms.Compose([
125 |             OpencvResize(256),
126 |             transforms.CenterCrop(224),
127 |             ToBGRTensor(),
128 |         ])),
129 |         batch_size=200, shuffle=False,
130 |         num_workers=1, pin_memory=use_gpu
131 |     )
132 |     val_dataprovider = DataIterator(val_loader)
133 |     print('load data successfully')
134 | 
135 |     architecture = [0, 0, 3, 1, 1, 1, 0, 0, 2, 0, 2, 1, 1, 0, 2, 0, 2, 1, 3, 2]
136 |     model = ShuffleNetV2_Plus(architecture=architecture, model_size=args.model_size)
137 | 
138 |     optimizer = torch.optim.SGD(get_parameters(model),
139 |                                 lr=args.learning_rate,
140 |                                 momentum=args.momentum,
141 |                                 weight_decay=args.weight_decay)
142 |     criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1)
143 | 
144 |     if use_gpu:
145 |         model = nn.DataParallel(model)
146 |         loss_function = criterion_smooth.cuda()
147 |         device = torch.device("cuda")
148 |     else:
149 |         loss_function = criterion_smooth
150 |         device = torch.device("cpu")
151 | 
152 |     scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
153 |                     lambda step : (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1)
154 | 
155 |     model = model.to(device)
156 | 
157 |     all_iters = 0
158 |     if args.auto_continue:
159 |         lastest_model, iters = get_lastest_model()
160 |         if lastest_model is not None:
161 |             all_iters = iters
162 |             checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu')
163 |             model.load_state_dict(checkpoint['state_dict'], strict=True)
164 |             print('load from checkpoint')
165 |             for i in range(iters):
166 |                 scheduler.step()
167 | 
168 |     args.optimizer = optimizer
169 |     args.loss_function = loss_function
170 |     args.scheduler = scheduler
171 |     args.train_dataprovider = train_dataprovider
172 |     args.val_dataprovider = val_dataprovider
173 | 
174 |     if args.eval:
175 |         if args.eval_resume is not None:
176 |             checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu')
177 |             load_checkpoint(model, checkpoint)
178 |             validate(model, device, args, all_iters=all_iters)
179 |         exit(0)
180 | 
181 |     while all_iters < args.total_iters:
182 |         all_iters = train(model, device, args, val_interval=args.val_interval, bn_process=False, all_iters=all_iters)
183 |         validate(model, device, args, all_iters=all_iters)
184 |     all_iters = train(model, device, args, val_interval=int(1280000/args.batch_size), bn_process=True, all_iters=all_iters)
185 |     validate(model, device, args, all_iters=all_iters)
186 |     save_checkpoint({'state_dict': model.state_dict(),}, args.total_iters, tag='bnps-')
187 | 
188 | 
189 | def adjust_bn_momentum(model, iters):
190 |     for m in model.modules():
191 |         if isinstance(m, nn.BatchNorm2d):
192 |             m.momentum = 1 / iters
193 | 
194 | def train(model, device, args, *, val_interval, bn_process=False, all_iters=None):
195 | 
196 |     optimizer = args.optimizer
197 |     loss_function = args.loss_function
198 |     scheduler = args.scheduler
199 |     train_dataprovider = args.train_dataprovider
200 | 
201 |     t1 = time.time()
202 |     Top1_err, Top5_err = 0.0, 0.0
203 |     model.train()
204 |     for iters in range(1, val_interval + 1):
205 |         scheduler.step()
206 |         if bn_process:
207 |             adjust_bn_momentum(model, iters)
208 | 
209 |         all_iters += 1
210 |         d_st = time.time()
211 |         data, target = train_dataprovider.next()
212 |         target = target.type(torch.LongTensor)
213 |         data, target = data.to(device), target.to(device)
214 |         data_time = time.time() - d_st
215 | 
216 |         output = model(data)
217 |         loss = loss_function(output, target)
218 |         optimizer.zero_grad()
219 |         loss.backward()
220 |         optimizer.step()
221 |         prec1, prec5 = accuracy(output, target, topk=(1, 5))
222 | 
223 |         Top1_err += 1 - prec1.item() / 100
224 |         Top5_err += 1 - prec5.item() / 100
225 | 
226 |         if all_iters % args.display_interval == 0:
227 |             printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \
228 |                         'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \
229 |                         'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \
230 |                         'data_time = {:.6f},\ttrain_time = {:.6f}'.format(data_time, (time.time() - t1) / args.display_interval)
231 |             logging.info(printInfo)
232 |             t1 = time.time()
233 |             Top1_err, Top5_err = 0.0, 0.0
234 | 
235 |         if all_iters % args.save_interval == 0:
236 |             save_checkpoint({
237 |                 'state_dict': model.state_dict(),
238 |                 }, all_iters)
239 | 
240 |     return all_iters
241 | 
242 | def validate(model, device, args, *, all_iters=None):
243 |     objs = AvgrageMeter()
244 |     top1 = AvgrageMeter()
245 |     top5 = AvgrageMeter()
246 | 
247 |     loss_function = args.loss_function
248 |     val_dataprovider = args.val_dataprovider
249 | 
250 |     model.eval()
251 |     max_val_iters = 250
252 |     t1  = time.time()
253 |     with torch.no_grad():
254 |         for _ in range(1, max_val_iters + 1):
255 |             data, target = val_dataprovider.next()
256 |             target = target.type(torch.LongTensor)
257 |             data, target = data.to(device), target.to(device)
258 | 
259 |             output = model(data)
260 |             loss = loss_function(output, target)
261 | 
262 |             prec1, prec5 = accuracy(output, target, topk=(1, 5))
263 |             n = data.size(0)
264 |             objs.update(loss.item(), n)
265 |             top1.update(prec1.item(), n)
266 |             top5.update(prec5.item(), n)
267 | 
268 |     logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \
269 |               'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \
270 |               'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \
271 |               'val_time = {:.6f}'.format(time.time() - t1)
272 |     logging.info(logInfo)
273 | 
274 | def load_checkpoint(net, checkpoint):
275 |     from collections import OrderedDict
276 | 
277 |     temp = OrderedDict()
278 |     if 'state_dict' in checkpoint:
279 |         checkpoint = dict(checkpoint['state_dict'])
280 |     for k in checkpoint:
281 |         k2 = 'module.'+k if not k.startswith('module.') else k
282 |         temp[k2] = checkpoint[k]
283 | 
284 |     net.load_state_dict(temp, strict=True)
285 | 
286 | if __name__ == "__main__":
287 |     main()
288 | 
289 | 


--------------------------------------------------------------------------------
/ShuffleNetV1/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import torch
  4 | import argparse
  5 | import torch.nn as nn
  6 | import torchvision.transforms as transforms
  7 | import torchvision.datasets as datasets
  8 | import cv2
  9 | import numpy as np
 10 | import PIL
 11 | from PIL import Image
 12 | import time
 13 | import logging
 14 | import argparse
 15 | from network import ShuffleNetV1
 16 | from utils import accuracy, AvgrageMeter, CrossEntropyLabelSmooth, save_checkpoint, get_lastest_model, get_parameters
 17 | 
 18 | class OpencvResize(object):
 19 | 
 20 |     def __init__(self, size=256):
 21 |         self.size = size
 22 | 
 23 |     def __call__(self, img):
 24 |         assert isinstance(img, PIL.Image.Image)
 25 |         img = np.asarray(img) # (H,W,3) RGB
 26 |         img = img[:,:,::-1] # 2 BGR
 27 |         img = np.ascontiguousarray(img)
 28 |         H, W, _ = img.shape
 29 |         target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5))
 30 |         img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR)
 31 |         img = img[:,:,::-1] # 2 RGB
 32 |         img = np.ascontiguousarray(img)
 33 |         img = Image.fromarray(img)
 34 |         return img
 35 | 
 36 | class ToBGRTensor(object):
 37 | 
 38 |     def __call__(self, img):
 39 |         assert isinstance(img, (np.ndarray, PIL.Image.Image))
 40 |         if isinstance(img, PIL.Image.Image):
 41 |             img = np.asarray(img)
 42 |         img = img[:,:,::-1] # 2 BGR
 43 |         img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W)
 44 |         img = np.ascontiguousarray(img)
 45 |         img = torch.from_numpy(img).float()
 46 |         return img
 47 | 
 48 | class DataIterator(object):
 49 | 
 50 |     def __init__(self, dataloader):
 51 |         self.dataloader = dataloader
 52 |         self.iterator = enumerate(self.dataloader)
 53 | 
 54 |     def next(self):
 55 |         try:
 56 |             _, data = next(self.iterator)
 57 |         except Exception:
 58 |             self.iterator = enumerate(self.dataloader)
 59 |             _, data = next(self.iterator)
 60 |         return data[0], data[1]
 61 | 
 62 | def get_args():
 63 |     parser = argparse.ArgumentParser("ShuffleNetV1")
 64 |     parser.add_argument('--eval', default=False, action='store_true')
 65 |     parser.add_argument('--eval-resume', type=str, default='./snet_detnas.pkl', help='path for eval model')
 66 |     parser.add_argument('--batch-size', type=int, default=1024, help='batch size')
 67 |     parser.add_argument('--total-iters', type=int, default=300000, help='total iters')
 68 |     parser.add_argument('--learning-rate', type=float, default=0.5, help='init learning rate')
 69 |     parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
 70 |     parser.add_argument('--weight-decay', type=float, default=4e-5, help='weight decay')
 71 |     parser.add_argument('--save', type=str, default='./models', help='path for saving trained models')
 72 |     parser.add_argument('--label-smooth', type=float, default=0.1, help='label smoothing')
 73 | 
 74 | 
 75 |     parser.add_argument('--auto-continue', type=bool, default=True, help='auto continue')
 76 |     parser.add_argument('--display-interval', type=int, default=20, help='display interval')
 77 |     parser.add_argument('--val-interval', type=int, default=10000, help='val interval')
 78 |     parser.add_argument('--save-interval', type=int, default=10000, help='save interval')
 79 | 
 80 | 
 81 |     parser.add_argument('--group', type=int, default=3, help='group number')
 82 |     parser.add_argument('--model-size', type=str, default='2.0x', choices=['0.5x', '1.0x', '1.5x', '2.0x'], help='size of the model')
 83 | 
 84 |     parser.add_argument('--train-dir', type=str, default='data/train', help='path to training dataset')
 85 |     parser.add_argument('--val-dir', type=str, default='data/val', help='path to validation dataset')
 86 | 
 87 |     args = parser.parse_args()
 88 |     return args
 89 | 
 90 | def main():
 91 |     args = get_args()
 92 | 
 93 |     # Log
 94 |     log_format = '[%(asctime)s] %(message)s'
 95 |     logging.basicConfig(stream=sys.stdout, level=logging.INFO,
 96 |         format=log_format, datefmt='%d %I:%M:%S')
 97 |     t = time.time()
 98 |     local_time = time.localtime(t)
 99 |     if not os.path.exists('./log'):
100 |         os.mkdir('./log')
101 |     fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t)))
102 |     fh.setFormatter(logging.Formatter(log_format))
103 |     logging.getLogger().addHandler(fh)
104 | 
105 |     use_gpu = False
106 |     if torch.cuda.is_available():
107 |         use_gpu = True
108 | 
109 |     assert os.path.exists(args.train_dir)
110 |     train_dataset = datasets.ImageFolder(
111 |         args.train_dir,
112 |         transforms.Compose([
113 |             transforms.RandomResizedCrop(224),
114 |             transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
115 |             transforms.RandomHorizontalFlip(0.5),
116 |             ToBGRTensor(),
117 |         ])
118 |     )
119 |     train_loader = torch.utils.data.DataLoader(
120 |         train_dataset, batch_size=args.batch_size, shuffle=True,
121 |         num_workers=1, pin_memory=use_gpu)
122 |     train_dataprovider = DataIterator(train_loader)
123 | 
124 |     assert os.path.exists(args.val_dir)
125 |     val_loader = torch.utils.data.DataLoader(
126 |         datasets.ImageFolder(args.val_dir, transforms.Compose([
127 |             OpencvResize(256),
128 |             transforms.CenterCrop(224),
129 |             ToBGRTensor(),
130 |         ])),
131 |         batch_size=200, shuffle=False,
132 |         num_workers=1, pin_memory=use_gpu
133 |     )
134 |     val_dataprovider = DataIterator(val_loader)
135 |     print('load data successfully')
136 | 
137 |     model = ShuffleNetV1(group=args.group, model_size=args.model_size)
138 | 
139 |     optimizer = torch.optim.SGD(get_parameters(model),
140 |                                 lr=args.learning_rate,
141 |                                 momentum=args.momentum,
142 |                                 weight_decay=args.weight_decay)
143 |     criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1)
144 | 
145 |     if use_gpu:
146 |         model = nn.DataParallel(model)
147 |         loss_function = criterion_smooth.cuda()
148 |         device = torch.device("cuda")
149 |     else:
150 |         loss_function = criterion_smooth
151 |         device = torch.device("cpu")
152 | 
153 |     scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
154 |                     lambda step : (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1)
155 | 
156 |     model = model.to(device)
157 | 
158 |     all_iters = 0
159 |     if args.auto_continue:
160 |         lastest_model, iters = get_lastest_model()
161 |         if lastest_model is not None:
162 |             all_iters = iters
163 |             checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu')
164 |             model.load_state_dict(checkpoint['state_dict'], strict=True)
165 |             print('load from checkpoint')
166 |             for i in range(iters):
167 |                 scheduler.step()
168 | 
169 |     args.optimizer = optimizer
170 |     args.loss_function = loss_function
171 |     args.scheduler = scheduler
172 |     args.train_dataprovider = train_dataprovider
173 |     args.val_dataprovider = val_dataprovider
174 | 
175 |     if args.eval:
176 |         if args.eval_resume is not None:
177 |             checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu')
178 |             load_checkpoint(model, checkpoint)
179 |             validate(model, device, args, all_iters=all_iters)
180 |         exit(0)
181 | 
182 |     while all_iters < args.total_iters:
183 |         all_iters = train(model, device, args, val_interval=args.val_interval, bn_process=False, all_iters=all_iters)
184 |         validate(model, device, args, all_iters=all_iters)
185 |     all_iters = train(model, device, args, val_interval=int(1280000/args.batch_size), bn_process=True, all_iters=all_iters)
186 |     validate(model, device, args, all_iters=all_iters)
187 |     save_checkpoint({'state_dict': model.state_dict(),}, args.total_iters, tag='bnps-')
188 |     torch.save(model.state_dict(), 'model.mdl')
189 | 
190 | def adjust_bn_momentum(model, iters):
191 |     for m in model.modules():
192 |         if isinstance(m, nn.BatchNorm2d):
193 |             m.momentum = 1 / iters
194 | 
195 | def train(model, device, args, *, val_interval, bn_process=False, all_iters=None):
196 | 
197 |     optimizer = args.optimizer
198 |     loss_function = args.loss_function
199 |     scheduler = args.scheduler
200 |     train_dataprovider = args.train_dataprovider
201 | 
202 |     t1 = time.time()
203 |     Top1_err, Top5_err = 0.0, 0.0
204 |     model.train()
205 |     for iters in range(1, val_interval + 1):
206 |         scheduler.step()
207 |         if bn_process:
208 |             adjust_bn_momentum(model, iters)
209 | 
210 |         all_iters += 1
211 |         d_st = time.time()
212 |         data, target = train_dataprovider.next()
213 |         target = target.type(torch.LongTensor)
214 |         data, target = data.to(device), target.to(device)
215 |         data_time = time.time() - d_st
216 | 
217 |         output = model(data)
218 |         loss = loss_function(output, target)
219 |         optimizer.zero_grad()
220 |         loss.backward()
221 |         optimizer.step()
222 |         prec1, prec5 = accuracy(output, target, topk=(1, 5))
223 | 
224 |         Top1_err += 1 - prec1.item() / 100
225 |         Top5_err += 1 - prec5.item() / 100
226 | 
227 |         if all_iters % args.display_interval == 0:
228 |             printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \
229 |                         'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \
230 |                         'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \
231 |                         'data_time = {:.6f},\ttrain_time = {:.6f}'.format(data_time, (time.time() - t1) / args.display_interval)
232 |             logging.info(printInfo)
233 |             t1 = time.time()
234 |             Top1_err, Top5_err = 0.0, 0.0
235 | 
236 |         if all_iters % args.save_interval == 0:
237 |             save_checkpoint({
238 |                 'state_dict': model.state_dict(),
239 |                 }, all_iters)
240 | 
241 |     return all_iters
242 | 
243 | def validate(model, device, args, *, all_iters=None):
244 |     objs = AvgrageMeter()
245 |     top1 = AvgrageMeter()
246 |     top5 = AvgrageMeter()
247 | 
248 |     loss_function = args.loss_function
249 |     val_dataprovider = args.val_dataprovider
250 | 
251 |     model.eval()
252 |     max_val_iters = 250
253 |     t1  = time.time()
254 |     with torch.no_grad():
255 |         for _ in range(1, max_val_iters + 1):
256 |             data, target = val_dataprovider.next()
257 |             target = target.type(torch.LongTensor)
258 |             data, target = data.to(device), target.to(device)
259 | 
260 |             output = model(data)
261 |             loss = loss_function(output, target)
262 | 
263 |             prec1, prec5 = accuracy(output, target, topk=(1, 5))
264 |             n = data.size(0)
265 |             objs.update(loss.item(), n)
266 |             top1.update(prec1.item(), n)
267 |             top5.update(prec5.item(), n)
268 | 
269 |     logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \
270 |               'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \
271 |               'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \
272 |               'val_time = {:.6f}'.format(time.time() - t1)
273 |     logging.info(logInfo)
274 | 
275 | def load_checkpoint(net, checkpoint):
276 |     from collections import OrderedDict
277 | 
278 |     temp = OrderedDict()
279 |     if 'state_dict' in checkpoint:
280 |         checkpoint = dict(checkpoint['state_dict'])
281 |     for k in checkpoint:
282 |         k2 = 'module.'+k if not k.startswith('module.') else k
283 |         temp[k2] = checkpoint[k]
284 | 
285 |     net.load_state_dict(temp, strict=True)
286 | 
287 | if __name__ == "__main__":
288 |     main()
289 | 
290 | 


--------------------------------------------------------------------------------
/DetNAS/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import torch
  4 | import torch.nn as nn
  5 | import time
  6 | import logging
  7 | import argparse
  8 | import torchvision.transforms as transforms
  9 | import torchvision.datasets as datasets
 10 | import cv2
 11 | import numpy as np
 12 | import PIL
 13 | from PIL import Image
 14 | from network import ShuffleNetV2DetNAS
 15 | from utils import accuracy, AvgrageMeter, CrossEntropyLabelSmooth, save_checkpoint, get_lastest_model, get_parameters
 16 | 
 17 | class OpencvResize(object):
 18 | 
 19 |     def __init__(self, size=256):
 20 |         self.size = size
 21 | 
 22 |     def __call__(self, img):
 23 |         assert isinstance(img, PIL.Image.Image)
 24 |         img = np.asarray(img) # (H,W,3) RGB
 25 |         img = img[:,:,::-1] # 2 BGR
 26 |         img = np.ascontiguousarray(img)
 27 |         H, W, _ = img.shape
 28 |         target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5))
 29 |         img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR)
 30 |         img = img[:,:,::-1] # 2 RGB
 31 |         img = np.ascontiguousarray(img)
 32 |         img = Image.fromarray(img)
 33 |         return img
 34 | 
 35 | class ToBGRTensor(object):
 36 | 
 37 |     def __call__(self, img):
 38 |         assert isinstance(img, (np.ndarray, PIL.Image.Image))
 39 |         if isinstance(img, PIL.Image.Image):
 40 |             img = np.asarray(img)
 41 |         img = img[:,:,::-1] # 2 BGR
 42 |         img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W)
 43 |         img = np.ascontiguousarray(img)
 44 |         img = torch.from_numpy(img).float()
 45 |         return img
 46 | 
 47 | class DataIterator(object):
 48 | 
 49 |     def __init__(self, dataloader):
 50 |         self.dataloader = dataloader
 51 |         self.iterator = enumerate(self.dataloader)
 52 | 
 53 |     def next(self):
 54 |         try:
 55 |             _, data = next(self.iterator)
 56 |         except Exception:
 57 |             self.iterator = enumerate(self.dataloader)
 58 |             _, data = next(self.iterator)
 59 |         return data[0], data[1]
 60 | 
 61 | 
 62 | def get_args():
 63 |     parser = argparse.ArgumentParser()
 64 |     parser.add_argument('--eval', default=False, action='store_true')
 65 |     parser.add_argument('--eval-resume', type=str, default='./snet_detnas.pkl', help='path for eval model')
 66 |     parser.add_argument('--batch-size', type=int, default=1024, help='batch size')
 67 |     parser.add_argument('--total-iters', type=int, default=300000, help='total iters')
 68 |     parser.add_argument('--learning-rate', type=float, default=0.5, help='init learning rate')
 69 |     parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
 70 |     parser.add_argument('--weight-decay', type=float, default=4e-5, help='weight decay')
 71 |     parser.add_argument('--save', type=str, default='./models', help='path for saving trained models')
 72 |     parser.add_argument('--label-smooth', type=float, default=0.1, help='label smoothing')
 73 | 
 74 |     parser.add_argument('--auto-continue', default=False, action='store_true', help='report frequency')
 75 |     parser.add_argument('--display-interval', type=int, default=20, help='report frequency')
 76 |     parser.add_argument('--val-interval', type=int, default=10000, help='report frequency')
 77 |     parser.add_argument('--save-interval', type=int, default=10000, help='report frequency')
 78 | 
 79 |     parser.add_argument('--model-size', type=str, default='VOC_FPN_300M',
 80 |                         choices=['COCO_FPN_300M',
 81 |                                  'COCO_FPN_1.3G',
 82 |                                  'COCO_FPN_3.8G',
 83 |                                  'COCO_RetinaNet_300M',
 84 |                                  'VOC_FPN_300M',
 85 |                                  'VOC_RetinaNet_300M'],
 86 |                         help='size of the model')
 87 | 
 88 |     parser.add_argument('--train-dir', type=str, default='data/train', help='path to training dataset')
 89 |     parser.add_argument('--val-dir', type=str, default='data/val', help='path to validation dataset')
 90 | 
 91 |     args = parser.parse_args()
 92 |     return args
 93 | 
 94 | 
 95 | def main():
 96 |     args = get_args()
 97 | 
 98 |     # Log
 99 |     log_format = '[%(asctime)s] %(message)s'
100 |     logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%d %I:%M:%S')
101 |     t = time.time()
102 |     local_time = time.localtime(t)
103 |     if not os.path.exists('./log'):
104 |         os.mkdir('./log')
105 |     fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t)))
106 |     fh.setFormatter(logging.Formatter(log_format))
107 |     logging.getLogger().addHandler(fh)
108 | 
109 |     use_gpu = False
110 |     if torch.cuda.is_available():
111 |         use_gpu = True
112 | 
113 |     assert os.path.exists(args.train_dir)
114 |     train_dataset = datasets.ImageFolder(
115 |         args.train_dir,
116 |         transforms.Compose([
117 |             transforms.RandomResizedCrop(224),
118 |             transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
119 |             transforms.RandomHorizontalFlip(0.5),
120 |             ToBGRTensor(),
121 |         ])
122 |     )
123 |     train_loader = torch.utils.data.DataLoader(
124 |         train_dataset, batch_size=args.batch_size, shuffle=True,
125 |         num_workers=1, pin_memory=use_gpu)
126 |     train_dataprovider = DataIterator(train_loader)
127 | 
128 |     assert os.path.exists(args.val_dir)
129 |     val_loader = torch.utils.data.DataLoader(
130 |         datasets.ImageFolder(args.val_dir, transforms.Compose([
131 |             OpencvResize(256),
132 |             transforms.CenterCrop(224),
133 |             ToBGRTensor(),
134 |         ])),
135 |         batch_size=200, shuffle=False,
136 |         num_workers=1, pin_memory=use_gpu
137 |     )
138 |     val_dataprovider = DataIterator(val_loader)
139 |     print('load data successfully')
140 | 
141 |     model = ShuffleNetV2DetNAS(model_size=args.model_size)
142 |     if args.eval:
143 |         if args.eval_resume is not None:
144 |             checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu')
145 |             print('==> Resuming from checkpoint..')
146 |             load_checkpoint(model, checkpoint)
147 | 
148 |     optimizer = torch.optim.SGD(get_parameters(model), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)
149 |     criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1)
150 | 
151 |     if use_gpu:
152 |         model = nn.DataParallel(model)
153 |         loss_function = criterion_smooth.cuda()
154 |         device = torch.device("cuda")
155 |     else:
156 |         loss_function = criterion_smooth
157 |         device = torch.device("cpu")
158 | 
159 |     scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
160 |                     lambda step : (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1)
161 | 
162 |     model = model.to(device)
163 | 
164 |     all_iters = 0
165 |     if args.auto_continue:
166 |         lastest_model, iters = get_lastest_model()
167 |         if lastest_model is not None:
168 |             all_iters = iters
169 |             checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu')
170 |             model.load_state_dict(checkpoint['state_dict'], strict=True)
171 |             optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
172 |             scheduler.load_state_dict(checkpoint['lr_scheduler_state_dict'])
173 |             print('load from checkpoint')
174 | 
175 |     args.optimizer = optimizer
176 |     args.loss_function = loss_function
177 |     args.scheduler = scheduler
178 |     args.train_dataprovider = train_dataprovider
179 |     args.val_dataprovider = val_dataprovider
180 | 
181 |     if args.eval:
182 |         if args.eval_resume is not None:
183 |             validate(model, device, args, all_iters=all_iters)
184 |     else:
185 |         while all_iters < args.total_iters:
186 |             all_iters = train(model, device, args, val_interval=args.val_interval, all_iters=all_iters)
187 |             validate(model, device, args, all_iters=all_iters)
188 |         save_checkpoint({'state_dict': model.state_dict(),
189 |                          'optimizer_state_dict': args.optimizer.state_dict(),
190 |                          'lr_scheduler_state_dict': args.scheduler.state_dict()},
191 |                         args.total_iters, tag='bnps-')
192 | 
193 | 
194 | def train(model, device, args, *, val_interval, all_iters=None):
195 | 
196 |     optimizer = args.optimizer
197 |     loss_function = args.loss_function
198 |     scheduler = args.scheduler
199 |     train_dataprovider = args.train_dataprovider
200 | 
201 |     t1 = time.time()
202 |     Top1_err, Top5_err = 0.0, 0.0
203 |     model.train()
204 |     for iters in range(1, val_interval + 1):
205 |         scheduler.step()
206 |         all_iters += 1
207 |         d_st = time.time()
208 |         data, target = train_dataprovider.next()
209 |         target = target.type(torch.LongTensor)
210 |         data, target = data.to(device), target.to(device)
211 |         data_time = time.time() - d_st
212 | 
213 |         output = model(data)
214 |         loss = loss_function(output, target)
215 |         optimizer.zero_grad()
216 |         loss.backward()
217 |         optimizer.step()
218 |         prec1, prec5 = accuracy(output, target, topk=(1, 5))
219 | 
220 |         Top1_err += 1 - prec1.item() / 100
221 |         Top5_err += 1 - prec5.item() / 100
222 | 
223 |         if all_iters % args.display_interval == 0:
224 |             printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \
225 |                         'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \
226 |                         'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \
227 |                         'data_time = {:.6f},\ttrain_time = {:.6f}'.format(data_time, (time.time() - t1) / args.display_interval)
228 |             logging.info(printInfo)
229 |             t1 = time.time()
230 |             Top1_err, Top5_err = 0.0, 0.0
231 | 
232 |         if all_iters % args.save_interval == 0:
233 |             save_checkpoint({'state_dict': model.state_dict(),
234 |                              'optimizer_state_dict': args.optimizer.state_dict(),
235 |                              'lr_scheduler_state_dict': args.scheduler.state_dict()},
236 |                             all_iters)
237 | 
238 |     return all_iters
239 | 
240 | 
241 | def validate(model, device, args, *, all_iters=None):
242 |     objs = AvgrageMeter()
243 |     top1 = AvgrageMeter()
244 |     top5 = AvgrageMeter()
245 | 
246 |     loss_function = args.loss_function
247 |     val_dataprovider = args.val_dataprovider
248 | 
249 |     model.eval()
250 |     max_val_iters = 250
251 |     t1 = time.time()
252 |     with torch.no_grad():
253 |         for _ in range(1, max_val_iters + 1):
254 |             data, target = val_dataprovider.next()
255 |             target = target.type(torch.LongTensor)
256 |             data, target = data.to(device), target.to(device)
257 | 
258 |             output = model(data)
259 |             loss = loss_function(output, target)
260 | 
261 |             prec1, prec5 = accuracy(output, target, topk=(1, 5))
262 |             n = data.size(0)
263 |             objs.update(loss.item(), n)
264 |             top1.update(prec1.item(), n)
265 |             top5.update(prec5.item(), n)
266 | 
267 |     logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \
268 |               'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \
269 |               'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \
270 |               'val_time = {:.6f}'.format(time.time() - t1)
271 |     logging.info(logInfo)
272 | 
273 | 
274 | def load_checkpoint(net, checkpoint):
275 |     if 'state_dict' in checkpoint:
276 |         checkpoint = dict(checkpoint['state_dict'])
277 |     for k in checkpoint:
278 |         if 'module' in k:
279 |             checkpoint[k[7:]] = checkpoint.pop(k)
280 |     for name, param in net.named_parameters():
281 |         if name not in checkpoint:
282 |             if 'predict' not in name:
283 |                 print(name)
284 |         else:
285 |             param.data = checkpoint[name].data
286 |     for name, buffer in net.named_buffers():
287 |         if name not in checkpoint:
288 |             if 'predict' not in name:
289 |                 print(name)
290 |         else:
291 |             buffer.data = checkpoint[name].data
292 | 
293 | 
294 | if __name__ == "__main__":
295 |     main()
296 | 
297 | 


--------------------------------------------------------------------------------
/ShuffleNetV2.Large/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import torch
  4 | import torch.nn as nn
  5 | import time
  6 | import logging
  7 | import argparse
  8 | import torchvision.transforms as transforms
  9 | import torchvision.datasets as datasets
 10 | import cv2
 11 | import numpy as np
 12 | import PIL
 13 | from PIL import Image
 14 | from network import ShuffleNetV2
 15 | from utils import accuracy, AvgrageMeter, CrossEntropyLabelSmooth, save_checkpoint, get_lastest_model, get_parameters
 16 | 
 17 | class OpencvResize(object):
 18 | 
 19 |     def __init__(self, size=256):
 20 |         self.size = size
 21 | 
 22 |     def __call__(self, img):
 23 |         assert isinstance(img, PIL.Image.Image)
 24 |         img = np.asarray(img) # (H,W,3) RGB
 25 |         img = img[:,:,::-1] # 2 BGR
 26 |         img = np.ascontiguousarray(img)
 27 |         H, W, _ = img.shape
 28 |         target_size = (int(self.size/H * W + 0.5), self.size) if H < W else (self.size, int(self.size/W * H + 0.5))
 29 |         img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR)
 30 |         img = img[:,:,::-1] # 2 RGB
 31 |         img = np.ascontiguousarray(img)
 32 |         img = Image.fromarray(img)
 33 |         return img
 34 | 
 35 | class ToBGRTensor(object):
 36 | 
 37 |     def __call__(self, img):
 38 |         assert isinstance(img, (np.ndarray, PIL.Image.Image))
 39 |         if isinstance(img, PIL.Image.Image):
 40 |             img = np.asarray(img)
 41 |         img = img[:,:,::-1] # 2 BGR
 42 |         img = np.transpose(img, [2, 0, 1]) # 2 (3, H, W)
 43 |         img = np.ascontiguousarray(img)
 44 |         img = torch.from_numpy(img).float()
 45 |         return img
 46 | 
 47 | class DataIterator(object):
 48 | 
 49 |     def __init__(self, dataloader):
 50 |         self.dataloader = dataloader
 51 |         self.iterator = enumerate(self.dataloader)
 52 | 
 53 |     def next(self):
 54 |         try:
 55 |             _, data = next(self.iterator)
 56 |         except Exception:
 57 |             self.iterator = enumerate(self.dataloader)
 58 |             _, data = next(self.iterator)
 59 |         return data[0], data[1]
 60 | 
 61 | class Lighting(object):
 62 | 
 63 |     def __init__(self, alphastd, eigval=None, eigvec=None):
 64 |         self.alphastd = alphastd
 65 |         if eigval is None:
 66 |             eigval = torch.Tensor([0.2175, 0.0188, 0.0045])
 67 |         if eigvec is None:
 68 |             eigvec = torch.Tensor([
 69 |                 [-0.5675, 0.7192, 0.4009],
 70 |                 [-0.5808, -0.0045, -0.8140],
 71 |                 [-0.5836, -0.6948, 0.4203],
 72 |             ])
 73 |         self.eigval = eigval
 74 |         self.eigvec = eigvec
 75 | 
 76 |     def __call__(self, img):
 77 |         """
 78 |         :param img : (N,3,H,W) RGB
 79 |         """
 80 |         if self.alphastd == 0:
 81 |             return img
 82 | 
 83 |         device = img.device
 84 |         alpha = torch.normal(torch.zeros_like(self.eigval), self.alphastd)
 85 |         alpha = alpha.to(device)
 86 |         eigval = self.eigval.to(device)
 87 |         eigvec = self.eigvec.to(device)
 88 |         rgb = torch.mm(eigvec, eigval.mul(alpha).reshape(3,1)).squeeze() # (3)
 89 |         img = img.add(rgb.view(1, 3, 1, 1))
 90 |         return img
 91 | 
 92 | class ColorNormalize(object):
 93 | 
 94 |     def __init__(self, mean=None, std=None):
 95 |         if mean is None:
 96 |             mean = torch.Tensor([0.485, 0.456, 0.406])
 97 |         if std is None:
 98 |             std = torch.Tensor([0.229, 0.224, 0.225])
 99 |         self.mean = mean
100 |         self.std = std
101 | 
102 |     def __call__(self, img):
103 |         """
104 |         :param img : (N,3,H,W) RGB
105 |         """
106 |         device = img.device
107 |         mean = self.mean.to(device)
108 |         std = self.std.to(device)
109 |         img.sub_(mean.reshape(1, -1, 1, 1)).div_(std.reshape(1, -1, 1, 1))
110 |         return img
111 | 
112 | def get_mean():
113 |     from xml.dom.minidom import parse
114 |     import numpy as np
115 | 
116 |     f = './ImageNet_1000_scale224_mean.xml'
117 |     tree = parse(f)
118 |     content = tree.documentElement
119 |     data = content.getElementsByTagName('MeanImg')[0]
120 |     data = data.getElementsByTagName('data')[0]
121 |     mean = data.childNodes[0].data
122 |     mean = mean.split(' ')
123 |     res = []
124 |     for m in mean:
125 |         if m == '\n' or m == '':
126 |             continue
127 |         m = float(m[:-1]) if m.endswith('\n') else float(m)
128 |         assert m <= 255
129 |         res.append(m)
130 |     mean = np.array(res).reshape((224,224,3)) # BGR
131 |     mean = np.transpose(mean, [2, 0, 1])
132 |     mean = mean[np.newaxis, ...]
133 |     return mean
134 | 
135 | def get_args():
136 |     parser = argparse.ArgumentParser()
137 |     parser.add_argument('--eval', default=False, action='store_true')
138 |     parser.add_argument('--eval-resume', type=str, default='./snetv2_residual_se.pkl', help='path for eval model')
139 |     parser.add_argument('--batch-size', type=int, default=256, help='batch size')
140 |     parser.add_argument('--total-iters', type=int, default=600000, help='total iters')
141 |     parser.add_argument('--learning-rate', type=float, default=0.25, help='init learning rate')
142 |     parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
143 |     parser.add_argument('--weight-decay', type=float, default=4e-5, help='weight decay')
144 |     parser.add_argument('--save', type=str, default='./models', help='path for saving trained models')
145 |     parser.add_argument('--label-smooth', type=float, default=0.1, help='label smoothing')
146 | 
147 |     parser.add_argument('--auto-continue', default=False, action='store_true', help='report frequency')
148 |     parser.add_argument('--display-interval', type=int, default=20, help='report frequency')
149 |     parser.add_argument('--val-interval', type=int, default=10000, help='report frequency')
150 |     parser.add_argument('--save-interval', type=int, default=10000, help='report frequency')
151 | 
152 |     parser.add_argument('--train-dir', type=str, default='data/train', help='path to training dataset')
153 |     parser.add_argument('--val-dir', type=str, default='data/val', help='path to validation dataset')
154 | 
155 |     args = parser.parse_args()
156 |     return args
157 | 
158 | 
159 | def main():
160 |     args = get_args()
161 | 
162 |     # Log
163 |     log_format = '[%(asctime)s] %(message)s'
164 |     logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%d %I:%M:%S')
165 |     t = time.time()
166 |     local_time = time.localtime(t)
167 |     if not os.path.exists('./log'):
168 |         os.mkdir('./log')
169 |     fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t)))
170 |     fh.setFormatter(logging.Formatter(log_format))
171 |     logging.getLogger().addHandler(fh)
172 | 
173 |     use_gpu = False
174 |     if torch.cuda.is_available():
175 |         use_gpu = True
176 | 
177 |     assert os.path.exists(args.train_dir)
178 |     train_dataset = datasets.ImageFolder(
179 |         args.train_dir,
180 |         transforms.Compose([
181 |             transforms.RandomResizedCrop(224),
182 |             transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
183 |             transforms.RandomHorizontalFlip(0.5),
184 |             transforms.ToTensor(),
185 |         ])
186 |     )
187 |     train_loader = torch.utils.data.DataLoader(
188 |         train_dataset, batch_size=args.batch_size, shuffle=True,
189 |         num_workers=1, pin_memory=use_gpu)
190 |     train_dataprovider = DataIterator(train_loader)
191 | 
192 |     assert os.path.exists(args.val_dir)
193 |     val_loader = torch.utils.data.DataLoader(
194 |         datasets.ImageFolder(args.val_dir, transforms.Compose([
195 |             OpencvResize(256),
196 |             transforms.CenterCrop(224),
197 |             ToBGRTensor(),
198 |         ])),
199 |         batch_size=200, shuffle=False,
200 |         num_workers=1, pin_memory=use_gpu
201 |     )
202 |     val_dataprovider = DataIterator(val_loader)
203 |     print('load data successfully')
204 | 
205 |     model = ShuffleNetV2()
206 |     if args.eval:
207 |         if args.eval_resume is not None:
208 |             checkpoint = torch.load(args.eval_resume, map_location=None if use_gpu else 'cpu')
209 |             print('==> Resuming from checkpoint..')
210 |             load_checkpoint(model, checkpoint)
211 | 
212 |     optimizer = torch.optim.SGD(get_parameters(model), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)
213 |     criterion_smooth = CrossEntropyLabelSmooth(1000, 0.1)
214 | 
215 |     if use_gpu:
216 |         model = nn.DataParallel(model)
217 |         loss_function = criterion_smooth.cuda()
218 |         device = torch.device("cuda")
219 |     else:
220 |         loss_function = criterion_smooth
221 |         device = torch.device("cpu")
222 | 
223 |     scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
224 |                     lambda step : (1.0-step/args.total_iters) if step <= args.total_iters else 0, last_epoch=-1)
225 | 
226 |     model = model.to(device)
227 | 
228 |     all_iters = 0
229 |     if args.auto_continue:
230 |         lastest_model, iters = get_lastest_model()
231 |         if lastest_model is not None:
232 |             all_iters = iters
233 |             checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu')
234 |             model.load_state_dict(checkpoint['state_dict'], strict=True)
235 |             optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
236 |             scheduler.load_state_dict(checkpoint['lr_scheduler_state_dict'])
237 |             print('load from checkpoint')
238 | 
239 |     args.optimizer = optimizer
240 |     args.loss_function = loss_function
241 |     args.scheduler = scheduler
242 |     args.train_dataprovider = train_dataprovider
243 |     args.val_dataprovider = val_dataprovider
244 | 
245 |     if args.eval:
246 |         if args.eval_resume is not None:
247 |             validate(model, device, args, all_iters=all_iters)
248 |     else:
249 |         while all_iters < args.total_iters:
250 |             all_iters = train(model, device, args, val_interval=args.val_interval, all_iters=all_iters)
251 |             validate(model, device, args, all_iters=all_iters)
252 |         save_checkpoint({'state_dict': model.state_dict(),
253 |                          'optimizer_state_dict': args.optimizer.state_dict(),
254 |                          'lr_scheduler_state_dict': args.scheduler.state_dict()},
255 |                         args.total_iters, tag='bnps-')
256 | 
257 | 
258 | def train(model, device, args, *, val_interval, all_iters=None):
259 | 
260 |     optimizer = args.optimizer
261 |     loss_function = args.loss_function
262 |     scheduler = args.scheduler
263 |     train_dataprovider = args.train_dataprovider
264 | 
265 |     t1 = time.time()
266 |     Top1_err, Top5_err = 0.0, 0.0
267 |     model.train()
268 |     for iters in range(1, val_interval + 1):
269 |         scheduler.step()
270 |         all_iters += 1
271 |         d_st = time.time()
272 |         data, target = train_dataprovider.next()
273 |         target = target.type(torch.LongTensor)
274 |         data, target = data.to(device), target.to(device) # (N,3,H,W) RGB 0~1
275 |         data = ColorNormalize()(Lighting(alphastd=0.1)(data))
276 |         data = data.cpu().numpy()[:,::-1,:,:] # 2 BGR
277 |         data = np.ascontiguousarray(data)
278 |         data = torch.from_numpy(data).to(device)
279 |         data_time = time.time() - d_st
280 | 
281 |         output_7, output_14, output_28, output_56 = model(data)
282 |         loss = 1.0 * loss_function(output_7, target) + 0.7 * loss_function(output_14, target) + \
283 |             0.5 * loss_function(output_28, target) + 0.3 * loss_function(output_56, target)
284 |         optimizer.zero_grad()
285 |         loss.backward()
286 |         optimizer.step()
287 |         prec1, prec5 = accuracy(output_7, target, topk=(1, 5))
288 | 
289 |         Top1_err += 1 - prec1.item() / 100
290 |         Top5_err += 1 - prec5.item() / 100
291 | 
292 |         if all_iters % args.display_interval == 0:
293 |             printInfo = 'TRAIN Iter {}: lr = {:.6f},\tloss = {:.6f},\t'.format(all_iters, scheduler.get_lr()[0], loss.item()) + \
294 |                         'Top-1 err = {:.6f},\t'.format(Top1_err / args.display_interval) + \
295 |                         'Top-5 err = {:.6f},\t'.format(Top5_err / args.display_interval) + \
296 |                         'data_time = {:.6f},\ttrain_time = {:.6f}'.format(data_time, (time.time() - t1) / args.display_interval)
297 |             logging.info(printInfo)
298 |             t1 = time.time()
299 |             Top1_err, Top5_err = 0.0, 0.0
300 | 
301 |         if all_iters % args.save_interval == 0:
302 |             save_checkpoint({'state_dict': model.state_dict(),
303 |                              'optimizer_state_dict': args.optimizer.state_dict(),
304 |                              'lr_scheduler_state_dict': args.scheduler.state_dict()},
305 |                             all_iters)
306 | 
307 |     return all_iters
308 | 
309 | 
310 | def validate(model, device, args, *, all_iters=None):
311 |     objs = AvgrageMeter()
312 |     top1 = AvgrageMeter()
313 |     top5 = AvgrageMeter()
314 | 
315 |     loss_function = args.loss_function
316 |     val_dataprovider = args.val_dataprovider
317 | 
318 |     mean = get_mean()
319 |     mean = torch.from_numpy(mean).to(device).float() # (1, 3, 224, 224) BGR
320 | 
321 |     model.eval()
322 |     max_val_iters = 250
323 |     t1 = time.time()
324 |     with torch.no_grad():
325 |         for _ in range(1, max_val_iters + 1):
326 |             data, target = val_dataprovider.next()
327 |             target = target.type(torch.LongTensor)
328 |             data, target = data.to(device), target.to(device) # data : BGR [0,255]
329 |             data -= mean
330 | 
331 |             output = model(data)
332 |             loss = loss_function(output, target)
333 | 
334 |             prec1, prec5 = accuracy(output, target, topk=(1, 5))
335 |             n = data.size(0)
336 |             objs.update(loss.item(), n)
337 |             top1.update(prec1.item(), n)
338 |             top5.update(prec5.item(), n)
339 | 
340 |     logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \
341 |               'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \
342 |               'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \
343 |               'val_time = {:.6f}'.format(time.time() - t1)
344 |     logging.info(logInfo)
345 | 
346 | 
347 | 
348 | def load_checkpoint(net, checkpoint):
349 |     if 'state_dict' in checkpoint:
350 |         checkpoint = dict(checkpoint['state_dict'])
351 |     for k in checkpoint:
352 |         if 'module' in k:
353 |             checkpoint[k[7:]] = checkpoint.pop(k)
354 |     for name, param in net.named_parameters():
355 |         if name not in checkpoint:
356 |             if 'predict' not in name:
357 |                 print(name)
358 |         else:
359 |             param.data = checkpoint[name].data
360 |     for name, buffer in net.named_buffers():
361 |         if name not in checkpoint:
362 |             if 'predict' not in name:
363 |                 print(name)
364 |         else:
365 |             buffer.data = checkpoint[name].data
366 | 
367 | 
368 | if __name__ == "__main__":
369 |     main()
370 | 
371 | 


--------------------------------------------------------------------------------