├── .gitignore
├── LICENSE
├── README.md
├── logs
├── nin_1875527964
│ └── log.txt
├── resnet-pre-act_17934627
│ └── log.txt
├── vgg_24208029
│ └── log.txt
└── wide-resnet_1121914561
│ └── log.txt
├── models
├── nin.lua
├── resnet-pre-act.lua
├── utils.lua
├── vgg.lua
└── wide-resnet.lua
├── notebooks
├── sweeper.py
└── visualize.ipynb
├── pretrained
├── README.md
└── wide-resnet.lua
├── pytorch
├── README.md
├── main.py
├── requirements.txt
├── resnet.py
└── utils.py
├── scripts
├── train_cifar.sh
└── train_svhn.sh
└── train.lua
/.gitignore:
--------------------------------------------------------------------------------
1 | logs
2 | notebooks/.ipynb_checkpoints
3 | *.pyc
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 2-Clause License
2 |
3 | Copyright (c) 2016, Sergey Zagoruyko
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Wide Residual Networks
2 | =============
3 |
4 | This code was used for experiments with Wide Residual Networks (BMVC 2016) http://arxiv.org/abs/1605.07146 by Sergey Zagoruyko and Nikos Komodakis.
5 |
6 | Deep residual networks were shown to be able to scale up to thousands of
7 | layers and still have improving performance. However, each fraction of a
8 | percent of improved accuracy costs nearly doubling the number of layers, and so
9 | training very deep residual networks has a problem of diminishing feature
10 | reuse, which makes these networks very slow to train.
11 |
12 | To tackle these problems,
13 | in this work we conduct a detailed experimental study on the architecture of
14 | ResNet blocks, based on which we propose a novel architecture where we *decrease
15 | depth* and *increase width* of residual networks. We call the resulting network
16 | structures **wide residual networks (WRNs)** and show that these are far superior
17 | over their commonly used thin and very deep counterparts.
18 |
19 | For example, we
20 | demonstrate that even a simple 16-layer-deep wide residual network outperforms
21 | in accuracy and efficiency all previous deep residual networks, including
22 | thousand-layer-deep networks. We further show that WRNs achieve **incredibly**
23 | good results (e.g., achieving new state-of-the-art results on
24 | CIFAR-10, CIFAR-100, SVHN, COCO and substantial improvements on ImageNet) and train **several times faster** than pre-activation ResNets.
25 |
26 | **Update (August 2019):** Pretrained ImageNet WRN models are available in
27 | torchvision 0.4 and [PyTorch Hub](https://pytorch.org/hub/pytorch_vision_wide_resnet/), e.g. loading WRN-50-2:
28 | ```python
29 | model = torch.hub.load('pytorch/vision', 'wide_resnet50_2', pretrained=True)
30 | ```
31 |
32 | **Update (November 2016):** We updated the paper with ImageNet, COCO and meanstd preprocessing CIFAR results.
33 | If you're comparing your method against WRN, please report correct preprocessing numbers because they give substantially different results.
34 |
35 | tldr; ImageNet WRN-50-2-bottleneck (ResNet-50 with wider inner bottleneck 3x3 convolution) is significantly faster than ResNet-152 and has better accuracy; on CIFAR meanstd preprocessing (as in fb.resnet.torch) gives better results than ZCA whitening; on COCO wide ResNet with 34 layers outperforms even Inception-v4-based Fast-RCNN model in single model performance.
36 |
37 | Test error (%, flip/translation augmentation, **meanstd** normalization, median of 5 runs) on CIFAR:
38 |
39 | Network | CIFAR-10 | CIFAR-100 |
40 | -----------------|:--------:|:--------:
41 | pre-ResNet-164 | 5.46 | 24.33
42 | pre-ResNet-1001 | 4.92 | 22.71
43 | WRN-28-10 | 4.00 | 19.25
44 | WRN-28-10-dropout| **3.89** | **18.85**
45 |
46 | Single-time runs (meanstd normalization):
47 |
48 | Dataset | network | test perf. |
49 | --------|:-------:|:---------:|
50 | CIFAR-10 | WRN-40-10-dropout | 3.8%
51 | CIFAR-100 | WRN-40-10-dropout | 18.3%
52 | SVHN | WRN-16-8-dropout | 1.54%
53 | ImageNet (single crop) | WRN-50-2-bottleneck | 21.9% top-1, 5.79% top-5
54 | COCO-val5k (single model) | WRN-34-2 | 36 mAP
55 |
56 | See http://arxiv.org/abs/1605.07146 for details.
57 |
58 | 
59 |
60 | bibtex:
61 |
62 | ```
63 | @INPROCEEDINGS{Zagoruyko2016WRN,
64 | author = {Sergey Zagoruyko and Nikos Komodakis},
65 | title = {Wide Residual Networks},
66 | booktitle = {BMVC},
67 | year = {2016}}
68 | ```
69 |
70 | # Pretrained models
71 |
72 | ## ImageNet
73 |
74 | WRN-50-2-bottleneck (wider bottleneck), see [pretrained](pretrained/README.md) for details
75 | Download (263MB): https://yadi.sk/d/-8AWymOPyVZns
76 |
77 | There are also PyTorch and Tensorflow model definitions with pretrained weights at
78 |
79 |
80 | ## COCO
81 |
82 | Coming
83 |
84 | # Installation
85 |
86 | The code depends on Torch http://torch.ch. Follow instructions [here](http://torch.ch/docs/getting-started.html) and run:
87 |
88 | ```
89 | luarocks install torchnet
90 | luarocks install optnet
91 | luarocks install iterm
92 | ```
93 |
94 | For visualizing training curves we used ipython notebook with pandas and bokeh.
95 |
96 | # Usage
97 |
98 | ## Dataset support
99 |
100 | The code supports loading simple datasets in torch format. We provide the following:
101 |
102 | * MNIST
103 | [data preparation script](https://gist.github.com/szagoruyko/8467ee15d020ab2a7ce80a215af71b74)
104 | * CIFAR-10
105 | [**recommended**]
106 | [data preparation script](https://gist.github.com/szagoruyko/e5cf5e9b54661a817695c8c7b5c3dfa6),
107 | [preprocessed data (176MB)](https://yadi.sk/d/eFmOduZyxaBrT)
108 | * CIFAR-10 whitened (using pylearn2)
109 | [preprocessed dataset](https://yadi.sk/d/em4b0FMgrnqxy)
110 | * CIFAR-100
111 | [**recommended**]
112 | [data preparation script](https://gist.github.com/szagoruyko/01bfa936396f913a899ee49b98e7304b),
113 | [preprocessed data (176MB)](https://yadi.sk/d/ZbiXAegjxaBcM)
114 | * CIFAR-100 whitened (using pylearn2)
115 | [preprocessed dataset](https://yadi.sk/d/em4b0FMgrnqxy)
116 | * SVHN [data preparation script](https://gist.github.com/szagoruyko/27712564a3f3765c5bfd933b56a21757)
117 |
118 | To whiten CIFAR-10 and CIFAR-100 we used the following scripts https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/scripts/datasets/make_cifar10_gcn_whitened.py and then converted to torch using https://gist.github.com/szagoruyko/ad2977e4b8dceb64c68ea07f6abf397b and npy to torch converter https://github.com/htwaijry/npy4th.
119 |
120 | We are running ImageNet experiments and will update the paper and this repo soon.
121 |
122 | ## Training
123 |
124 | We provide several scripts for reproducing results in the paper. Below are several examples.
125 |
126 | ```bash
127 | model=wide-resnet widen_factor=4 depth=40 ./scripts/train_cifar.sh
128 | ```
129 |
130 | This will train WRN-40-4 on CIFAR-10 whitened (supposed to be in `datasets` folder). This network achieves about the same accuracy as ResNet-1001 and trains in 6 hours on a single Titan X.
131 | Log is saved to `logs/wide-resnet_$RANDOM$RANDOM` folder with json entries for each epoch and can be visualized with itorch/ipython later.
132 |
133 | For reference we provide logs for this experiment and [ipython notebook](notebooks/visualize.ipynb) to visualize the results. After running it you should see these training curves:
134 |
135 | 
136 |
137 | Another example:
138 |
139 | ```bash
140 | model=wide-resnet widen_factor=10 depth=28 dropout=0.3 dataset=./datasets/cifar100_whitened.t7 ./scripts/train_cifar.sh
141 | ```
142 |
143 | This network achieves 20.0% error on CIFAR-100 in about a day on a single Titan X.
144 |
145 | Multi-GPU is supported with `nGPU=n` parameter.
146 |
147 | ## Other models
148 |
149 | Additional models in this repo:
150 |
151 | * NIN (7.4% on CIFAR-10 whitened)
152 | * VGG (modified from [cifar.torch](https://github.com/szagoruyko/cifar.torch), 6.3% on CIFAR-10 whitened)
153 | * pre-activation ResNet (from https://github.com/KaimingHe/resnet-1k-layers)
154 |
155 | ## Implementation details
156 |
157 | The code evolved from https://github.com/szagoruyko/cifar.torch. To reduce memory usage we use @fmassa's [optimize-net](https://github.com/fmassa/optimize-net), which automatically shares output and gradient tensors between modules. This keeps memory usage below 4 Gb even for our best networks. Also, it can generate network graph plots as the one for WRN-16-2 in the end of this page.
158 |
159 | # Acknowledgements
160 |
161 | We thank startup company [VisionLabs](http://www.visionlabs.ru/en/) and Eugenio Culurciello for giving us access to their clusters, without them ImageNet experiments wouldn't be possible. We also thank Adam Lerer and Sam Gross for helpful discussions. Work supported by EC project FP7-ICT-611145 ROBOSPECT.
162 |
163 |
164 |
--------------------------------------------------------------------------------
/logs/nin_1875527964/log.txt:
--------------------------------------------------------------------------------
1 | {
2 | optnet_optimize : true
3 | generate_graph : false
4 | init_value : 10
5 | randomcrop : 4
6 | batchSize : 128
7 | epoch_step :
8 | {
9 | 1 : 60
10 | 2 : 120
11 | 3 : 160
12 | }
13 | randomcrop_type : "reflection"
14 | model : "nin"
15 | save : "logs/nin_1875527964"
16 | dampening : 0
17 | learningRate : 0.1
18 | shortcutType : "A"
19 | nesterov : true
20 | cudnn_deterministic : false
21 | depth : 50
22 | learningRateDecayRatio : 0.2
23 | multiply_input_factor : 1
24 | dataset : "./datasets/cifar10_whitened.t7"
25 | weightDecay : 0.0005
26 | momentum : 0.9
27 | optimMethod : "sgd"
28 | hflip : true
29 | max_epoch : 200
30 | imageSize : 32
31 | dropout : 0
32 | learningRateDecay : 0
33 | cudnn_fastest : true
34 | widen_factor : 1
35 | }
36 | ==> loading data
37 | ==> configuring model
38 | forward output {
39 | 1 : FloatTensor - size: 1x10
40 | }
41 | backward output {
42 | 1 : FloatTensor - size: 1x3x32x32
43 | }
44 | nn.Sequential {
45 | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> (9) -> (10) -> (11) -> (12) -> (13) -> (14) -> (15) -> (16) -> (17) -> (18) -> (19) -> (20) -> (21) -> (22) -> (23) -> (24) -> (25) -> (26) -> (27) -> (28) -> (29) -> (30) -> (31) -> (32) -> output]
46 | (1): cudnn.SpatialConvolution(3 -> 192, 5x5, 1,1, 2,2)
47 | (2): cudnn.SpatialBatchNormalization
48 | (3): cudnn.ReLU
49 | (4): cudnn.SpatialConvolution(192 -> 160, 1x1)
50 | (5): cudnn.SpatialBatchNormalization
51 | (6): cudnn.ReLU
52 | (7): cudnn.SpatialConvolution(160 -> 96, 1x1)
53 | (8): cudnn.SpatialBatchNormalization
54 | (9): cudnn.ReLU
55 | (10): cudnn.SpatialMaxPooling(3x3, 2,2)
56 | (11): cudnn.SpatialConvolution(96 -> 192, 5x5, 1,1, 2,2)
57 | (12): cudnn.SpatialBatchNormalization
58 | (13): cudnn.ReLU
59 | (14): cudnn.SpatialConvolution(192 -> 192, 1x1)
60 | (15): cudnn.SpatialBatchNormalization
61 | (16): cudnn.ReLU
62 | (17): cudnn.SpatialConvolution(192 -> 192, 1x1)
63 | (18): cudnn.SpatialBatchNormalization
64 | (19): cudnn.ReLU
65 | (20): cudnn.SpatialAveragePooling(3x3, 2,2)
66 | (21): cudnn.SpatialConvolution(192 -> 192, 3x3, 1,1, 1,1)
67 | (22): cudnn.SpatialBatchNormalization
68 | (23): cudnn.ReLU
69 | (24): cudnn.SpatialConvolution(192 -> 192, 1x1)
70 | (25): cudnn.SpatialBatchNormalization
71 | (26): cudnn.ReLU
72 | (27): cudnn.SpatialConvolution(192 -> 192, 1x1)
73 | (28): cudnn.SpatialBatchNormalization
74 | (29): cudnn.ReLU
75 | (30): cudnn.SpatialAveragePooling(8x8, 1,1)
76 | (31): nn.View(-1)
77 | (32): nn.Linear(192 -> 10)
78 | }
79 | Network has 9 convolutions
80 | Will save at logs/nin_1875527964
81 | Network has 1007242 parameters
82 | ==> setting criterion
83 | ==> configuring optimizer
84 | ==> online epoch # 1 [batchSize = 128]
85 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.593863964081,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":1,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":54.78,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.4729490280151,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":1.1969858704469}
86 | ==> online epoch # 2 [batchSize = 128]
87 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.532868862152,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":2,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":70.87,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2549231052399,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.82106501123844}
88 | ==> online epoch # 3 [batchSize = 128]
89 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.588787078857,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":3,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":67.43,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2515769004822,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.69562138464206}
90 | ==> online epoch # 4 [batchSize = 128]
91 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.437535047531,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":4,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":76.97,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2535479068756,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.64121536719493}
92 | ==> online epoch # 5 [batchSize = 128]
93 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.63557600975,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":5,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":74.93,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2503349781036,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.60574328242204}
94 | ==> online epoch # 6 [batchSize = 128]
95 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.368470191956,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":6,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":69.68,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2589159011841,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.56973296541434}
96 | ==> online epoch # 7 [batchSize = 128]
97 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.608664035797,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":7,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":72.46,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2541699409485,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.55406727538659}
98 | ==> online epoch # 8 [batchSize = 128]
99 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.443886041641,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":8,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":71.06,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2503561973572,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.53045730017699}
100 | ==> online epoch # 9 [batchSize = 128]
101 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.654844045639,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":9,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":79.36,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.254166841507,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.5159140002269}
102 | ==> online epoch # 10 [batchSize = 128]
103 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":56.00287604332,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":10,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":74.11,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.258073091507,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.50183857289644}
104 | ==> online epoch # 11 [batchSize = 128]
105 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.120012998581,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":11,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":78.3,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2523429393768,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.49553745870407}
106 | ==> online epoch # 12 [batchSize = 128]
107 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.61984705925,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":12,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":74.18,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2490620613098,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.48498020233252}
108 | ==> online epoch # 13 [batchSize = 128]
109 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.887814044952,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":13,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":73.53,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2499670982361,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.47510553697745}
110 | ==> online epoch # 14 [batchSize = 128]
111 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.945474147797,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":14,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":76.36,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2481338977814,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.46514304058674}
112 | ==> online epoch # 15 [batchSize = 128]
113 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.944491863251,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":15,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":79.23,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2524788379669,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.4617732351407}
114 | ==> online epoch # 16 [batchSize = 128]
115 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.834233045578,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":16,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":80.13,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2483010292053,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.45532201711948}
116 | ==> online epoch # 17 [batchSize = 128]
117 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.916064977646,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":17,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":77.12,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2509729862213,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.44790997589246}
118 | ==> online epoch # 18 [batchSize = 128]
119 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.023838996887,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":18,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":77.44,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2504661083221,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.44162713854741}
120 | ==> online epoch # 19 [batchSize = 128]
121 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.770215988159,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":19,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":76.9,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2507870197296,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.43800124143943}
122 | ==> online epoch # 20 [batchSize = 128]
123 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.90252494812,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":20,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":76.03,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2497539520264,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.43589583990666}
124 | ==> online epoch # 21 [batchSize = 128]
125 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.002173900604,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":21,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":66.82,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2496049404144,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.42981643737891}
126 | ==> online epoch # 22 [batchSize = 128]
127 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.752336978912,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":22,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":82.59,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2492098808289,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.42459514660713}
128 | ==> online epoch # 23 [batchSize = 128]
129 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.789345026016,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":23,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":77.19,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2487859725952,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.42145103521836}
130 | ==> online epoch # 24 [batchSize = 128]
131 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.813308000565,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":24,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":72.51,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2489840984344,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.41741959108756}
132 | ==> online epoch # 25 [batchSize = 128]
133 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.880307912827,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":25,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":75.82,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2450361251831,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.41995204568673}
134 | ==> online epoch # 26 [batchSize = 128]
135 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.844065904617,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":26,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":78.03,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2530159950256,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.40897574275732}
136 | ==> online epoch # 27 [batchSize = 128]
137 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.667095184326,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":27,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":81.62,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2375519275665,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.41404951084883}
138 | ==> online epoch # 28 [batchSize = 128]
139 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.418707847595,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":28,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":72.25,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2441000938416,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.4057252982488}
140 | ==> online epoch # 29 [batchSize = 128]
141 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.580642938614,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":29,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":81.16,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2447679042816,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.39754433024388}
142 | ==> online epoch # 30 [batchSize = 128]
143 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.571613073349,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":30,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":81.21,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2439520359039,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.39858484837489}
144 | ==> online epoch # 31 [batchSize = 128]
145 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.805215835571,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":31,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":77.34,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2530789375305,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.40288508194379}
146 | ==> online epoch # 32 [batchSize = 128]
147 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.553646802902,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":32,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":74.95,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2410068511963,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.39816183722936}
148 | ==> online epoch # 33 [batchSize = 128]
149 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.486525058746,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":33,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":70.26,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.245041847229,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.39601638683906}
150 | ==> online epoch # 34 [batchSize = 128]
151 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.5560131073,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":34,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":79.95,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2443888187408,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.39213467993033}
152 | ==> online epoch # 35 [batchSize = 128]
153 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.545255899429,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":35,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":80.58,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2449429035187,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.3885368809104}
154 | ==> online epoch # 36 [batchSize = 128]
155 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.537830114365,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":36,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":77.95,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.274099111557,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.39021477790979}
156 | ==> online epoch # 37 [batchSize = 128]
157 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.767641067505,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":37,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":82.12,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2437970638275,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.38730258360887}
158 | ==> online epoch # 38 [batchSize = 128]
159 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.486897945404,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":38,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":75.67,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2433269023895,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.38219698361861}
160 | ==> online epoch # 39 [batchSize = 128]
161 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.628032922745,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":39,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":79.04,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2432010173798,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.38763711039837}
162 | ==> online epoch # 40 [batchSize = 128]
163 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.536153078079,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":40,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":80.11,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2452688217163,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.38575872286008}
164 | ==> online epoch # 41 [batchSize = 128]
165 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.520205974579,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":41,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":77.83,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.241760969162,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.38098671753437}
166 | ==> online epoch # 42 [batchSize = 128]
167 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.468014001846,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":42,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":74.42,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2496681213379,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.38319841267971}
168 | ==> online epoch # 43 [batchSize = 128]
169 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.556786060333,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":43,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":72.46,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2482600212097,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.37964692081396}
170 | ==> online epoch # 44 [batchSize = 128]
171 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.497092008591,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":44,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":80.29,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2438809871674,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.37826023327234}
172 | ==> online epoch # 45 [batchSize = 128]
173 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.417484045029,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":45,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":78.65,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2540519237518,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.38129784755218}
174 | ==> online epoch # 46 [batchSize = 128]
175 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.612913131714,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":46,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":80.46,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2438228130341,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.37656217454336}
176 | ==> online epoch # 47 [batchSize = 128]
177 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.595071077347,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":47,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":68.27,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2512409687042,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.37258849484034}
178 | ==> online epoch # 48 [batchSize = 128]
179 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.653529167175,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":48,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":78.61,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2416579723358,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.37765663812558}
180 | ==> online epoch # 49 [batchSize = 128]
181 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.506092071533,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":49,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":79.53,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2470300197601,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.37032264463412}
182 | ==> online epoch # 50 [batchSize = 128]
183 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.531888008118,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":50,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":81.96,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2538690567017,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.37068568055446}
184 | ==> online epoch # 51 [batchSize = 128]
185 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.501205205917,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":51,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":83.19,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.252256155014,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.37402288734149}
186 | ==> online epoch # 52 [batchSize = 128]
187 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.543003797531,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":52,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":73.99,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2470951080322,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.36888554314008}
188 | ==> online epoch # 53 [batchSize = 128]
189 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.486098051071,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":53,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":79.71,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2463281154633,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.36890217501384}
190 | ==> online epoch # 54 [batchSize = 128]
191 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.55241394043,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":54,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":80.02,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2498989105225,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.36993713669288}
192 | ==> online epoch # 55 [batchSize = 128]
193 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.450776815414,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":55,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":75.6,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2452819347382,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.36913666110008}
194 | ==> online epoch # 56 [batchSize = 128]
195 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.542490005493,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":56,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":83.64,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2427079677582,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.3689535304904}
196 | ==> online epoch # 57 [batchSize = 128]
197 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.437011003494,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":57,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":81.4,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2431769371033,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.36746362615854}
198 | ==> online epoch # 58 [batchSize = 128]
199 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.555178880692,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":58,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":77.93,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2454750537872,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.36633080626145}
200 | ==> online epoch # 59 [batchSize = 128]
201 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.464647054672,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":59,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":78.09,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2453479766846,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.36666739491316}
202 | ==> online epoch # 60 [batchSize = 128]
203 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.503804922104,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":60,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":90.74,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2440459728241,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.20818299402793}
204 | ==> online epoch # 61 [batchSize = 128]
205 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.495636940002,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":61,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":91.11,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2481241226196,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.16236374943684}
206 | ==> online epoch # 62 [batchSize = 128]
207 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.565950155258,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":62,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":91.06,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2420511245728,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.14666115751442}
208 | ==> online epoch # 63 [batchSize = 128]
209 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.502928972244,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":63,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":91.13,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2500689029694,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.13470643959366}
210 | ==> online epoch # 64 [batchSize = 128]
211 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.547512054443,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":64,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.97,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2418549060822,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12917851983355}
212 | ==> online epoch # 65 [batchSize = 128]
213 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.530552864075,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":65,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":90.66,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2429277896881,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.1217747542243}
214 | ==> online epoch # 66 [batchSize = 128]
215 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.522407054901,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":66,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":90.32,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2444458007812,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11924994465632}
216 | ==> online epoch # 67 [batchSize = 128]
217 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.45677614212,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":67,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.58,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2485420703888,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11929867765269}
218 | ==> online epoch # 68 [batchSize = 128]
219 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.461474180222,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":68,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.5,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2442350387573,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11808920183625}
220 | ==> online epoch # 69 [batchSize = 128]
221 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.601045131683,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":69,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.49,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2437310218811,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11948176400784}
222 | ==> online epoch # 70 [batchSize = 128]
223 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.556071996689,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":70,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.37,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2431380748749,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11758094648711}
224 | ==> online epoch # 71 [batchSize = 128]
225 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.50079703331,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":71,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.19,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2443370819092,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12548536382233}
226 | ==> online epoch # 72 [batchSize = 128]
227 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.50164103508,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":72,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.99,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2577300071716,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12277139732853}
228 | ==> online epoch # 73 [batchSize = 128]
229 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.557413101196,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":73,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.71,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2449638843536,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12368412876549}
230 | ==> online epoch # 74 [batchSize = 128]
231 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.590755939484,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":74,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.78,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2457990646362,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12241119453922}
232 | ==> online epoch # 75 [batchSize = 128]
233 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.539636135101,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":75,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.77,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2458119392395,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.1267889219981}
234 | ==> online epoch # 76 [batchSize = 128]
235 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.581067085266,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":76,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.74,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2409319877625,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12495670402661}
236 | ==> online epoch # 77 [batchSize = 128]
237 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.457403182983,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":77,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.93,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2478671073914,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12759307682132}
238 | ==> online epoch # 78 [batchSize = 128]
239 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.560487985611,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":78,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.25,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2419581413269,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.1243928169354}
240 | ==> online epoch # 79 [batchSize = 128]
241 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.416492938995,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":79,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":85.91,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2465958595276,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12609680067652}
242 | ==> online epoch # 80 [batchSize = 128]
243 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.632702827454,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":80,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.38,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2502009868622,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12994859545277}
244 | ==> online epoch # 81 [batchSize = 128]
245 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.524478912354,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":81,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.32,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2440950870514,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12787336121576}
246 | ==> online epoch # 82 [batchSize = 128]
247 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.612407922745,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":82,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.69,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2492918968201,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12567707441556}
248 | ==> online epoch # 83 [batchSize = 128]
249 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.411056995392,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":83,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.32,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2420258522034,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.1243204951955}
250 | ==> online epoch # 84 [batchSize = 128]
251 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.510627985001,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":84,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.03,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2453551292419,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12435456805695}
252 | ==> online epoch # 85 [batchSize = 128]
253 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.480562925339,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":85,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.38,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2415220737457,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12520780682755}
254 | ==> online epoch # 86 [batchSize = 128]
255 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.475887060165,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":86,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":84.83,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2442979812622,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12019503449973}
256 | ==> online epoch # 87 [batchSize = 128]
257 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.428845882416,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":87,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.5,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2574808597565,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11920055043048}
258 | ==> online epoch # 88 [batchSize = 128]
259 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.554698944092,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":88,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.24,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2481360435486,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12393878577993}
260 | ==> online epoch # 89 [batchSize = 128]
261 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.506734132767,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":89,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.34,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2449119091034,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12871642884058}
262 | ==> online epoch # 90 [batchSize = 128]
263 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.545350074768,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":90,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.89,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2464830875397,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12214053118458}
264 | ==> online epoch # 91 [batchSize = 128]
265 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.449155092239,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":91,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2428939342499,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11818734575541}
266 | ==> online epoch # 92 [batchSize = 128]
267 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.518404006958,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":92,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.44,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2446639537811,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12180570296657}
268 | ==> online epoch # 93 [batchSize = 128]
269 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.510896921158,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":93,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.46,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2653188705444,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11845285176562}
270 | ==> online epoch # 94 [batchSize = 128]
271 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.543385982513,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":94,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.1,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2435641288757,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11678733453155}
272 | ==> online epoch # 95 [batchSize = 128]
273 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.477043151855,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":95,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.07,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2428460121155,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11897576571657}
274 | ==> online epoch # 96 [batchSize = 128]
275 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.624214887619,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":96,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":84.28,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2445249557495,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11904100564141}
276 | ==> online epoch # 97 [batchSize = 128]
277 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.501276016235,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":97,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.67,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2391991615295,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11647340873113}
278 | ==> online epoch # 98 [batchSize = 128]
279 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.449658155441,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":98,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":86.65,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2489409446716,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11487070041207}
280 | ==> online epoch # 99 [batchSize = 128]
281 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.39834189415,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":99,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.94,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.243931055069,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12033484642131}
282 | ==> online epoch # 100 [batchSize = 128]
283 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.520040988922,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":100,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.25,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2445020675659,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11764579631197}
284 | ==> online epoch # 101 [batchSize = 128]
285 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.521477937698,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":101,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.61,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2443239688873,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11709465831996}
286 | ==> online epoch # 102 [batchSize = 128]
287 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.547205924988,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":102,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.98,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2500238418579,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12114164804419}
288 | ==> online epoch # 103 [batchSize = 128]
289 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.543912887573,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":103,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.45,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2557911872864,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.10805225480252}
290 | ==> online epoch # 104 [batchSize = 128]
291 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.601721048355,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":104,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.93,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2498631477356,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11217697963405}
292 | ==> online epoch # 105 [batchSize = 128]
293 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.516308069229,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":105,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.21,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2453751564026,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11557923503793}
294 | ==> online epoch # 106 [batchSize = 128]
295 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.495959997177,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":106,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":85.93,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2460300922394,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11261214604362}
296 | ==> online epoch # 107 [batchSize = 128]
297 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.65331697464,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":107,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.5,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2417590618134,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11322574942158}
298 | ==> online epoch # 108 [batchSize = 128]
299 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.478212833405,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":108,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.57,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2612581253052,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11255719629236}
300 | ==> online epoch # 109 [batchSize = 128]
301 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.476624965668,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":109,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":86.82,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2438879013062,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11577705828807}
302 | ==> online epoch # 110 [batchSize = 128]
303 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.607400894165,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":110,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.64,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2424070835114,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.10901697488645}
304 | ==> online epoch # 111 [batchSize = 128]
305 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.571146965027,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":111,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.52,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2469940185547,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.10439068487821}
306 | ==> online epoch # 112 [batchSize = 128]
307 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.501214981079,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":112,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2427980899811,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.1087359870187}
308 | ==> online epoch # 113 [batchSize = 128]
309 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.543877124786,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":113,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.74,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2601130008698,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11583366151421}
310 | ==> online epoch # 114 [batchSize = 128]
311 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.608434200287,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":114,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":86.61,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2427129745483,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.10374431608197}
312 | ==> online epoch # 115 [batchSize = 128]
313 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.492336034775,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":115,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.14,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2446849346161,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.10607366448221}
314 | ==> online epoch # 116 [batchSize = 128]
315 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.384386062622,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":116,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.18,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2474370002747,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11034816891337}
316 | ==> online epoch # 117 [batchSize = 128]
317 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.530400037766,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":117,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.69,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2414441108704,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11196253771583}
318 | ==> online epoch # 118 [batchSize = 128]
319 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.534486055374,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":118,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.14,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2451329231262,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11396147064292}
320 | ==> online epoch # 119 [batchSize = 128]
321 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.450636863708,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":119,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.42,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2497298717499,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.10939630023562}
322 | ==> online epoch # 120 [batchSize = 128]
323 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.463079929352,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":120,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.11,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2465319633484,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.048100662412934}
324 | ==> online epoch # 121 [batchSize = 128]
325 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.569537878036,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":121,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.42,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2562310695648,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.02959025050394}
326 | ==> online epoch # 122 [batchSize = 128]
327 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.637385129929,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":122,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.52,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2412090301514,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.024007035295169}
328 | ==> online epoch # 123 [batchSize = 128]
329 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.600746154785,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":123,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.62,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2499098777771,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.021907049111831}
330 | ==> online epoch # 124 [batchSize = 128]
331 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.454932928085,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":124,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.67,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2476890087128,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.019307417838046}
332 | ==> online epoch # 125 [batchSize = 128]
333 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.441093206406,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":125,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.58,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2414410114288,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.017742788266295}
334 | ==> online epoch # 126 [batchSize = 128]
335 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.455188989639,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":126,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.64,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2454380989075,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.016401587278606}
336 | ==> online epoch # 127 [batchSize = 128]
337 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.467864990234,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":127,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.56,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.241947889328,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.014926031800226}
338 | ==> online epoch # 128 [batchSize = 128]
339 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.533565044403,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":128,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.69,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2432818412781,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.01391430978114}
340 | ==> online epoch # 129 [batchSize = 128]
341 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.569344997406,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":129,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.59,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2436130046844,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.013448232364578}
342 | ==> online epoch # 130 [batchSize = 128]
343 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.485431909561,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":130,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.5,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.247474193573,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.013354300869963}
344 | ==> online epoch # 131 [batchSize = 128]
345 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.450932025909,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":131,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.69,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2469120025635,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.012197144162387}
346 | ==> online epoch # 132 [batchSize = 128]
347 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.558818101883,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":132,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.79,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2424931526184,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.011325099218923}
348 | ==> online epoch # 133 [batchSize = 128]
349 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.48894906044,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":133,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.68,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2434628009796,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.010793494394956}
350 | ==> online epoch # 134 [batchSize = 128]
351 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.505841970444,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":134,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.77,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2437591552734,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.010930944802478}
352 | ==> online epoch # 135 [batchSize = 128]
353 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.58614897728,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":135,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.67,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2496569156647,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.010146045068709}
354 | ==> online epoch # 136 [batchSize = 128]
355 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.484592914581,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":136,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.76,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2537951469421,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.010516702675093}
356 | ==> online epoch # 137 [batchSize = 128]
357 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.539739847183,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":137,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.84,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2451858520508,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.010800205873182}
358 | ==> online epoch # 138 [batchSize = 128]
359 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.510965108871,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":138,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.54,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2438318729401,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0097131812610687}
360 | ==> online epoch # 139 [batchSize = 128]
361 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.508059024811,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":139,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.56,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2426409721375,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0094331520299117}
362 | ==> online epoch # 140 [batchSize = 128]
363 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.515514850616,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":140,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.56,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2462821006775,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0095086083245965}
364 | ==> online epoch # 141 [batchSize = 128]
365 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.426674842834,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":141,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.51,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2578938007355,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0091447023579325}
366 | ==> online epoch # 142 [batchSize = 128]
367 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.441915988922,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":142,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.63,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2683110237122,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0091797006770204}
368 | ==> online epoch # 143 [batchSize = 128]
369 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.656672000885,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":143,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.59,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2413201332092,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0091757497344262}
370 | ==> online epoch # 144 [batchSize = 128]
371 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.532930135727,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":144,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.48,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2492570877075,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0090615240331644}
372 | ==> online epoch # 145 [batchSize = 128]
373 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.444036006927,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":145,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.45,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2438859939575,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0092722899304369}
374 | ==> online epoch # 146 [batchSize = 128]
375 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.517123937607,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":146,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.63,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2475869655609,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0092046390598019}
376 | ==> online epoch # 147 [batchSize = 128]
377 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.520902872086,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":147,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.55,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2421779632568,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.008476875254359}
378 | ==> online epoch # 148 [batchSize = 128]
379 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.654017925262,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":148,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.44,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2514848709106,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0085756237403705}
380 | ==> online epoch # 149 [batchSize = 128]
381 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.552144765854,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":149,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.58,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.244772195816,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0086590464394062}
382 | ==> online epoch # 150 [batchSize = 128]
383 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.528169155121,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":150,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.57,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2425131797791,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.008542746410538}
384 | ==> online epoch # 151 [batchSize = 128]
385 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.526846885681,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":151,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.7,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2472369670868,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0079688250684203}
386 | ==> online epoch # 152 [batchSize = 128]
387 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.597927093506,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":152,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.75,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2419338226318,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0079787191051321}
388 | ==> online epoch # 153 [batchSize = 128]
389 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.520629167557,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":153,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.44,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2469570636749,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0081460221551168}
390 | ==> online epoch # 154 [batchSize = 128]
391 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.669497013092,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":154,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.68,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2429950237274,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0081049349159002}
392 | ==> online epoch # 155 [batchSize = 128]
393 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.598120927811,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":155,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.59,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2529668807983,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0076991175564054}
394 | ==> online epoch # 156 [batchSize = 128]
395 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.563598155975,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":156,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.5,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.242280960083,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0081053374191889}
396 | ==> online epoch # 157 [batchSize = 128]
397 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.488450050354,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":157,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.58,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2525689601898,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0078322904279981}
398 | ==> online epoch # 158 [batchSize = 128]
399 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.524356126785,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":158,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.41,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2454540729523,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0078825698162501}
400 | ==> online epoch # 159 [batchSize = 128]
401 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.502348184586,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":159,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.63,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.242996931076,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0082889589934777}
402 | ==> online epoch # 160 [batchSize = 128]
403 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.499071121216,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":160,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.5,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2465269565582,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0073816621246246}
404 | ==> online epoch # 161 [batchSize = 128]
405 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.437483072281,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":161,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.85,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2424199581146,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0068039200149285}
406 | ==> online epoch # 162 [batchSize = 128]
407 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.464745998383,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":162,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.76,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2421388626099,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0068422729483782}
408 | ==> online epoch # 163 [batchSize = 128]
409 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.484056949615,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":163,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.56,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2484450340271,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0062748136763007}
410 | ==> online epoch # 164 [batchSize = 128]
411 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.546632051468,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":164,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.59,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.244167804718,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0065681714086961}
412 | ==> online epoch # 165 [batchSize = 128]
413 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.596024990082,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":165,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.67,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2483789920807,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0067759589459269}
414 | ==> online epoch # 166 [batchSize = 128]
415 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.526091098785,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":166,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.5,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2405340671539,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0061572475406604}
416 | ==> online epoch # 167 [batchSize = 128]
417 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.582150936127,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":167,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.64,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2663550376892,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0065574873955204}
418 | ==> online epoch # 168 [batchSize = 128]
419 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.68184709549,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":168,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.5,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2436389923096,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0062123294442128}
420 | ==> online epoch # 169 [batchSize = 128]
421 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.522497177124,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":169,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.56,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2438719272614,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0063478323081747}
422 | ==> online epoch # 170 [batchSize = 128]
423 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.64563703537,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":170,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.6,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2442688941956,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.006331152917865}
424 | ==> online epoch # 171 [batchSize = 128]
425 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.453807115555,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":171,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.71,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2440838813782,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0061411829139942}
426 | ==> online epoch # 172 [batchSize = 128]
427 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.474457979202,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":172,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.72,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2488949298859,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0060788853046222}
428 | ==> online epoch # 173 [batchSize = 128]
429 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.548737049103,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":173,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.63,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2540090084076,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0062972170897783}
430 | ==> online epoch # 174 [batchSize = 128]
431 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.424373865128,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":174,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.85,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2482089996338,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0062021393185625}
432 | ==> online epoch # 175 [batchSize = 128]
433 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.541701078415,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":175,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.55,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2426030635834,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0062863830190438}
434 | ==> online epoch # 176 [batchSize = 128]
435 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.496028900146,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":176,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.62,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2426710128784,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0059864581013337}
436 | ==> online epoch # 177 [batchSize = 128]
437 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.576555967331,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":177,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.59,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2451550960541,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0060988159086078}
438 | ==> online epoch # 178 [batchSize = 128]
439 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.524843215942,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":178,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.57,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2428200244904,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0061548010661052}
440 | ==> online epoch # 179 [batchSize = 128]
441 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.521260023117,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":179,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.61,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2453989982605,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0063890275521538}
442 | ==> online epoch # 180 [batchSize = 128]
443 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.529162168503,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":180,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.58,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2445778846741,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0060930794200454}
444 | ==> online epoch # 181 [batchSize = 128]
445 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.402792930603,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":181,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.4,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2468218803406,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0061355742936333}
446 | ==> online epoch # 182 [batchSize = 128]
447 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.46883893013,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":182,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.45,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.243901014328,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0058311326954609}
448 | ==> online epoch # 183 [batchSize = 128]
449 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.164771795273,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":183,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.58,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2449779510498,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0062234142604165}
450 | ==> online epoch # 184 [batchSize = 128]
451 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.035275936127,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":184,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.7,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2461631298065,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0063772084669043}
452 | ==> online epoch # 185 [batchSize = 128]
453 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.064441919327,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":185,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.51,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.241240978241,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0057631540470398}
454 | ==> online epoch # 186 [batchSize = 128]
455 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.002777099609,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":186,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.64,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2482149600983,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.006140244532472}
456 | ==> online epoch # 187 [batchSize = 128]
457 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.205068826675,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":187,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.65,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2436771392822,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0057536732978546}
458 | ==> online epoch # 188 [batchSize = 128]
459 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.153147935867,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":188,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.58,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2434120178223,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0061327080600537}
460 | ==> online epoch # 189 [batchSize = 128]
461 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.114384174347,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":189,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.64,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2446839809418,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0060385716649202}
462 | ==> online epoch # 190 [batchSize = 128]
463 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.283517837524,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":190,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.58,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2530851364136,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0058259687744654}
464 | ==> online epoch # 191 [batchSize = 128]
465 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.125494003296,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":191,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.62,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2428460121155,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0059547477807754}
466 | ==> online epoch # 192 [batchSize = 128]
467 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.045717954636,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":192,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.5,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2428419589996,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0058646194923383}
468 | ==> online epoch # 193 [batchSize = 128]
469 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.088896989822,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":193,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.57,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2665319442749,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0058154677781157}
470 | ==> online epoch # 194 [batchSize = 128]
471 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.138751983643,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":194,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.54,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.24520611763,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0058429941487236}
472 | ==> online epoch # 195 [batchSize = 128]
473 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.113023996353,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":195,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.57,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2416369915009,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0060955316449205}
474 | ==> online epoch # 196 [batchSize = 128]
475 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.114979982376,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":196,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.59,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2458860874176,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0057551918503566}
476 | ==> online epoch # 197 [batchSize = 128]
477 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.031116008759,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":197,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.59,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2434060573578,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0060307394044522}
478 | ==> online epoch # 198 [batchSize = 128]
479 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.105694055557,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":198,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.49,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2506849765778,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0057907086773179}
480 | ==> online epoch # 199 [batchSize = 128]
481 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.04305100441,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":199,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.65,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2430560588837,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0058586274393094}
482 | ==> online epoch # 200 [batchSize = 128]
483 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.059851884842,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":200,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.64,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2437040805817,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0060513800439926}
484 |
--------------------------------------------------------------------------------
/models/nin.lua:
--------------------------------------------------------------------------------
1 | -- This is a modified version of NIN network in
2 | -- https://github.com/szagoruyko/cifar.torch
3 | -- Network-In-Network: http://arxiv.org/abs/1312.4400
4 | -- Modifications:
5 | -- * removed dropout
6 | -- * added BatchNorm
7 | -- * the last layer changed from avg-pooling to linear (works better)
8 | require 'nn'
9 | local utils = paths.dofile'utils.lua'
10 |
11 | local function createModel(opt)
12 | local model = nn.Sequential()
13 |
14 | local function Block(...)
15 | local arg = {...}
16 | model:add(nn.SpatialConvolution(...):noBias())
17 | model:add(nn.SpatialBatchNormalization(arg[2],1e-5))
18 | model:add(nn.ReLU(true))
19 | return model
20 | end
21 |
22 | Block(3,192,5,5,1,1,2,2)
23 | Block(192,160,1,1)
24 | Block(160,96,1,1)
25 | model:add(nn.SpatialMaxPooling(3,3,2,2):ceil())
26 | Block(96,192,5,5,1,1,2,2)
27 | Block(192,192,1,1)
28 | Block(192,192,1,1)
29 | model:add(nn.SpatialAveragePooling(3,3,2,2):ceil())
30 | Block(192,192,3,3,1,1,1,1)
31 | Block(192,192,1,1)
32 | Block(192,192,1,1)
33 | model:add(nn.SpatialAveragePooling(8,8,1,1))
34 | model:add(nn.View(-1):setNumInputDims(3))
35 | model:add(nn.Linear(192,opt and opt.num_classes or 10))
36 |
37 | utils.FCinit(model)
38 | utils.testModel(model)
39 | utils.MSRinit(model)
40 | return model
41 | end
42 |
43 | return createModel
44 |
--------------------------------------------------------------------------------
/models/resnet-pre-act.lua:
--------------------------------------------------------------------------------
1 | -- ResNet-1001
2 | -- This is a re-implementation of the 1001-layer residual networks described in:
3 | -- [a] "Identity Mappings in Deep Residual Networks", arXiv:1603.05027, 2016,
4 | -- authored by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun.
5 |
6 | -- Acknowledgement: This code is contributed by Xiang Ming from Xi'an Jiaotong Univeristy.
7 |
8 | -- ************************************************************************
9 | -- This code incorporates material from:
10 |
11 | -- fb.resnet.torch (https://github.com/facebook/fb.resnet.torch)
12 | -- Copyright (c) 2016, Facebook, Inc.
13 | -- All rights reserved.
14 | --
15 | -- This source code is licensed under the BSD-style license found in the
16 | -- LICENSE file in the root directory of this source tree. An additional grant
17 | -- of patent rights can be found in the PATENTS file in the same directory.
18 | --
19 | -- ************************************************************************
20 |
21 | local nn = require 'nn'
22 | local utils = paths.dofile'utils.lua'
23 |
24 | local Convolution = nn.SpatialConvolution
25 | local Avg = nn.SpatialAveragePooling
26 | local ReLU = nn.ReLU
27 | local Max = nn.SpatialMaxPooling
28 | local SBatchNorm = nn.SpatialBatchNormalization
29 |
30 | local function createModel(opt)
31 | local depth = opt.depth
32 |
33 | -- The new Residual Unit in [a]
34 | local function bottleneck(nInputPlane, nOutputPlane, stride)
35 |
36 | local nBottleneckPlane = nOutputPlane / 4
37 | if opt.resnet_nobottleneck then
38 | nBottleneckPlane = nOutputPlane
39 | end
40 |
41 | if nInputPlane == nOutputPlane then -- most Residual Units have this shape
42 | local convs = nn.Sequential()
43 | -- conv1x1
44 | convs:add(SBatchNorm(nInputPlane))
45 | convs:add(ReLU(true))
46 | convs:add(Convolution(nInputPlane,nBottleneckPlane,1,1,stride,stride,0,0))
47 |
48 | -- conv3x3
49 | convs:add(SBatchNorm(nBottleneckPlane))
50 | convs:add(ReLU(true))
51 | convs:add(Convolution(nBottleneckPlane,nBottleneckPlane,3,3,1,1,1,1))
52 |
53 | -- conv1x1
54 | convs:add(SBatchNorm(nBottleneckPlane))
55 | convs:add(ReLU(true))
56 | convs:add(Convolution(nBottleneckPlane,nOutputPlane,1,1,1,1,0,0))
57 |
58 | local shortcut = nn.Identity()
59 |
60 | return nn.Sequential()
61 | :add(nn.ConcatTable()
62 | :add(convs)
63 | :add(shortcut))
64 | :add(nn.CAddTable(true))
65 | else -- Residual Units for increasing dimensions
66 | local block = nn.Sequential()
67 | -- common BN, ReLU
68 | block:add(SBatchNorm(nInputPlane))
69 | block:add(ReLU(true))
70 |
71 | local convs = nn.Sequential()
72 | -- conv1x1
73 | convs:add(Convolution(nInputPlane,nBottleneckPlane,1,1,stride,stride,0,0))
74 |
75 | -- conv3x3
76 | convs:add(SBatchNorm(nBottleneckPlane))
77 | convs:add(ReLU(true))
78 | convs:add(Convolution(nBottleneckPlane,nBottleneckPlane,3,3,1,1,1,1))
79 |
80 | -- conv1x1
81 | convs:add(SBatchNorm(nBottleneckPlane))
82 | convs:add(ReLU(true))
83 | convs:add(Convolution(nBottleneckPlane,nOutputPlane,1,1,1,1,0,0))
84 |
85 | local shortcut = nn.Sequential()
86 | shortcut:add(Convolution(nInputPlane,nOutputPlane,1,1,stride,stride,0,0))
87 |
88 | return block
89 | :add(nn.ConcatTable()
90 | :add(convs)
91 | :add(shortcut))
92 | :add(nn.CAddTable(true))
93 | end
94 | end
95 |
96 | -- Stacking Residual Units on the same stage
97 | local function layer(block, nInputPlane, nOutputPlane, count, stride)
98 | local s = nn.Sequential()
99 |
100 | s:add(block(nInputPlane, nOutputPlane, stride))
101 | for i=2,count do
102 | s:add(block(nOutputPlane, nOutputPlane, 1))
103 | end
104 | return s
105 | end
106 |
107 | local model = nn.Sequential()
108 | do
109 | assert((depth - 2) % 9 == 0, 'depth should be 9n+2 (e.g., 164 or 1001 in the paper)')
110 | local n = (depth - 2) / 9
111 |
112 | -- The new ResNet-164 and ResNet-1001 in [a]
113 | local nStages = {16, 64, 128, 256}
114 |
115 | model:add(Convolution(3,nStages[1],3,3,1,1,1,1)) -- one conv at the beginning (spatial size: 32x32)
116 | model:add(layer(bottleneck, nStages[1], nStages[2], n, 1)) -- Stage 1 (spatial size: 32x32)
117 | model:add(layer(bottleneck, nStages[2], nStages[3], n, 2)) -- Stage 2 (spatial size: 16x16)
118 | model:add(layer(bottleneck, nStages[3], nStages[4], n, 2)) -- Stage 3 (spatial size: 8x8)
119 | model:add(SBatchNorm(nStages[4]))
120 | model:add(ReLU(true))
121 | model:add(Avg(8, 8, 1, 1))
122 | model:add(nn.View(nStages[4]):setNumInputDims(3))
123 | model:add(nn.Linear(nStages[4], opt.num_classes))
124 | end
125 |
126 | utils.DisableBias(model)
127 | utils.testModel(model)
128 | utils.MSRinit(model)
129 | utils.FCinit(model)
130 |
131 | -- model:get(1).gradInput = nil
132 |
133 | return model
134 | end
135 |
136 | return createModel
137 |
--------------------------------------------------------------------------------
/models/utils.lua:
--------------------------------------------------------------------------------
1 | local utils = {}
2 |
3 | function utils.MSRinit(model)
4 | for k,v in pairs(model:findModules('nn.SpatialConvolution')) do
5 | local n = v.kW*v.kH*v.nInputPlane
6 | v.weight:normal(0,math.sqrt(2/n))
7 | if v.bias then v.bias:zero() end
8 | end
9 | end
10 |
11 | function utils.FCinit(model)
12 | for k,v in pairs(model:findModules'nn.Linear') do
13 | v.bias:zero()
14 | end
15 | end
16 |
17 | function utils.DisableBias(model)
18 | for i,v in ipairs(model:findModules'nn.SpatialConvolution') do
19 | v.bias = nil
20 | v.gradBias = nil
21 | end
22 | end
23 |
24 | function utils.testModel(model)
25 | model:float()
26 | local imageSize = opt and opt.imageSize or 32
27 | local input = torch.randn(1,3,imageSize,imageSize):type(model._type)
28 | print('forward output',{model:forward(input)})
29 | print('backward output',{model:backward(input,model.output)})
30 | model:reset()
31 | end
32 |
33 | function utils.makeDataParallelTable(model, nGPU)
34 | if nGPU > 1 then
35 | local gpus = torch.range(1, nGPU):totable()
36 | local fastest, benchmark = cudnn.fastest, cudnn.benchmark
37 |
38 | local dpt = nn.DataParallelTable(1, true, true)
39 | :add(model, gpus)
40 | :threads(function()
41 | local cudnn = require 'cudnn'
42 | cudnn.fastest, cudnn.benchmark = fastest, benchmark
43 | end)
44 | dpt.gradInput = nil
45 |
46 | model = dpt:cuda()
47 | end
48 | return model
49 | end
50 |
51 | return utils
52 |
--------------------------------------------------------------------------------
/models/vgg.lua:
--------------------------------------------------------------------------------
1 | -- This is a modified version of VGG network in
2 | -- https://github.com/szagoruyko/cifar.torch
3 | -- Modifications:
4 | -- * removed dropout
5 | -- * last nn.Linear layers substituted with convolutional layers
6 | -- and avg-pooling
7 | require 'nn'
8 | local utils = paths.dofile'utils.lua'
9 |
10 | local function createModel(opt)
11 | local model = nn.Sequential()
12 |
13 | -- building block
14 | local function Block(nInputPlane, nOutputPlane)
15 | model:add(nn.SpatialConvolution(nInputPlane, nOutputPlane, 3,3, 1,1, 1,1):noBias())
16 | model:add(nn.SpatialBatchNormalization(nOutputPlane,1e-3))
17 | model:add(nn.ReLU(true))
18 | return model
19 | end
20 |
21 | local function MP()
22 | model:add(nn.SpatialMaxPooling(2,2,2,2):ceil())
23 | return model
24 | end
25 |
26 | local function Group(ni, no, N, f)
27 | for i=1,N do
28 | Block(i == 1 and ni or no, no)
29 | end
30 | if f then f() end
31 | end
32 |
33 | Group(3,64,2,MP)
34 | Group(64,128,2,MP)
35 | Group(128,256,4,MP)
36 | Group(256,512,4,MP)
37 | Group(512,512,4)
38 | model:add(nn.SpatialAveragePooling(2,2,2,2):ceil())
39 | model:add(nn.View(-1):setNumInputDims(3))
40 | model:add(nn.Linear(512,opt and opt.num_classes or 10))
41 |
42 | utils.FCinit(model)
43 | utils.testModel(model)
44 | utils.MSRinit(model)
45 |
46 | return model
47 | end
48 |
49 | return createModel
50 |
--------------------------------------------------------------------------------
/models/wide-resnet.lua:
--------------------------------------------------------------------------------
1 | -- Wide Residual Network
2 | -- This is an implementation of the wide residual networks described in:
3 | -- "Wide Residual Networks", http://arxiv.org/abs/1605.07146
4 | -- authored by Sergey Zagoruyko and Nikos Komodakis
5 |
6 | -- ************************************************************************
7 | -- This code incorporates material from:
8 |
9 | -- fb.resnet.torch (https://github.com/facebook/fb.resnet.torch)
10 | -- Copyright (c) 2016, Facebook, Inc.
11 | -- All rights reserved.
12 | --
13 | -- This source code is licensed under the BSD-style license found in the
14 | -- LICENSE file in the root directory of this source tree. An additional grant
15 | -- of patent rights can be found in the PATENTS file in the same directory.
16 | --
17 | -- ************************************************************************
18 |
19 | local nn = require 'nn'
20 | local utils = paths.dofile'utils.lua'
21 |
22 | local Convolution = nn.SpatialConvolution
23 | local Avg = nn.SpatialAveragePooling
24 | local ReLU = nn.ReLU
25 | local Max = nn.SpatialMaxPooling
26 | local SBatchNorm = nn.SpatialBatchNormalization
27 |
28 | local function createModel(opt)
29 | assert(opt and opt.depth)
30 | assert(opt and opt.num_classes)
31 | assert(opt and opt.widen_factor)
32 |
33 | local function Dropout()
34 | return nn.Dropout(opt and opt.dropout or 0,nil,true)
35 | end
36 |
37 | local depth = opt.depth
38 |
39 | local blocks = {}
40 |
41 | local function wide_basic(nInputPlane, nOutputPlane, stride)
42 | local conv_params = {
43 | {3,3,stride,stride,1,1},
44 | {3,3,1,1,1,1},
45 | }
46 | local nBottleneckPlane = nOutputPlane
47 |
48 | local block = nn.Sequential()
49 | local convs = nn.Sequential()
50 |
51 | for i,v in ipairs(conv_params) do
52 | if i == 1 then
53 | local module = nInputPlane == nOutputPlane and convs or block
54 | module:add(SBatchNorm(nInputPlane)):add(ReLU(true))
55 | convs:add(Convolution(nInputPlane,nBottleneckPlane,table.unpack(v)))
56 | else
57 | convs:add(SBatchNorm(nBottleneckPlane)):add(ReLU(true))
58 | if opt.dropout > 0 then
59 | convs:add(Dropout())
60 | end
61 | convs:add(Convolution(nBottleneckPlane,nBottleneckPlane,table.unpack(v)))
62 | end
63 | end
64 |
65 | local shortcut = nInputPlane == nOutputPlane and
66 | nn.Identity() or
67 | Convolution(nInputPlane,nOutputPlane,1,1,stride,stride,0,0)
68 |
69 | return block
70 | :add(nn.ConcatTable()
71 | :add(convs)
72 | :add(shortcut))
73 | :add(nn.CAddTable(true))
74 | end
75 |
76 | -- Stacking Residual Units on the same stage
77 | local function layer(block, nInputPlane, nOutputPlane, count, stride)
78 | local s = nn.Sequential()
79 |
80 | s:add(block(nInputPlane, nOutputPlane, stride))
81 | for i=2,count do
82 | s:add(block(nOutputPlane, nOutputPlane, 1))
83 | end
84 | return s
85 | end
86 |
87 | local model = nn.Sequential()
88 | do
89 | assert((depth - 4) % 6 == 0, 'depth should be 6n+4')
90 | local n = (depth - 4) / 6
91 |
92 | local k = opt.widen_factor
93 | local nStages = torch.Tensor{16, 16*k, 32*k, 64*k}
94 |
95 | model:add(Convolution(3,nStages[1],3,3,1,1,1,1)) -- one conv at the beginning (spatial size: 32x32)
96 | model:add(layer(wide_basic, nStages[1], nStages[2], n, 1)) -- Stage 1 (spatial size: 32x32)
97 | model:add(layer(wide_basic, nStages[2], nStages[3], n, 2)) -- Stage 2 (spatial size: 16x16)
98 | model:add(layer(wide_basic, nStages[3], nStages[4], n, 2)) -- Stage 3 (spatial size: 8x8)
99 | model:add(SBatchNorm(nStages[4]))
100 | model:add(ReLU(true))
101 | model:add(Avg(8, 8, 1, 1))
102 | model:add(nn.View(nStages[4]):setNumInputDims(3))
103 | model:add(nn.Linear(nStages[4], opt.num_classes))
104 | end
105 |
106 | utils.DisableBias(model)
107 | utils.testModel(model)
108 | utils.MSRinit(model)
109 | utils.FCinit(model)
110 |
111 | -- model:get(1).gradInput = nil
112 |
113 | return model
114 | end
115 |
116 | return createModel
117 |
--------------------------------------------------------------------------------
/notebooks/sweeper.py:
--------------------------------------------------------------------------------
1 | import json
2 | import numpy as np
3 |
4 |
5 | def loadLog(filename):
6 | s = []
7 | for line in open(filename):
8 | r = line.find('json_stats')
9 | if r > -1:
10 | s.append(json.loads(line[r+12:]))
11 | return s
12 |
13 |
14 | def findSweepParams(frames):
15 | def findConstants(frame):
16 | keys = dict()
17 | for key in frame.keys():
18 | v = np.asarray(frame[key])
19 | u = np.copy(v)
20 | u.fill(v[0])
21 | if np.array_equal(v, u):
22 | keys[key] = v[0]
23 | return keys
24 | changing = dict()
25 | for frame in frames:
26 | for k, v in findConstants(frame).items():
27 | if isinstance(v, list):
28 | v = json.dumps(v)
29 | if k not in changing:
30 | changing[k] = {v}
31 | else:
32 | changing[k].add(v)
33 | all_keys = []
34 | for k, v in changing.items():
35 | if len(v) > 1:
36 | all_keys.append(k)
37 | return sorted(all_keys)
38 |
39 |
40 | def generateLegend(frame, sweeps):
41 | s = ''
42 | for key in sweeps:
43 | if key not in frame:
44 | s = s + key + '=not present, '
45 | else:
46 | s = s + key + '=' + str(frame[key][0]) + ', '
47 | return s
48 |
49 | def generateLegends(frames):
50 | params = findSweepParams(frames)
51 | return [generateLegend(frame, params) for frame in frames]
52 |
--------------------------------------------------------------------------------
/notebooks/visualize.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "collapsed": false
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import pandas as pd\n",
12 | "import json\n",
13 | "from bokeh.plotting import figure, output_notebook, show\n",
14 | "import numpy as np\n",
15 | "from bokeh.charts.utils import cycle_colors\n",
16 | "import sweeper\n",
17 | "\n",
18 | "output_notebook()"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": null,
24 | "metadata": {
25 | "collapsed": true
26 | },
27 | "outputs": [],
28 | "source": [
29 | "from IPython.display import display\n",
30 | "\n",
31 | "def plotLogs(log_names):\n",
32 | " # parse log files, extracting json entry with stats per epoch and creating pandas DataFrame\n",
33 | " frames = [pd.DataFrame(sweeper.loadLog('../logs/'+log+'/log.txt')) for log in log_names]\n",
34 | " colors = ['red','blue','green','black','purple','orange','yellow']\n",
35 | " \n",
36 | " # this searches constant parameters across different runs to generate legends\n",
37 | " legends = sweeper.generateLegends(frames)\n",
38 | "\n",
39 | " # TODO: improve this, add hovers etc.\n",
40 | " p = figure(title='test error', x_axis_label='epoch')\n",
41 | " for i,frame in enumerate(frames):\n",
42 | " p.line(frame['epoch'], 100-frame['test_acc'], color=colors[i], legend=legends[i])\n",
43 | " show(p)\n",
44 | " \n",
45 | " p = figure(title='log loss', x_axis_label='epoch', y_axis_label='loss')\n",
46 | " for i,frame in enumerate(frames):\n",
47 | " p.line(frame['epoch'], np.log(frame['loss']), color=colors[i], legend=legends[i])\n",
48 | " show(p)"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": null,
54 | "metadata": {
55 | "collapsed": false,
56 | "scrolled": false
57 | },
58 | "outputs": [],
59 | "source": [
60 | "plotLogs([\n",
61 | " 'nin_1875527964',\n",
62 | " 'vgg_24208029',\n",
63 | " 'resnet-pre-act_17934627',\n",
64 | " 'wide-resnet_1121914561'\n",
65 | "])"
66 | ]
67 | }
68 | ],
69 | "metadata": {
70 | "kernelspec": {
71 | "display_name": "Python 2",
72 | "language": "python",
73 | "name": "python2"
74 | },
75 | "language_info": {
76 | "codemirror_mode": {
77 | "name": "ipython",
78 | "version": 2
79 | },
80 | "file_extension": ".py",
81 | "mimetype": "text/x-python",
82 | "name": "python",
83 | "nbconvert_exporter": "python",
84 | "pygments_lexer": "ipython2",
85 | "version": "2.7.10"
86 | }
87 | },
88 | "nbformat": 4,
89 | "nbformat_minor": 0
90 | }
91 |
--------------------------------------------------------------------------------
/pretrained/README.md:
--------------------------------------------------------------------------------
1 | WRN-50-2
2 | ==========
3 |
4 | Best performing ImageNet model from Wide Residual Networks BMVC 2016 paper https://arxiv.org/abs/1605.07146
5 | The model is slower than ResNet-101 and faster than ResNet-152, with better accuracy:
6 |
7 | | Model | top-1 err, % | top-5 err, % | #params | time/batch 16 |
8 | |---|---|---|---|---|
9 | | ResNet-50 | 24.01 | 7.02 | 25.6M | 49 |
10 | | ResNet-101 | 22.44 | 6.21 | 44.5M | 82 |
11 | | ResNet-152 | 22.16 | 6.16 | 60.2M | 115 |
12 | | __WRN-50-2-bottleneck__ | 21.9 | 6.03 | 68.9M | 93 |
13 | | pre-ResNet-200 | 21.66 | 5.79 | 64.7M | 154 |
14 |
15 | Download (263MB): https://yadi.sk/d/-8AWymOPyVZns
16 |
17 | PyTorch and Tensorflow pretrained weights and model definitions:
18 |
19 |
20 | Convergence plot:
21 |
22 | 
23 |
24 | If you find this model useful please cite this paper:
25 |
26 | ```bib
27 | @INPROCEEDINGS{Zagoruyko2016WRN,
28 | author = {Sergey Zagoruyko and Nikos Komodakis},
29 | title = {Wide Residual Networks},
30 | booktitle = {BMVC},
31 | year = {2016},
32 | }
33 | ```
34 |
35 |
36 | # Model printout
37 |
38 | ```
39 | nn.Sequential {
40 | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> (9) -> (10) -> (11) -> output]
41 | (1): cudnn.SpatialConvolution(3 -> 64, 7x7, 2,2, 3,3) without bias
42 | (2): nn.SpatialBatchNormalization (4D) (64)
43 | (3): cudnn.ReLU
44 | (4): nn.SpatialMaxPooling(3x3, 2,2, 1,1)
45 | (5): nn.Sequential {
46 | [input -> (1) -> (2) -> (3) -> output]
47 | (1): nn.Sequential {
48 | [input -> (1) -> (2) -> (3) -> output]
49 | (1): nn.ConcatTable {
50 | input
51 | |`-> (1): nn.Sequential {
52 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output]
53 | | (1): cudnn.SpatialConvolution(64 -> 128, 1x1) without bias
54 | | (2): nn.SpatialBatchNormalization (4D) (128)
55 | | (3): cudnn.ReLU
56 | | (4): cudnn.SpatialConvolution(128 -> 128, 3x3, 1,1, 1,1) without bias
57 | | (5): nn.SpatialBatchNormalization (4D) (128)
58 | | (6): cudnn.ReLU
59 | | (7): cudnn.SpatialConvolution(128 -> 256, 1x1) without bias
60 | | (8): nn.SpatialBatchNormalization (4D) (256)
61 | | }
62 | `-> (2): nn.Sequential {
63 | [input -> (1) -> (2) -> output]
64 | (1): cudnn.SpatialConvolution(64 -> 256, 1x1) without bias
65 | (2): nn.SpatialBatchNormalization (4D) (256)
66 | }
67 | ... -> output
68 | }
69 | (2): nn.CAddTable
70 | (3): cudnn.ReLU
71 | }
72 | (2): nn.Sequential {
73 | [input -> (1) -> (2) -> (3) -> output]
74 | (1): nn.ConcatTable {
75 | input
76 | |`-> (1): nn.Sequential {
77 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output]
78 | | (1): cudnn.SpatialConvolution(256 -> 128, 1x1) without bias
79 | | (2): nn.SpatialBatchNormalization (4D) (128)
80 | | (3): cudnn.ReLU
81 | | (4): cudnn.SpatialConvolution(128 -> 128, 3x3, 1,1, 1,1) without bias
82 | | (5): nn.SpatialBatchNormalization (4D) (128)
83 | | (6): cudnn.ReLU
84 | | (7): cudnn.SpatialConvolution(128 -> 256, 1x1) without bias
85 | | (8): nn.SpatialBatchNormalization (4D) (256)
86 | | }
87 | `-> (2): nn.Identity
88 | ... -> output
89 | }
90 | (2): nn.CAddTable
91 | (3): cudnn.ReLU
92 | }
93 | (3): nn.Sequential {
94 | [input -> (1) -> (2) -> (3) -> output]
95 | (1): nn.ConcatTable {
96 | input
97 | |`-> (1): nn.Sequential {
98 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output]
99 | | (1): cudnn.SpatialConvolution(256 -> 128, 1x1) without bias
100 | | (2): nn.SpatialBatchNormalization (4D) (128)
101 | | (3): cudnn.ReLU
102 | | (4): cudnn.SpatialConvolution(128 -> 128, 3x3, 1,1, 1,1) without bias
103 | | (5): nn.SpatialBatchNormalization (4D) (128)
104 | | (6): cudnn.ReLU
105 | | (7): cudnn.SpatialConvolution(128 -> 256, 1x1) without bias
106 | | (8): nn.SpatialBatchNormalization (4D) (256)
107 | | }
108 | `-> (2): nn.Identity
109 | ... -> output
110 | }
111 | (2): nn.CAddTable
112 | (3): cudnn.ReLU
113 | }
114 | }
115 | (6): nn.Sequential {
116 | [input -> (1) -> (2) -> (3) -> (4) -> output]
117 | (1): nn.Sequential {
118 | [input -> (1) -> (2) -> (3) -> output]
119 | (1): nn.ConcatTable {
120 | input
121 | |`-> (1): nn.Sequential {
122 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output]
123 | | (1): cudnn.SpatialConvolution(256 -> 256, 1x1) without bias
124 | | (2): nn.SpatialBatchNormalization (4D) (256)
125 | | (3): cudnn.ReLU
126 | | (4): cudnn.SpatialConvolution(256 -> 256, 3x3, 2,2, 1,1) without bias
127 | | (5): nn.SpatialBatchNormalization (4D) (256)
128 | | (6): cudnn.ReLU
129 | | (7): cudnn.SpatialConvolution(256 -> 512, 1x1) without bias
130 | | (8): nn.SpatialBatchNormalization (4D) (512)
131 | | }
132 | `-> (2): nn.Sequential {
133 | [input -> (1) -> (2) -> output]
134 | (1): cudnn.SpatialConvolution(256 -> 512, 1x1, 2,2) without bias
135 | (2): nn.SpatialBatchNormalization (4D) (512)
136 | }
137 | ... -> output
138 | }
139 | (2): nn.CAddTable
140 | (3): cudnn.ReLU
141 | }
142 | (2): nn.Sequential {
143 | [input -> (1) -> (2) -> (3) -> output]
144 | (1): nn.ConcatTable {
145 | input
146 | |`-> (1): nn.Sequential {
147 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output]
148 | | (1): cudnn.SpatialConvolution(512 -> 256, 1x1) without bias
149 | | (2): nn.SpatialBatchNormalization (4D) (256)
150 | | (3): cudnn.ReLU
151 | | (4): cudnn.SpatialConvolution(256 -> 256, 3x3, 1,1, 1,1) without bias
152 | | (5): nn.SpatialBatchNormalization (4D) (256)
153 | | (6): cudnn.ReLU
154 | | (7): cudnn.SpatialConvolution(256 -> 512, 1x1) without bias
155 | | (8): nn.SpatialBatchNormalization (4D) (512)
156 | | }
157 | `-> (2): nn.Identity
158 | ... -> output
159 | }
160 | (2): nn.CAddTable
161 | (3): cudnn.ReLU
162 | }
163 | (3): nn.Sequential {
164 | [input -> (1) -> (2) -> (3) -> output]
165 | (1): nn.ConcatTable {
166 | input
167 | |`-> (1): nn.Sequential {
168 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output]
169 | | (1): cudnn.SpatialConvolution(512 -> 256, 1x1) without bias
170 | | (2): nn.SpatialBatchNormalization (4D) (256)
171 | | (3): cudnn.ReLU
172 | | (4): cudnn.SpatialConvolution(256 -> 256, 3x3, 1,1, 1,1) without bias
173 | | (5): nn.SpatialBatchNormalization (4D) (256)
174 | | (6): cudnn.ReLU
175 | | (7): cudnn.SpatialConvolution(256 -> 512, 1x1) without bias
176 | | (8): nn.SpatialBatchNormalization (4D) (512)
177 | | }
178 | `-> (2): nn.Identity
179 | ... -> output
180 | }
181 | (2): nn.CAddTable
182 | (3): cudnn.ReLU
183 | }
184 | (4): nn.Sequential {
185 | [input -> (1) -> (2) -> (3) -> output]
186 | (1): nn.ConcatTable {
187 | input
188 | |`-> (1): nn.Sequential {
189 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output]
190 | | (1): cudnn.SpatialConvolution(512 -> 256, 1x1) without bias
191 | | (2): nn.SpatialBatchNormalization (4D) (256)
192 | | (3): cudnn.ReLU
193 | | (4): cudnn.SpatialConvolution(256 -> 256, 3x3, 1,1, 1,1) without bias
194 | | (5): nn.SpatialBatchNormalization (4D) (256)
195 | | (6): cudnn.ReLU
196 | | (7): cudnn.SpatialConvolution(256 -> 512, 1x1) without bias
197 | | (8): nn.SpatialBatchNormalization (4D) (512)
198 | | }
199 | `-> (2): nn.Identity
200 | ... -> output
201 | }
202 | (2): nn.CAddTable
203 | (3): cudnn.ReLU
204 | }
205 | }
206 | (7): nn.Sequential {
207 | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> output]
208 | (1): nn.Sequential {
209 | [input -> (1) -> (2) -> (3) -> output]
210 | (1): nn.ConcatTable {
211 | input
212 | |`-> (1): nn.Sequential {
213 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output]
214 | | (1): cudnn.SpatialConvolution(512 -> 512, 1x1) without bias
215 | | (2): nn.SpatialBatchNormalization (4D) (512)
216 | | (3): cudnn.ReLU
217 | | (4): cudnn.SpatialConvolution(512 -> 512, 3x3, 2,2, 1,1) without bias
218 | | (5): nn.SpatialBatchNormalization (4D) (512)
219 | | (6): cudnn.ReLU
220 | | (7): cudnn.SpatialConvolution(512 -> 1024, 1x1) without bias
221 | | (8): nn.SpatialBatchNormalization (4D) (1024)
222 | | }
223 | `-> (2): nn.Sequential {
224 | [input -> (1) -> (2) -> output]
225 | (1): cudnn.SpatialConvolution(512 -> 1024, 1x1, 2,2) without bias
226 | (2): nn.SpatialBatchNormalization (4D) (1024)
227 | }
228 | ... -> output
229 | }
230 | (2): nn.CAddTable
231 | (3): cudnn.ReLU
232 | }
233 | (2): nn.Sequential {
234 | [input -> (1) -> (2) -> (3) -> output]
235 | (1): nn.ConcatTable {
236 | input
237 | |`-> (1): nn.Sequential {
238 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output]
239 | | (1): cudnn.SpatialConvolution(1024 -> 512, 1x1) without bias
240 | | (2): nn.SpatialBatchNormalization (4D) (512)
241 | | (3): cudnn.ReLU
242 | | (4): cudnn.SpatialConvolution(512 -> 512, 3x3, 1,1, 1,1) without bias
243 | | (5): nn.SpatialBatchNormalization (4D) (512)
244 | | (6): cudnn.ReLU
245 | | (7): cudnn.SpatialConvolution(512 -> 1024, 1x1) without bias
246 | | (8): nn.SpatialBatchNormalization (4D) (1024)
247 | | }
248 | `-> (2): nn.Identity
249 | ... -> output
250 | }
251 | (2): nn.CAddTable
252 | (3): cudnn.ReLU
253 | }
254 | (3): nn.Sequential {
255 | [input -> (1) -> (2) -> (3) -> output]
256 | (1): nn.ConcatTable {
257 | input
258 | |`-> (1): nn.Sequential {
259 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output]
260 | | (1): cudnn.SpatialConvolution(1024 -> 512, 1x1) without bias
261 | | (2): nn.SpatialBatchNormalization (4D) (512)
262 | | (3): cudnn.ReLU
263 | | (4): cudnn.SpatialConvolution(512 -> 512, 3x3, 1,1, 1,1) without bias
264 | | (5): nn.SpatialBatchNormalization (4D) (512)
265 | | (6): cudnn.ReLU
266 | | (7): cudnn.SpatialConvolution(512 -> 1024, 1x1) without bias
267 | | (8): nn.SpatialBatchNormalization (4D) (1024)
268 | | }
269 | `-> (2): nn.Identity
270 | ... -> output
271 | }
272 | (2): nn.CAddTable
273 | (3): cudnn.ReLU
274 | }
275 | (4): nn.Sequential {
276 | [input -> (1) -> (2) -> (3) -> output]
277 | (1): nn.ConcatTable {
278 | input
279 | |`-> (1): nn.Sequential {
280 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output]
281 | | (1): cudnn.SpatialConvolution(1024 -> 512, 1x1) without bias
282 | | (2): nn.SpatialBatchNormalization (4D) (512)
283 | | (3): cudnn.ReLU
284 | | (4): cudnn.SpatialConvolution(512 -> 512, 3x3, 1,1, 1,1) without bias
285 | | (5): nn.SpatialBatchNormalization (4D) (512)
286 | | (6): cudnn.ReLU
287 | | (7): cudnn.SpatialConvolution(512 -> 1024, 1x1) without bias
288 | | (8): nn.SpatialBatchNormalization (4D) (1024)
289 | | }
290 | `-> (2): nn.Identity
291 | ... -> output
292 | }
293 | (2): nn.CAddTable
294 | (3): cudnn.ReLU
295 | }
296 | (5): nn.Sequential {
297 | [input -> (1) -> (2) -> (3) -> output]
298 | (1): nn.ConcatTable {
299 | input
300 | |`-> (1): nn.Sequential {
301 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output]
302 | | (1): cudnn.SpatialConvolution(1024 -> 512, 1x1) without bias
303 | | (2): nn.SpatialBatchNormalization (4D) (512)
304 | | (3): cudnn.ReLU
305 | | (4): cudnn.SpatialConvolution(512 -> 512, 3x3, 1,1, 1,1) without bias
306 | | (5): nn.SpatialBatchNormalization (4D) (512)
307 | | (6): cudnn.ReLU
308 | | (7): cudnn.SpatialConvolution(512 -> 1024, 1x1) without bias
309 | | (8): nn.SpatialBatchNormalization (4D) (1024)
310 | | }
311 | `-> (2): nn.Identity
312 | ... -> output
313 | }
314 | (2): nn.CAddTable
315 | (3): cudnn.ReLU
316 | }
317 | (6): nn.Sequential {
318 | [input -> (1) -> (2) -> (3) -> output]
319 | (1): nn.ConcatTable {
320 | input
321 | |`-> (1): nn.Sequential {
322 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output]
323 | | (1): cudnn.SpatialConvolution(1024 -> 512, 1x1) without bias
324 | | (2): nn.SpatialBatchNormalization (4D) (512)
325 | | (3): cudnn.ReLU
326 | | (4): cudnn.SpatialConvolution(512 -> 512, 3x3, 1,1, 1,1) without bias
327 | | (5): nn.SpatialBatchNormalization (4D) (512)
328 | | (6): cudnn.ReLU
329 | | (7): cudnn.SpatialConvolution(512 -> 1024, 1x1) without bias
330 | | (8): nn.SpatialBatchNormalization (4D) (1024)
331 | | }
332 | `-> (2): nn.Identity
333 | ... -> output
334 | }
335 | (2): nn.CAddTable
336 | (3): cudnn.ReLU
337 | }
338 | }
339 | (8): nn.Sequential {
340 | [input -> (1) -> (2) -> (3) -> output]
341 | (1): nn.Sequential {
342 | [input -> (1) -> (2) -> (3) -> output]
343 | (1): nn.ConcatTable {
344 | input
345 | |`-> (1): nn.Sequential {
346 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output]
347 | | (1): cudnn.SpatialConvolution(1024 -> 1024, 1x1) without bias
348 | | (2): nn.SpatialBatchNormalization (4D) (1024)
349 | | (3): cudnn.ReLU
350 | | (4): cudnn.SpatialConvolution(1024 -> 1024, 3x3, 2,2, 1,1) without bias
351 | | (5): nn.SpatialBatchNormalization (4D) (1024)
352 | | (6): cudnn.ReLU
353 | | (7): cudnn.SpatialConvolution(1024 -> 2048, 1x1) without bias
354 | | (8): nn.SpatialBatchNormalization (4D) (2048)
355 | | }
356 | `-> (2): nn.Sequential {
357 | [input -> (1) -> (2) -> output]
358 | (1): cudnn.SpatialConvolution(1024 -> 2048, 1x1, 2,2) without bias
359 | (2): nn.SpatialBatchNormalization (4D) (2048)
360 | }
361 | ... -> output
362 | }
363 | (2): nn.CAddTable
364 | (3): cudnn.ReLU
365 | }
366 | (2): nn.Sequential {
367 | [input -> (1) -> (2) -> (3) -> output]
368 | (1): nn.ConcatTable {
369 | input
370 | |`-> (1): nn.Sequential {
371 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output]
372 | | (1): cudnn.SpatialConvolution(2048 -> 1024, 1x1) without bias
373 | | (2): nn.SpatialBatchNormalization (4D) (1024)
374 | | (3): cudnn.ReLU
375 | | (4): cudnn.SpatialConvolution(1024 -> 1024, 3x3, 1,1, 1,1) without bias
376 | | (5): nn.SpatialBatchNormalization (4D) (1024)
377 | | (6): cudnn.ReLU
378 | | (7): cudnn.SpatialConvolution(1024 -> 2048, 1x1) without bias
379 | | (8): nn.SpatialBatchNormalization (4D) (2048)
380 | | }
381 | `-> (2): nn.Identity
382 | ... -> output
383 | }
384 | (2): nn.CAddTable
385 | (3): cudnn.ReLU
386 | }
387 | (3): nn.Sequential {
388 | [input -> (1) -> (2) -> (3) -> output]
389 | (1): nn.ConcatTable {
390 | input
391 | |`-> (1): nn.Sequential {
392 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output]
393 | | (1): cudnn.SpatialConvolution(2048 -> 1024, 1x1) without bias
394 | | (2): nn.SpatialBatchNormalization (4D) (1024)
395 | | (3): cudnn.ReLU
396 | | (4): cudnn.SpatialConvolution(1024 -> 1024, 3x3, 1,1, 1,1) without bias
397 | | (5): nn.SpatialBatchNormalization (4D) (1024)
398 | | (6): cudnn.ReLU
399 | | (7): cudnn.SpatialConvolution(1024 -> 2048, 1x1) without bias
400 | | (8): nn.SpatialBatchNormalization (4D) (2048)
401 | | }
402 | `-> (2): nn.Identity
403 | ... -> output
404 | }
405 | (2): nn.CAddTable
406 | (3): cudnn.ReLU
407 | }
408 | }
409 | (9): cudnn.SpatialAveragePooling(7x7, 1,1)
410 | (10): nn.View(2048)
411 | (11): nn.Linear(2048 -> 1000)
412 | }
413 | ```
414 |
--------------------------------------------------------------------------------
/pretrained/wide-resnet.lua:
--------------------------------------------------------------------------------
1 | --
2 | -- Copyright (c) 2016, Facebook, Inc.
3 | -- All rights reserved.
4 | --
5 | -- This source code is licensed under the BSD-style license found in the
6 | -- LICENSE file in the root directory of this source tree. An additional grant
7 | -- of patent rights can be found in the PATENTS file in the same directory.
8 | --
9 | -- Edited by Sergey Zagoruyko for Wide Residual Networks
10 | -- http://arxiv.org/abs/1605.07146
11 | --
12 | -- The Wide-ResNet model definition (not pre-activation)
13 | -- * WRN-18-WRN-34: wider basic block
14 | -- * WRN-50-WRN-152: wider bottleneck
15 |
16 | local nn = require 'nn'
17 | require 'cunn'
18 |
19 | local Convolution = cudnn.SpatialConvolution
20 | local Avg = cudnn.SpatialAveragePooling
21 | local ReLU = cudnn.ReLU
22 | local Max = nn.SpatialMaxPooling
23 | local SBatchNorm = nn.SpatialBatchNormalization
24 |
25 | local function createModel(opt)
26 |
27 | assert(opt.depth)
28 | assert(opt.width)
29 |
30 | local depth = opt.depth
31 | local width = opt.width -- for WRN-18-34
32 | local bottle = opt.width -- for WRN-50-WRN-152
33 | local shortcutType = opt.shortcutType or 'B'
34 | local iChannels
35 |
36 | -- The shortcut layer is either identity or 1x1 convolution
37 | local function shortcut(nInputPlane, nOutputPlane, stride)
38 | local useConv = shortcutType == 'C' or
39 | (shortcutType == 'B' and nInputPlane ~= nOutputPlane)
40 | if useConv then
41 | -- 1x1 convolution
42 | return nn.Sequential()
43 | :add(Convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride))
44 | :add(SBatchNorm(nOutputPlane))
45 | elseif nInputPlane ~= nOutputPlane then
46 | -- Strided, zero-padded identity shortcut
47 | return nn.Sequential()
48 | :add(nn.SpatialAveragePooling(1, 1, stride, stride))
49 | :add(nn.Concat(2)
50 | :add(nn.Identity())
51 | :add(nn.MulConstant(0)))
52 | else
53 | return nn.Identity()
54 | end
55 | end
56 |
57 | -- The basic residual layer block for 18 and 34 layer network, and the
58 | -- CIFAR networks
59 | local function basicblock(n, stride)
60 | local nInputPlane = iChannels
61 | iChannels = n
62 |
63 | local s = nn.Sequential()
64 | s:add(Convolution(nInputPlane,n,3,3,stride,stride,1,1))
65 | s:add(SBatchNorm(n))
66 | s:add(ReLU(true))
67 | s:add(Convolution(n,n,3,3,1,1,1,1))
68 | s:add(SBatchNorm(n))
69 |
70 | return nn.Sequential()
71 | :add(nn.ConcatTable()
72 | :add(s)
73 | :add(shortcut(nInputPlane, n, stride)))
74 | :add(nn.CAddTable(true))
75 | :add(ReLU(true))
76 | end
77 |
78 | -- The bottleneck residual layer for 50, 101, and 152 layer networks
79 | local function bottleneck(n, stride)
80 | local nInputPlane = iChannels
81 | iChannels = n * bottle
82 |
83 | local s = nn.Sequential()
84 | s:add(Convolution(nInputPlane,n,1,1,1,1,0,0))
85 | s:add(SBatchNorm(n))
86 | s:add(ReLU(true))
87 | s:add(Convolution(n,n,3,3,stride,stride,1,1))
88 | s:add(SBatchNorm(n))
89 | s:add(ReLU(true))
90 | s:add(Convolution(n,n*bottle,1,1,1,1,0,0))
91 | s:add(SBatchNorm(n * bottle))
92 |
93 | return nn.Sequential()
94 | :add(nn.ConcatTable()
95 | :add(s)
96 | :add(shortcut(nInputPlane, n * bottle, stride)))
97 | :add(nn.CAddTable(true))
98 | :add(ReLU(true))
99 | end
100 |
101 | -- Creates count residual blocks with specified number of features
102 | local function layer(block, features, count, stride)
103 | local s = nn.Sequential()
104 | for i=1,count do
105 | s:add(block(features, i == 1 and stride or 1))
106 | end
107 | return s
108 | end
109 |
110 | local model = nn.Sequential()
111 | if opt.dataset == 'imagenet' then
112 | -- Configurations for ResNet:
113 | -- num. residual blocks, num features, residual block function
114 | local cfg = {
115 | [18] = {{2, 2, 2, 2}, 512*width, basicblock}, -- lea as is
116 | [34] = {{3, 4, 6, 3}, 512*width, basicblock}, -- leave as is
117 | [50] = {{3, 4, 6, 3}, 512*bottle, bottleneck},
118 | [101] = {{3, 4, 23, 3}, 512*bottle, bottleneck},
119 | [152] = {{3, 8, 36, 3}, 512*bottle, bottleneck},
120 | }
121 |
122 | assert(cfg[depth], 'Invalid depth: ' .. tostring(depth))
123 | local def, nFeatures, block = table.unpack(cfg[depth])
124 | iChannels = 64
125 | print(' | ResNet-' .. depth ..'-'..width .. ' ImageNet')
126 |
127 | -- The ResNet ImageNet model
128 | model:add(Convolution(3,64,7,7,2,2,3,3))
129 | model:add(SBatchNorm(64))
130 | model:add(ReLU(true))
131 | model:add(Max(3,3,2,2,1,1))
132 | model:add(layer(block, width*64, def[1]))
133 | model:add(layer(block, width*128, def[2], 2))
134 | model:add(layer(block, width*256, def[3], 2))
135 | model:add(layer(block, width*512, def[4], 2))
136 | model:add(Avg(7, 7, 1, 1))
137 | model:add(nn.View(nFeatures):setNumInputDims(3))
138 | model:add(nn.Linear(nFeatures, 1000))
139 | elseif opt.dataset == 'cifar10' then
140 | -- Model type specifies number of layers for CIFAR-10 model
141 | assert((depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110, 1202')
142 | local n = (depth - 2) / 6
143 | iChannels = 16
144 | print(' | ResNet-' .. depth .. ' CIFAR-10')
145 |
146 | -- The ResNet CIFAR-10 model
147 | model:add(Convolution(3,16,3,3,1,1,1,1))
148 | model:add(SBatchNorm(16))
149 | model:add(ReLU(true))
150 | model:add(layer(basicblock, 16*width, n))
151 | model:add(layer(basicblock, 32*width, n, 2))
152 | model:add(layer(basicblock, 64*width, n, 2))
153 | model:add(Avg(8, 8, 1, 1))
154 | model:add(nn.View(64*width):setNumInputDims(3))
155 | model:add(nn.Linear(64*width, 10))
156 | else
157 | error('invalid dataset: ' .. opt.dataset)
158 | end
159 |
160 | local function ConvInit(name)
161 | for k,v in pairs(model:findModules(name)) do
162 | local n = v.kW*v.kH*v.nInputPlane
163 | v.weight:normal(0,math.sqrt(2/n))
164 | if cudnn.version >= 4000 then
165 | v.bias = nil
166 | v.gradBias = nil
167 | else
168 | v.bias:zero()
169 | end
170 | end
171 | end
172 | local function BNInit(name)
173 | for k,v in pairs(model:findModules(name)) do
174 | v.weight:fill(1)
175 | v.bias:zero()
176 | end
177 | end
178 |
179 | ConvInit('cudnn.SpatialConvolution')
180 | ConvInit('nn.SpatialConvolution')
181 | BNInit('fbnn.SpatialBatchNormalization')
182 | BNInit('cudnn.SpatialBatchNormalization')
183 | BNInit('nn.SpatialBatchNormalization')
184 | for k,v in pairs(model:findModules('nn.Linear')) do
185 | v.bias:zero()
186 | end
187 | model:cuda()
188 |
189 | if opt.cudnn == 'deterministic' then
190 | model:apply(function(m)
191 | if m.setMode then m:setMode(1,1,1) end
192 | end)
193 | end
194 |
195 | model:get(1).gradInput = nil
196 |
197 | return model
198 | end
199 |
200 | return createModel
201 |
--------------------------------------------------------------------------------
/pytorch/README.md:
--------------------------------------------------------------------------------
1 | PyTorch training code for Wide Residual Networks
2 | ==========
3 |
4 | PyTorch training code for Wide Residual Networks:
5 | http://arxiv.org/abs/1605.07146
6 |
7 | The code reproduces *exactly* it's lua version:
8 | https://github.com/szagoruyko/wide-residual-networks
9 |
10 |
11 | # Requirements
12 |
13 | Install requirements:
14 |
15 | ```
16 | pip install -r requirements.txt
17 | ```
18 |
19 |
20 | # Howto
21 |
22 | Train WRN-28-10 on 4 GPUs:
23 |
24 | ```
25 | python main.py --save ./logs/resnet_$RANDOM$RANDOM --depth 28 --width 10 --ngpu 4 --gpu_id 0,1,2,3
26 | ```
27 |
--------------------------------------------------------------------------------
/pytorch/main.py:
--------------------------------------------------------------------------------
1 | """
2 | PyTorch training code for Wide Residual Networks:
3 | http://arxiv.org/abs/1605.07146
4 |
5 | The code reproduces *exactly* it's lua version:
6 | https://github.com/szagoruyko/wide-residual-networks
7 |
8 | 2016 Sergey Zagoruyko
9 | """
10 |
11 | import argparse
12 | import os
13 | import json
14 | import numpy as np
15 | from tqdm import tqdm
16 | import torch
17 | from torch.optim import SGD
18 | import torch.utils.data
19 | import torchvision.transforms as T
20 | import torchvision.datasets as datasets
21 | from torch.utils.data import DataLoader
22 | import torch.nn.functional as F
23 | import torchnet as tnt
24 | from torchnet.engine import Engine
25 | from utils import cast, data_parallel, print_tensor_dict
26 | from torch.backends import cudnn
27 | from resnet import resnet
28 |
29 | cudnn.benchmark = True
30 |
31 | parser = argparse.ArgumentParser(description='Wide Residual Networks')
32 | # Model options
33 | parser.add_argument('--model', default='resnet', type=str)
34 | parser.add_argument('--depth', default=16, type=int)
35 | parser.add_argument('--width', default=1, type=float)
36 | parser.add_argument('--dataset', default='CIFAR10', type=str)
37 | parser.add_argument('--dataroot', default='.', type=str)
38 | parser.add_argument('--dtype', default='float', type=str)
39 | parser.add_argument('--groups', default=1, type=int)
40 | parser.add_argument('--nthread', default=4, type=int)
41 | parser.add_argument('--seed', default=1, type=int)
42 |
43 | # Training options
44 | parser.add_argument('--batch_size', default=128, type=int)
45 | parser.add_argument('--lr', default=0.1, type=float)
46 | parser.add_argument('--epochs', default=200, type=int, metavar='N',
47 | help='number of total epochs to run')
48 | parser.add_argument('--weight_decay', default=0.0005, type=float)
49 | parser.add_argument('--epoch_step', default='[60,120,160]', type=str,
50 | help='json list with epochs to drop lr on')
51 | parser.add_argument('--lr_decay_ratio', default=0.2, type=float)
52 | parser.add_argument('--resume', default='', type=str)
53 | parser.add_argument('--note', default='', type=str)
54 |
55 | # Device options
56 | parser.add_argument('--cuda', action='store_true')
57 | parser.add_argument('--save', default='', type=str,
58 | help='save parameters and logs in this folder')
59 | parser.add_argument('--ngpu', default=1, type=int,
60 | help='number of GPUs to use for training')
61 | parser.add_argument('--gpu_id', default='0', type=str,
62 | help='id(s) for CUDA_VISIBLE_DEVICES')
63 |
64 |
65 | def create_dataset(opt, train):
66 | transform = T.Compose([
67 | T.ToTensor(),
68 | T.Normalize(np.array([125.3, 123.0, 113.9]) / 255.0,
69 | np.array([63.0, 62.1, 66.7]) / 255.0),
70 | ])
71 | if train:
72 | transform = T.Compose([
73 | T.Pad(4, padding_mode='reflect'),
74 | T.RandomHorizontalFlip(),
75 | T.RandomCrop(32),
76 | transform
77 | ])
78 | return getattr(datasets, opt.dataset)(opt.dataroot, train=train, download=True, transform=transform)
79 |
80 |
81 | def main():
82 | opt = parser.parse_args()
83 | print('parsed options:', vars(opt))
84 | epoch_step = json.loads(opt.epoch_step)
85 | num_classes = 10 if opt.dataset == 'CIFAR10' else 100
86 |
87 | torch.manual_seed(opt.seed)
88 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id
89 |
90 | def create_iterator(mode):
91 | return DataLoader(create_dataset(opt, mode), opt.batch_size, shuffle=mode,
92 | num_workers=opt.nthread, pin_memory=torch.cuda.is_available())
93 |
94 | train_loader = create_iterator(True)
95 | test_loader = create_iterator(False)
96 |
97 | f, params = resnet(opt.depth, opt.width, num_classes)
98 |
99 | def create_optimizer(opt, lr):
100 | print('creating optimizer with lr = ', lr)
101 | return SGD([v for v in params.values() if v.requires_grad], lr, momentum=0.9, weight_decay=opt.weight_decay)
102 |
103 | optimizer = create_optimizer(opt, opt.lr)
104 |
105 | epoch = 0
106 | if opt.resume != '':
107 | state_dict = torch.load(opt.resume)
108 | epoch = state_dict['epoch']
109 | params_tensors = state_dict['params']
110 | for k, v in params.items():
111 | v.data.copy_(params_tensors[k])
112 | optimizer.load_state_dict(state_dict['optimizer'])
113 |
114 | print('\nParameters:')
115 | print_tensor_dict(params)
116 |
117 | n_parameters = sum(p.numel() for p in params.values() if p.requires_grad)
118 | print('\nTotal number of parameters:', n_parameters)
119 |
120 | meter_loss = tnt.meter.AverageValueMeter()
121 | classacc = tnt.meter.ClassErrorMeter(accuracy=True)
122 | timer_train = tnt.meter.TimeMeter('s')
123 | timer_test = tnt.meter.TimeMeter('s')
124 |
125 | if not os.path.exists(opt.save):
126 | os.mkdir(opt.save)
127 |
128 | def h(sample):
129 | inputs = cast(sample[0], opt.dtype)
130 | targets = cast(sample[1], 'long')
131 | y = data_parallel(f, inputs, params, sample[2], list(range(opt.ngpu))).float()
132 | return F.cross_entropy(y, targets), y
133 |
134 | def log(t, state):
135 | torch.save(dict(params=params, epoch=t['epoch'], optimizer=state['optimizer'].state_dict()),
136 | os.path.join(opt.save, 'model.pt7'))
137 | z = {**vars(opt), **t}
138 | with open(os.path.join(opt.save, 'log.txt'), 'a') as flog:
139 | flog.write('json_stats: ' + json.dumps(z) + '\n')
140 | print(z)
141 |
142 | def on_sample(state):
143 | state['sample'].append(state['train'])
144 |
145 | def on_forward(state):
146 | loss = float(state['loss'])
147 | classacc.add(state['output'].data, state['sample'][1])
148 | meter_loss.add(loss)
149 | if state['train']:
150 | state['iterator'].set_postfix(loss=loss)
151 |
152 | def on_start(state):
153 | state['epoch'] = epoch
154 |
155 | def on_start_epoch(state):
156 | classacc.reset()
157 | meter_loss.reset()
158 | timer_train.reset()
159 | state['iterator'] = tqdm(train_loader, dynamic_ncols=True)
160 |
161 | epoch = state['epoch'] + 1
162 | if epoch in epoch_step:
163 | lr = state['optimizer'].param_groups[0]['lr']
164 | state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio)
165 |
166 | def on_end_epoch(state):
167 | train_loss = meter_loss.value()
168 | train_acc = classacc.value()
169 | train_time = timer_train.value()
170 | meter_loss.reset()
171 | classacc.reset()
172 | timer_test.reset()
173 |
174 | with torch.no_grad():
175 | engine.test(h, test_loader)
176 |
177 | test_acc = classacc.value()[0]
178 | print(log({
179 | "train_loss": train_loss[0],
180 | "train_acc": train_acc[0],
181 | "test_loss": meter_loss.value()[0],
182 | "test_acc": test_acc,
183 | "epoch": state['epoch'],
184 | "num_classes": num_classes,
185 | "n_parameters": n_parameters,
186 | "train_time": train_time,
187 | "test_time": timer_test.value(),
188 | }, state))
189 | print('==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' %
190 | (opt.save, state['epoch'], opt.epochs, test_acc))
191 |
192 | engine = Engine()
193 | engine.hooks['on_sample'] = on_sample
194 | engine.hooks['on_forward'] = on_forward
195 | engine.hooks['on_start_epoch'] = on_start_epoch
196 | engine.hooks['on_end_epoch'] = on_end_epoch
197 | engine.hooks['on_start'] = on_start
198 | engine.train(h, train_loader, opt.epochs, optimizer)
199 |
200 |
201 | if __name__ == '__main__':
202 | main()
--------------------------------------------------------------------------------
/pytorch/requirements.txt:
--------------------------------------------------------------------------------
1 | nested_dict
2 | tqdm
3 | torchvision
4 | torchnet
5 |
--------------------------------------------------------------------------------
/pytorch/resnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 | import utils
4 |
5 |
6 | def resnet(depth, width, num_classes):
7 | assert (depth - 4) % 6 == 0, 'depth should be 6n+4'
8 | n = (depth - 4) // 6
9 | widths = [int(v * width) for v in (16, 32, 64)]
10 |
11 | def gen_block_params(ni, no):
12 | return {
13 | 'conv0': utils.conv_params(ni, no, 3),
14 | 'conv1': utils.conv_params(no, no, 3),
15 | 'bn0': utils.bnparams(ni),
16 | 'bn1': utils.bnparams(no),
17 | 'convdim': utils.conv_params(ni, no, 1) if ni != no else None,
18 | }
19 |
20 | def gen_group_params(ni, no, count):
21 | return {'block%d' % i: gen_block_params(ni if i == 0 else no, no)
22 | for i in range(count)}
23 |
24 | flat_params = utils.cast(utils.flatten({
25 | 'conv0': utils.conv_params(3, 16, 3),
26 | 'group0': gen_group_params(16, widths[0], n),
27 | 'group1': gen_group_params(widths[0], widths[1], n),
28 | 'group2': gen_group_params(widths[1], widths[2], n),
29 | 'bn': utils.bnparams(widths[2]),
30 | 'fc': utils.linear_params(widths[2], num_classes),
31 | }))
32 |
33 | utils.set_requires_grad_except_bn_(flat_params)
34 |
35 | def block(x, params, base, mode, stride):
36 | o1 = F.relu(utils.batch_norm(x, params, base + '.bn0', mode), inplace=True)
37 | y = F.conv2d(o1, params[base + '.conv0'], stride=stride, padding=1)
38 | o2 = F.relu(utils.batch_norm(y, params, base + '.bn1', mode), inplace=True)
39 | z = F.conv2d(o2, params[base + '.conv1'], stride=1, padding=1)
40 | if base + '.convdim' in params:
41 | return z + F.conv2d(o1, params[base + '.convdim'], stride=stride)
42 | else:
43 | return z + x
44 |
45 | def group(o, params, base, mode, stride):
46 | for i in range(n):
47 | o = block(o, params, '%s.block%d' % (base,i), mode, stride if i == 0 else 1)
48 | return o
49 |
50 | def f(input, params, mode):
51 | x = F.conv2d(input, params['conv0'], padding=1)
52 | g0 = group(x, params, 'group0', mode, 1)
53 | g1 = group(g0, params, 'group1', mode, 2)
54 | g2 = group(g1, params, 'group2', mode, 2)
55 | o = F.relu(utils.batch_norm(g2, params, 'bn', mode))
56 | o = F.avg_pool2d(o, 8, 1, 0)
57 | o = o.view(o.size(0), -1)
58 | o = F.linear(o, params['fc.weight'], params['fc.bias'])
59 | return o
60 |
61 | return f, flat_params
--------------------------------------------------------------------------------
/pytorch/utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn.init import kaiming_normal_
3 | import torch.nn.functional as F
4 | from torch.nn.parallel._functions import Broadcast
5 | from torch.nn.parallel import scatter, parallel_apply, gather
6 | from functools import partial
7 | from nested_dict import nested_dict
8 |
9 |
10 | def cast(params, dtype='float'):
11 | if isinstance(params, dict):
12 | return {k: cast(v, dtype) for k,v in params.items()}
13 | else:
14 | return getattr(params.cuda() if torch.cuda.is_available() else params, dtype)()
15 |
16 |
17 | def conv_params(ni, no, k=1):
18 | return kaiming_normal_(torch.Tensor(no, ni, k, k))
19 |
20 |
21 | def linear_params(ni, no):
22 | return {'weight': kaiming_normal_(torch.Tensor(no, ni)), 'bias': torch.zeros(no)}
23 |
24 |
25 | def bnparams(n):
26 | return {'weight': torch.rand(n),
27 | 'bias': torch.zeros(n),
28 | 'running_mean': torch.zeros(n),
29 | 'running_var': torch.ones(n)}
30 |
31 |
32 | def data_parallel(f, input, params, mode, device_ids, output_device=None):
33 | assert isinstance(device_ids, list)
34 | if output_device is None:
35 | output_device = device_ids[0]
36 |
37 | if len(device_ids) == 1:
38 | return f(input, params, mode)
39 |
40 | params_all = Broadcast.apply(device_ids, *params.values())
41 | params_replicas = [{k: params_all[i + j*len(params)] for i, k in enumerate(params.keys())}
42 | for j in range(len(device_ids))]
43 |
44 | replicas = [partial(f, params=p, mode=mode)
45 | for p in params_replicas]
46 | inputs = scatter([input], device_ids)
47 | outputs = parallel_apply(replicas, inputs)
48 | return gather(outputs, output_device)
49 |
50 |
51 | def flatten(params):
52 | return {'.'.join(k): v for k, v in nested_dict(params).items_flat() if v is not None}
53 |
54 |
55 | def batch_norm(x, params, base, mode):
56 | return F.batch_norm(x, weight=params[base + '.weight'],
57 | bias=params[base + '.bias'],
58 | running_mean=params[base + '.running_mean'],
59 | running_var=params[base + '.running_var'],
60 | training=mode)
61 |
62 |
63 | def print_tensor_dict(params):
64 | kmax = max(len(key) for key in params.keys())
65 | for i, (key, v) in enumerate(params.items()):
66 | print(str(i).ljust(5), key.ljust(kmax + 3), str(tuple(v.shape)).ljust(23), torch.typename(v), v.requires_grad)
67 |
68 |
69 | def set_requires_grad_except_bn_(params):
70 | for k, v in params.items():
71 | if not k.endswith('running_mean') and not k.endswith('running_var'):
72 | v.requires_grad = True
73 |
--------------------------------------------------------------------------------
/scripts/train_cifar.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | export learningRate=0.1
4 | export epoch_step="{60,120,160}"
5 | export max_epoch=200
6 | export learningRateDecay=0
7 | export learningRateDecayRatio=0.2
8 | export nesterov=true
9 | export randomcrop_type=reflection
10 |
11 | # tee redirects stdout both to screen and to file
12 | # have to create folder for script and model beforehand
13 | export save=logs/${model}_${RANDOM}${RANDOM}
14 | mkdir -p $save
15 | th train.lua | tee $save/log.txt
16 |
--------------------------------------------------------------------------------
/scripts/train_svhn.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | export learningRate=0.01
4 | export epoch_step="{80,120}"
5 | export max_epoch=160
6 | export learningRateDecay=0
7 | export learningRateDecayRatio=0.1
8 | export nesterov=true
9 |
10 | export dropout=0.4
11 | export dataset=./datasets/svhn.t7
12 | export randomcrop=0
13 | export hflip=false
14 |
15 | # tee redirects stdout both to screen and to file
16 | # have to create folder for script and model beforehand
17 | export save=logs/svhn_${model}_${RANDOM}${RANDOM}
18 | mkdir -p $save
19 | th train.lua | tee $save/log.txt
20 |
--------------------------------------------------------------------------------
/train.lua:
--------------------------------------------------------------------------------
1 | -- Code for Wide Residual Networks http://arxiv.org/abs/1605.07146
2 | -- (c) Sergey Zagoruyko, 2016
3 | require 'xlua'
4 | require 'optim'
5 | require 'image'
6 | local tnt = require 'torchnet'
7 | local c = require 'trepl.colorize'
8 | local json = require 'cjson'
9 | local utils = paths.dofile'models/utils.lua'
10 |
11 | -- for memory optimizations and graph generation
12 | local optnet = require 'optnet'
13 | local graphgen = require 'optnet.graphgen'
14 | local iterm = require 'iterm'
15 | require 'iterm.dot'
16 |
17 | local opt = {
18 | dataset = './datasets/cifar10_whitened.t7',
19 | save = 'logs',
20 | batchSize = 128,
21 | learningRate = 0.1,
22 | learningRateDecay = 0,
23 | learningRateDecayRatio = 0.2,
24 | weightDecay = 0.0005,
25 | dampening = 0,
26 | momentum = 0.9,
27 | epoch_step = "80",
28 | max_epoch = 300,
29 | model = 'nin',
30 | optimMethod = 'sgd',
31 | init_value = 10,
32 | depth = 50,
33 | shortcutType = 'A',
34 | nesterov = false,
35 | dropout = 0,
36 | hflip = true,
37 | randomcrop = 4,
38 | imageSize = 32,
39 | randomcrop_type = 'zero',
40 | cudnn_deterministic = false,
41 | optnet_optimize = true,
42 | generate_graph = false,
43 | multiply_input_factor = 1,
44 | widen_factor = 1,
45 | nGPU = 1,
46 | data_type = 'torch.CudaTensor',
47 | seed = 444,
48 | }
49 | opt = xlua.envparams(opt)
50 |
51 |
52 | torch.manualSeed(opt.seed)
53 |
54 | opt.epoch_step = tonumber(opt.epoch_step) or loadstring('return '..opt.epoch_step)()
55 | print(opt)
56 |
57 | local meanstd = {mean = {125.3, 123.0, 113.9}, std = {63.0, 62.1, 66.7}}
58 | print(c.blue '==>' ..' loading data')
59 | local provider = torch.load(opt.dataset)
60 | opt.num_classes = provider.testData.labels:max()
61 | if torch.type(provider.trainData.data) == 'torch.ByteTensor' then
62 | for i,v in ipairs{'trainData', 'testData'} do
63 | provider[v].data = provider[v].data:float()--:div(256)
64 | for ch=1,3 do
65 | provider[v].data:select(2,ch):add(-meanstd.mean[ch]):div(meanstd.std[ch])
66 | end
67 | end
68 | end
69 |
70 | local function cast(x) return x:type(opt.data_type) end
71 |
72 | print(c.blue '==>' ..' configuring model')
73 | local model = nn.Sequential()
74 | local net = dofile('models/'..opt.model..'.lua')(opt)
75 | if opt.data_type:match'torch.Cuda.*Tensor' then
76 | require 'cudnn'
77 | require 'cunn'
78 | cudnn.convert(net, cudnn):cuda()
79 | if opt.cudnn_deterministic then
80 | net:apply(function(m) if m.setMode then m:setMode(1,1,1) end end)
81 | end
82 |
83 | print(net)
84 | print('Network has', #net:findModules'cudnn.SpatialConvolution', 'convolutions')
85 |
86 | local sample_input = torch.randn(8,3,opt.imageSize,opt.imageSize):cuda()
87 | if opt.generate_graph then
88 | iterm.dot(graphgen(net, sample_input), opt.save..'/graph.pdf')
89 | end
90 | if opt.optnet_optimize then
91 | optnet.optimizeMemory(net, sample_input, {inplace = false, mode = 'training'})
92 | end
93 | -- to avoid optnet messing cudnn FindEx
94 | cudnn.benchmark = true
95 | end
96 | model:add(utils.makeDataParallelTable(net, opt.nGPU))
97 | cast(model)
98 |
99 | local function hflip(x)
100 | return torch.random(0,1) == 1 and x or image.hflip(x)
101 | end
102 |
103 | local function randomcrop(x)
104 | local pad = opt.randomcrop
105 | if opt.randomcrop_type == 'reflection' then
106 | module = nn.SpatialReflectionPadding(pad,pad,pad,pad):float()
107 | elseif opt.randomcrop_type == 'zero' then
108 | module = nn.SpatialZeroPadding(pad,pad,pad,pad):float()
109 | else
110 | error'unknown mode'
111 | end
112 |
113 | local imsize = opt.imageSize
114 | local padded = module:forward(x)
115 | local x = torch.random(1,pad*2 + 1)
116 | local y = torch.random(1,pad*2 + 1)
117 | return padded:narrow(3,x,imsize):narrow(2,y,imsize)
118 | end
119 |
120 |
121 | local function getIterator(mode)
122 | local dataset = provider[mode..'Data']
123 |
124 | local list_dataset = tnt.ListDataset{
125 | list = torch.range(1, dataset.labels:numel()):long(),
126 | load = function(idx)
127 | return {
128 | input = dataset.data[idx]:float(),
129 | target = torch.LongTensor{dataset.labels[idx]},
130 | }
131 | end,
132 | }
133 |
134 | local d = mode == 'train' and list_dataset
135 | :shuffle()
136 | :transform{
137 | input = tnt.transform.compose{
138 | opt.hflip and hflip or nil,
139 | opt.randomcrop > 0 and randomcrop or nil,
140 | }
141 | }
142 | :batch(opt.batchSize, 'skip-last')
143 | or list_dataset
144 | :batch(opt.batchSize, 'include-last')
145 |
146 | function d:manualSeed(seed) torch.manualSeed(seed) end
147 |
148 | return tnt.ParallelDatasetIterator{
149 | nthread = 2,
150 | init = function()
151 | require 'torchnet'
152 | require 'image'
153 | require 'nn'
154 | end,
155 | closure = function()
156 | return d
157 | end,
158 | }
159 | end
160 |
161 | local function log(t) print('json_stats: '..json.encode(tablex.merge(t,opt,true))) end
162 |
163 | print('Will save at '..opt.save)
164 | paths.mkdir(opt.save)
165 |
166 | local engine = tnt.OptimEngine()
167 | local criterion = cast(nn.CrossEntropyCriterion())
168 | local meter = tnt.AverageValueMeter()
169 | local clerr = tnt.ClassErrorMeter{topk = {1}}
170 | local train_timer = torch.Timer()
171 | local test_timer = torch.Timer()
172 |
173 | engine.hooks.onStartEpoch = function(state)
174 | local epoch = state.epoch + 1
175 | print('==>'.." online epoch # " .. epoch .. ' [batchSize = ' .. opt.batchSize .. ']')
176 | meter:reset()
177 | clerr:reset()
178 | train_timer:reset()
179 | if torch.type(opt.epoch_step) == 'number' and epoch % opt.epoch_step == 0 or
180 | torch.type(opt.epoch_step) == 'table' and tablex.find(opt.epoch_step, epoch) then
181 | opt.learningRate = opt.learningRate * opt.learningRateDecayRatio
182 | state.config = tablex.deepcopy(opt)
183 | state.optim = tablex.deepcopy(opt)
184 | end
185 |
186 | state.iterator:exec('manualSeed', state.epoch + 1)
187 | state.iterator:exec'resample'
188 | end
189 |
190 | engine.hooks.onEndEpoch = function(state)
191 | local train_loss = meter:value()
192 | local train_err = clerr:value{k = 1}
193 | local train_time = train_timer:time().real
194 | meter:reset()
195 | clerr:reset()
196 | test_timer:reset()
197 |
198 | engine:test{
199 | network = model,
200 | iterator = getIterator('test'),
201 | criterion = criterion,
202 | }
203 |
204 | log{
205 | loss = train_loss,
206 | train_loss = train_loss,
207 | train_acc = 100 - train_err,
208 | epoch = state.epoch,
209 | test_acc = 100 - clerr:value{k = 1},
210 | lr = opt.learningRate,
211 | train_time = train_time,
212 | test_time = test_timer:time().real,
213 | n_parameters = state.params:numel(),
214 | }
215 | end
216 |
217 | engine.hooks.onForwardCriterion = function(state)
218 | meter:add(state.criterion.output)
219 | clerr:add(state.network.output, state.sample.target)
220 | end
221 |
222 | local inputs = cast(torch.Tensor())
223 | local targets = cast(torch.Tensor())
224 | engine.hooks.onSample = function(state)
225 | inputs:resize(state.sample.input:size()):copy(state.sample.input)
226 | targets:resize(state.sample.target:size()):copy(state.sample.target)
227 | state.sample.input = inputs
228 | state.sample.target = targets
229 | end
230 |
231 | engine:train{
232 | network = model,
233 | iterator = getIterator('train'),
234 | criterion = criterion,
235 | optimMethod = optim.sgd,
236 | config = tablex.deepcopy(opt),
237 | maxepoch = opt.max_epoch,
238 | }
239 |
240 | torch.save(opt.save..'/model.t7', net:clearState())
241 |
--------------------------------------------------------------------------------