├── .gitignore ├── LICENSE ├── README.md ├── logs ├── nin_1875527964 │ └── log.txt ├── resnet-pre-act_17934627 │ └── log.txt ├── vgg_24208029 │ └── log.txt └── wide-resnet_1121914561 │ └── log.txt ├── models ├── nin.lua ├── resnet-pre-act.lua ├── utils.lua ├── vgg.lua └── wide-resnet.lua ├── notebooks ├── sweeper.py └── visualize.ipynb ├── pretrained ├── README.md └── wide-resnet.lua ├── pytorch ├── README.md ├── main.py ├── requirements.txt ├── resnet.py └── utils.py ├── scripts ├── train_cifar.sh └── train_svhn.sh └── train.lua /.gitignore: -------------------------------------------------------------------------------- 1 | logs 2 | notebooks/.ipynb_checkpoints 3 | *.pyc 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2016, Sergey Zagoruyko 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Wide Residual Networks 2 | ============= 3 | 4 | This code was used for experiments with Wide Residual Networks (BMVC 2016) http://arxiv.org/abs/1605.07146 by Sergey Zagoruyko and Nikos Komodakis. 5 | 6 | Deep residual networks were shown to be able to scale up to thousands of 7 | layers and still have improving performance. However, each fraction of a 8 | percent of improved accuracy costs nearly doubling the number of layers, and so 9 | training very deep residual networks has a problem of diminishing feature 10 | reuse, which makes these networks very slow to train. 11 | 12 | To tackle these problems, 13 | in this work we conduct a detailed experimental study on the architecture of 14 | ResNet blocks, based on which we propose a novel architecture where we *decrease 15 | depth* and *increase width* of residual networks. We call the resulting network 16 | structures **wide residual networks (WRNs)** and show that these are far superior 17 | over their commonly used thin and very deep counterparts. 18 | 19 | For example, we 20 | demonstrate that even a simple 16-layer-deep wide residual network outperforms 21 | in accuracy and efficiency all previous deep residual networks, including 22 | thousand-layer-deep networks. We further show that WRNs achieve **incredibly** 23 | good results (e.g., achieving new state-of-the-art results on 24 | CIFAR-10, CIFAR-100, SVHN, COCO and substantial improvements on ImageNet) and train **several times faster** than pre-activation ResNets. 25 | 26 | **Update (August 2019):** Pretrained ImageNet WRN models are available in 27 | torchvision 0.4 and [PyTorch Hub](https://pytorch.org/hub/pytorch_vision_wide_resnet/), e.g. loading WRN-50-2: 28 | ```python 29 | model = torch.hub.load('pytorch/vision', 'wide_resnet50_2', pretrained=True) 30 | ``` 31 | 32 | **Update (November 2016):** We updated the paper with ImageNet, COCO and meanstd preprocessing CIFAR results. 33 | If you're comparing your method against WRN, please report correct preprocessing numbers because they give substantially different results. 34 | 35 | tldr; ImageNet WRN-50-2-bottleneck (ResNet-50 with wider inner bottleneck 3x3 convolution) is significantly faster than ResNet-152 and has better accuracy; on CIFAR meanstd preprocessing (as in fb.resnet.torch) gives better results than ZCA whitening; on COCO wide ResNet with 34 layers outperforms even Inception-v4-based Fast-RCNN model in single model performance. 36 | 37 | Test error (%, flip/translation augmentation, **meanstd** normalization, median of 5 runs) on CIFAR: 38 | 39 | Network | CIFAR-10 | CIFAR-100 | 40 | -----------------|:--------:|:--------: 41 | pre-ResNet-164 | 5.46 | 24.33 42 | pre-ResNet-1001 | 4.92 | 22.71 43 | WRN-28-10 | 4.00 | 19.25 44 | WRN-28-10-dropout| **3.89** | **18.85** 45 | 46 | Single-time runs (meanstd normalization): 47 | 48 | Dataset | network | test perf. | 49 | --------|:-------:|:---------:| 50 | CIFAR-10 | WRN-40-10-dropout | 3.8% 51 | CIFAR-100 | WRN-40-10-dropout | 18.3% 52 | SVHN | WRN-16-8-dropout | 1.54% 53 | ImageNet (single crop) | WRN-50-2-bottleneck | 21.9% top-1, 5.79% top-5 54 | COCO-val5k (single model) | WRN-34-2 | 36 mAP 55 | 56 | See http://arxiv.org/abs/1605.07146 for details. 57 | 58 | 59 | 60 | bibtex: 61 | 62 | ``` 63 | @INPROCEEDINGS{Zagoruyko2016WRN, 64 | author = {Sergey Zagoruyko and Nikos Komodakis}, 65 | title = {Wide Residual Networks}, 66 | booktitle = {BMVC}, 67 | year = {2016}} 68 | ``` 69 | 70 | # Pretrained models 71 | 72 | ## ImageNet 73 | 74 | WRN-50-2-bottleneck (wider bottleneck), see [pretrained](pretrained/README.md) for details
75 | Download (263MB): https://yadi.sk/d/-8AWymOPyVZns 76 | 77 | There are also PyTorch and Tensorflow model definitions with pretrained weights at 78 | 79 | 80 | ## COCO 81 | 82 | Coming 83 | 84 | # Installation 85 | 86 | The code depends on Torch http://torch.ch. Follow instructions [here](http://torch.ch/docs/getting-started.html) and run: 87 | 88 | ``` 89 | luarocks install torchnet 90 | luarocks install optnet 91 | luarocks install iterm 92 | ``` 93 | 94 | For visualizing training curves we used ipython notebook with pandas and bokeh. 95 | 96 | # Usage 97 | 98 | ## Dataset support 99 | 100 | The code supports loading simple datasets in torch format. We provide the following: 101 | 102 | * MNIST 103 | [data preparation script](https://gist.github.com/szagoruyko/8467ee15d020ab2a7ce80a215af71b74) 104 | * CIFAR-10 105 | [**recommended**] 106 | [data preparation script](https://gist.github.com/szagoruyko/e5cf5e9b54661a817695c8c7b5c3dfa6), 107 | [preprocessed data (176MB)](https://yadi.sk/d/eFmOduZyxaBrT) 108 | * CIFAR-10 whitened (using pylearn2) 109 | [preprocessed dataset](https://yadi.sk/d/em4b0FMgrnqxy) 110 | * CIFAR-100 111 | [**recommended**] 112 | [data preparation script](https://gist.github.com/szagoruyko/01bfa936396f913a899ee49b98e7304b), 113 | [preprocessed data (176MB)](https://yadi.sk/d/ZbiXAegjxaBcM) 114 | * CIFAR-100 whitened (using pylearn2) 115 | [preprocessed dataset](https://yadi.sk/d/em4b0FMgrnqxy) 116 | * SVHN [data preparation script](https://gist.github.com/szagoruyko/27712564a3f3765c5bfd933b56a21757) 117 | 118 | To whiten CIFAR-10 and CIFAR-100 we used the following scripts https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/scripts/datasets/make_cifar10_gcn_whitened.py and then converted to torch using https://gist.github.com/szagoruyko/ad2977e4b8dceb64c68ea07f6abf397b and npy to torch converter https://github.com/htwaijry/npy4th. 119 | 120 | We are running ImageNet experiments and will update the paper and this repo soon. 121 | 122 | ## Training 123 | 124 | We provide several scripts for reproducing results in the paper. Below are several examples. 125 | 126 | ```bash 127 | model=wide-resnet widen_factor=4 depth=40 ./scripts/train_cifar.sh 128 | ``` 129 | 130 | This will train WRN-40-4 on CIFAR-10 whitened (supposed to be in `datasets` folder). This network achieves about the same accuracy as ResNet-1001 and trains in 6 hours on a single Titan X. 131 | Log is saved to `logs/wide-resnet_$RANDOM$RANDOM` folder with json entries for each epoch and can be visualized with itorch/ipython later. 132 | 133 | For reference we provide logs for this experiment and [ipython notebook](notebooks/visualize.ipynb) to visualize the results. After running it you should see these training curves: 134 | 135 | ![viz](https://cloud.githubusercontent.com/assets/4953728/15482840/11b46698-2132-11e6-931e-04680ae42c3c.png) 136 | 137 | Another example: 138 | 139 | ```bash 140 | model=wide-resnet widen_factor=10 depth=28 dropout=0.3 dataset=./datasets/cifar100_whitened.t7 ./scripts/train_cifar.sh 141 | ``` 142 | 143 | This network achieves 20.0% error on CIFAR-100 in about a day on a single Titan X. 144 | 145 | Multi-GPU is supported with `nGPU=n` parameter. 146 | 147 | ## Other models 148 | 149 | Additional models in this repo: 150 | 151 | * NIN (7.4% on CIFAR-10 whitened) 152 | * VGG (modified from [cifar.torch](https://github.com/szagoruyko/cifar.torch), 6.3% on CIFAR-10 whitened) 153 | * pre-activation ResNet (from https://github.com/KaimingHe/resnet-1k-layers) 154 | 155 | ## Implementation details 156 | 157 | The code evolved from https://github.com/szagoruyko/cifar.torch. To reduce memory usage we use @fmassa's [optimize-net](https://github.com/fmassa/optimize-net), which automatically shares output and gradient tensors between modules. This keeps memory usage below 4 Gb even for our best networks. Also, it can generate network graph plots as the one for WRN-16-2 in the end of this page. 158 | 159 | # Acknowledgements 160 | 161 | We thank startup company [VisionLabs](http://www.visionlabs.ru/en/) and Eugenio Culurciello for giving us access to their clusters, without them ImageNet experiments wouldn't be possible. We also thank Adam Lerer and Sam Gross for helpful discussions. Work supported by EC project FP7-ICT-611145 ROBOSPECT. 162 | 163 |
164 | -------------------------------------------------------------------------------- /logs/nin_1875527964/log.txt: -------------------------------------------------------------------------------- 1 | { 2 | optnet_optimize : true 3 | generate_graph : false 4 | init_value : 10 5 | randomcrop : 4 6 | batchSize : 128 7 | epoch_step : 8 | { 9 | 1 : 60 10 | 2 : 120 11 | 3 : 160 12 | } 13 | randomcrop_type : "reflection" 14 | model : "nin" 15 | save : "logs/nin_1875527964" 16 | dampening : 0 17 | learningRate : 0.1 18 | shortcutType : "A" 19 | nesterov : true 20 | cudnn_deterministic : false 21 | depth : 50 22 | learningRateDecayRatio : 0.2 23 | multiply_input_factor : 1 24 | dataset : "./datasets/cifar10_whitened.t7" 25 | weightDecay : 0.0005 26 | momentum : 0.9 27 | optimMethod : "sgd" 28 | hflip : true 29 | max_epoch : 200 30 | imageSize : 32 31 | dropout : 0 32 | learningRateDecay : 0 33 | cudnn_fastest : true 34 | widen_factor : 1 35 | } 36 | ==> loading data 37 | ==> configuring model 38 | forward output { 39 | 1 : FloatTensor - size: 1x10 40 | } 41 | backward output { 42 | 1 : FloatTensor - size: 1x3x32x32 43 | } 44 | nn.Sequential { 45 | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> (9) -> (10) -> (11) -> (12) -> (13) -> (14) -> (15) -> (16) -> (17) -> (18) -> (19) -> (20) -> (21) -> (22) -> (23) -> (24) -> (25) -> (26) -> (27) -> (28) -> (29) -> (30) -> (31) -> (32) -> output] 46 | (1): cudnn.SpatialConvolution(3 -> 192, 5x5, 1,1, 2,2) 47 | (2): cudnn.SpatialBatchNormalization 48 | (3): cudnn.ReLU 49 | (4): cudnn.SpatialConvolution(192 -> 160, 1x1) 50 | (5): cudnn.SpatialBatchNormalization 51 | (6): cudnn.ReLU 52 | (7): cudnn.SpatialConvolution(160 -> 96, 1x1) 53 | (8): cudnn.SpatialBatchNormalization 54 | (9): cudnn.ReLU 55 | (10): cudnn.SpatialMaxPooling(3x3, 2,2) 56 | (11): cudnn.SpatialConvolution(96 -> 192, 5x5, 1,1, 2,2) 57 | (12): cudnn.SpatialBatchNormalization 58 | (13): cudnn.ReLU 59 | (14): cudnn.SpatialConvolution(192 -> 192, 1x1) 60 | (15): cudnn.SpatialBatchNormalization 61 | (16): cudnn.ReLU 62 | (17): cudnn.SpatialConvolution(192 -> 192, 1x1) 63 | (18): cudnn.SpatialBatchNormalization 64 | (19): cudnn.ReLU 65 | (20): cudnn.SpatialAveragePooling(3x3, 2,2) 66 | (21): cudnn.SpatialConvolution(192 -> 192, 3x3, 1,1, 1,1) 67 | (22): cudnn.SpatialBatchNormalization 68 | (23): cudnn.ReLU 69 | (24): cudnn.SpatialConvolution(192 -> 192, 1x1) 70 | (25): cudnn.SpatialBatchNormalization 71 | (26): cudnn.ReLU 72 | (27): cudnn.SpatialConvolution(192 -> 192, 1x1) 73 | (28): cudnn.SpatialBatchNormalization 74 | (29): cudnn.ReLU 75 | (30): cudnn.SpatialAveragePooling(8x8, 1,1) 76 | (31): nn.View(-1) 77 | (32): nn.Linear(192 -> 10) 78 | } 79 | Network has 9 convolutions 80 | Will save at logs/nin_1875527964 81 | Network has 1007242 parameters 82 | ==> setting criterion 83 | ==> configuring optimizer 84 | ==> online epoch # 1 [batchSize = 128] 85 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.593863964081,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":1,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":54.78,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.4729490280151,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":1.1969858704469} 86 | ==> online epoch # 2 [batchSize = 128] 87 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.532868862152,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":2,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":70.87,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2549231052399,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.82106501123844} 88 | ==> online epoch # 3 [batchSize = 128] 89 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.588787078857,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":3,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":67.43,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2515769004822,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.69562138464206} 90 | ==> online epoch # 4 [batchSize = 128] 91 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.437535047531,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":4,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":76.97,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2535479068756,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.64121536719493} 92 | ==> online epoch # 5 [batchSize = 128] 93 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.63557600975,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":5,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":74.93,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2503349781036,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.60574328242204} 94 | ==> online epoch # 6 [batchSize = 128] 95 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.368470191956,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":6,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":69.68,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2589159011841,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.56973296541434} 96 | ==> online epoch # 7 [batchSize = 128] 97 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.608664035797,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":7,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":72.46,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2541699409485,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.55406727538659} 98 | ==> online epoch # 8 [batchSize = 128] 99 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.443886041641,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":8,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":71.06,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2503561973572,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.53045730017699} 100 | ==> online epoch # 9 [batchSize = 128] 101 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.654844045639,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":9,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":79.36,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.254166841507,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.5159140002269} 102 | ==> online epoch # 10 [batchSize = 128] 103 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":56.00287604332,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":10,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":74.11,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.258073091507,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.50183857289644} 104 | ==> online epoch # 11 [batchSize = 128] 105 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.120012998581,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":11,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":78.3,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2523429393768,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.49553745870407} 106 | ==> online epoch # 12 [batchSize = 128] 107 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.61984705925,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":12,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":74.18,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2490620613098,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.48498020233252} 108 | ==> online epoch # 13 [batchSize = 128] 109 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.887814044952,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":13,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":73.53,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2499670982361,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.47510553697745} 110 | ==> online epoch # 14 [batchSize = 128] 111 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.945474147797,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":14,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":76.36,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2481338977814,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.46514304058674} 112 | ==> online epoch # 15 [batchSize = 128] 113 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.944491863251,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":15,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":79.23,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2524788379669,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.4617732351407} 114 | ==> online epoch # 16 [batchSize = 128] 115 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.834233045578,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":16,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":80.13,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2483010292053,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.45532201711948} 116 | ==> online epoch # 17 [batchSize = 128] 117 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.916064977646,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":17,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":77.12,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2509729862213,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.44790997589246} 118 | ==> online epoch # 18 [batchSize = 128] 119 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.023838996887,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":18,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":77.44,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2504661083221,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.44162713854741} 120 | ==> online epoch # 19 [batchSize = 128] 121 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.770215988159,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":19,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":76.9,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2507870197296,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.43800124143943} 122 | ==> online epoch # 20 [batchSize = 128] 123 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.90252494812,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":20,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":76.03,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2497539520264,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.43589583990666} 124 | ==> online epoch # 21 [batchSize = 128] 125 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":55.002173900604,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":21,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":66.82,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2496049404144,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.42981643737891} 126 | ==> online epoch # 22 [batchSize = 128] 127 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.752336978912,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":22,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":82.59,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2492098808289,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.42459514660713} 128 | ==> online epoch # 23 [batchSize = 128] 129 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.789345026016,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":23,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":77.19,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2487859725952,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.42145103521836} 130 | ==> online epoch # 24 [batchSize = 128] 131 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.813308000565,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":24,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":72.51,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2489840984344,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.41741959108756} 132 | ==> online epoch # 25 [batchSize = 128] 133 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.880307912827,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":25,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":75.82,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2450361251831,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.41995204568673} 134 | ==> online epoch # 26 [batchSize = 128] 135 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.844065904617,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":26,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":78.03,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2530159950256,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.40897574275732} 136 | ==> online epoch # 27 [batchSize = 128] 137 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":54.667095184326,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":27,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":81.62,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2375519275665,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.41404951084883} 138 | ==> online epoch # 28 [batchSize = 128] 139 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.418707847595,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":28,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":72.25,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2441000938416,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.4057252982488} 140 | ==> online epoch # 29 [batchSize = 128] 141 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.580642938614,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":29,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":81.16,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2447679042816,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.39754433024388} 142 | ==> online epoch # 30 [batchSize = 128] 143 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.571613073349,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":30,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":81.21,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2439520359039,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.39858484837489} 144 | ==> online epoch # 31 [batchSize = 128] 145 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.805215835571,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":31,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":77.34,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2530789375305,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.40288508194379} 146 | ==> online epoch # 32 [batchSize = 128] 147 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.553646802902,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":32,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":74.95,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2410068511963,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.39816183722936} 148 | ==> online epoch # 33 [batchSize = 128] 149 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.486525058746,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":33,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":70.26,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.245041847229,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.39601638683906} 150 | ==> online epoch # 34 [batchSize = 128] 151 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.5560131073,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":34,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":79.95,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2443888187408,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.39213467993033} 152 | ==> online epoch # 35 [batchSize = 128] 153 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.545255899429,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":35,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":80.58,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2449429035187,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.3885368809104} 154 | ==> online epoch # 36 [batchSize = 128] 155 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.537830114365,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":36,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":77.95,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.274099111557,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.39021477790979} 156 | ==> online epoch # 37 [batchSize = 128] 157 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.767641067505,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":37,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":82.12,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2437970638275,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.38730258360887} 158 | ==> online epoch # 38 [batchSize = 128] 159 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.486897945404,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":38,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":75.67,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2433269023895,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.38219698361861} 160 | ==> online epoch # 39 [batchSize = 128] 161 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.628032922745,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":39,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":79.04,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2432010173798,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.38763711039837} 162 | ==> online epoch # 40 [batchSize = 128] 163 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.536153078079,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":40,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":80.11,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2452688217163,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.38575872286008} 164 | ==> online epoch # 41 [batchSize = 128] 165 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.520205974579,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":41,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":77.83,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.241760969162,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.38098671753437} 166 | ==> online epoch # 42 [batchSize = 128] 167 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.468014001846,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":42,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":74.42,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2496681213379,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.38319841267971} 168 | ==> online epoch # 43 [batchSize = 128] 169 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.556786060333,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":43,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":72.46,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2482600212097,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.37964692081396} 170 | ==> online epoch # 44 [batchSize = 128] 171 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.497092008591,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":44,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":80.29,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2438809871674,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.37826023327234} 172 | ==> online epoch # 45 [batchSize = 128] 173 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.417484045029,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":45,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":78.65,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2540519237518,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.38129784755218} 174 | ==> online epoch # 46 [batchSize = 128] 175 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.612913131714,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":46,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":80.46,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2438228130341,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.37656217454336} 176 | ==> online epoch # 47 [batchSize = 128] 177 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.595071077347,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":47,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":68.27,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2512409687042,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.37258849484034} 178 | ==> online epoch # 48 [batchSize = 128] 179 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.653529167175,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":48,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":78.61,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2416579723358,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.37765663812558} 180 | ==> online epoch # 49 [batchSize = 128] 181 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.506092071533,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":49,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":79.53,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2470300197601,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.37032264463412} 182 | ==> online epoch # 50 [batchSize = 128] 183 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.531888008118,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":50,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":81.96,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2538690567017,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.37068568055446} 184 | ==> online epoch # 51 [batchSize = 128] 185 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.501205205917,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":51,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":83.19,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.252256155014,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.37402288734149} 186 | ==> online epoch # 52 [batchSize = 128] 187 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.543003797531,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":52,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":73.99,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2470951080322,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.36888554314008} 188 | ==> online epoch # 53 [batchSize = 128] 189 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.486098051071,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":53,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":79.71,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2463281154633,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.36890217501384} 190 | ==> online epoch # 54 [batchSize = 128] 191 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.55241394043,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":54,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":80.02,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2498989105225,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.36993713669288} 192 | ==> online epoch # 55 [batchSize = 128] 193 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.450776815414,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":55,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":75.6,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2452819347382,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.36913666110008} 194 | ==> online epoch # 56 [batchSize = 128] 195 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.542490005493,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":56,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":83.64,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2427079677582,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.3689535304904} 196 | ==> online epoch # 57 [batchSize = 128] 197 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.437011003494,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":57,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":81.4,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2431769371033,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.36746362615854} 198 | ==> online epoch # 58 [batchSize = 128] 199 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.555178880692,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":58,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":77.93,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2454750537872,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.36633080626145} 200 | ==> online epoch # 59 [batchSize = 128] 201 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.464647054672,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":59,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.1,"shortcutType":"A","test_acc":78.09,"learningRate":0.1,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2453479766846,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.36666739491316} 202 | ==> online epoch # 60 [batchSize = 128] 203 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.503804922104,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":60,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":90.74,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2440459728241,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.20818299402793} 204 | ==> online epoch # 61 [batchSize = 128] 205 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.495636940002,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":61,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":91.11,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2481241226196,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.16236374943684} 206 | ==> online epoch # 62 [batchSize = 128] 207 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.565950155258,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":62,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":91.06,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2420511245728,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.14666115751442} 208 | ==> online epoch # 63 [batchSize = 128] 209 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.502928972244,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":63,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":91.13,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2500689029694,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.13470643959366} 210 | ==> online epoch # 64 [batchSize = 128] 211 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.547512054443,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":64,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.97,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2418549060822,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12917851983355} 212 | ==> online epoch # 65 [batchSize = 128] 213 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.530552864075,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":65,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":90.66,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2429277896881,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.1217747542243} 214 | ==> online epoch # 66 [batchSize = 128] 215 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.522407054901,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":66,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":90.32,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2444458007812,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11924994465632} 216 | ==> online epoch # 67 [batchSize = 128] 217 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.45677614212,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":67,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.58,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2485420703888,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11929867765269} 218 | ==> online epoch # 68 [batchSize = 128] 219 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.461474180222,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":68,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.5,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2442350387573,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11808920183625} 220 | ==> online epoch # 69 [batchSize = 128] 221 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.601045131683,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":69,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.49,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2437310218811,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11948176400784} 222 | ==> online epoch # 70 [batchSize = 128] 223 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.556071996689,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":70,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.37,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2431380748749,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11758094648711} 224 | ==> online epoch # 71 [batchSize = 128] 225 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.50079703331,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":71,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.19,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2443370819092,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12548536382233} 226 | ==> online epoch # 72 [batchSize = 128] 227 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.50164103508,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":72,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.99,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2577300071716,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12277139732853} 228 | ==> online epoch # 73 [batchSize = 128] 229 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.557413101196,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":73,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.71,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2449638843536,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12368412876549} 230 | ==> online epoch # 74 [batchSize = 128] 231 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.590755939484,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":74,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.78,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2457990646362,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12241119453922} 232 | ==> online epoch # 75 [batchSize = 128] 233 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.539636135101,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":75,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.77,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2458119392395,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.1267889219981} 234 | ==> online epoch # 76 [batchSize = 128] 235 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.581067085266,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":76,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.74,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2409319877625,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12495670402661} 236 | ==> online epoch # 77 [batchSize = 128] 237 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.457403182983,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":77,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.93,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2478671073914,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12759307682132} 238 | ==> online epoch # 78 [batchSize = 128] 239 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.560487985611,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":78,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.25,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2419581413269,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.1243928169354} 240 | ==> online epoch # 79 [batchSize = 128] 241 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.416492938995,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":79,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":85.91,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2465958595276,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12609680067652} 242 | ==> online epoch # 80 [batchSize = 128] 243 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.632702827454,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":80,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.38,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2502009868622,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12994859545277} 244 | ==> online epoch # 81 [batchSize = 128] 245 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.524478912354,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":81,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.32,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2440950870514,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12787336121576} 246 | ==> online epoch # 82 [batchSize = 128] 247 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.612407922745,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":82,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.69,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2492918968201,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12567707441556} 248 | ==> online epoch # 83 [batchSize = 128] 249 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.411056995392,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":83,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.32,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2420258522034,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.1243204951955} 250 | ==> online epoch # 84 [batchSize = 128] 251 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.510627985001,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":84,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.03,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2453551292419,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12435456805695} 252 | ==> online epoch # 85 [batchSize = 128] 253 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.480562925339,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":85,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.38,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2415220737457,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12520780682755} 254 | ==> online epoch # 86 [batchSize = 128] 255 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.475887060165,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":86,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":84.83,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2442979812622,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12019503449973} 256 | ==> online epoch # 87 [batchSize = 128] 257 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.428845882416,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":87,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.5,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2574808597565,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11920055043048} 258 | ==> online epoch # 88 [batchSize = 128] 259 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.554698944092,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":88,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.24,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2481360435486,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12393878577993} 260 | ==> online epoch # 89 [batchSize = 128] 261 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.506734132767,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":89,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.34,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2449119091034,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12871642884058} 262 | ==> online epoch # 90 [batchSize = 128] 263 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.545350074768,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":90,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.89,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2464830875397,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12214053118458} 264 | ==> online epoch # 91 [batchSize = 128] 265 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.449155092239,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":91,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2428939342499,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11818734575541} 266 | ==> online epoch # 92 [batchSize = 128] 267 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.518404006958,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":92,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.44,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2446639537811,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12180570296657} 268 | ==> online epoch # 93 [batchSize = 128] 269 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.510896921158,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":93,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.46,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2653188705444,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11845285176562} 270 | ==> online epoch # 94 [batchSize = 128] 271 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.543385982513,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":94,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.1,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2435641288757,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11678733453155} 272 | ==> online epoch # 95 [batchSize = 128] 273 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.477043151855,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":95,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.07,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2428460121155,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11897576571657} 274 | ==> online epoch # 96 [batchSize = 128] 275 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.624214887619,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":96,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":84.28,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2445249557495,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11904100564141} 276 | ==> online epoch # 97 [batchSize = 128] 277 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.501276016235,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":97,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.67,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2391991615295,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11647340873113} 278 | ==> online epoch # 98 [batchSize = 128] 279 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.449658155441,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":98,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":86.65,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2489409446716,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11487070041207} 280 | ==> online epoch # 99 [batchSize = 128] 281 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.39834189415,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":99,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.94,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.243931055069,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12033484642131} 282 | ==> online epoch # 100 [batchSize = 128] 283 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.520040988922,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":100,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.25,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2445020675659,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11764579631197} 284 | ==> online epoch # 101 [batchSize = 128] 285 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.521477937698,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":101,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.61,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2443239688873,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11709465831996} 286 | ==> online epoch # 102 [batchSize = 128] 287 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.547205924988,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":102,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.98,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2500238418579,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.12114164804419} 288 | ==> online epoch # 103 [batchSize = 128] 289 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.543912887573,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":103,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.45,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2557911872864,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.10805225480252} 290 | ==> online epoch # 104 [batchSize = 128] 291 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.601721048355,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":104,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.93,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2498631477356,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11217697963405} 292 | ==> online epoch # 105 [batchSize = 128] 293 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.516308069229,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":105,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.21,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2453751564026,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11557923503793} 294 | ==> online epoch # 106 [batchSize = 128] 295 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.495959997177,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":106,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":85.93,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2460300922394,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11261214604362} 296 | ==> online epoch # 107 [batchSize = 128] 297 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.65331697464,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":107,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.5,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2417590618134,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11322574942158} 298 | ==> online epoch # 108 [batchSize = 128] 299 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.478212833405,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":108,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.57,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2612581253052,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11255719629236} 300 | ==> online epoch # 109 [batchSize = 128] 301 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.476624965668,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":109,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":86.82,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2438879013062,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11577705828807} 302 | ==> online epoch # 110 [batchSize = 128] 303 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.607400894165,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":110,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.64,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2424070835114,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.10901697488645} 304 | ==> online epoch # 111 [batchSize = 128] 305 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.571146965027,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":111,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.52,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2469940185547,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.10439068487821} 306 | ==> online epoch # 112 [batchSize = 128] 307 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.501214981079,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":112,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2427980899811,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.1087359870187} 308 | ==> online epoch # 113 [batchSize = 128] 309 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.543877124786,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":113,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.74,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2601130008698,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11583366151421} 310 | ==> online epoch # 114 [batchSize = 128] 311 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.608434200287,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":114,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":86.61,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2427129745483,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.10374431608197} 312 | ==> online epoch # 115 [batchSize = 128] 313 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.492336034775,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":115,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.14,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2446849346161,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.10607366448221} 314 | ==> online epoch # 116 [batchSize = 128] 315 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.384386062622,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":116,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.18,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2474370002747,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11034816891337} 316 | ==> online epoch # 117 [batchSize = 128] 317 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.530400037766,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":117,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":87.69,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2414441108704,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11196253771583} 318 | ==> online epoch # 118 [batchSize = 128] 319 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.534486055374,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":118,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":89.14,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2451329231262,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.11396147064292} 320 | ==> online epoch # 119 [batchSize = 128] 321 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.450636863708,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":119,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.02,"shortcutType":"A","test_acc":88.42,"learningRate":0.02,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2497298717499,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.10939630023562} 322 | ==> online epoch # 120 [batchSize = 128] 323 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.463079929352,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":120,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.11,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2465319633484,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.048100662412934} 324 | ==> online epoch # 121 [batchSize = 128] 325 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.569537878036,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":121,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.42,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2562310695648,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.02959025050394} 326 | ==> online epoch # 122 [batchSize = 128] 327 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.637385129929,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":122,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.52,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2412090301514,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.024007035295169} 328 | ==> online epoch # 123 [batchSize = 128] 329 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.600746154785,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":123,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.62,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2499098777771,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.021907049111831} 330 | ==> online epoch # 124 [batchSize = 128] 331 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.454932928085,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":124,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.67,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2476890087128,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.019307417838046} 332 | ==> online epoch # 125 [batchSize = 128] 333 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.441093206406,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":125,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.58,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2414410114288,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.017742788266295} 334 | ==> online epoch # 126 [batchSize = 128] 335 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.455188989639,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":126,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.64,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2454380989075,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.016401587278606} 336 | ==> online epoch # 127 [batchSize = 128] 337 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.467864990234,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":127,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.56,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.241947889328,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.014926031800226} 338 | ==> online epoch # 128 [batchSize = 128] 339 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.533565044403,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":128,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.69,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2432818412781,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.01391430978114} 340 | ==> online epoch # 129 [batchSize = 128] 341 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.569344997406,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":129,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.59,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2436130046844,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.013448232364578} 342 | ==> online epoch # 130 [batchSize = 128] 343 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.485431909561,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":130,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.5,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.247474193573,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.013354300869963} 344 | ==> online epoch # 131 [batchSize = 128] 345 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.450932025909,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":131,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.69,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2469120025635,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.012197144162387} 346 | ==> online epoch # 132 [batchSize = 128] 347 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.558818101883,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":132,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.79,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2424931526184,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.011325099218923} 348 | ==> online epoch # 133 [batchSize = 128] 349 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.48894906044,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":133,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.68,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2434628009796,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.010793494394956} 350 | ==> online epoch # 134 [batchSize = 128] 351 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.505841970444,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":134,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.77,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2437591552734,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.010930944802478} 352 | ==> online epoch # 135 [batchSize = 128] 353 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.58614897728,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":135,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.67,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2496569156647,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.010146045068709} 354 | ==> online epoch # 136 [batchSize = 128] 355 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.484592914581,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":136,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.76,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2537951469421,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.010516702675093} 356 | ==> online epoch # 137 [batchSize = 128] 357 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.539739847183,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":137,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.84,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2451858520508,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.010800205873182} 358 | ==> online epoch # 138 [batchSize = 128] 359 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.510965108871,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":138,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.54,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2438318729401,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0097131812610687} 360 | ==> online epoch # 139 [batchSize = 128] 361 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.508059024811,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":139,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.56,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2426409721375,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0094331520299117} 362 | ==> online epoch # 140 [batchSize = 128] 363 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.515514850616,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":140,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.56,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2462821006775,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0095086083245965} 364 | ==> online epoch # 141 [batchSize = 128] 365 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.426674842834,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":141,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.51,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2578938007355,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0091447023579325} 366 | ==> online epoch # 142 [batchSize = 128] 367 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.441915988922,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":142,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.63,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2683110237122,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0091797006770204} 368 | ==> online epoch # 143 [batchSize = 128] 369 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.656672000885,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":143,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.59,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2413201332092,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0091757497344262} 370 | ==> online epoch # 144 [batchSize = 128] 371 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.532930135727,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":144,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.48,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2492570877075,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0090615240331644} 372 | ==> online epoch # 145 [batchSize = 128] 373 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.444036006927,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":145,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.45,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2438859939575,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0092722899304369} 374 | ==> online epoch # 146 [batchSize = 128] 375 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.517123937607,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":146,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.63,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2475869655609,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0092046390598019} 376 | ==> online epoch # 147 [batchSize = 128] 377 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.520902872086,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":147,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.55,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2421779632568,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.008476875254359} 378 | ==> online epoch # 148 [batchSize = 128] 379 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.654017925262,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":148,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.44,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2514848709106,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0085756237403705} 380 | ==> online epoch # 149 [batchSize = 128] 381 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.552144765854,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":149,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.58,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.244772195816,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0086590464394062} 382 | ==> online epoch # 150 [batchSize = 128] 383 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.528169155121,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":150,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.57,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2425131797791,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.008542746410538} 384 | ==> online epoch # 151 [batchSize = 128] 385 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.526846885681,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":151,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.7,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2472369670868,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0079688250684203} 386 | ==> online epoch # 152 [batchSize = 128] 387 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.597927093506,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":152,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.75,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2419338226318,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0079787191051321} 388 | ==> online epoch # 153 [batchSize = 128] 389 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.520629167557,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":153,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.44,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2469570636749,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0081460221551168} 390 | ==> online epoch # 154 [batchSize = 128] 391 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.669497013092,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":154,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.68,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2429950237274,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0081049349159002} 392 | ==> online epoch # 155 [batchSize = 128] 393 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.598120927811,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":155,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.59,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2529668807983,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0076991175564054} 394 | ==> online epoch # 156 [batchSize = 128] 395 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.563598155975,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":156,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.5,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.242280960083,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0081053374191889} 396 | ==> online epoch # 157 [batchSize = 128] 397 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.488450050354,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":157,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.58,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2525689601898,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0078322904279981} 398 | ==> online epoch # 158 [batchSize = 128] 399 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.524356126785,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":158,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.41,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2454540729523,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0078825698162501} 400 | ==> online epoch # 159 [batchSize = 128] 401 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.502348184586,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":159,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.004,"shortcutType":"A","test_acc":92.63,"learningRate":0.004,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.242996931076,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0082889589934777} 402 | ==> online epoch # 160 [batchSize = 128] 403 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.499071121216,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":160,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.5,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2465269565582,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0073816621246246} 404 | ==> online epoch # 161 [batchSize = 128] 405 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.437483072281,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":161,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.85,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2424199581146,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0068039200149285} 406 | ==> online epoch # 162 [batchSize = 128] 407 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.464745998383,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":162,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.76,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2421388626099,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0068422729483782} 408 | ==> online epoch # 163 [batchSize = 128] 409 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.484056949615,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":163,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.56,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2484450340271,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0062748136763007} 410 | ==> online epoch # 164 [batchSize = 128] 411 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.546632051468,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":164,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.59,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.244167804718,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0065681714086961} 412 | ==> online epoch # 165 [batchSize = 128] 413 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.596024990082,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":165,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.67,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2483789920807,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0067759589459269} 414 | ==> online epoch # 166 [batchSize = 128] 415 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.526091098785,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":166,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.5,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2405340671539,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0061572475406604} 416 | ==> online epoch # 167 [batchSize = 128] 417 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.582150936127,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":167,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.64,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2663550376892,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0065574873955204} 418 | ==> online epoch # 168 [batchSize = 128] 419 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.68184709549,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":168,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.5,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2436389923096,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0062123294442128} 420 | ==> online epoch # 169 [batchSize = 128] 421 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.522497177124,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":169,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.56,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2438719272614,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0063478323081747} 422 | ==> online epoch # 170 [batchSize = 128] 423 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.64563703537,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":170,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.6,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2442688941956,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.006331152917865} 424 | ==> online epoch # 171 [batchSize = 128] 425 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.453807115555,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":171,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.71,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2440838813782,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0061411829139942} 426 | ==> online epoch # 172 [batchSize = 128] 427 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.474457979202,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":172,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.72,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2488949298859,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0060788853046222} 428 | ==> online epoch # 173 [batchSize = 128] 429 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.548737049103,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":173,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.63,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2540090084076,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0062972170897783} 430 | ==> online epoch # 174 [batchSize = 128] 431 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.424373865128,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":174,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.85,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2482089996338,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0062021393185625} 432 | ==> online epoch # 175 [batchSize = 128] 433 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.541701078415,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":175,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.55,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2426030635834,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0062863830190438} 434 | ==> online epoch # 176 [batchSize = 128] 435 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.496028900146,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":176,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.62,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2426710128784,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0059864581013337} 436 | ==> online epoch # 177 [batchSize = 128] 437 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.576555967331,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":177,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.59,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2451550960541,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0060988159086078} 438 | ==> online epoch # 178 [batchSize = 128] 439 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.524843215942,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":178,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.57,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2428200244904,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0061548010661052} 440 | ==> online epoch # 179 [batchSize = 128] 441 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.521260023117,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":179,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.61,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2453989982605,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0063890275521538} 442 | ==> online epoch # 180 [batchSize = 128] 443 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.529162168503,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":180,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.58,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2445778846741,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0060930794200454} 444 | ==> online epoch # 181 [batchSize = 128] 445 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.402792930603,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":181,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.4,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2468218803406,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0061355742936333} 446 | ==> online epoch # 182 [batchSize = 128] 447 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.46883893013,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":182,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.45,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.243901014328,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0058311326954609} 448 | ==> online epoch # 183 [batchSize = 128] 449 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.164771795273,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":183,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.58,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2449779510498,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0062234142604165} 450 | ==> online epoch # 184 [batchSize = 128] 451 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.035275936127,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":184,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.7,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2461631298065,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0063772084669043} 452 | ==> online epoch # 185 [batchSize = 128] 453 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.064441919327,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":185,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.51,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.241240978241,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0057631540470398} 454 | ==> online epoch # 186 [batchSize = 128] 455 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.002777099609,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":186,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.64,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2482149600983,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.006140244532472} 456 | ==> online epoch # 187 [batchSize = 128] 457 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.205068826675,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":187,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.65,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2436771392822,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0057536732978546} 458 | ==> online epoch # 188 [batchSize = 128] 459 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.153147935867,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":188,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.58,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2434120178223,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0061327080600537} 460 | ==> online epoch # 189 [batchSize = 128] 461 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.114384174347,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":189,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.64,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2446839809418,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0060385716649202} 462 | ==> online epoch # 190 [batchSize = 128] 463 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.283517837524,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":190,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.58,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2530851364136,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0058259687744654} 464 | ==> online epoch # 191 [batchSize = 128] 465 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.125494003296,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":191,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.62,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2428460121155,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0059547477807754} 466 | ==> online epoch # 192 [batchSize = 128] 467 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.045717954636,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":192,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.5,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2428419589996,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0058646194923383} 468 | ==> online epoch # 193 [batchSize = 128] 469 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.088896989822,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":193,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.57,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2665319442749,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0058154677781157} 470 | ==> online epoch # 194 [batchSize = 128] 471 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.138751983643,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":194,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.54,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.24520611763,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0058429941487236} 472 | ==> online epoch # 195 [batchSize = 128] 473 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.113023996353,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":195,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.57,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2416369915009,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0060955316449205} 474 | ==> online epoch # 196 [batchSize = 128] 475 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.114979982376,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":196,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.59,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2458860874176,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0057551918503566} 476 | ==> online epoch # 197 [batchSize = 128] 477 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.031116008759,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":197,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.59,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2434060573578,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0060307394044522} 478 | ==> online epoch # 198 [batchSize = 128] 479 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.105694055557,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":198,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.49,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2506849765778,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0057907086773179} 480 | ==> online epoch # 199 [batchSize = 128] 481 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.04305100441,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":199,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.65,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2430560588837,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0058586274393094} 482 | ==> online epoch # 200 [batchSize = 128] 483 | json_stats: {"optnet_optimize":true,"hflip":true,"randomcrop":4,"batchSize":128,"model":"nin","nesterov":true,"cudnn_deterministic":false,"multiply_input_factor":1,"num_classes":10,"momentum":0.9,"optimMethod":"sgd","dropout":0,"weightDecay":0.0005,"generate_graph":false,"init_value":10,"widen_factor":1,"imageSize":32,"train_time":53.059851884842,"learningRateDecayRatio":0.2,"dampening":0,"max_epoch":200,"epoch":200,"depth":50,"learningRateDecay":0,"cudnn_fastest":true,"randomcrop_type":"reflection","save":"logs\/nin_1875527964","lr":0.0008,"shortcutType":"A","test_acc":92.64,"learningRate":0.0008,"dataset":".\/datasets\/cifar10_whitened.t7","test_time":3.2437040805817,"n_parameters":1007242,"epoch_step":[60,120,160],"loss":0.0060513800439926} 484 | -------------------------------------------------------------------------------- /models/nin.lua: -------------------------------------------------------------------------------- 1 | -- This is a modified version of NIN network in 2 | -- https://github.com/szagoruyko/cifar.torch 3 | -- Network-In-Network: http://arxiv.org/abs/1312.4400 4 | -- Modifications: 5 | -- * removed dropout 6 | -- * added BatchNorm 7 | -- * the last layer changed from avg-pooling to linear (works better) 8 | require 'nn' 9 | local utils = paths.dofile'utils.lua' 10 | 11 | local function createModel(opt) 12 | local model = nn.Sequential() 13 | 14 | local function Block(...) 15 | local arg = {...} 16 | model:add(nn.SpatialConvolution(...):noBias()) 17 | model:add(nn.SpatialBatchNormalization(arg[2],1e-5)) 18 | model:add(nn.ReLU(true)) 19 | return model 20 | end 21 | 22 | Block(3,192,5,5,1,1,2,2) 23 | Block(192,160,1,1) 24 | Block(160,96,1,1) 25 | model:add(nn.SpatialMaxPooling(3,3,2,2):ceil()) 26 | Block(96,192,5,5,1,1,2,2) 27 | Block(192,192,1,1) 28 | Block(192,192,1,1) 29 | model:add(nn.SpatialAveragePooling(3,3,2,2):ceil()) 30 | Block(192,192,3,3,1,1,1,1) 31 | Block(192,192,1,1) 32 | Block(192,192,1,1) 33 | model:add(nn.SpatialAveragePooling(8,8,1,1)) 34 | model:add(nn.View(-1):setNumInputDims(3)) 35 | model:add(nn.Linear(192,opt and opt.num_classes or 10)) 36 | 37 | utils.FCinit(model) 38 | utils.testModel(model) 39 | utils.MSRinit(model) 40 | return model 41 | end 42 | 43 | return createModel 44 | -------------------------------------------------------------------------------- /models/resnet-pre-act.lua: -------------------------------------------------------------------------------- 1 | -- ResNet-1001 2 | -- This is a re-implementation of the 1001-layer residual networks described in: 3 | -- [a] "Identity Mappings in Deep Residual Networks", arXiv:1603.05027, 2016, 4 | -- authored by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 5 | 6 | -- Acknowledgement: This code is contributed by Xiang Ming from Xi'an Jiaotong Univeristy. 7 | 8 | -- ************************************************************************ 9 | -- This code incorporates material from: 10 | 11 | -- fb.resnet.torch (https://github.com/facebook/fb.resnet.torch) 12 | -- Copyright (c) 2016, Facebook, Inc. 13 | -- All rights reserved. 14 | -- 15 | -- This source code is licensed under the BSD-style license found in the 16 | -- LICENSE file in the root directory of this source tree. An additional grant 17 | -- of patent rights can be found in the PATENTS file in the same directory. 18 | -- 19 | -- ************************************************************************ 20 | 21 | local nn = require 'nn' 22 | local utils = paths.dofile'utils.lua' 23 | 24 | local Convolution = nn.SpatialConvolution 25 | local Avg = nn.SpatialAveragePooling 26 | local ReLU = nn.ReLU 27 | local Max = nn.SpatialMaxPooling 28 | local SBatchNorm = nn.SpatialBatchNormalization 29 | 30 | local function createModel(opt) 31 | local depth = opt.depth 32 | 33 | -- The new Residual Unit in [a] 34 | local function bottleneck(nInputPlane, nOutputPlane, stride) 35 | 36 | local nBottleneckPlane = nOutputPlane / 4 37 | if opt.resnet_nobottleneck then 38 | nBottleneckPlane = nOutputPlane 39 | end 40 | 41 | if nInputPlane == nOutputPlane then -- most Residual Units have this shape 42 | local convs = nn.Sequential() 43 | -- conv1x1 44 | convs:add(SBatchNorm(nInputPlane)) 45 | convs:add(ReLU(true)) 46 | convs:add(Convolution(nInputPlane,nBottleneckPlane,1,1,stride,stride,0,0)) 47 | 48 | -- conv3x3 49 | convs:add(SBatchNorm(nBottleneckPlane)) 50 | convs:add(ReLU(true)) 51 | convs:add(Convolution(nBottleneckPlane,nBottleneckPlane,3,3,1,1,1,1)) 52 | 53 | -- conv1x1 54 | convs:add(SBatchNorm(nBottleneckPlane)) 55 | convs:add(ReLU(true)) 56 | convs:add(Convolution(nBottleneckPlane,nOutputPlane,1,1,1,1,0,0)) 57 | 58 | local shortcut = nn.Identity() 59 | 60 | return nn.Sequential() 61 | :add(nn.ConcatTable() 62 | :add(convs) 63 | :add(shortcut)) 64 | :add(nn.CAddTable(true)) 65 | else -- Residual Units for increasing dimensions 66 | local block = nn.Sequential() 67 | -- common BN, ReLU 68 | block:add(SBatchNorm(nInputPlane)) 69 | block:add(ReLU(true)) 70 | 71 | local convs = nn.Sequential() 72 | -- conv1x1 73 | convs:add(Convolution(nInputPlane,nBottleneckPlane,1,1,stride,stride,0,0)) 74 | 75 | -- conv3x3 76 | convs:add(SBatchNorm(nBottleneckPlane)) 77 | convs:add(ReLU(true)) 78 | convs:add(Convolution(nBottleneckPlane,nBottleneckPlane,3,3,1,1,1,1)) 79 | 80 | -- conv1x1 81 | convs:add(SBatchNorm(nBottleneckPlane)) 82 | convs:add(ReLU(true)) 83 | convs:add(Convolution(nBottleneckPlane,nOutputPlane,1,1,1,1,0,0)) 84 | 85 | local shortcut = nn.Sequential() 86 | shortcut:add(Convolution(nInputPlane,nOutputPlane,1,1,stride,stride,0,0)) 87 | 88 | return block 89 | :add(nn.ConcatTable() 90 | :add(convs) 91 | :add(shortcut)) 92 | :add(nn.CAddTable(true)) 93 | end 94 | end 95 | 96 | -- Stacking Residual Units on the same stage 97 | local function layer(block, nInputPlane, nOutputPlane, count, stride) 98 | local s = nn.Sequential() 99 | 100 | s:add(block(nInputPlane, nOutputPlane, stride)) 101 | for i=2,count do 102 | s:add(block(nOutputPlane, nOutputPlane, 1)) 103 | end 104 | return s 105 | end 106 | 107 | local model = nn.Sequential() 108 | do 109 | assert((depth - 2) % 9 == 0, 'depth should be 9n+2 (e.g., 164 or 1001 in the paper)') 110 | local n = (depth - 2) / 9 111 | 112 | -- The new ResNet-164 and ResNet-1001 in [a] 113 | local nStages = {16, 64, 128, 256} 114 | 115 | model:add(Convolution(3,nStages[1],3,3,1,1,1,1)) -- one conv at the beginning (spatial size: 32x32) 116 | model:add(layer(bottleneck, nStages[1], nStages[2], n, 1)) -- Stage 1 (spatial size: 32x32) 117 | model:add(layer(bottleneck, nStages[2], nStages[3], n, 2)) -- Stage 2 (spatial size: 16x16) 118 | model:add(layer(bottleneck, nStages[3], nStages[4], n, 2)) -- Stage 3 (spatial size: 8x8) 119 | model:add(SBatchNorm(nStages[4])) 120 | model:add(ReLU(true)) 121 | model:add(Avg(8, 8, 1, 1)) 122 | model:add(nn.View(nStages[4]):setNumInputDims(3)) 123 | model:add(nn.Linear(nStages[4], opt.num_classes)) 124 | end 125 | 126 | utils.DisableBias(model) 127 | utils.testModel(model) 128 | utils.MSRinit(model) 129 | utils.FCinit(model) 130 | 131 | -- model:get(1).gradInput = nil 132 | 133 | return model 134 | end 135 | 136 | return createModel 137 | -------------------------------------------------------------------------------- /models/utils.lua: -------------------------------------------------------------------------------- 1 | local utils = {} 2 | 3 | function utils.MSRinit(model) 4 | for k,v in pairs(model:findModules('nn.SpatialConvolution')) do 5 | local n = v.kW*v.kH*v.nInputPlane 6 | v.weight:normal(0,math.sqrt(2/n)) 7 | if v.bias then v.bias:zero() end 8 | end 9 | end 10 | 11 | function utils.FCinit(model) 12 | for k,v in pairs(model:findModules'nn.Linear') do 13 | v.bias:zero() 14 | end 15 | end 16 | 17 | function utils.DisableBias(model) 18 | for i,v in ipairs(model:findModules'nn.SpatialConvolution') do 19 | v.bias = nil 20 | v.gradBias = nil 21 | end 22 | end 23 | 24 | function utils.testModel(model) 25 | model:float() 26 | local imageSize = opt and opt.imageSize or 32 27 | local input = torch.randn(1,3,imageSize,imageSize):type(model._type) 28 | print('forward output',{model:forward(input)}) 29 | print('backward output',{model:backward(input,model.output)}) 30 | model:reset() 31 | end 32 | 33 | function utils.makeDataParallelTable(model, nGPU) 34 | if nGPU > 1 then 35 | local gpus = torch.range(1, nGPU):totable() 36 | local fastest, benchmark = cudnn.fastest, cudnn.benchmark 37 | 38 | local dpt = nn.DataParallelTable(1, true, true) 39 | :add(model, gpus) 40 | :threads(function() 41 | local cudnn = require 'cudnn' 42 | cudnn.fastest, cudnn.benchmark = fastest, benchmark 43 | end) 44 | dpt.gradInput = nil 45 | 46 | model = dpt:cuda() 47 | end 48 | return model 49 | end 50 | 51 | return utils 52 | -------------------------------------------------------------------------------- /models/vgg.lua: -------------------------------------------------------------------------------- 1 | -- This is a modified version of VGG network in 2 | -- https://github.com/szagoruyko/cifar.torch 3 | -- Modifications: 4 | -- * removed dropout 5 | -- * last nn.Linear layers substituted with convolutional layers 6 | -- and avg-pooling 7 | require 'nn' 8 | local utils = paths.dofile'utils.lua' 9 | 10 | local function createModel(opt) 11 | local model = nn.Sequential() 12 | 13 | -- building block 14 | local function Block(nInputPlane, nOutputPlane) 15 | model:add(nn.SpatialConvolution(nInputPlane, nOutputPlane, 3,3, 1,1, 1,1):noBias()) 16 | model:add(nn.SpatialBatchNormalization(nOutputPlane,1e-3)) 17 | model:add(nn.ReLU(true)) 18 | return model 19 | end 20 | 21 | local function MP() 22 | model:add(nn.SpatialMaxPooling(2,2,2,2):ceil()) 23 | return model 24 | end 25 | 26 | local function Group(ni, no, N, f) 27 | for i=1,N do 28 | Block(i == 1 and ni or no, no) 29 | end 30 | if f then f() end 31 | end 32 | 33 | Group(3,64,2,MP) 34 | Group(64,128,2,MP) 35 | Group(128,256,4,MP) 36 | Group(256,512,4,MP) 37 | Group(512,512,4) 38 | model:add(nn.SpatialAveragePooling(2,2,2,2):ceil()) 39 | model:add(nn.View(-1):setNumInputDims(3)) 40 | model:add(nn.Linear(512,opt and opt.num_classes or 10)) 41 | 42 | utils.FCinit(model) 43 | utils.testModel(model) 44 | utils.MSRinit(model) 45 | 46 | return model 47 | end 48 | 49 | return createModel 50 | -------------------------------------------------------------------------------- /models/wide-resnet.lua: -------------------------------------------------------------------------------- 1 | -- Wide Residual Network 2 | -- This is an implementation of the wide residual networks described in: 3 | -- "Wide Residual Networks", http://arxiv.org/abs/1605.07146 4 | -- authored by Sergey Zagoruyko and Nikos Komodakis 5 | 6 | -- ************************************************************************ 7 | -- This code incorporates material from: 8 | 9 | -- fb.resnet.torch (https://github.com/facebook/fb.resnet.torch) 10 | -- Copyright (c) 2016, Facebook, Inc. 11 | -- All rights reserved. 12 | -- 13 | -- This source code is licensed under the BSD-style license found in the 14 | -- LICENSE file in the root directory of this source tree. An additional grant 15 | -- of patent rights can be found in the PATENTS file in the same directory. 16 | -- 17 | -- ************************************************************************ 18 | 19 | local nn = require 'nn' 20 | local utils = paths.dofile'utils.lua' 21 | 22 | local Convolution = nn.SpatialConvolution 23 | local Avg = nn.SpatialAveragePooling 24 | local ReLU = nn.ReLU 25 | local Max = nn.SpatialMaxPooling 26 | local SBatchNorm = nn.SpatialBatchNormalization 27 | 28 | local function createModel(opt) 29 | assert(opt and opt.depth) 30 | assert(opt and opt.num_classes) 31 | assert(opt and opt.widen_factor) 32 | 33 | local function Dropout() 34 | return nn.Dropout(opt and opt.dropout or 0,nil,true) 35 | end 36 | 37 | local depth = opt.depth 38 | 39 | local blocks = {} 40 | 41 | local function wide_basic(nInputPlane, nOutputPlane, stride) 42 | local conv_params = { 43 | {3,3,stride,stride,1,1}, 44 | {3,3,1,1,1,1}, 45 | } 46 | local nBottleneckPlane = nOutputPlane 47 | 48 | local block = nn.Sequential() 49 | local convs = nn.Sequential() 50 | 51 | for i,v in ipairs(conv_params) do 52 | if i == 1 then 53 | local module = nInputPlane == nOutputPlane and convs or block 54 | module:add(SBatchNorm(nInputPlane)):add(ReLU(true)) 55 | convs:add(Convolution(nInputPlane,nBottleneckPlane,table.unpack(v))) 56 | else 57 | convs:add(SBatchNorm(nBottleneckPlane)):add(ReLU(true)) 58 | if opt.dropout > 0 then 59 | convs:add(Dropout()) 60 | end 61 | convs:add(Convolution(nBottleneckPlane,nBottleneckPlane,table.unpack(v))) 62 | end 63 | end 64 | 65 | local shortcut = nInputPlane == nOutputPlane and 66 | nn.Identity() or 67 | Convolution(nInputPlane,nOutputPlane,1,1,stride,stride,0,0) 68 | 69 | return block 70 | :add(nn.ConcatTable() 71 | :add(convs) 72 | :add(shortcut)) 73 | :add(nn.CAddTable(true)) 74 | end 75 | 76 | -- Stacking Residual Units on the same stage 77 | local function layer(block, nInputPlane, nOutputPlane, count, stride) 78 | local s = nn.Sequential() 79 | 80 | s:add(block(nInputPlane, nOutputPlane, stride)) 81 | for i=2,count do 82 | s:add(block(nOutputPlane, nOutputPlane, 1)) 83 | end 84 | return s 85 | end 86 | 87 | local model = nn.Sequential() 88 | do 89 | assert((depth - 4) % 6 == 0, 'depth should be 6n+4') 90 | local n = (depth - 4) / 6 91 | 92 | local k = opt.widen_factor 93 | local nStages = torch.Tensor{16, 16*k, 32*k, 64*k} 94 | 95 | model:add(Convolution(3,nStages[1],3,3,1,1,1,1)) -- one conv at the beginning (spatial size: 32x32) 96 | model:add(layer(wide_basic, nStages[1], nStages[2], n, 1)) -- Stage 1 (spatial size: 32x32) 97 | model:add(layer(wide_basic, nStages[2], nStages[3], n, 2)) -- Stage 2 (spatial size: 16x16) 98 | model:add(layer(wide_basic, nStages[3], nStages[4], n, 2)) -- Stage 3 (spatial size: 8x8) 99 | model:add(SBatchNorm(nStages[4])) 100 | model:add(ReLU(true)) 101 | model:add(Avg(8, 8, 1, 1)) 102 | model:add(nn.View(nStages[4]):setNumInputDims(3)) 103 | model:add(nn.Linear(nStages[4], opt.num_classes)) 104 | end 105 | 106 | utils.DisableBias(model) 107 | utils.testModel(model) 108 | utils.MSRinit(model) 109 | utils.FCinit(model) 110 | 111 | -- model:get(1).gradInput = nil 112 | 113 | return model 114 | end 115 | 116 | return createModel 117 | -------------------------------------------------------------------------------- /notebooks/sweeper.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | 4 | 5 | def loadLog(filename): 6 | s = [] 7 | for line in open(filename): 8 | r = line.find('json_stats') 9 | if r > -1: 10 | s.append(json.loads(line[r+12:])) 11 | return s 12 | 13 | 14 | def findSweepParams(frames): 15 | def findConstants(frame): 16 | keys = dict() 17 | for key in frame.keys(): 18 | v = np.asarray(frame[key]) 19 | u = np.copy(v) 20 | u.fill(v[0]) 21 | if np.array_equal(v, u): 22 | keys[key] = v[0] 23 | return keys 24 | changing = dict() 25 | for frame in frames: 26 | for k, v in findConstants(frame).items(): 27 | if isinstance(v, list): 28 | v = json.dumps(v) 29 | if k not in changing: 30 | changing[k] = {v} 31 | else: 32 | changing[k].add(v) 33 | all_keys = [] 34 | for k, v in changing.items(): 35 | if len(v) > 1: 36 | all_keys.append(k) 37 | return sorted(all_keys) 38 | 39 | 40 | def generateLegend(frame, sweeps): 41 | s = '' 42 | for key in sweeps: 43 | if key not in frame: 44 | s = s + key + '=not present, ' 45 | else: 46 | s = s + key + '=' + str(frame[key][0]) + ', ' 47 | return s 48 | 49 | def generateLegends(frames): 50 | params = findSweepParams(frames) 51 | return [generateLegend(frame, params) for frame in frames] 52 | -------------------------------------------------------------------------------- /notebooks/visualize.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "import json\n", 13 | "from bokeh.plotting import figure, output_notebook, show\n", 14 | "import numpy as np\n", 15 | "from bokeh.charts.utils import cycle_colors\n", 16 | "import sweeper\n", 17 | "\n", 18 | "output_notebook()" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "from IPython.display import display\n", 30 | "\n", 31 | "def plotLogs(log_names):\n", 32 | " # parse log files, extracting json entry with stats per epoch and creating pandas DataFrame\n", 33 | " frames = [pd.DataFrame(sweeper.loadLog('../logs/'+log+'/log.txt')) for log in log_names]\n", 34 | " colors = ['red','blue','green','black','purple','orange','yellow']\n", 35 | " \n", 36 | " # this searches constant parameters across different runs to generate legends\n", 37 | " legends = sweeper.generateLegends(frames)\n", 38 | "\n", 39 | " # TODO: improve this, add hovers etc.\n", 40 | " p = figure(title='test error', x_axis_label='epoch')\n", 41 | " for i,frame in enumerate(frames):\n", 42 | " p.line(frame['epoch'], 100-frame['test_acc'], color=colors[i], legend=legends[i])\n", 43 | " show(p)\n", 44 | " \n", 45 | " p = figure(title='log loss', x_axis_label='epoch', y_axis_label='loss')\n", 46 | " for i,frame in enumerate(frames):\n", 47 | " p.line(frame['epoch'], np.log(frame['loss']), color=colors[i], legend=legends[i])\n", 48 | " show(p)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": { 55 | "collapsed": false, 56 | "scrolled": false 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "plotLogs([\n", 61 | " 'nin_1875527964',\n", 62 | " 'vgg_24208029',\n", 63 | " 'resnet-pre-act_17934627',\n", 64 | " 'wide-resnet_1121914561'\n", 65 | "])" 66 | ] 67 | } 68 | ], 69 | "metadata": { 70 | "kernelspec": { 71 | "display_name": "Python 2", 72 | "language": "python", 73 | "name": "python2" 74 | }, 75 | "language_info": { 76 | "codemirror_mode": { 77 | "name": "ipython", 78 | "version": 2 79 | }, 80 | "file_extension": ".py", 81 | "mimetype": "text/x-python", 82 | "name": "python", 83 | "nbconvert_exporter": "python", 84 | "pygments_lexer": "ipython2", 85 | "version": "2.7.10" 86 | } 87 | }, 88 | "nbformat": 4, 89 | "nbformat_minor": 0 90 | } 91 | -------------------------------------------------------------------------------- /pretrained/README.md: -------------------------------------------------------------------------------- 1 | WRN-50-2 2 | ========== 3 | 4 | Best performing ImageNet model from Wide Residual Networks BMVC 2016 paper https://arxiv.org/abs/1605.07146
5 | The model is slower than ResNet-101 and faster than ResNet-152, with better accuracy: 6 | 7 | | Model | top-1 err, % | top-5 err, % | #params | time/batch 16 | 8 | |---|---|---|---|---| 9 | | ResNet-50 | 24.01 | 7.02 | 25.6M | 49 | 10 | | ResNet-101 | 22.44 | 6.21 | 44.5M | 82 | 11 | | ResNet-152 | 22.16 | 6.16 | 60.2M | 115 | 12 | | __WRN-50-2-bottleneck__ | 21.9 | 6.03 | 68.9M | 93 | 13 | | pre-ResNet-200 | 21.66 | 5.79 | 64.7M | 154 | 14 | 15 | Download (263MB): https://yadi.sk/d/-8AWymOPyVZns 16 | 17 | PyTorch and Tensorflow pretrained weights and model definitions:
18 | 19 | 20 | Convergence plot: 21 | 22 | ![bokeh_plot 4](https://cloud.githubusercontent.com/assets/4953728/20243021/98a2a66a-a945-11e6-807b-a037f667c052.png) 23 | 24 | If you find this model useful please cite this paper: 25 | 26 | ```bib 27 | @INPROCEEDINGS{Zagoruyko2016WRN, 28 | author = {Sergey Zagoruyko and Nikos Komodakis}, 29 | title = {Wide Residual Networks}, 30 | booktitle = {BMVC}, 31 | year = {2016}, 32 | } 33 | ``` 34 | 35 | 36 | # Model printout 37 | 38 | ``` 39 | nn.Sequential { 40 | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> (9) -> (10) -> (11) -> output] 41 | (1): cudnn.SpatialConvolution(3 -> 64, 7x7, 2,2, 3,3) without bias 42 | (2): nn.SpatialBatchNormalization (4D) (64) 43 | (3): cudnn.ReLU 44 | (4): nn.SpatialMaxPooling(3x3, 2,2, 1,1) 45 | (5): nn.Sequential { 46 | [input -> (1) -> (2) -> (3) -> output] 47 | (1): nn.Sequential { 48 | [input -> (1) -> (2) -> (3) -> output] 49 | (1): nn.ConcatTable { 50 | input 51 | |`-> (1): nn.Sequential { 52 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output] 53 | | (1): cudnn.SpatialConvolution(64 -> 128, 1x1) without bias 54 | | (2): nn.SpatialBatchNormalization (4D) (128) 55 | | (3): cudnn.ReLU 56 | | (4): cudnn.SpatialConvolution(128 -> 128, 3x3, 1,1, 1,1) without bias 57 | | (5): nn.SpatialBatchNormalization (4D) (128) 58 | | (6): cudnn.ReLU 59 | | (7): cudnn.SpatialConvolution(128 -> 256, 1x1) without bias 60 | | (8): nn.SpatialBatchNormalization (4D) (256) 61 | | } 62 | `-> (2): nn.Sequential { 63 | [input -> (1) -> (2) -> output] 64 | (1): cudnn.SpatialConvolution(64 -> 256, 1x1) without bias 65 | (2): nn.SpatialBatchNormalization (4D) (256) 66 | } 67 | ... -> output 68 | } 69 | (2): nn.CAddTable 70 | (3): cudnn.ReLU 71 | } 72 | (2): nn.Sequential { 73 | [input -> (1) -> (2) -> (3) -> output] 74 | (1): nn.ConcatTable { 75 | input 76 | |`-> (1): nn.Sequential { 77 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output] 78 | | (1): cudnn.SpatialConvolution(256 -> 128, 1x1) without bias 79 | | (2): nn.SpatialBatchNormalization (4D) (128) 80 | | (3): cudnn.ReLU 81 | | (4): cudnn.SpatialConvolution(128 -> 128, 3x3, 1,1, 1,1) without bias 82 | | (5): nn.SpatialBatchNormalization (4D) (128) 83 | | (6): cudnn.ReLU 84 | | (7): cudnn.SpatialConvolution(128 -> 256, 1x1) without bias 85 | | (8): nn.SpatialBatchNormalization (4D) (256) 86 | | } 87 | `-> (2): nn.Identity 88 | ... -> output 89 | } 90 | (2): nn.CAddTable 91 | (3): cudnn.ReLU 92 | } 93 | (3): nn.Sequential { 94 | [input -> (1) -> (2) -> (3) -> output] 95 | (1): nn.ConcatTable { 96 | input 97 | |`-> (1): nn.Sequential { 98 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output] 99 | | (1): cudnn.SpatialConvolution(256 -> 128, 1x1) without bias 100 | | (2): nn.SpatialBatchNormalization (4D) (128) 101 | | (3): cudnn.ReLU 102 | | (4): cudnn.SpatialConvolution(128 -> 128, 3x3, 1,1, 1,1) without bias 103 | | (5): nn.SpatialBatchNormalization (4D) (128) 104 | | (6): cudnn.ReLU 105 | | (7): cudnn.SpatialConvolution(128 -> 256, 1x1) without bias 106 | | (8): nn.SpatialBatchNormalization (4D) (256) 107 | | } 108 | `-> (2): nn.Identity 109 | ... -> output 110 | } 111 | (2): nn.CAddTable 112 | (3): cudnn.ReLU 113 | } 114 | } 115 | (6): nn.Sequential { 116 | [input -> (1) -> (2) -> (3) -> (4) -> output] 117 | (1): nn.Sequential { 118 | [input -> (1) -> (2) -> (3) -> output] 119 | (1): nn.ConcatTable { 120 | input 121 | |`-> (1): nn.Sequential { 122 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output] 123 | | (1): cudnn.SpatialConvolution(256 -> 256, 1x1) without bias 124 | | (2): nn.SpatialBatchNormalization (4D) (256) 125 | | (3): cudnn.ReLU 126 | | (4): cudnn.SpatialConvolution(256 -> 256, 3x3, 2,2, 1,1) without bias 127 | | (5): nn.SpatialBatchNormalization (4D) (256) 128 | | (6): cudnn.ReLU 129 | | (7): cudnn.SpatialConvolution(256 -> 512, 1x1) without bias 130 | | (8): nn.SpatialBatchNormalization (4D) (512) 131 | | } 132 | `-> (2): nn.Sequential { 133 | [input -> (1) -> (2) -> output] 134 | (1): cudnn.SpatialConvolution(256 -> 512, 1x1, 2,2) without bias 135 | (2): nn.SpatialBatchNormalization (4D) (512) 136 | } 137 | ... -> output 138 | } 139 | (2): nn.CAddTable 140 | (3): cudnn.ReLU 141 | } 142 | (2): nn.Sequential { 143 | [input -> (1) -> (2) -> (3) -> output] 144 | (1): nn.ConcatTable { 145 | input 146 | |`-> (1): nn.Sequential { 147 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output] 148 | | (1): cudnn.SpatialConvolution(512 -> 256, 1x1) without bias 149 | | (2): nn.SpatialBatchNormalization (4D) (256) 150 | | (3): cudnn.ReLU 151 | | (4): cudnn.SpatialConvolution(256 -> 256, 3x3, 1,1, 1,1) without bias 152 | | (5): nn.SpatialBatchNormalization (4D) (256) 153 | | (6): cudnn.ReLU 154 | | (7): cudnn.SpatialConvolution(256 -> 512, 1x1) without bias 155 | | (8): nn.SpatialBatchNormalization (4D) (512) 156 | | } 157 | `-> (2): nn.Identity 158 | ... -> output 159 | } 160 | (2): nn.CAddTable 161 | (3): cudnn.ReLU 162 | } 163 | (3): nn.Sequential { 164 | [input -> (1) -> (2) -> (3) -> output] 165 | (1): nn.ConcatTable { 166 | input 167 | |`-> (1): nn.Sequential { 168 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output] 169 | | (1): cudnn.SpatialConvolution(512 -> 256, 1x1) without bias 170 | | (2): nn.SpatialBatchNormalization (4D) (256) 171 | | (3): cudnn.ReLU 172 | | (4): cudnn.SpatialConvolution(256 -> 256, 3x3, 1,1, 1,1) without bias 173 | | (5): nn.SpatialBatchNormalization (4D) (256) 174 | | (6): cudnn.ReLU 175 | | (7): cudnn.SpatialConvolution(256 -> 512, 1x1) without bias 176 | | (8): nn.SpatialBatchNormalization (4D) (512) 177 | | } 178 | `-> (2): nn.Identity 179 | ... -> output 180 | } 181 | (2): nn.CAddTable 182 | (3): cudnn.ReLU 183 | } 184 | (4): nn.Sequential { 185 | [input -> (1) -> (2) -> (3) -> output] 186 | (1): nn.ConcatTable { 187 | input 188 | |`-> (1): nn.Sequential { 189 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output] 190 | | (1): cudnn.SpatialConvolution(512 -> 256, 1x1) without bias 191 | | (2): nn.SpatialBatchNormalization (4D) (256) 192 | | (3): cudnn.ReLU 193 | | (4): cudnn.SpatialConvolution(256 -> 256, 3x3, 1,1, 1,1) without bias 194 | | (5): nn.SpatialBatchNormalization (4D) (256) 195 | | (6): cudnn.ReLU 196 | | (7): cudnn.SpatialConvolution(256 -> 512, 1x1) without bias 197 | | (8): nn.SpatialBatchNormalization (4D) (512) 198 | | } 199 | `-> (2): nn.Identity 200 | ... -> output 201 | } 202 | (2): nn.CAddTable 203 | (3): cudnn.ReLU 204 | } 205 | } 206 | (7): nn.Sequential { 207 | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> output] 208 | (1): nn.Sequential { 209 | [input -> (1) -> (2) -> (3) -> output] 210 | (1): nn.ConcatTable { 211 | input 212 | |`-> (1): nn.Sequential { 213 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output] 214 | | (1): cudnn.SpatialConvolution(512 -> 512, 1x1) without bias 215 | | (2): nn.SpatialBatchNormalization (4D) (512) 216 | | (3): cudnn.ReLU 217 | | (4): cudnn.SpatialConvolution(512 -> 512, 3x3, 2,2, 1,1) without bias 218 | | (5): nn.SpatialBatchNormalization (4D) (512) 219 | | (6): cudnn.ReLU 220 | | (7): cudnn.SpatialConvolution(512 -> 1024, 1x1) without bias 221 | | (8): nn.SpatialBatchNormalization (4D) (1024) 222 | | } 223 | `-> (2): nn.Sequential { 224 | [input -> (1) -> (2) -> output] 225 | (1): cudnn.SpatialConvolution(512 -> 1024, 1x1, 2,2) without bias 226 | (2): nn.SpatialBatchNormalization (4D) (1024) 227 | } 228 | ... -> output 229 | } 230 | (2): nn.CAddTable 231 | (3): cudnn.ReLU 232 | } 233 | (2): nn.Sequential { 234 | [input -> (1) -> (2) -> (3) -> output] 235 | (1): nn.ConcatTable { 236 | input 237 | |`-> (1): nn.Sequential { 238 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output] 239 | | (1): cudnn.SpatialConvolution(1024 -> 512, 1x1) without bias 240 | | (2): nn.SpatialBatchNormalization (4D) (512) 241 | | (3): cudnn.ReLU 242 | | (4): cudnn.SpatialConvolution(512 -> 512, 3x3, 1,1, 1,1) without bias 243 | | (5): nn.SpatialBatchNormalization (4D) (512) 244 | | (6): cudnn.ReLU 245 | | (7): cudnn.SpatialConvolution(512 -> 1024, 1x1) without bias 246 | | (8): nn.SpatialBatchNormalization (4D) (1024) 247 | | } 248 | `-> (2): nn.Identity 249 | ... -> output 250 | } 251 | (2): nn.CAddTable 252 | (3): cudnn.ReLU 253 | } 254 | (3): nn.Sequential { 255 | [input -> (1) -> (2) -> (3) -> output] 256 | (1): nn.ConcatTable { 257 | input 258 | |`-> (1): nn.Sequential { 259 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output] 260 | | (1): cudnn.SpatialConvolution(1024 -> 512, 1x1) without bias 261 | | (2): nn.SpatialBatchNormalization (4D) (512) 262 | | (3): cudnn.ReLU 263 | | (4): cudnn.SpatialConvolution(512 -> 512, 3x3, 1,1, 1,1) without bias 264 | | (5): nn.SpatialBatchNormalization (4D) (512) 265 | | (6): cudnn.ReLU 266 | | (7): cudnn.SpatialConvolution(512 -> 1024, 1x1) without bias 267 | | (8): nn.SpatialBatchNormalization (4D) (1024) 268 | | } 269 | `-> (2): nn.Identity 270 | ... -> output 271 | } 272 | (2): nn.CAddTable 273 | (3): cudnn.ReLU 274 | } 275 | (4): nn.Sequential { 276 | [input -> (1) -> (2) -> (3) -> output] 277 | (1): nn.ConcatTable { 278 | input 279 | |`-> (1): nn.Sequential { 280 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output] 281 | | (1): cudnn.SpatialConvolution(1024 -> 512, 1x1) without bias 282 | | (2): nn.SpatialBatchNormalization (4D) (512) 283 | | (3): cudnn.ReLU 284 | | (4): cudnn.SpatialConvolution(512 -> 512, 3x3, 1,1, 1,1) without bias 285 | | (5): nn.SpatialBatchNormalization (4D) (512) 286 | | (6): cudnn.ReLU 287 | | (7): cudnn.SpatialConvolution(512 -> 1024, 1x1) without bias 288 | | (8): nn.SpatialBatchNormalization (4D) (1024) 289 | | } 290 | `-> (2): nn.Identity 291 | ... -> output 292 | } 293 | (2): nn.CAddTable 294 | (3): cudnn.ReLU 295 | } 296 | (5): nn.Sequential { 297 | [input -> (1) -> (2) -> (3) -> output] 298 | (1): nn.ConcatTable { 299 | input 300 | |`-> (1): nn.Sequential { 301 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output] 302 | | (1): cudnn.SpatialConvolution(1024 -> 512, 1x1) without bias 303 | | (2): nn.SpatialBatchNormalization (4D) (512) 304 | | (3): cudnn.ReLU 305 | | (4): cudnn.SpatialConvolution(512 -> 512, 3x3, 1,1, 1,1) without bias 306 | | (5): nn.SpatialBatchNormalization (4D) (512) 307 | | (6): cudnn.ReLU 308 | | (7): cudnn.SpatialConvolution(512 -> 1024, 1x1) without bias 309 | | (8): nn.SpatialBatchNormalization (4D) (1024) 310 | | } 311 | `-> (2): nn.Identity 312 | ... -> output 313 | } 314 | (2): nn.CAddTable 315 | (3): cudnn.ReLU 316 | } 317 | (6): nn.Sequential { 318 | [input -> (1) -> (2) -> (3) -> output] 319 | (1): nn.ConcatTable { 320 | input 321 | |`-> (1): nn.Sequential { 322 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output] 323 | | (1): cudnn.SpatialConvolution(1024 -> 512, 1x1) without bias 324 | | (2): nn.SpatialBatchNormalization (4D) (512) 325 | | (3): cudnn.ReLU 326 | | (4): cudnn.SpatialConvolution(512 -> 512, 3x3, 1,1, 1,1) without bias 327 | | (5): nn.SpatialBatchNormalization (4D) (512) 328 | | (6): cudnn.ReLU 329 | | (7): cudnn.SpatialConvolution(512 -> 1024, 1x1) without bias 330 | | (8): nn.SpatialBatchNormalization (4D) (1024) 331 | | } 332 | `-> (2): nn.Identity 333 | ... -> output 334 | } 335 | (2): nn.CAddTable 336 | (3): cudnn.ReLU 337 | } 338 | } 339 | (8): nn.Sequential { 340 | [input -> (1) -> (2) -> (3) -> output] 341 | (1): nn.Sequential { 342 | [input -> (1) -> (2) -> (3) -> output] 343 | (1): nn.ConcatTable { 344 | input 345 | |`-> (1): nn.Sequential { 346 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output] 347 | | (1): cudnn.SpatialConvolution(1024 -> 1024, 1x1) without bias 348 | | (2): nn.SpatialBatchNormalization (4D) (1024) 349 | | (3): cudnn.ReLU 350 | | (4): cudnn.SpatialConvolution(1024 -> 1024, 3x3, 2,2, 1,1) without bias 351 | | (5): nn.SpatialBatchNormalization (4D) (1024) 352 | | (6): cudnn.ReLU 353 | | (7): cudnn.SpatialConvolution(1024 -> 2048, 1x1) without bias 354 | | (8): nn.SpatialBatchNormalization (4D) (2048) 355 | | } 356 | `-> (2): nn.Sequential { 357 | [input -> (1) -> (2) -> output] 358 | (1): cudnn.SpatialConvolution(1024 -> 2048, 1x1, 2,2) without bias 359 | (2): nn.SpatialBatchNormalization (4D) (2048) 360 | } 361 | ... -> output 362 | } 363 | (2): nn.CAddTable 364 | (3): cudnn.ReLU 365 | } 366 | (2): nn.Sequential { 367 | [input -> (1) -> (2) -> (3) -> output] 368 | (1): nn.ConcatTable { 369 | input 370 | |`-> (1): nn.Sequential { 371 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output] 372 | | (1): cudnn.SpatialConvolution(2048 -> 1024, 1x1) without bias 373 | | (2): nn.SpatialBatchNormalization (4D) (1024) 374 | | (3): cudnn.ReLU 375 | | (4): cudnn.SpatialConvolution(1024 -> 1024, 3x3, 1,1, 1,1) without bias 376 | | (5): nn.SpatialBatchNormalization (4D) (1024) 377 | | (6): cudnn.ReLU 378 | | (7): cudnn.SpatialConvolution(1024 -> 2048, 1x1) without bias 379 | | (8): nn.SpatialBatchNormalization (4D) (2048) 380 | | } 381 | `-> (2): nn.Identity 382 | ... -> output 383 | } 384 | (2): nn.CAddTable 385 | (3): cudnn.ReLU 386 | } 387 | (3): nn.Sequential { 388 | [input -> (1) -> (2) -> (3) -> output] 389 | (1): nn.ConcatTable { 390 | input 391 | |`-> (1): nn.Sequential { 392 | | [input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> output] 393 | | (1): cudnn.SpatialConvolution(2048 -> 1024, 1x1) without bias 394 | | (2): nn.SpatialBatchNormalization (4D) (1024) 395 | | (3): cudnn.ReLU 396 | | (4): cudnn.SpatialConvolution(1024 -> 1024, 3x3, 1,1, 1,1) without bias 397 | | (5): nn.SpatialBatchNormalization (4D) (1024) 398 | | (6): cudnn.ReLU 399 | | (7): cudnn.SpatialConvolution(1024 -> 2048, 1x1) without bias 400 | | (8): nn.SpatialBatchNormalization (4D) (2048) 401 | | } 402 | `-> (2): nn.Identity 403 | ... -> output 404 | } 405 | (2): nn.CAddTable 406 | (3): cudnn.ReLU 407 | } 408 | } 409 | (9): cudnn.SpatialAveragePooling(7x7, 1,1) 410 | (10): nn.View(2048) 411 | (11): nn.Linear(2048 -> 1000) 412 | } 413 | ``` 414 | -------------------------------------------------------------------------------- /pretrained/wide-resnet.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- Edited by Sergey Zagoruyko for Wide Residual Networks 10 | -- http://arxiv.org/abs/1605.07146 11 | -- 12 | -- The Wide-ResNet model definition (not pre-activation) 13 | -- * WRN-18-WRN-34: wider basic block 14 | -- * WRN-50-WRN-152: wider bottleneck 15 | 16 | local nn = require 'nn' 17 | require 'cunn' 18 | 19 | local Convolution = cudnn.SpatialConvolution 20 | local Avg = cudnn.SpatialAveragePooling 21 | local ReLU = cudnn.ReLU 22 | local Max = nn.SpatialMaxPooling 23 | local SBatchNorm = nn.SpatialBatchNormalization 24 | 25 | local function createModel(opt) 26 | 27 | assert(opt.depth) 28 | assert(opt.width) 29 | 30 | local depth = opt.depth 31 | local width = opt.width -- for WRN-18-34 32 | local bottle = opt.width -- for WRN-50-WRN-152 33 | local shortcutType = opt.shortcutType or 'B' 34 | local iChannels 35 | 36 | -- The shortcut layer is either identity or 1x1 convolution 37 | local function shortcut(nInputPlane, nOutputPlane, stride) 38 | local useConv = shortcutType == 'C' or 39 | (shortcutType == 'B' and nInputPlane ~= nOutputPlane) 40 | if useConv then 41 | -- 1x1 convolution 42 | return nn.Sequential() 43 | :add(Convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride)) 44 | :add(SBatchNorm(nOutputPlane)) 45 | elseif nInputPlane ~= nOutputPlane then 46 | -- Strided, zero-padded identity shortcut 47 | return nn.Sequential() 48 | :add(nn.SpatialAveragePooling(1, 1, stride, stride)) 49 | :add(nn.Concat(2) 50 | :add(nn.Identity()) 51 | :add(nn.MulConstant(0))) 52 | else 53 | return nn.Identity() 54 | end 55 | end 56 | 57 | -- The basic residual layer block for 18 and 34 layer network, and the 58 | -- CIFAR networks 59 | local function basicblock(n, stride) 60 | local nInputPlane = iChannels 61 | iChannels = n 62 | 63 | local s = nn.Sequential() 64 | s:add(Convolution(nInputPlane,n,3,3,stride,stride,1,1)) 65 | s:add(SBatchNorm(n)) 66 | s:add(ReLU(true)) 67 | s:add(Convolution(n,n,3,3,1,1,1,1)) 68 | s:add(SBatchNorm(n)) 69 | 70 | return nn.Sequential() 71 | :add(nn.ConcatTable() 72 | :add(s) 73 | :add(shortcut(nInputPlane, n, stride))) 74 | :add(nn.CAddTable(true)) 75 | :add(ReLU(true)) 76 | end 77 | 78 | -- The bottleneck residual layer for 50, 101, and 152 layer networks 79 | local function bottleneck(n, stride) 80 | local nInputPlane = iChannels 81 | iChannels = n * bottle 82 | 83 | local s = nn.Sequential() 84 | s:add(Convolution(nInputPlane,n,1,1,1,1,0,0)) 85 | s:add(SBatchNorm(n)) 86 | s:add(ReLU(true)) 87 | s:add(Convolution(n,n,3,3,stride,stride,1,1)) 88 | s:add(SBatchNorm(n)) 89 | s:add(ReLU(true)) 90 | s:add(Convolution(n,n*bottle,1,1,1,1,0,0)) 91 | s:add(SBatchNorm(n * bottle)) 92 | 93 | return nn.Sequential() 94 | :add(nn.ConcatTable() 95 | :add(s) 96 | :add(shortcut(nInputPlane, n * bottle, stride))) 97 | :add(nn.CAddTable(true)) 98 | :add(ReLU(true)) 99 | end 100 | 101 | -- Creates count residual blocks with specified number of features 102 | local function layer(block, features, count, stride) 103 | local s = nn.Sequential() 104 | for i=1,count do 105 | s:add(block(features, i == 1 and stride or 1)) 106 | end 107 | return s 108 | end 109 | 110 | local model = nn.Sequential() 111 | if opt.dataset == 'imagenet' then 112 | -- Configurations for ResNet: 113 | -- num. residual blocks, num features, residual block function 114 | local cfg = { 115 | [18] = {{2, 2, 2, 2}, 512*width, basicblock}, -- lea as is 116 | [34] = {{3, 4, 6, 3}, 512*width, basicblock}, -- leave as is 117 | [50] = {{3, 4, 6, 3}, 512*bottle, bottleneck}, 118 | [101] = {{3, 4, 23, 3}, 512*bottle, bottleneck}, 119 | [152] = {{3, 8, 36, 3}, 512*bottle, bottleneck}, 120 | } 121 | 122 | assert(cfg[depth], 'Invalid depth: ' .. tostring(depth)) 123 | local def, nFeatures, block = table.unpack(cfg[depth]) 124 | iChannels = 64 125 | print(' | ResNet-' .. depth ..'-'..width .. ' ImageNet') 126 | 127 | -- The ResNet ImageNet model 128 | model:add(Convolution(3,64,7,7,2,2,3,3)) 129 | model:add(SBatchNorm(64)) 130 | model:add(ReLU(true)) 131 | model:add(Max(3,3,2,2,1,1)) 132 | model:add(layer(block, width*64, def[1])) 133 | model:add(layer(block, width*128, def[2], 2)) 134 | model:add(layer(block, width*256, def[3], 2)) 135 | model:add(layer(block, width*512, def[4], 2)) 136 | model:add(Avg(7, 7, 1, 1)) 137 | model:add(nn.View(nFeatures):setNumInputDims(3)) 138 | model:add(nn.Linear(nFeatures, 1000)) 139 | elseif opt.dataset == 'cifar10' then 140 | -- Model type specifies number of layers for CIFAR-10 model 141 | assert((depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110, 1202') 142 | local n = (depth - 2) / 6 143 | iChannels = 16 144 | print(' | ResNet-' .. depth .. ' CIFAR-10') 145 | 146 | -- The ResNet CIFAR-10 model 147 | model:add(Convolution(3,16,3,3,1,1,1,1)) 148 | model:add(SBatchNorm(16)) 149 | model:add(ReLU(true)) 150 | model:add(layer(basicblock, 16*width, n)) 151 | model:add(layer(basicblock, 32*width, n, 2)) 152 | model:add(layer(basicblock, 64*width, n, 2)) 153 | model:add(Avg(8, 8, 1, 1)) 154 | model:add(nn.View(64*width):setNumInputDims(3)) 155 | model:add(nn.Linear(64*width, 10)) 156 | else 157 | error('invalid dataset: ' .. opt.dataset) 158 | end 159 | 160 | local function ConvInit(name) 161 | for k,v in pairs(model:findModules(name)) do 162 | local n = v.kW*v.kH*v.nInputPlane 163 | v.weight:normal(0,math.sqrt(2/n)) 164 | if cudnn.version >= 4000 then 165 | v.bias = nil 166 | v.gradBias = nil 167 | else 168 | v.bias:zero() 169 | end 170 | end 171 | end 172 | local function BNInit(name) 173 | for k,v in pairs(model:findModules(name)) do 174 | v.weight:fill(1) 175 | v.bias:zero() 176 | end 177 | end 178 | 179 | ConvInit('cudnn.SpatialConvolution') 180 | ConvInit('nn.SpatialConvolution') 181 | BNInit('fbnn.SpatialBatchNormalization') 182 | BNInit('cudnn.SpatialBatchNormalization') 183 | BNInit('nn.SpatialBatchNormalization') 184 | for k,v in pairs(model:findModules('nn.Linear')) do 185 | v.bias:zero() 186 | end 187 | model:cuda() 188 | 189 | if opt.cudnn == 'deterministic' then 190 | model:apply(function(m) 191 | if m.setMode then m:setMode(1,1,1) end 192 | end) 193 | end 194 | 195 | model:get(1).gradInput = nil 196 | 197 | return model 198 | end 199 | 200 | return createModel 201 | -------------------------------------------------------------------------------- /pytorch/README.md: -------------------------------------------------------------------------------- 1 | PyTorch training code for Wide Residual Networks 2 | ========== 3 | 4 | PyTorch training code for Wide Residual Networks: 5 | http://arxiv.org/abs/1605.07146 6 | 7 | The code reproduces *exactly* it's lua version: 8 | https://github.com/szagoruyko/wide-residual-networks 9 | 10 | 11 | # Requirements 12 | 13 | Install requirements: 14 | 15 | ``` 16 | pip install -r requirements.txt 17 | ``` 18 | 19 | 20 | # Howto 21 | 22 | Train WRN-28-10 on 4 GPUs: 23 | 24 | ``` 25 | python main.py --save ./logs/resnet_$RANDOM$RANDOM --depth 28 --width 10 --ngpu 4 --gpu_id 0,1,2,3 26 | ``` 27 | -------------------------------------------------------------------------------- /pytorch/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | PyTorch training code for Wide Residual Networks: 3 | http://arxiv.org/abs/1605.07146 4 | 5 | The code reproduces *exactly* it's lua version: 6 | https://github.com/szagoruyko/wide-residual-networks 7 | 8 | 2016 Sergey Zagoruyko 9 | """ 10 | 11 | import argparse 12 | import os 13 | import json 14 | import numpy as np 15 | from tqdm import tqdm 16 | import torch 17 | from torch.optim import SGD 18 | import torch.utils.data 19 | import torchvision.transforms as T 20 | import torchvision.datasets as datasets 21 | from torch.utils.data import DataLoader 22 | import torch.nn.functional as F 23 | import torchnet as tnt 24 | from torchnet.engine import Engine 25 | from utils import cast, data_parallel, print_tensor_dict 26 | from torch.backends import cudnn 27 | from resnet import resnet 28 | 29 | cudnn.benchmark = True 30 | 31 | parser = argparse.ArgumentParser(description='Wide Residual Networks') 32 | # Model options 33 | parser.add_argument('--model', default='resnet', type=str) 34 | parser.add_argument('--depth', default=16, type=int) 35 | parser.add_argument('--width', default=1, type=float) 36 | parser.add_argument('--dataset', default='CIFAR10', type=str) 37 | parser.add_argument('--dataroot', default='.', type=str) 38 | parser.add_argument('--dtype', default='float', type=str) 39 | parser.add_argument('--groups', default=1, type=int) 40 | parser.add_argument('--nthread', default=4, type=int) 41 | parser.add_argument('--seed', default=1, type=int) 42 | 43 | # Training options 44 | parser.add_argument('--batch_size', default=128, type=int) 45 | parser.add_argument('--lr', default=0.1, type=float) 46 | parser.add_argument('--epochs', default=200, type=int, metavar='N', 47 | help='number of total epochs to run') 48 | parser.add_argument('--weight_decay', default=0.0005, type=float) 49 | parser.add_argument('--epoch_step', default='[60,120,160]', type=str, 50 | help='json list with epochs to drop lr on') 51 | parser.add_argument('--lr_decay_ratio', default=0.2, type=float) 52 | parser.add_argument('--resume', default='', type=str) 53 | parser.add_argument('--note', default='', type=str) 54 | 55 | # Device options 56 | parser.add_argument('--cuda', action='store_true') 57 | parser.add_argument('--save', default='', type=str, 58 | help='save parameters and logs in this folder') 59 | parser.add_argument('--ngpu', default=1, type=int, 60 | help='number of GPUs to use for training') 61 | parser.add_argument('--gpu_id', default='0', type=str, 62 | help='id(s) for CUDA_VISIBLE_DEVICES') 63 | 64 | 65 | def create_dataset(opt, train): 66 | transform = T.Compose([ 67 | T.ToTensor(), 68 | T.Normalize(np.array([125.3, 123.0, 113.9]) / 255.0, 69 | np.array([63.0, 62.1, 66.7]) / 255.0), 70 | ]) 71 | if train: 72 | transform = T.Compose([ 73 | T.Pad(4, padding_mode='reflect'), 74 | T.RandomHorizontalFlip(), 75 | T.RandomCrop(32), 76 | transform 77 | ]) 78 | return getattr(datasets, opt.dataset)(opt.dataroot, train=train, download=True, transform=transform) 79 | 80 | 81 | def main(): 82 | opt = parser.parse_args() 83 | print('parsed options:', vars(opt)) 84 | epoch_step = json.loads(opt.epoch_step) 85 | num_classes = 10 if opt.dataset == 'CIFAR10' else 100 86 | 87 | torch.manual_seed(opt.seed) 88 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_id 89 | 90 | def create_iterator(mode): 91 | return DataLoader(create_dataset(opt, mode), opt.batch_size, shuffle=mode, 92 | num_workers=opt.nthread, pin_memory=torch.cuda.is_available()) 93 | 94 | train_loader = create_iterator(True) 95 | test_loader = create_iterator(False) 96 | 97 | f, params = resnet(opt.depth, opt.width, num_classes) 98 | 99 | def create_optimizer(opt, lr): 100 | print('creating optimizer with lr = ', lr) 101 | return SGD([v for v in params.values() if v.requires_grad], lr, momentum=0.9, weight_decay=opt.weight_decay) 102 | 103 | optimizer = create_optimizer(opt, opt.lr) 104 | 105 | epoch = 0 106 | if opt.resume != '': 107 | state_dict = torch.load(opt.resume) 108 | epoch = state_dict['epoch'] 109 | params_tensors = state_dict['params'] 110 | for k, v in params.items(): 111 | v.data.copy_(params_tensors[k]) 112 | optimizer.load_state_dict(state_dict['optimizer']) 113 | 114 | print('\nParameters:') 115 | print_tensor_dict(params) 116 | 117 | n_parameters = sum(p.numel() for p in params.values() if p.requires_grad) 118 | print('\nTotal number of parameters:', n_parameters) 119 | 120 | meter_loss = tnt.meter.AverageValueMeter() 121 | classacc = tnt.meter.ClassErrorMeter(accuracy=True) 122 | timer_train = tnt.meter.TimeMeter('s') 123 | timer_test = tnt.meter.TimeMeter('s') 124 | 125 | if not os.path.exists(opt.save): 126 | os.mkdir(opt.save) 127 | 128 | def h(sample): 129 | inputs = cast(sample[0], opt.dtype) 130 | targets = cast(sample[1], 'long') 131 | y = data_parallel(f, inputs, params, sample[2], list(range(opt.ngpu))).float() 132 | return F.cross_entropy(y, targets), y 133 | 134 | def log(t, state): 135 | torch.save(dict(params=params, epoch=t['epoch'], optimizer=state['optimizer'].state_dict()), 136 | os.path.join(opt.save, 'model.pt7')) 137 | z = {**vars(opt), **t} 138 | with open(os.path.join(opt.save, 'log.txt'), 'a') as flog: 139 | flog.write('json_stats: ' + json.dumps(z) + '\n') 140 | print(z) 141 | 142 | def on_sample(state): 143 | state['sample'].append(state['train']) 144 | 145 | def on_forward(state): 146 | loss = float(state['loss']) 147 | classacc.add(state['output'].data, state['sample'][1]) 148 | meter_loss.add(loss) 149 | if state['train']: 150 | state['iterator'].set_postfix(loss=loss) 151 | 152 | def on_start(state): 153 | state['epoch'] = epoch 154 | 155 | def on_start_epoch(state): 156 | classacc.reset() 157 | meter_loss.reset() 158 | timer_train.reset() 159 | state['iterator'] = tqdm(train_loader, dynamic_ncols=True) 160 | 161 | epoch = state['epoch'] + 1 162 | if epoch in epoch_step: 163 | lr = state['optimizer'].param_groups[0]['lr'] 164 | state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio) 165 | 166 | def on_end_epoch(state): 167 | train_loss = meter_loss.value() 168 | train_acc = classacc.value() 169 | train_time = timer_train.value() 170 | meter_loss.reset() 171 | classacc.reset() 172 | timer_test.reset() 173 | 174 | with torch.no_grad(): 175 | engine.test(h, test_loader) 176 | 177 | test_acc = classacc.value()[0] 178 | print(log({ 179 | "train_loss": train_loss[0], 180 | "train_acc": train_acc[0], 181 | "test_loss": meter_loss.value()[0], 182 | "test_acc": test_acc, 183 | "epoch": state['epoch'], 184 | "num_classes": num_classes, 185 | "n_parameters": n_parameters, 186 | "train_time": train_time, 187 | "test_time": timer_test.value(), 188 | }, state)) 189 | print('==> id: %s (%d/%d), test_acc: \33[91m%.2f\033[0m' % 190 | (opt.save, state['epoch'], opt.epochs, test_acc)) 191 | 192 | engine = Engine() 193 | engine.hooks['on_sample'] = on_sample 194 | engine.hooks['on_forward'] = on_forward 195 | engine.hooks['on_start_epoch'] = on_start_epoch 196 | engine.hooks['on_end_epoch'] = on_end_epoch 197 | engine.hooks['on_start'] = on_start 198 | engine.train(h, train_loader, opt.epochs, optimizer) 199 | 200 | 201 | if __name__ == '__main__': 202 | main() -------------------------------------------------------------------------------- /pytorch/requirements.txt: -------------------------------------------------------------------------------- 1 | nested_dict 2 | tqdm 3 | torchvision 4 | torchnet 5 | -------------------------------------------------------------------------------- /pytorch/resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import utils 4 | 5 | 6 | def resnet(depth, width, num_classes): 7 | assert (depth - 4) % 6 == 0, 'depth should be 6n+4' 8 | n = (depth - 4) // 6 9 | widths = [int(v * width) for v in (16, 32, 64)] 10 | 11 | def gen_block_params(ni, no): 12 | return { 13 | 'conv0': utils.conv_params(ni, no, 3), 14 | 'conv1': utils.conv_params(no, no, 3), 15 | 'bn0': utils.bnparams(ni), 16 | 'bn1': utils.bnparams(no), 17 | 'convdim': utils.conv_params(ni, no, 1) if ni != no else None, 18 | } 19 | 20 | def gen_group_params(ni, no, count): 21 | return {'block%d' % i: gen_block_params(ni if i == 0 else no, no) 22 | for i in range(count)} 23 | 24 | flat_params = utils.cast(utils.flatten({ 25 | 'conv0': utils.conv_params(3, 16, 3), 26 | 'group0': gen_group_params(16, widths[0], n), 27 | 'group1': gen_group_params(widths[0], widths[1], n), 28 | 'group2': gen_group_params(widths[1], widths[2], n), 29 | 'bn': utils.bnparams(widths[2]), 30 | 'fc': utils.linear_params(widths[2], num_classes), 31 | })) 32 | 33 | utils.set_requires_grad_except_bn_(flat_params) 34 | 35 | def block(x, params, base, mode, stride): 36 | o1 = F.relu(utils.batch_norm(x, params, base + '.bn0', mode), inplace=True) 37 | y = F.conv2d(o1, params[base + '.conv0'], stride=stride, padding=1) 38 | o2 = F.relu(utils.batch_norm(y, params, base + '.bn1', mode), inplace=True) 39 | z = F.conv2d(o2, params[base + '.conv1'], stride=1, padding=1) 40 | if base + '.convdim' in params: 41 | return z + F.conv2d(o1, params[base + '.convdim'], stride=stride) 42 | else: 43 | return z + x 44 | 45 | def group(o, params, base, mode, stride): 46 | for i in range(n): 47 | o = block(o, params, '%s.block%d' % (base,i), mode, stride if i == 0 else 1) 48 | return o 49 | 50 | def f(input, params, mode): 51 | x = F.conv2d(input, params['conv0'], padding=1) 52 | g0 = group(x, params, 'group0', mode, 1) 53 | g1 = group(g0, params, 'group1', mode, 2) 54 | g2 = group(g1, params, 'group2', mode, 2) 55 | o = F.relu(utils.batch_norm(g2, params, 'bn', mode)) 56 | o = F.avg_pool2d(o, 8, 1, 0) 57 | o = o.view(o.size(0), -1) 58 | o = F.linear(o, params['fc.weight'], params['fc.bias']) 59 | return o 60 | 61 | return f, flat_params -------------------------------------------------------------------------------- /pytorch/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.init import kaiming_normal_ 3 | import torch.nn.functional as F 4 | from torch.nn.parallel._functions import Broadcast 5 | from torch.nn.parallel import scatter, parallel_apply, gather 6 | from functools import partial 7 | from nested_dict import nested_dict 8 | 9 | 10 | def cast(params, dtype='float'): 11 | if isinstance(params, dict): 12 | return {k: cast(v, dtype) for k,v in params.items()} 13 | else: 14 | return getattr(params.cuda() if torch.cuda.is_available() else params, dtype)() 15 | 16 | 17 | def conv_params(ni, no, k=1): 18 | return kaiming_normal_(torch.Tensor(no, ni, k, k)) 19 | 20 | 21 | def linear_params(ni, no): 22 | return {'weight': kaiming_normal_(torch.Tensor(no, ni)), 'bias': torch.zeros(no)} 23 | 24 | 25 | def bnparams(n): 26 | return {'weight': torch.rand(n), 27 | 'bias': torch.zeros(n), 28 | 'running_mean': torch.zeros(n), 29 | 'running_var': torch.ones(n)} 30 | 31 | 32 | def data_parallel(f, input, params, mode, device_ids, output_device=None): 33 | assert isinstance(device_ids, list) 34 | if output_device is None: 35 | output_device = device_ids[0] 36 | 37 | if len(device_ids) == 1: 38 | return f(input, params, mode) 39 | 40 | params_all = Broadcast.apply(device_ids, *params.values()) 41 | params_replicas = [{k: params_all[i + j*len(params)] for i, k in enumerate(params.keys())} 42 | for j in range(len(device_ids))] 43 | 44 | replicas = [partial(f, params=p, mode=mode) 45 | for p in params_replicas] 46 | inputs = scatter([input], device_ids) 47 | outputs = parallel_apply(replicas, inputs) 48 | return gather(outputs, output_device) 49 | 50 | 51 | def flatten(params): 52 | return {'.'.join(k): v for k, v in nested_dict(params).items_flat() if v is not None} 53 | 54 | 55 | def batch_norm(x, params, base, mode): 56 | return F.batch_norm(x, weight=params[base + '.weight'], 57 | bias=params[base + '.bias'], 58 | running_mean=params[base + '.running_mean'], 59 | running_var=params[base + '.running_var'], 60 | training=mode) 61 | 62 | 63 | def print_tensor_dict(params): 64 | kmax = max(len(key) for key in params.keys()) 65 | for i, (key, v) in enumerate(params.items()): 66 | print(str(i).ljust(5), key.ljust(kmax + 3), str(tuple(v.shape)).ljust(23), torch.typename(v), v.requires_grad) 67 | 68 | 69 | def set_requires_grad_except_bn_(params): 70 | for k, v in params.items(): 71 | if not k.endswith('running_mean') and not k.endswith('running_var'): 72 | v.requires_grad = True 73 | -------------------------------------------------------------------------------- /scripts/train_cifar.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export learningRate=0.1 4 | export epoch_step="{60,120,160}" 5 | export max_epoch=200 6 | export learningRateDecay=0 7 | export learningRateDecayRatio=0.2 8 | export nesterov=true 9 | export randomcrop_type=reflection 10 | 11 | # tee redirects stdout both to screen and to file 12 | # have to create folder for script and model beforehand 13 | export save=logs/${model}_${RANDOM}${RANDOM} 14 | mkdir -p $save 15 | th train.lua | tee $save/log.txt 16 | -------------------------------------------------------------------------------- /scripts/train_svhn.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export learningRate=0.01 4 | export epoch_step="{80,120}" 5 | export max_epoch=160 6 | export learningRateDecay=0 7 | export learningRateDecayRatio=0.1 8 | export nesterov=true 9 | 10 | export dropout=0.4 11 | export dataset=./datasets/svhn.t7 12 | export randomcrop=0 13 | export hflip=false 14 | 15 | # tee redirects stdout both to screen and to file 16 | # have to create folder for script and model beforehand 17 | export save=logs/svhn_${model}_${RANDOM}${RANDOM} 18 | mkdir -p $save 19 | th train.lua | tee $save/log.txt 20 | -------------------------------------------------------------------------------- /train.lua: -------------------------------------------------------------------------------- 1 | -- Code for Wide Residual Networks http://arxiv.org/abs/1605.07146 2 | -- (c) Sergey Zagoruyko, 2016 3 | require 'xlua' 4 | require 'optim' 5 | require 'image' 6 | local tnt = require 'torchnet' 7 | local c = require 'trepl.colorize' 8 | local json = require 'cjson' 9 | local utils = paths.dofile'models/utils.lua' 10 | 11 | -- for memory optimizations and graph generation 12 | local optnet = require 'optnet' 13 | local graphgen = require 'optnet.graphgen' 14 | local iterm = require 'iterm' 15 | require 'iterm.dot' 16 | 17 | local opt = { 18 | dataset = './datasets/cifar10_whitened.t7', 19 | save = 'logs', 20 | batchSize = 128, 21 | learningRate = 0.1, 22 | learningRateDecay = 0, 23 | learningRateDecayRatio = 0.2, 24 | weightDecay = 0.0005, 25 | dampening = 0, 26 | momentum = 0.9, 27 | epoch_step = "80", 28 | max_epoch = 300, 29 | model = 'nin', 30 | optimMethod = 'sgd', 31 | init_value = 10, 32 | depth = 50, 33 | shortcutType = 'A', 34 | nesterov = false, 35 | dropout = 0, 36 | hflip = true, 37 | randomcrop = 4, 38 | imageSize = 32, 39 | randomcrop_type = 'zero', 40 | cudnn_deterministic = false, 41 | optnet_optimize = true, 42 | generate_graph = false, 43 | multiply_input_factor = 1, 44 | widen_factor = 1, 45 | nGPU = 1, 46 | data_type = 'torch.CudaTensor', 47 | seed = 444, 48 | } 49 | opt = xlua.envparams(opt) 50 | 51 | 52 | torch.manualSeed(opt.seed) 53 | 54 | opt.epoch_step = tonumber(opt.epoch_step) or loadstring('return '..opt.epoch_step)() 55 | print(opt) 56 | 57 | local meanstd = {mean = {125.3, 123.0, 113.9}, std = {63.0, 62.1, 66.7}} 58 | print(c.blue '==>' ..' loading data') 59 | local provider = torch.load(opt.dataset) 60 | opt.num_classes = provider.testData.labels:max() 61 | if torch.type(provider.trainData.data) == 'torch.ByteTensor' then 62 | for i,v in ipairs{'trainData', 'testData'} do 63 | provider[v].data = provider[v].data:float()--:div(256) 64 | for ch=1,3 do 65 | provider[v].data:select(2,ch):add(-meanstd.mean[ch]):div(meanstd.std[ch]) 66 | end 67 | end 68 | end 69 | 70 | local function cast(x) return x:type(opt.data_type) end 71 | 72 | print(c.blue '==>' ..' configuring model') 73 | local model = nn.Sequential() 74 | local net = dofile('models/'..opt.model..'.lua')(opt) 75 | if opt.data_type:match'torch.Cuda.*Tensor' then 76 | require 'cudnn' 77 | require 'cunn' 78 | cudnn.convert(net, cudnn):cuda() 79 | if opt.cudnn_deterministic then 80 | net:apply(function(m) if m.setMode then m:setMode(1,1,1) end end) 81 | end 82 | 83 | print(net) 84 | print('Network has', #net:findModules'cudnn.SpatialConvolution', 'convolutions') 85 | 86 | local sample_input = torch.randn(8,3,opt.imageSize,opt.imageSize):cuda() 87 | if opt.generate_graph then 88 | iterm.dot(graphgen(net, sample_input), opt.save..'/graph.pdf') 89 | end 90 | if opt.optnet_optimize then 91 | optnet.optimizeMemory(net, sample_input, {inplace = false, mode = 'training'}) 92 | end 93 | -- to avoid optnet messing cudnn FindEx 94 | cudnn.benchmark = true 95 | end 96 | model:add(utils.makeDataParallelTable(net, opt.nGPU)) 97 | cast(model) 98 | 99 | local function hflip(x) 100 | return torch.random(0,1) == 1 and x or image.hflip(x) 101 | end 102 | 103 | local function randomcrop(x) 104 | local pad = opt.randomcrop 105 | if opt.randomcrop_type == 'reflection' then 106 | module = nn.SpatialReflectionPadding(pad,pad,pad,pad):float() 107 | elseif opt.randomcrop_type == 'zero' then 108 | module = nn.SpatialZeroPadding(pad,pad,pad,pad):float() 109 | else 110 | error'unknown mode' 111 | end 112 | 113 | local imsize = opt.imageSize 114 | local padded = module:forward(x) 115 | local x = torch.random(1,pad*2 + 1) 116 | local y = torch.random(1,pad*2 + 1) 117 | return padded:narrow(3,x,imsize):narrow(2,y,imsize) 118 | end 119 | 120 | 121 | local function getIterator(mode) 122 | local dataset = provider[mode..'Data'] 123 | 124 | local list_dataset = tnt.ListDataset{ 125 | list = torch.range(1, dataset.labels:numel()):long(), 126 | load = function(idx) 127 | return { 128 | input = dataset.data[idx]:float(), 129 | target = torch.LongTensor{dataset.labels[idx]}, 130 | } 131 | end, 132 | } 133 | 134 | local d = mode == 'train' and list_dataset 135 | :shuffle() 136 | :transform{ 137 | input = tnt.transform.compose{ 138 | opt.hflip and hflip or nil, 139 | opt.randomcrop > 0 and randomcrop or nil, 140 | } 141 | } 142 | :batch(opt.batchSize, 'skip-last') 143 | or list_dataset 144 | :batch(opt.batchSize, 'include-last') 145 | 146 | function d:manualSeed(seed) torch.manualSeed(seed) end 147 | 148 | return tnt.ParallelDatasetIterator{ 149 | nthread = 2, 150 | init = function() 151 | require 'torchnet' 152 | require 'image' 153 | require 'nn' 154 | end, 155 | closure = function() 156 | return d 157 | end, 158 | } 159 | end 160 | 161 | local function log(t) print('json_stats: '..json.encode(tablex.merge(t,opt,true))) end 162 | 163 | print('Will save at '..opt.save) 164 | paths.mkdir(opt.save) 165 | 166 | local engine = tnt.OptimEngine() 167 | local criterion = cast(nn.CrossEntropyCriterion()) 168 | local meter = tnt.AverageValueMeter() 169 | local clerr = tnt.ClassErrorMeter{topk = {1}} 170 | local train_timer = torch.Timer() 171 | local test_timer = torch.Timer() 172 | 173 | engine.hooks.onStartEpoch = function(state) 174 | local epoch = state.epoch + 1 175 | print('==>'.." online epoch # " .. epoch .. ' [batchSize = ' .. opt.batchSize .. ']') 176 | meter:reset() 177 | clerr:reset() 178 | train_timer:reset() 179 | if torch.type(opt.epoch_step) == 'number' and epoch % opt.epoch_step == 0 or 180 | torch.type(opt.epoch_step) == 'table' and tablex.find(opt.epoch_step, epoch) then 181 | opt.learningRate = opt.learningRate * opt.learningRateDecayRatio 182 | state.config = tablex.deepcopy(opt) 183 | state.optim = tablex.deepcopy(opt) 184 | end 185 | 186 | state.iterator:exec('manualSeed', state.epoch + 1) 187 | state.iterator:exec'resample' 188 | end 189 | 190 | engine.hooks.onEndEpoch = function(state) 191 | local train_loss = meter:value() 192 | local train_err = clerr:value{k = 1} 193 | local train_time = train_timer:time().real 194 | meter:reset() 195 | clerr:reset() 196 | test_timer:reset() 197 | 198 | engine:test{ 199 | network = model, 200 | iterator = getIterator('test'), 201 | criterion = criterion, 202 | } 203 | 204 | log{ 205 | loss = train_loss, 206 | train_loss = train_loss, 207 | train_acc = 100 - train_err, 208 | epoch = state.epoch, 209 | test_acc = 100 - clerr:value{k = 1}, 210 | lr = opt.learningRate, 211 | train_time = train_time, 212 | test_time = test_timer:time().real, 213 | n_parameters = state.params:numel(), 214 | } 215 | end 216 | 217 | engine.hooks.onForwardCriterion = function(state) 218 | meter:add(state.criterion.output) 219 | clerr:add(state.network.output, state.sample.target) 220 | end 221 | 222 | local inputs = cast(torch.Tensor()) 223 | local targets = cast(torch.Tensor()) 224 | engine.hooks.onSample = function(state) 225 | inputs:resize(state.sample.input:size()):copy(state.sample.input) 226 | targets:resize(state.sample.target:size()):copy(state.sample.target) 227 | state.sample.input = inputs 228 | state.sample.target = targets 229 | end 230 | 231 | engine:train{ 232 | network = model, 233 | iterator = getIterator('train'), 234 | criterion = criterion, 235 | optimMethod = optim.sgd, 236 | config = tablex.deepcopy(opt), 237 | maxepoch = opt.max_epoch, 238 | } 239 | 240 | torch.save(opt.save..'/model.t7', net:clearState()) 241 | --------------------------------------------------------------------------------