├── .gitignore
├── LICENSE
├── README.md
├── notebooks
    ├── CNTK_CNN.ipynb
    ├── CNTK_CNN_highAPI.ipynb
    ├── CNTK_Inference.ipynb
    ├── CNTK_RNN.ipynb
    ├── Caffe2_CNN.ipynb
    ├── Caffe2_Inference.ipynb
    ├── Chainer_CNN.ipynb
    ├── Chainer_Inference.ipynb
    ├── Chainer_MultiGPU.ipynb
    ├── Gluon_CNN.ipynb
    ├── Gluon_Inference.ipynb
    ├── Gluon_MultiGPU.ipynb
    ├── Gluon_RNN.ipynb
    ├── KerasR_TF_CNN.ipynb
    ├── KerasR_TF_Inference.ipynb
    ├── KerasR_TF_RNN.ipynb
    ├── Keras_CNTK_CNN.ipynb
    ├── Keras_CNTK_Inference.ipynb
    ├── Keras_CNTK_RNN.ipynb
    ├── Keras_TF_CNN.ipynb
    ├── Keras_TF_Inference.ipynb
    ├── Keras_TF_MultiGPU.ipynb
    ├── Keras_TF_RNN.ipynb
    ├── Keras_Theano_CNN.ipynb
    ├── Knet_CNN.ipynb
    ├── Knet_Inference.ipynb
    ├── Knet_RNN.ipynb
    ├── MXNet_CNN.ipynb
    ├── MXNet_CNN_highAPI.ipynb
    ├── MXNet_Inference.ipynb
    ├── MXNet_RNN.ipynb
    ├── MXNet_RNN_TNC.ipynb
    ├── PyTorch_CNN.ipynb
    ├── PyTorch_Inference.ipynb
    ├── PyTorch_MultiGPU-Exp-Loss.ipynb
    ├── PyTorch_MultiGPU.ipynb
    ├── PyTorch_RNN.ipynb
    ├── Tensorflow_CNN.ipynb
    ├── Tensorflow_CNN_highAPI.ipynb
    ├── Tensorflow_Inference.ipynb
    ├── Tensorflow_MultiGPU.ipynb
    ├── Tensorflow_RNN.ipynb
    ├── Theano_Lasagne_CNN.ipynb
    └── common
    │   ├── __init__.py
    │   ├── automobile10.png
    │   ├── info.PNG
    │   ├── params.json
    │   ├── params.py
    │   ├── params_dense.py
    │   ├── params_inf.py
    │   ├── params_lstm.py
    │   ├── utils.R
    │   └── utils.py
└── support
    ├── chainer_4gpu.JPG
    ├── gluon_4gpu.JPG
    ├── keras_4gpu.JPG
    ├── logo.png
    ├── pytorch_4gpu.JPG
    └── tensorflow_4gpu.JPG


/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | *.pyc
 3 | *.swp
 4 | .ipynb_checkpoints/
 5 | cifar-10-batches-py/
 6 | __pycache__
 7 | .DS_Store
 8 | notebooks/chestxray
 9 | notebooks/*-0000.params
10 | notebooks/*-symbol.json
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Microsoft Corporation
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/notebooks/CNTK_CNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# High-level CNTK Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "import os\n",
 18 |     "import sys\n",
 19 |     "import cntk\n",
 20 |     "from cntk.layers import Convolution2D, MaxPooling, Dense, Dropout\n",
 21 |     "from common.params import *\n",
 22 |     "from common.utils import *"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "# Force one-gpu\n",
 32 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 3,
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "name": "stdout",
 42 |      "output_type": "stream",
 43 |      "text": [
 44 |       "OS:  linux\n",
 45 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 46 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 47 |       "Numpy:  1.14.1\n",
 48 |       "CNTK:  2.4\n",
 49 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 50 |       "CUDA Version 8.0.61\n",
 51 |       "CuDNN Version  6.0.21\n"
 52 |      ]
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "print(\"OS: \", sys.platform)\n",
 57 |     "print(\"Python: \", sys.version)\n",
 58 |     "print(\"Numpy: \", np.__version__)\n",
 59 |     "print(\"CNTK: \", cntk.__version__)\n",
 60 |     "print(\"GPU: \", get_gpu_name())\n",
 61 |     "print(get_cuda_version())\n",
 62 |     "print(\"CuDNN Version \", get_cudnn_version())"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 4,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "def create_symbol(n_classes=N_CLASSES):\n",
 72 |     "    # Weight initialiser from uniform distribution\n",
 73 |     "    # Activation (unless states) is None\n",
 74 |     "    with cntk.layers.default_options(init = cntk.glorot_uniform(), activation = cntk.relu):\n",
 75 |     "        x = Convolution2D(filter_shape=(3, 3), num_filters=50, pad=True)(features)\n",
 76 |     "        x = Convolution2D(filter_shape=(3, 3), num_filters=50, pad=True)(x)\n",
 77 |     "        x = MaxPooling((2, 2), strides=(2, 2), pad=False)(x)\n",
 78 |     "        x = Dropout(0.25)(x)\n",
 79 |     "\n",
 80 |     "        x = Convolution2D(filter_shape=(3, 3), num_filters=100, pad=True)(x)\n",
 81 |     "        x = Convolution2D(filter_shape=(3, 3), num_filters=100, pad=True)(x)\n",
 82 |     "        x = MaxPooling((2, 2), strides=(2, 2), pad=False)(x)\n",
 83 |     "        x = Dropout(0.25)(x)    \n",
 84 |     "        \n",
 85 |     "        x = Dense(512)(x)\n",
 86 |     "        x = Dropout(0.5)(x)\n",
 87 |     "        x = Dense(n_classes, activation=None)(x)\n",
 88 |     "        return x"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 5,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "def init_model(m, labels, lr=LR, momentum=MOMENTUM):\n",
 98 |     "    # Loss (dense labels); check if support for sparse labels\n",
 99 |     "    loss = cntk.cross_entropy_with_softmax(m, labels)  \n",
100 |     "    # Momentum SGD\n",
101 |     "    # https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_use_learners.ipynb\n",
102 |     "    # unit_gain=False: momentum_direction = momentum*old_momentum_direction + gradient\n",
103 |     "    # if unit_gain=True then ...(1-momentum)*gradient\n",
104 |     "    learner = cntk.momentum_sgd(m.parameters,\n",
105 |     "                                lr=cntk.learning_rate_schedule(lr, cntk.UnitType.minibatch) ,\n",
106 |     "                                momentum=cntk.momentum_schedule(momentum), \n",
107 |     "                                unit_gain=False)\n",
108 |     "    trainer = cntk.Trainer(m, (loss, cntk.classification_error(m, labels)), [learner])\n",
109 |     "    return trainer"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 6,
115 |    "metadata": {},
116 |    "outputs": [
117 |     {
118 |      "name": "stdout",
119 |      "output_type": "stream",
120 |      "text": [
121 |       "Preparing train set...\n",
122 |       "Preparing test set...\n",
123 |       "(50000, 3, 32, 32) (10000, 3, 32, 32) (50000, 10) (10000, 10)\n",
124 |       "float32 float32 float32 float32\n",
125 |       "CPU times: user 671 ms, sys: 576 ms, total: 1.25 s\n",
126 |       "Wall time: 1.25 s\n"
127 |      ]
128 |     }
129 |    ],
130 |    "source": [
131 |     "%%time\n",
132 |     "# Data into format for library\n",
133 |     "x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True, one_hot=True)\n",
134 |     "# CNTK format\n",
135 |     "y_train = y_train.astype(np.float32)\n",
136 |     "y_test = y_test.astype(np.float32)\n",
137 |     "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
138 |     "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": 7,
144 |    "metadata": {},
145 |    "outputs": [
146 |     {
147 |      "name": "stdout",
148 |      "output_type": "stream",
149 |      "text": [
150 |       "CPU times: user 24 ms, sys: 32 ms, total: 56 ms\n",
151 |       "Wall time: 69 ms\n"
152 |      ]
153 |     }
154 |    ],
155 |    "source": [
156 |     "%%time\n",
157 |     "# Placeholders\n",
158 |     "features = cntk.input_variable((3, 32, 32), np.float32)\n",
159 |     "labels = cntk.input_variable(N_CLASSES, np.float32)\n",
160 |     "# Load symbol\n",
161 |     "sym = create_symbol()"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 8,
167 |    "metadata": {},
168 |    "outputs": [
169 |     {
170 |      "name": "stdout",
171 |      "output_type": "stream",
172 |      "text": [
173 |       "CPU times: user 119 ms, sys: 116 ms, total: 235 ms\n",
174 |       "Wall time: 236 ms\n"
175 |      ]
176 |     }
177 |    ],
178 |    "source": [
179 |     "%%time\n",
180 |     "trainer = init_model(sym, labels)"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 9,
186 |    "metadata": {},
187 |    "outputs": [
188 |     {
189 |      "name": "stdout",
190 |      "output_type": "stream",
191 |      "text": [
192 |       "Epoch 1  |  Accuracy: 0.468750\n",
193 |       "Epoch 2  |  Accuracy: 0.640625\n",
194 |       "Epoch 3  |  Accuracy: 0.609375\n",
195 |       "Epoch 4  |  Accuracy: 0.578125\n",
196 |       "Epoch 5  |  Accuracy: 0.812500\n",
197 |       "Epoch 6  |  Accuracy: 0.781250\n",
198 |       "Epoch 7  |  Accuracy: 0.671875\n",
199 |       "Epoch 8  |  Accuracy: 0.843750\n",
200 |       "Epoch 9  |  Accuracy: 0.796875\n",
201 |       "Epoch 10  |  Accuracy: 0.843750\n",
202 |       "CPU times: user 40.3 s, sys: 13.1 s, total: 53.3 s\n",
203 |       "Wall time: 53.2 s\n"
204 |      ]
205 |     }
206 |    ],
207 |    "source": [
208 |     "%%time \n",
209 |     "# Main training loop: 53s\n",
210 |     "for j in range(EPOCHS):\n",
211 |     "    for data, label in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n",
212 |     "        trainer.train_minibatch({features: data, labels: label})\n",
213 |     "    # Log (this is just last batch in epoch, not average of batches)\n",
214 |     "    eval_error = trainer.previous_minibatch_evaluation_average\n",
215 |     "    print(\"Epoch %d  |  Accuracy: %.6f\" % (j+1, (1-eval_error)))"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": 10,
221 |    "metadata": {},
222 |    "outputs": [
223 |     {
224 |      "name": "stdout",
225 |      "output_type": "stream",
226 |      "text": [
227 |       "CPU times: user 291 ms, sys: 88.9 ms, total: 379 ms\n",
228 |       "Wall time: 408 ms\n"
229 |      ]
230 |     }
231 |    ],
232 |    "source": [
233 |     "%%time\n",
234 |     "# Main evaluation loop: 343ms\n",
235 |     "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n",
236 |     "y_guess = np.zeros(n_samples, dtype=np.int)\n",
237 |     "y_truth = np.argmax(y_test[:n_samples], axis=-1)\n",
238 |     "c = 0\n",
239 |     "for data, label in yield_mb(x_test, y_test, BATCHSIZE):\n",
240 |     "    predicted_label_probs = sym.eval({features : data})\n",
241 |     "    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = np.argmax(predicted_label_probs, axis=-1)\n",
242 |     "    c += 1"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": 11,
248 |    "metadata": {},
249 |    "outputs": [
250 |     {
251 |      "name": "stdout",
252 |      "output_type": "stream",
253 |      "text": [
254 |       "Accuracy:  0.7701322115384616\n"
255 |      ]
256 |     }
257 |    ],
258 |    "source": [
259 |     "print(\"Accuracy: \", 1.*sum(y_guess == y_truth)/len(y_guess))"
260 |    ]
261 |   }
262 |  ],
263 |  "metadata": {
264 |   "anaconda-cloud": {},
265 |   "kernelspec": {
266 |    "display_name": "Python 3",
267 |    "language": "python",
268 |    "name": "python3"
269 |   },
270 |   "language_info": {
271 |    "codemirror_mode": {
272 |     "name": "ipython",
273 |     "version": 3
274 |    },
275 |    "file_extension": ".py",
276 |    "mimetype": "text/x-python",
277 |    "name": "python",
278 |    "nbconvert_exporter": "python",
279 |    "pygments_lexer": "ipython3",
280 |    "version": "3.5.2"
281 |   }
282 |  },
283 |  "nbformat": 4,
284 |  "nbformat_minor": 2
285 | }
286 | 


--------------------------------------------------------------------------------
/notebooks/CNTK_Inference.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%%bash\n",
 10 |     "# Download model\n",
 11 |     "#wget https://www.cntk.ai/Models/CNTK_Pretrained/ResNet50_ImageNet_CNTK.model"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import os\n",
 21 |     "import sys\n",
 22 |     "import numpy as np\n",
 23 |     "import cntk as C\n",
 24 |     "from cntk import load_model, combine\n",
 25 |     "from common.params_inf import *\n",
 26 |     "from common.utils import *"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 3,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "# Force one-gpu\n",
 36 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 4,
 42 |    "metadata": {},
 43 |    "outputs": [
 44 |     {
 45 |      "name": "stdout",
 46 |      "output_type": "stream",
 47 |      "text": [
 48 |       "OS:  linux\n",
 49 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 50 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 51 |       "Numpy:  1.14.1\n",
 52 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 53 |       "CUDA Version 8.0.61\n",
 54 |       "CuDNN Version  6.0.21\n"
 55 |      ]
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "print(\"OS: \", sys.platform)\n",
 60 |     "print(\"Python: \", sys.version)\n",
 61 |     "print(\"Numpy: \", np.__version__)\n",
 62 |     "print(\"GPU: \", get_gpu_name())\n",
 63 |     "print(get_cuda_version())\n",
 64 |     "print(\"CuDNN Version \", get_cudnn_version())"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 5,
 70 |    "metadata": {},
 71 |    "outputs": [
 72 |     {
 73 |      "name": "stdout",
 74 |      "output_type": "stream",
 75 |      "text": [
 76 |       "(1280, 224, 224, 3) (1280, 3, 224, 224)\n"
 77 |      ]
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "# Create batches of fake data\n",
 82 |     "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n",
 83 |     "print(fake_input_data_cl.shape, fake_input_data_cf.shape)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 6,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "# Features (penultimate layer)\n",
 93 |     "node_name = \"z.x\"\n",
 94 |     "model_file = \"ResNet50_ImageNet_CNTK.model\""
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 7,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "def predict_fn(classifier, data, batchsize):\n",
104 |     "    \"\"\" Return features from classifier \"\"\"\n",
105 |     "    out = np.zeros((len(data), RESNET_FEATURES), np.float32)\n",
106 |     "    for idx, dta in yield_mb_X(data, batchsize):\n",
107 |     "        pred = classifier.eval(dta)\n",
108 |     "        out[idx*batchsize:(idx+1)*batchsize] = pred[0].squeeze()\n",
109 |     "    return out"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 8,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "# Load model\n",
119 |     "loaded_model  = load_model(model_file)\n",
120 |     "node_in_graph = loaded_model.find_by_name(node_name)\n",
121 |     "output_nodes  = combine([node_in_graph.owner])"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 9,
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "fake_input_data_cf = np.ascontiguousarray(fake_input_data_cf)\n",
131 |     "cold_start = predict_fn(output_nodes, fake_input_data_cf, BATCH_SIZE)"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 10,
137 |    "metadata": {},
138 |    "outputs": [
139 |     {
140 |      "name": "stdout",
141 |      "output_type": "stream",
142 |      "text": [
143 |       "CPU times: user 1.53 s, sys: 501 ms, total: 2.03 s\n",
144 |       "Wall time: 2.26 s\n"
145 |      ]
146 |     }
147 |    ],
148 |    "source": [
149 |     "%%time\n",
150 |     "features = predict_fn(output_nodes, fake_input_data_cf, BATCH_SIZE)"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 11,
156 |    "metadata": {},
157 |    "outputs": [
158 |     {
159 |      "name": "stdout",
160 |      "output_type": "stream",
161 |      "text": [
162 |       "Images per second 624.3902439024391\n"
163 |      ]
164 |     }
165 |    ],
166 |    "source": [
167 |     "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/2.05))"
168 |    ]
169 |   }
170 |  ],
171 |  "metadata": {
172 |   "kernelspec": {
173 |    "display_name": "Python 3",
174 |    "language": "python",
175 |    "name": "python3"
176 |   },
177 |   "language_info": {
178 |    "codemirror_mode": {
179 |     "name": "ipython",
180 |     "version": 3
181 |    },
182 |    "file_extension": ".py",
183 |    "mimetype": "text/x-python",
184 |    "name": "python",
185 |    "nbconvert_exporter": "python",
186 |    "pygments_lexer": "ipython3",
187 |    "version": "3.5.2"
188 |   }
189 |  },
190 |  "nbformat": 4,
191 |  "nbformat_minor": 2
192 | }
193 | 


--------------------------------------------------------------------------------
/notebooks/CNTK_RNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# High-level RNN CNTK Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "import os\n",
 18 |     "import sys\n",
 19 |     "import cntk\n",
 20 |     "from cntk.layers import Embedding, LSTM, GRU, Dense, Recurrence\n",
 21 |     "from cntk import sequence\n",
 22 |     "from common.params_lstm import *\n",
 23 |     "from common.utils import *"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "# Force one-gpu\n",
 33 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 3,
 39 |    "metadata": {},
 40 |    "outputs": [
 41 |     {
 42 |      "name": "stdout",
 43 |      "output_type": "stream",
 44 |      "text": [
 45 |       "OS:  linux\n",
 46 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 47 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 48 |       "Numpy:  1.14.1\n",
 49 |       "CNTK:  2.4\n",
 50 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 51 |       "CUDA Version 8.0.61\n",
 52 |       "CuDNN Version  6.0.21\n"
 53 |      ]
 54 |     }
 55 |    ],
 56 |    "source": [
 57 |     "print(\"OS: \", sys.platform)\n",
 58 |     "print(\"Python: \", sys.version)\n",
 59 |     "print(\"Numpy: \", np.__version__)\n",
 60 |     "print(\"CNTK: \", cntk.__version__)\n",
 61 |     "print(\"GPU: \", get_gpu_name())\n",
 62 |     "print(get_cuda_version())\n",
 63 |     "print(\"CuDNN Version \", get_cudnn_version())"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 4,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "def create_symbol(CUDNN=True, edim=EMBEDSIZE, nhid=NUMHIDDEN):\n",
 73 |     "    # Weight initialiser from uniform distribution\n",
 74 |     "    # Activation (unless states) is None\n",
 75 |     "    with cntk.layers.default_options(init = cntk.glorot_uniform()):\n",
 76 |     "        x = Embedding(edim)(features) # output: list of len=BATCHSIZE of arrays with shape=(MAXLEN, EMBEDSIZE)\n",
 77 |     "        \n",
 78 |     "        # Since we have a vanilla RNN, instead of using the more flexible Recurrence(GRU) unit, which allows for\n",
 79 |     "        # example LayerNormalisation to be added to the network, we can use optimized_rnnstack which quickly\n",
 80 |     "        # goes down to the CuDNN level. This is another reason not to read much into the speed comparison because\n",
 81 |     "        # it becomes a measure of which framework has the fastest way to go down to CuDNN.\n",
 82 |     "        if not CUDNN:\n",
 83 |     "            x = Recurrence(GRU(nhid))(x) # output: list of len=BATCHSIZE of arrays with shape=(MAXLEN, NUMHIDDEN)\n",
 84 |     "        else:\n",
 85 |     "            W = cntk.parameter((cntk.InferredDimension, 4))\n",
 86 |     "            x = cntk.ops.optimized_rnnstack(x, W, nhid, \n",
 87 |     "                                            num_layers=1, bidirectional=False, recurrent_op='gru')\n",
 88 |     "        \n",
 89 |     "        x = sequence.last(x) #o utput: array with shape=(BATCHSIZE, NUMHIDDEN)\n",
 90 |     "        x = Dense(2)(x) # output: array with shape=(BATCHSIZE, 2)\n",
 91 |     "        return x"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 5,
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": [
100 |     "def init_model(m, labels, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n",
101 |     "    # Loss (dense labels); check if support for sparse labels\n",
102 |     "    loss = cntk.cross_entropy_with_softmax(m, labels)  \n",
103 |     "    # ADAM, set unit_gain to False to match others\n",
104 |     "    learner = cntk.adam(m.parameters,\n",
105 |     "                        lr=cntk.learning_rate_schedule(lr, cntk.UnitType.minibatch) ,\n",
106 |     "                        momentum=cntk.momentum_schedule(b1), \n",
107 |     "                        variance_momentum=cntk.momentum_schedule(b2),\n",
108 |     "                        epsilon=eps,\n",
109 |     "                        unit_gain=False)\n",
110 |     "    trainer = cntk.Trainer(m, (loss, cntk.classification_error(m, labels)), [learner])\n",
111 |     "    return trainer"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 6,
117 |    "metadata": {
118 |     "scrolled": true
119 |    },
120 |    "outputs": [
121 |     {
122 |      "name": "stdout",
123 |      "output_type": "stream",
124 |      "text": [
125 |       "Preparing train set...\n",
126 |       "Preparing test set...\n",
127 |       "Trimming to 30000 max-features\n",
128 |       "Padding to length 150\n",
129 |       "(25000, 150) (25000, 150) (25000, 2) (25000, 2)\n",
130 |       "int32 int32 float32 float32\n",
131 |       "CPU times: user 5.77 s, sys: 379 ms, total: 6.15 s\n",
132 |       "Wall time: 6.15 s\n"
133 |      ]
134 |     }
135 |    ],
136 |    "source": [
137 |     "%%time\n",
138 |     "# Data into format for library\n",
139 |     "x_train, x_test, y_train, y_test = imdb_for_library(\n",
140 |     "    seq_len=MAXLEN, max_features=MAXFEATURES, one_hot=True) # CNTK format\n",
141 |     "y_train = y_train.astype(np.float32)\n",
142 |     "y_test = y_test.astype(np.float32)\n",
143 |     "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
144 |     "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 7,
150 |    "metadata": {},
151 |    "outputs": [
152 |     {
153 |      "name": "stdout",
154 |      "output_type": "stream",
155 |      "text": [
156 |       "CPU times: user 10.6 ms, sys: 32.4 ms, total: 43 ms\n",
157 |       "Wall time: 52.9 ms\n"
158 |      ]
159 |     }
160 |    ],
161 |    "source": [
162 |     "%%time\n",
163 |     "# Placeholders\n",
164 |     "features = sequence.input_variable(shape=MAXFEATURES, is_sparse=True)\n",
165 |     "labels = cntk.input_variable(2)\n",
166 |     "# Load symbol\n",
167 |     "sym = create_symbol()"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 8,
173 |    "metadata": {},
174 |    "outputs": [
175 |     {
176 |      "name": "stdout",
177 |      "output_type": "stream",
178 |      "text": [
179 |       "CPU times: user 110 ms, sys: 262 ms, total: 371 ms\n",
180 |       "Wall time: 377 ms\n"
181 |      ]
182 |     }
183 |    ],
184 |    "source": [
185 |     "%%time\n",
186 |     "trainer = init_model(sym, labels)"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": 9,
192 |    "metadata": {},
193 |    "outputs": [
194 |     {
195 |      "name": "stdout",
196 |      "output_type": "stream",
197 |      "text": [
198 |       "Epoch 1  |  Accuracy: 0.890625\n",
199 |       "Epoch 2  |  Accuracy: 0.875000\n",
200 |       "Epoch 3  |  Accuracy: 0.968750\n",
201 |       "CPU times: user 13.7 s, sys: 1.31 s, total: 15 s\n",
202 |       "Wall time: 14.6 s\n"
203 |      ]
204 |     }
205 |    ],
206 |    "source": [
207 |     "%%time\n",
208 |     "# Main training loop: 14.6s\n",
209 |     "for j in range(EPOCHS):\n",
210 |     "    for data, label in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n",
211 |     "        data_1hot = cntk.Value.one_hot(data, MAXFEATURES) #TODO: do this externally and generate batches of 1hot\n",
212 |     "        trainer.train_minibatch({features: data_1hot, labels: label})\n",
213 |     "    # Log (this is just last batch in epoch, not average of batches)\n",
214 |     "    eval_error = trainer.previous_minibatch_evaluation_average\n",
215 |     "    print(\"Epoch %d  |  Accuracy: %.6f\" % (j+1, (1-eval_error)))"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": 10,
221 |    "metadata": {},
222 |    "outputs": [
223 |     {
224 |      "name": "stdout",
225 |      "output_type": "stream",
226 |      "text": [
227 |       "CPU times: user 2.38 s, sys: 156 ms, total: 2.54 s\n",
228 |       "Wall time: 2.54 s\n"
229 |      ]
230 |     }
231 |    ],
232 |    "source": [
233 |     "%%time\n",
234 |     "# Main evaluation loop: 2.55s\n",
235 |     "z = cntk.softmax(sym)\n",
236 |     "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n",
237 |     "y_guess = np.zeros(n_samples, dtype=np.int)\n",
238 |     "y_truth = np.argmax(y_test[:n_samples], axis=-1)\n",
239 |     "c = 0\n",
240 |     "for data, label in yield_mb(x_test, y_test, BATCHSIZE):\n",
241 |     "    data = cntk.Value.one_hot(data, MAXFEATURES)\n",
242 |     "    predicted_label_probs = z.eval({features : data})\n",
243 |     "    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = np.argmax(predicted_label_probs, axis=-1)\n",
244 |     "    c += 1"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": 11,
250 |    "metadata": {},
251 |    "outputs": [
252 |     {
253 |      "name": "stdout",
254 |      "output_type": "stream",
255 |      "text": [
256 |       "Accuracy:  0.8565304487179487\n"
257 |      ]
258 |     }
259 |    ],
260 |    "source": [
261 |     "print(\"Accuracy: \", 1.*sum(y_guess == y_truth)/len(y_guess))"
262 |    ]
263 |   }
264 |  ],
265 |  "metadata": {
266 |   "anaconda-cloud": {},
267 |   "kernelspec": {
268 |    "display_name": "Python 3",
269 |    "language": "python",
270 |    "name": "python3"
271 |   },
272 |   "language_info": {
273 |    "codemirror_mode": {
274 |     "name": "ipython",
275 |     "version": 3
276 |    },
277 |    "file_extension": ".py",
278 |    "mimetype": "text/x-python",
279 |    "name": "python",
280 |    "nbconvert_exporter": "python",
281 |    "pygments_lexer": "ipython3",
282 |    "version": "3.5.2"
283 |   }
284 |  },
285 |  "nbformat": 4,
286 |  "nbformat_minor": 2
287 | }
288 | 


--------------------------------------------------------------------------------
/notebooks/Caffe2_Inference.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%%bash\n",
 10 |     "# Download ResNet50 pre-trained\n",
 11 |     "#wget https://github.com/leonardvandriel/caffe2_models/raw/master/model/resnet50_init_net.pb\n",
 12 |     "#wget https://github.com/leonardvandriel/caffe2_models/raw/master/model/resnet50_predict_net.pb"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "init_net_loc = 'resnet50_init_net.pb'\n",
 22 |     "predict_net_loc = 'resnet50_predict_net.pb'"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 3,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "import os\n",
 32 |     "import sys\n",
 33 |     "import numpy as np\n",
 34 |     "import caffe2\n",
 35 |     "from caffe2.proto import caffe2_pb2\n",
 36 |     "from caffe2.python import model_helper, core, workspace, models\n",
 37 |     "from common.params_inf import *\n",
 38 |     "from common.utils import *"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 4,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "# Force one-gpu\n",
 48 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 5,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "name": "stdout",
 58 |      "output_type": "stream",
 59 |      "text": [
 60 |       "OS:  linux\n",
 61 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 62 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 63 |       "Numpy:  1.14.1\n",
 64 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 65 |       "CUDA Version 8.0.61\n",
 66 |       "CuDNN Version  6.0.21\n"
 67 |      ]
 68 |     }
 69 |    ],
 70 |    "source": [
 71 |     "print(\"OS: \", sys.platform)\n",
 72 |     "print(\"Python: \", sys.version)\n",
 73 |     "print(\"Numpy: \", np.__version__)\n",
 74 |     "print(\"GPU: \", get_gpu_name())\n",
 75 |     "print(get_cuda_version())\n",
 76 |     "print(\"CuDNN Version \", get_cudnn_version())"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 6,
 82 |    "metadata": {},
 83 |    "outputs": [
 84 |     {
 85 |      "name": "stdout",
 86 |      "output_type": "stream",
 87 |      "text": [
 88 |       "(1280, 224, 224, 3) (1280, 3, 224, 224)\n"
 89 |      ]
 90 |     }
 91 |    ],
 92 |    "source": [
 93 |     "# Create batches of fake data\n",
 94 |     "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n",
 95 |     "print(fake_input_data_cl.shape, fake_input_data_cf.shape)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 7,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "def load_net(INIT_NET, PREDICT_NET, device_opts):\n",
105 |     "    init_def = caffe2_pb2.NetDef()\n",
106 |     "    with open(INIT_NET, 'rb') as f:\n",
107 |     "        init_def.ParseFromString(f.read())\n",
108 |     "        init_def.device_option.CopyFrom(device_opts)\n",
109 |     "        workspace.RunNetOnce(init_def.SerializeToString())\n",
110 |     "    net_def = caffe2_pb2.NetDef()\n",
111 |     "    with open(PREDICT_NET, 'rb') as f:\n",
112 |     "        net_def.ParseFromString(f.read())\n",
113 |     "        net_def.device_option.CopyFrom(device_opts)\n",
114 |     "        workspace.CreateNet(net_def.SerializeToString(), overwrite=True)\n",
115 |     "    return net_def.name"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 8,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "def predict_fn(classifier, data, batchsize, device_opts):\n",
125 |     "    \"\"\" Return features from classifier \"\"\"\n",
126 |     "    out = np.zeros((len(data), RESNET_FEATURES), np.float32)\n",
127 |     "    for idx, dta in yield_mb_X(data, batchsize):\n",
128 |     "        workspace.FeedBlob(\"data\", dta, device_option=device_opts)\n",
129 |     "        workspace.RunNet(classifier, 1)\n",
130 |     "        out[idx*batchsize:(idx+1)*batchsize] = workspace.FetchBlob('pool5').squeeze()\n",
131 |     "    return out"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 9,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "device_opts = core.DeviceOption(caffe2_pb2.CUDA, 0) \n",
141 |     "test_net = load_net(init_net_loc, \n",
142 |     "                    predict_net_loc,\n",
143 |     "                    device_opts=device_opts)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 10,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "cold_start = predict_fn(test_net, fake_input_data_cf, BATCH_SIZE, device_opts)"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 11,
158 |    "metadata": {},
159 |    "outputs": [
160 |     {
161 |      "name": "stdout",
162 |      "output_type": "stream",
163 |      "text": [
164 |       "CPU times: user 8.28 s, sys: 1.84 s, total: 10.1 s\n",
165 |       "Wall time: 10.1 s\n"
166 |      ]
167 |     }
168 |    ],
169 |    "source": [
170 |     "%%time\n",
171 |     "features = predict_fn(test_net, fake_input_data_cf, BATCH_SIZE, device_opts)"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": 13,
177 |    "metadata": {},
178 |    "outputs": [
179 |     {
180 |      "name": "stdout",
181 |      "output_type": "stream",
182 |      "text": [
183 |       "Images per second 126.73267326732673\n"
184 |      ]
185 |     }
186 |    ],
187 |    "source": [
188 |     "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/10.1))"
189 |    ]
190 |   }
191 |  ],
192 |  "metadata": {
193 |   "kernelspec": {
194 |    "display_name": "Python 3",
195 |    "language": "python",
196 |    "name": "python3"
197 |   },
198 |   "language_info": {
199 |    "codemirror_mode": {
200 |     "name": "ipython",
201 |     "version": 3
202 |    },
203 |    "file_extension": ".py",
204 |    "mimetype": "text/x-python",
205 |    "name": "python",
206 |    "nbconvert_exporter": "python",
207 |    "pygments_lexer": "ipython3",
208 |    "version": "3.5.2"
209 |   }
210 |  },
211 |  "nbformat": 4,
212 |  "nbformat_minor": 2
213 | }
214 | 


--------------------------------------------------------------------------------
/notebooks/Chainer_CNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# High-level Chainer Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "import sys\n",
 18 |     "import numpy as np\n",
 19 |     "import math\n",
 20 |     "import chainer\n",
 21 |     "import chainer.functions as F\n",
 22 |     "import chainer.links as L\n",
 23 |     "from chainer import optimizers\n",
 24 |     "from chainer import cuda\n",
 25 |     "from common.params import *\n",
 26 |     "from common.utils import *"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 2,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "# Performance Improvement\n",
 36 |     "# 1. Auto-tune\n",
 37 |     "# This adds very little now .. not sure if True by default?\n",
 38 |     "chainer.global_config.autotune = True"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 3,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "# Force one-gpu\n",
 48 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 4,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "name": "stdout",
 58 |      "output_type": "stream",
 59 |      "text": [
 60 |       "OS:  linux\n",
 61 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 62 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 63 |       "Chainer:  3.4.0\n",
 64 |       "CuPy:  2.4.0\n",
 65 |       "Numpy:  1.14.1\n",
 66 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 67 |       "CUDA Version 8.0.61\n",
 68 |       "CuDNN Version  6.0.21\n"
 69 |      ]
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "print(\"OS: \", sys.platform)\n",
 74 |     "print(\"Python: \", sys.version)\n",
 75 |     "print(\"Chainer: \", chainer.__version__)\n",
 76 |     "print(\"CuPy: \", chainer.cuda.cupy.__version__)\n",
 77 |     "print(\"Numpy: \", np.__version__)\n",
 78 |     "print(\"GPU: \", get_gpu_name())\n",
 79 |     "print(get_cuda_version())\n",
 80 |     "print(\"CuDNN Version \", get_cudnn_version())"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 5,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "class SymbolModule(chainer.Chain):\n",
 90 |     "    def __init__(self, n_classes=N_CLASSES):\n",
 91 |     "        super(SymbolModule, self).__init__()\n",
 92 |     "        with self.init_scope():\n",
 93 |     "            self.conv1 = L.Convolution2D(3, 50, ksize=3, pad=1)\n",
 94 |     "            self.conv2 = L.Convolution2D(50, 50, ksize=3, pad=1)\n",
 95 |     "            self.conv3 = L.Convolution2D(50, 100, ksize=3, pad=1)\n",
 96 |     "            self.conv4 = L.Convolution2D(100, 100, ksize=3, pad=1)\n",
 97 |     "            # feature map size is 8*8 by pooling\n",
 98 |     "            self.fc1 = L.Linear(100*8*8, 512)\n",
 99 |     "            self.fc2 = L.Linear(512, n_classes)\n",
100 |     "    \n",
101 |     "    def __call__(self, x):\n",
102 |     "        h = self.conv2(F.relu(self.conv1(x)))\n",
103 |     "        h = F.relu(F.max_pooling_2d(h, ksize=2, stride=2))\n",
104 |     "        h = F.dropout(h, 0.25)\n",
105 |     "        \n",
106 |     "        h = self.conv4(F.relu(self.conv3(h)))\n",
107 |     "        h = F.relu(F.max_pooling_2d(h, ksize=2, stride=2))\n",
108 |     "        h = F.dropout(h, 0.25)       \n",
109 |     "        \n",
110 |     "        h = F.dropout(F.relu(self.fc1(h)), 0.5)\n",
111 |     "        return self.fc2(h)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 6,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "def init_model(m, lr=LR, momentum=MOMENTUM):\n",
121 |     "    optimizer = optimizers.MomentumSGD(lr, momentum)\n",
122 |     "    optimizer.setup(m)\n",
123 |     "    return optimizer"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 7,
129 |    "metadata": {
130 |     "scrolled": true
131 |    },
132 |    "outputs": [
133 |     {
134 |      "name": "stdout",
135 |      "output_type": "stream",
136 |      "text": [
137 |       "Preparing train set...\n",
138 |       "Preparing test set...\n",
139 |       "(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)\n",
140 |       "float32 float32 int32 int32\n",
141 |       "CPU times: user 605 ms, sys: 612 ms, total: 1.22 s\n",
142 |       "Wall time: 1.22 s\n"
143 |      ]
144 |     }
145 |    ],
146 |    "source": [
147 |     "%%time\n",
148 |     "# Data into format for library\n",
149 |     "x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)\n",
150 |     "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
151 |     "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 8,
157 |    "metadata": {},
158 |    "outputs": [
159 |     {
160 |      "name": "stdout",
161 |      "output_type": "stream",
162 |      "text": [
163 |       "CPU times: user 216 ms, sys: 132 ms, total: 349 ms\n",
164 |       "Wall time: 348 ms\n"
165 |      ]
166 |     }
167 |    ],
168 |    "source": [
169 |     "%%time\n",
170 |     "# Create symbol\n",
171 |     "sym = SymbolModule()\n",
172 |     "chainer.cuda.get_device(0).use()  # Make a specified GPU current\n",
173 |     "sym.to_gpu()  # Copy the model to the GPU"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 9,
179 |    "metadata": {},
180 |    "outputs": [
181 |     {
182 |      "name": "stdout",
183 |      "output_type": "stream",
184 |      "text": [
185 |       "CPU times: user 115 µs, sys: 0 ns, total: 115 µs\n",
186 |       "Wall time: 119 µs\n"
187 |      ]
188 |     }
189 |    ],
190 |    "source": [
191 |     "%%time\n",
192 |     "optimizer = init_model(sym)"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 10,
198 |    "metadata": {},
199 |    "outputs": [
200 |     {
201 |      "name": "stdout",
202 |      "output_type": "stream",
203 |      "text": [
204 |       "0\n",
205 |       "1\n",
206 |       "2\n",
207 |       "3\n",
208 |       "4\n",
209 |       "5\n",
210 |       "6\n",
211 |       "7\n",
212 |       "8\n",
213 |       "9\n",
214 |       "CPU times: user 1min 7s, sys: 1.61 s, total: 1min 8s\n",
215 |       "Wall time: 1min 9s\n"
216 |      ]
217 |     }
218 |    ],
219 |    "source": [
220 |     "%%time\n",
221 |     "# Main training loop: 69s\n",
222 |     "for j in range(EPOCHS):\n",
223 |     "    for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n",
224 |     "        # Get samples\n",
225 |     "        data = cuda.to_gpu(data)\n",
226 |     "        target = cuda.to_gpu(target)\n",
227 |     "        # Forwards\n",
228 |     "        output = sym(data)\n",
229 |     "        # Loss\n",
230 |     "        loss = F.softmax_cross_entropy(output, target)\n",
231 |     "        sym.cleargrads()\n",
232 |     "        # Back-prop\n",
233 |     "        loss.backward()\n",
234 |     "        optimizer.update()\n",
235 |     "    # Log\n",
236 |     "    print(j)"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": 13,
242 |    "metadata": {},
243 |    "outputs": [
244 |     {
245 |      "name": "stdout",
246 |      "output_type": "stream",
247 |      "text": [
248 |       "CPU times: user 466 ms, sys: 0 ns, total: 466 ms\n",
249 |       "Wall time: 466 ms\n"
250 |      ]
251 |     }
252 |    ],
253 |    "source": [
254 |     "%%time\n",
255 |     "# Main evaluation loop: 800ms\n",
256 |     "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n",
257 |     "y_guess = np.zeros(n_samples, dtype=np.int)\n",
258 |     "y_truth = y_test[:n_samples]\n",
259 |     "c = 0\n",
260 |     "with chainer.using_config('train', False), chainer.using_config('enable_backprop', False):\n",
261 |     "    for data, target in yield_mb(x_test, y_test, BATCHSIZE):\n",
262 |     "        # Forwards\n",
263 |     "        pred = cuda.to_cpu(sym(cuda.to_gpu(data)).data.argmax(-1))\n",
264 |     "        # Collect results\n",
265 |     "        y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred\n",
266 |     "        c += 1"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": 14,
272 |    "metadata": {},
273 |    "outputs": [
274 |     {
275 |      "name": "stdout",
276 |      "output_type": "stream",
277 |      "text": [
278 |       "Accuracy:  0.7901642628205128\n"
279 |      ]
280 |     }
281 |    ],
282 |    "source": [
283 |     "print(\"Accuracy: \", 1.*sum(y_guess == y_truth)/len(y_guess))"
284 |    ]
285 |   }
286 |  ],
287 |  "metadata": {
288 |   "anaconda-cloud": {},
289 |   "kernelspec": {
290 |    "display_name": "Python 3",
291 |    "language": "python",
292 |    "name": "python3"
293 |   },
294 |   "language_info": {
295 |    "codemirror_mode": {
296 |     "name": "ipython",
297 |     "version": 3
298 |    },
299 |    "file_extension": ".py",
300 |    "mimetype": "text/x-python",
301 |    "name": "python",
302 |    "nbconvert_exporter": "python",
303 |    "pygments_lexer": "ipython3",
304 |    "version": "3.5.2"
305 |   }
306 |  },
307 |  "nbformat": 4,
308 |  "nbformat_minor": 2
309 | }
310 | 


--------------------------------------------------------------------------------
/notebooks/Chainer_Inference.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%%bash\n",
 10 |     "# Downloaded from https://github.com/KaimingHe/deep-residual-networks\n",
 11 |     "#cd /home/iliauk/.chainer/dataset/pfnet/chainer/models/\n",
 12 |     "#wget https://ikpublictutorial.blob.core.windows.net/deeplearningframeworks/ResNet-50-model.caffemodel"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "import os\n",
 22 |     "import sys\n",
 23 |     "import numpy as np\n",
 24 |     "import chainer\n",
 25 |     "import chainer.functions as F\n",
 26 |     "import chainer.links as L\n",
 27 |     "from chainer import optimizers\n",
 28 |     "from chainer import cuda\n",
 29 |     "from common.params_inf import *\n",
 30 |     "from common.utils import *"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 3,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "# Force one-gpu\n",
 40 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 4,
 46 |    "metadata": {},
 47 |    "outputs": [
 48 |     {
 49 |      "name": "stdout",
 50 |      "output_type": "stream",
 51 |      "text": [
 52 |       "OS:  linux\n",
 53 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 54 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 55 |       "Numpy:  1.14.1\n",
 56 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 57 |       "CUDA Version 8.0.61\n",
 58 |       "CuDNN Version  6.0.21\n"
 59 |      ]
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "print(\"OS: \", sys.platform)\n",
 64 |     "print(\"Python: \", sys.version)\n",
 65 |     "print(\"Numpy: \", np.__version__)\n",
 66 |     "print(\"GPU: \", get_gpu_name())\n",
 67 |     "print(get_cuda_version())\n",
 68 |     "print(\"CuDNN Version \", get_cudnn_version())"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 5,
 74 |    "metadata": {},
 75 |    "outputs": [
 76 |     {
 77 |      "name": "stdout",
 78 |      "output_type": "stream",
 79 |      "text": [
 80 |       "(1280, 224, 224, 3) (1280, 3, 224, 224)\n"
 81 |      ]
 82 |     }
 83 |    ],
 84 |    "source": [
 85 |     "# Create batches of fake data\n",
 86 |     "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n",
 87 |     "print(fake_input_data_cl.shape, fake_input_data_cf.shape)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 6,
 93 |    "metadata": {},
 94 |    "outputs": [
 95 |     {
 96 |      "data": {
 97 |       "text/plain": [
 98 |        "<chainer.links.model.vision.resnet.ResNet50Layers at 0x7fcc38204438>"
 99 |       ]
100 |      },
101 |      "execution_count": 6,
102 |      "metadata": {},
103 |      "output_type": "execute_result"
104 |     }
105 |    ],
106 |    "source": [
107 |     "resnet50 = chainer.links.ResNet50Layers(pretrained_model=\"auto\")\n",
108 |     "# GPU\n",
109 |     "chainer.cuda.get_device(0).use()  # Make a specified GPU current\n",
110 |     "resnet50.to_gpu()  # Copy the model to the GPU"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 7,
116 |    "metadata": {},
117 |    "outputs": [
118 |     {
119 |      "data": {
120 |       "text/plain": [
121 |        "['conv1', 'pool1', 'res2', 'res3', 'res4', 'res5', 'pool5', 'fc6', 'prob']"
122 |       ]
123 |      },
124 |      "execution_count": 7,
125 |      "metadata": {},
126 |      "output_type": "execute_result"
127 |     }
128 |    ],
129 |    "source": [
130 |     "resnet50.available_layers"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 8,
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "def predict_fn(classifier, data, batchsize):\n",
140 |     "    \"\"\" Return features from classifier \"\"\"\n",
141 |     "    out = np.zeros((len(data), RESNET_FEATURES), np.float32)\n",
142 |     "    with chainer.using_config('train', False), chainer.using_config('enable_backprop', False):  \n",
143 |     "        for idx, dta in yield_mb_X(data, batchsize):\n",
144 |     "            pred = classifier(cuda.to_gpu(dta), layers=['pool5'])\n",
145 |     "            out[idx*batchsize:(idx+1)*batchsize] = cuda.to_cpu(pred['pool5'].data).squeeze()        \n",
146 |     "    return out"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 9,
152 |    "metadata": {},
153 |    "outputs": [],
154 |    "source": [
155 |     "cold_start = predict_fn(resnet50, fake_input_data_cf, BATCH_SIZE)"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 10,
161 |    "metadata": {},
162 |    "outputs": [
163 |     {
164 |      "name": "stdout",
165 |      "output_type": "stream",
166 |      "text": [
167 |       "CPU times: user 3.52 s, sys: 7.74 ms, total: 3.53 s\n",
168 |       "Wall time: 3.52 s\n"
169 |      ]
170 |     }
171 |    ],
172 |    "source": [
173 |     "%%time\n",
174 |     "features = predict_fn(resnet50, fake_input_data_cf, BATCH_SIZE)"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": 11,
180 |    "metadata": {},
181 |    "outputs": [
182 |     {
183 |      "name": "stdout",
184 |      "output_type": "stream",
185 |      "text": [
186 |       "Images per second 363.6363636363636\n"
187 |      ]
188 |     }
189 |    ],
190 |    "source": [
191 |     "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/3.52))"
192 |    ]
193 |   }
194 |  ],
195 |  "metadata": {
196 |   "kernelspec": {
197 |    "display_name": "Python 3",
198 |    "language": "python",
199 |    "name": "python3"
200 |   },
201 |   "language_info": {
202 |    "codemirror_mode": {
203 |     "name": "ipython",
204 |     "version": 3
205 |    },
206 |    "file_extension": ".py",
207 |    "mimetype": "text/x-python",
208 |    "name": "python",
209 |    "nbconvert_exporter": "python",
210 |    "pygments_lexer": "ipython3",
211 |    "version": "3.5.2"
212 |   }
213 |  },
214 |  "nbformat": 4,
215 |  "nbformat_minor": 2
216 | }
217 | 


--------------------------------------------------------------------------------
/notebooks/Gluon_CNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# MXNet/Gluon CNN example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "import sys\n",
 18 |     "import numpy as np\n",
 19 |     "import math\n",
 20 |     "import mxnet as mx\n",
 21 |     "from mxnet import nd, autograd\n",
 22 |     "from mxnet import gluon\n",
 23 |     "from common.params import *\n",
 24 |     "from common.utils import *"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 2,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "# Force one-gpu\n",
 34 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 3,
 40 |    "metadata": {},
 41 |    "outputs": [
 42 |     {
 43 |      "name": "stdout",
 44 |      "output_type": "stream",
 45 |      "text": [
 46 |       "OS:  linux\n",
 47 |       "Python:  3.6.3 |Anaconda custom (64-bit)| (default, Oct 13 2017, 12:02:49) \n",
 48 |       "[GCC 7.2.0]\n",
 49 |       "MXNet:  1.3.0\n",
 50 |       "Numpy:  1.13.3\n",
 51 |       "GPU:  ['Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB']\n",
 52 |       "CUDA Version 9.1.85\n",
 53 |       "CuDNN Version  7.1.3\n"
 54 |      ]
 55 |     }
 56 |    ],
 57 |    "source": [
 58 |     "print(\"OS: \", sys.platform)\n",
 59 |     "print(\"Python: \", sys.version)\n",
 60 |     "print(\"MXNet: \", mx.__version__)\n",
 61 |     "print(\"Numpy: \", np.__version__)\n",
 62 |     "print(\"GPU: \", get_gpu_name())\n",
 63 |     "print(get_cuda_version())\n",
 64 |     "print(\"CuDNN Version \", get_cudnn_version())"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "## Build model"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 13,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "def build_model(n_classes=N_CLASSES):\n",
 81 |     "    net = gluon.nn.HybridSequential()\n",
 82 |     "    with net.name_scope():\n",
 83 |     "        net.add(gluon.nn.Conv2D(channels=50, kernel_size=3, padding=1, activation='relu'))\n",
 84 |     "        net.add(gluon.nn.Conv2D(channels=50, kernel_size=3, padding=1))\n",
 85 |     "        net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))\n",
 86 |     "        net.add(gluon.nn.Activation('relu'))\n",
 87 |     "        # Equiv to gluon.nn.LeakyReLU(0)\n",
 88 |     "        net.add(gluon.nn.Dropout(0.25))\n",
 89 |     "        net.add(gluon.nn.Conv2D(channels=100, kernel_size=3, padding=1, activation='relu'))\n",
 90 |     "        net.add(gluon.nn.Conv2D(channels=100, kernel_size=3, padding=1))\n",
 91 |     "        net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))\n",
 92 |     "        net.add(gluon.nn.Activation('relu'))\n",
 93 |     "        net.add(gluon.nn.Dropout(0.25))\n",
 94 |     "        net.add(gluon.nn.Flatten())\n",
 95 |     "        net.add(gluon.nn.Dense(512, activation='relu'))\n",
 96 |     "        net.add(gluon.nn.Dropout(0.25))\n",
 97 |     "        net.add(gluon.nn.Dense(n_classes))\n",
 98 |     "    return net"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "markdown",
103 |    "metadata": {},
104 |    "source": [
105 |     "## Init model"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 8,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "def init_model(net, ctx, lr=LR, momentum=MOMENTUM):\n",
115 |     "    net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)\n",
116 |     "    trainer = gluon.Trainer(\n",
117 |     "        net.collect_params(), \n",
118 |     "        'sgd',\n",
119 |     "        {'learning_rate': lr, 'momentum':momentum})\n",
120 |     "    criterion = gluon.loss.SoftmaxCrossEntropyLoss()\n",
121 |     "    return trainer, criterion"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "## Get data"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 17,
134 |    "metadata": {
135 |     "scrolled": true
136 |    },
137 |    "outputs": [
138 |     {
139 |      "name": "stdout",
140 |      "output_type": "stream",
141 |      "text": [
142 |       "Preparing train set...\n",
143 |       "Preparing test set...\n",
144 |       "(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)\n",
145 |       "float32 float32 int32 int32\n",
146 |       "CPU times: user 776 ms, sys: 568 ms, total: 1.34 s\n",
147 |       "Wall time: 2.34 s\n"
148 |      ]
149 |     }
150 |    ],
151 |    "source": [
152 |     "%%time\n",
153 |     "# Data into format for library\n",
154 |     "x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)\n",
155 |     "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
156 |     "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "metadata": {},
162 |    "source": [
163 |     "## Create model"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 30,
169 |    "metadata": {},
170 |    "outputs": [
171 |     {
172 |      "name": "stdout",
173 |      "output_type": "stream",
174 |      "text": [
175 |       "CPU times: user 4 ms, sys: 0 ns, total: 4 ms\n",
176 |       "Wall time: 3.4 ms\n"
177 |      ]
178 |     }
179 |    ],
180 |    "source": [
181 |     "%%time\n",
182 |     "ctx = mx.gpu()\n",
183 |     "net = build_model()"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 31,
189 |    "metadata": {},
190 |    "outputs": [
191 |     {
192 |      "name": "stdout",
193 |      "output_type": "stream",
194 |      "text": [
195 |       "CPU times: user 8 ms, sys: 0 ns, total: 8 ms\n",
196 |       "Wall time: 4.42 ms\n"
197 |      ]
198 |     }
199 |    ],
200 |    "source": [
201 |     "%%time\n",
202 |     "trainer, criterion = init_model(net, ctx)"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "markdown",
207 |    "metadata": {},
208 |    "source": [
209 |     "## Training Loop"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 32,
215 |    "metadata": {},
216 |    "outputs": [
217 |     {
218 |      "name": "stdout",
219 |      "output_type": "stream",
220 |      "text": [
221 |       "Epoch   0: loss: 1.8582\n",
222 |       "Epoch   1: loss: 1.3819\n",
223 |       "Epoch   2: loss: 1.1333\n",
224 |       "Epoch   3: loss: 0.9515\n",
225 |       "Epoch   4: loss: 0.8145\n",
226 |       "Epoch   5: loss: 0.7097\n",
227 |       "Epoch   6: loss: 0.6174\n",
228 |       "Epoch   7: loss: 0.5324\n",
229 |       "Epoch   8: loss: 0.4575\n",
230 |       "Epoch   9: loss: 0.3964\n",
231 |       "CPU times: user 49 s, sys: 13.3 s, total: 1min 2s\n",
232 |       "Wall time: 37.2 s\n"
233 |      ]
234 |     }
235 |    ],
236 |    "source": [
237 |     "%%time\n",
238 |     "net.hybridize()\n",
239 |     "for j in range(EPOCHS):\n",
240 |     "    train_loss = nd.zeros(1, ctx=ctx)\n",
241 |     "    for i, (data, target) in enumerate(yield_mb(x_train, y_train, BATCHSIZE, shuffle=True)):\n",
242 |     "        # Get samples\n",
243 |     "        data = nd.array(data).as_in_context(ctx)\n",
244 |     "        target = nd.array(target).as_in_context(ctx)\n",
245 |     "        with autograd.record():\n",
246 |     "            # Forwards\n",
247 |     "            output = net(data)\n",
248 |     "            # Loss\n",
249 |     "            loss = criterion(output, target)\n",
250 |     "        # Back-prop\n",
251 |     "        loss.backward()\n",
252 |     "        trainer.step(data.shape[0])\n",
253 |     "        train_loss += loss.mean()\n",
254 |     "    # Log    \n",
255 |     "    print('Epoch %3d: loss: %5.4f'%(j, train_loss.asscalar()/(i+1)))"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "markdown",
260 |    "metadata": {},
261 |    "source": [
262 |     "## Evaluation loop"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": 34,
268 |    "metadata": {},
269 |    "outputs": [
270 |     {
271 |      "name": "stdout",
272 |      "output_type": "stream",
273 |      "text": [
274 |       "CPU times: user 296 ms, sys: 48 ms, total: 344 ms\n",
275 |       "Wall time: 278 ms\n"
276 |      ]
277 |     }
278 |    ],
279 |    "source": [
280 |     "%%time\n",
281 |     "# Main evaluation loop: 453ms\n",
282 |     "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n",
283 |     "y_guess = np.zeros(n_samples, dtype=np.int)\n",
284 |     "y_truth = y_test[:n_samples]\n",
285 |     "c = 0\n",
286 |     "for data, target in yield_mb(x_test, y_test, BATCHSIZE):\n",
287 |     "    # Get samples\n",
288 |     "    data = nd.array(data).as_in_context(ctx)\n",
289 |     "    # Forwards\n",
290 |     "    output = net(data)\n",
291 |     "    pred = nd.argmax(output, axis=1)\n",
292 |     "    # Collect results\n",
293 |     "    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred.asnumpy()\n",
294 |     "    c += 1"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": 35,
300 |    "metadata": {},
301 |    "outputs": [
302 |     {
303 |      "name": "stdout",
304 |      "output_type": "stream",
305 |      "text": [
306 |       "Accuracy:  0.765324519231\n"
307 |      ]
308 |     }
309 |    ],
310 |    "source": [
311 |     "print(\"Accuracy: \", 1.*sum(y_guess == y_truth)/len(y_guess))"
312 |    ]
313 |   }
314 |  ],
315 |  "metadata": {
316 |   "anaconda-cloud": {},
317 |   "kernelspec": {
318 |    "display_name": "Python 3",
319 |    "language": "python",
320 |    "name": "python3"
321 |   },
322 |   "language_info": {
323 |    "codemirror_mode": {
324 |     "name": "ipython",
325 |     "version": 3
326 |    },
327 |    "file_extension": ".py",
328 |    "mimetype": "text/x-python",
329 |    "name": "python",
330 |    "nbconvert_exporter": "python",
331 |    "pygments_lexer": "ipython3",
332 |    "version": "3.6.3"
333 |   }
334 |  },
335 |  "nbformat": 4,
336 |  "nbformat_minor": 2
337 | }
338 | 


--------------------------------------------------------------------------------
/notebooks/Gluon_Inference.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# MXNet/Gluon Inference"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 13,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "import sys\n",
 18 |     "import time\n",
 19 |     "import numpy as np\n",
 20 |     "import mxnet as mx\n",
 21 |     "from mxnet import gluon, nd\n",
 22 |     "from collections import namedtuple\n",
 23 |     "from common.params_inf import *\n",
 24 |     "from common.utils import *"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 2,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "# Force one-gpu\n",
 34 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 3,
 40 |    "metadata": {},
 41 |    "outputs": [
 42 |     {
 43 |      "name": "stdout",
 44 |      "output_type": "stream",
 45 |      "text": [
 46 |       "OS:  linux\n",
 47 |       "Python:  3.6.3 |Anaconda custom (64-bit)| (default, Oct 13 2017, 12:02:49) \n",
 48 |       "[GCC 7.2.0]\n",
 49 |       "Numpy:  1.13.3\n",
 50 |       "MXNet:  1.3.0\n",
 51 |       "GPU:  ['Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB']\n",
 52 |       "CUDA Version 9.1.85\n",
 53 |       "CuDNN Version  7.1.3\n"
 54 |      ]
 55 |     }
 56 |    ],
 57 |    "source": [
 58 |     "print(\"OS: \", sys.platform)\n",
 59 |     "print(\"Python: \", sys.version)\n",
 60 |     "print(\"Numpy: \", np.__version__)\n",
 61 |     "print(\"MXNet: \", mx.__version__)\n",
 62 |     "print(\"GPU: \", get_gpu_name())\n",
 63 |     "print(get_cuda_version())\n",
 64 |     "print(\"CuDNN Version \", get_cudnn_version())"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "## Get pre-trained model"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 4,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "# We create the network\n",
 81 |     "ctx = mx.gpu()\n",
 82 |     "net = mx.gluon.model_zoo.vision.resnet50_v1(pretrained=True, ctx=ctx).features\n",
 83 |     "# We hybridize the network\n",
 84 |     "net.hybridize(static_alloc=True, static_shape=True)"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "## Get data"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 5,
 97 |    "metadata": {},
 98 |    "outputs": [
 99 |     {
100 |      "name": "stdout",
101 |      "output_type": "stream",
102 |      "text": [
103 |       "(1280, 224, 224, 3) (1280, 3, 224, 224)\n"
104 |      ]
105 |     }
106 |    ],
107 |    "source": [
108 |     "# Create batches of fake data\n",
109 |     "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n",
110 |     "print(fake_input_data_cl.shape, fake_input_data_cf.shape)"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {},
116 |    "source": [
117 |     "## Run inference"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 7,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "def predict_fn(classifier, data, batchsize):\n",
127 |     "    \"\"\" Return features from classifier \"\"\"\n",
128 |     "    out = nd.zeros((len(data), RESNET_FEATURES), dtype=np.float32, ctx=ctx)\n",
129 |     "    for idx, dta in yield_mb_X(data, batchsize):\n",
130 |     "        outputs = classifier(mx.nd.array(dta, ctx=ctx))\n",
131 |     "        out[idx*batchsize:(idx+1)*batchsize] = outputs.squeeze()\n",
132 |     "    return out.asnumpy()"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 8,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "cold_start = predict_fn(net, fake_input_data_cf, BATCH_SIZE)"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 14,
147 |    "metadata": {},
148 |    "outputs": [
149 |     {
150 |      "name": "stdout",
151 |      "output_type": "stream",
152 |      "text": [
153 |       "CPU times: user 1.37 s, sys: 328 ms, total: 1.7 s\n",
154 |       "Wall time: 1.25 s\n"
155 |      ]
156 |     }
157 |    ],
158 |    "source": [
159 |     "%%time\n",
160 |     "tick = time.time()\n",
161 |     "features = predict_fn(net, fake_input_data_cf, BATCH_SIZE)\n",
162 |     "total = time.time()-tick"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 15,
168 |    "metadata": {},
169 |    "outputs": [
170 |     {
171 |      "name": "stdout",
172 |      "output_type": "stream",
173 |      "text": [
174 |       "Images per second 1024.1136844948533\n"
175 |      ]
176 |     }
177 |    ],
178 |    "source": [
179 |     "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/total))"
180 |    ]
181 |   }
182 |  ],
183 |  "metadata": {
184 |   "kernelspec": {
185 |    "display_name": "Python 3",
186 |    "language": "python",
187 |    "name": "python3"
188 |   },
189 |   "language_info": {
190 |    "codemirror_mode": {
191 |     "name": "ipython",
192 |     "version": 3
193 |    },
194 |    "file_extension": ".py",
195 |    "mimetype": "text/x-python",
196 |    "name": "python",
197 |    "nbconvert_exporter": "python",
198 |    "pygments_lexer": "ipython3",
199 |    "version": "3.6.3"
200 |   }
201 |  },
202 |  "nbformat": 4,
203 |  "nbformat_minor": 2
204 | }
205 | 


--------------------------------------------------------------------------------
/notebooks/Gluon_RNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# High-level RNN Gluon Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 11,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "import sys\n",
 18 |     "import numpy as np\n",
 19 |     "import math\n",
 20 |     "import mxnet as mx\n",
 21 |     "from mxnet import gluon\n",
 22 |     "from common.params_lstm import *\n",
 23 |     "from common.utils import *"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 9,
 29 |    "metadata": {},
 30 |    "outputs": [
 31 |     {
 32 |      "name": "stdout",
 33 |      "output_type": "stream",
 34 |      "text": [
 35 |       "OS:  linux\n",
 36 |       "Python:  3.6.4 |Anaconda, Inc.| (default, Jan 16 2018, 18:10:19) \n",
 37 |       "[GCC 7.2.0]\n",
 38 |       "MXNet:  1.3.0\n",
 39 |       "Numpy:  1.13.3\n",
 40 |       "GPU:  ['Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB']\n",
 41 |       "CUDA Version 9.1.85\n",
 42 |       "CuDNN Version  7.1.3\n"
 43 |      ]
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "print(\"OS: \", sys.platform)\n",
 48 |     "print(\"Python: \", sys.version)\n",
 49 |     "print(\"MXNet: \", mx.__version__)\n",
 50 |     "print(\"Numpy: \", np.__version__)\n",
 51 |     "print(\"GPU: \", get_gpu_name())\n",
 52 |     "print(get_cuda_version())\n",
 53 |     "print(\"CuDNN Version \", get_cudnn_version())"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 60,
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "name": "stdout",
 63 |      "output_type": "stream",
 64 |      "text": [
 65 |       "30000\n",
 66 |       "125\n",
 67 |       "100\n",
 68 |       "150\n"
 69 |      ]
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "print(MAXFEATURES)\n",
 74 |     "print(EMBEDSIZE)\n",
 75 |     "print(NUMHIDDEN)\n",
 76 |     "print(MAXLEN)"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "## Create the model"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 136,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "class RNN(gluon.Block):\n",
 93 |     "    def __init__(self, \n",
 94 |     "                 maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN, **kwargs):\n",
 95 |     "        super(RNN, self).__init__(**kwargs)\n",
 96 |     "        self.nhid = nhid\n",
 97 |     "        with self.name_scope():\n",
 98 |     "            self.embedding = gluon.nn.Embedding(input_dim=maxf,\n",
 99 |     "                                          output_dim=edim)\n",
100 |     "            self.gru = gluon.rnn.GRU(\n",
101 |     "                              hidden_size=nhid, \n",
102 |     "                              num_layers=1,\n",
103 |     "                              layout=\"NTC\",\n",
104 |     "                              bidirectional=False)   \n",
105 |     "            self.l_out = gluon.nn.Dense(units=2)\n",
106 |     "\n",
107 |     "    def forward(self, x):\n",
108 |     "        x = self.embedding(x) \n",
109 |     "        x = self.gru(x) # default state will be all 0\n",
110 |     "        x = x[:,-1,:].squeeze()\n",
111 |     "        x = self.l_out(x)\n",
112 |     "        return x"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "## Create optimizer"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 137,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "def init_model(net, ctx, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n",
129 |     "    net.initialize(mx.init.Xavier(), ctx=ctx)\n",
130 |     "    trainer = gluon.Trainer(\n",
131 |     "        net.collect_params(), \n",
132 |     "        'adam',\n",
133 |     "        {'learning_rate': lr, 'beta1':BETA_1, 'beta2':BETA_2, 'epsilon':EPS}\n",
134 |     "    )\n",
135 |     "    criterion = gluon.loss.SoftmaxCrossEntropyLoss()\n",
136 |     "    return trainer, criterion"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "metadata": {},
142 |    "source": [
143 |     "## Get data"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 138,
149 |    "metadata": {},
150 |    "outputs": [
151 |     {
152 |      "name": "stdout",
153 |      "output_type": "stream",
154 |      "text": [
155 |       "Preparing train set...\n",
156 |       "Preparing test set...\n",
157 |       "Trimming to 30000 max-features\n",
158 |       "Padding to length 150\n",
159 |       "(25000, 150) (25000, 150) (25000,) (25000,)\n",
160 |       "int64 int64 int64 int64\n",
161 |       "CPU times: user 5.63 s, sys: 248 ms, total: 5.88 s\n",
162 |       "Wall time: 5.87 s\n"
163 |      ]
164 |     }
165 |    ],
166 |    "source": [
167 |     "%%time\n",
168 |     "# Data into format for library\n",
169 |     "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES)\n",
170 |     "# Torch-specific\n",
171 |     "x_train = x_train.astype(np.int64)\n",
172 |     "x_test = x_test.astype(np.int64)\n",
173 |     "y_train = y_train.astype(np.int64)\n",
174 |     "y_test = y_test.astype(np.int64)\n",
175 |     "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
176 |     "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {},
182 |    "source": [
183 |     "## Initialize Model"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 139,
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": [
192 |     "# Run on one GPU\n",
193 |     "ctx = mx.gpu(0)"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": 140,
199 |    "metadata": {},
200 |    "outputs": [
201 |     {
202 |      "name": "stdout",
203 |      "output_type": "stream",
204 |      "text": [
205 |       "CPU times: user 40 ms, sys: 0 ns, total: 40 ms\n",
206 |       "Wall time: 3.47 ms\n"
207 |      ]
208 |     }
209 |    ],
210 |    "source": [
211 |     "%%time\n",
212 |     "net = RNN()\n",
213 |     "trainer, loss_fn = init_model(net, ctx)"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "## Train Model"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": 141,
226 |    "metadata": {},
227 |    "outputs": [
228 |     {
229 |      "name": "stdout",
230 |      "output_type": "stream",
231 |      "text": [
232 |       "Epoch [0], loss: 0.4858\n",
233 |       "Epoch [1], loss: 0.2264\n",
234 |       "Epoch [2], loss: 0.1178\n",
235 |       "CPU times: user 15.2 s, sys: 3.01 s, total: 18.2 s\n",
236 |       "Wall time: 11.4 s\n"
237 |      ]
238 |     }
239 |    ],
240 |    "source": [
241 |     "%%time\n",
242 |     "for i in range(EPOCHS):\n",
243 |     "    loss_acc = mx.nd.zeros((1), ctx)\n",
244 |     "    for j, (data, target) in enumerate(yield_mb(x_train, y_train, BATCHSIZE, shuffle=True)):\n",
245 |     "        # Get samples\n",
246 |     "        data = mx.nd.array(data, ctx=ctx)\n",
247 |     "        target = mx.nd.array(target, ctx=ctx)\n",
248 |     "        # Forwards\n",
249 |     "        with mx.autograd.record():\n",
250 |     "            output = net(data)\n",
251 |     "            loss = loss_fn(output, target)\n",
252 |     "        # Back-prop\n",
253 |     "        loss.backward()\n",
254 |     "        loss_acc += loss.mean()\n",
255 |     "        trainer.step(data.shape[0])\n",
256 |     "    print(\"Epoch [{}], loss: {:.4f}\".format(i, loss_acc.asscalar()/(j+1)))"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "markdown",
261 |    "metadata": {},
262 |    "source": [
263 |     "## Evaluate"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": 142,
269 |    "metadata": {},
270 |    "outputs": [
271 |     {
272 |      "name": "stdout",
273 |      "output_type": "stream",
274 |      "text": [
275 |       "CPU times: user 9.34 s, sys: 16.2 s, total: 25.5 s\n",
276 |       "Wall time: 1.78 s\n"
277 |      ]
278 |     }
279 |    ],
280 |    "source": [
281 |     "%%time\n",
282 |     "# Main evaluation loop: 1.52s\n",
283 |     "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n",
284 |     "y_guess = mx.nd.zeros((n_samples), dtype=np.int)\n",
285 |     "y_truth = y_test[:n_samples]\n",
286 |     "c = 0\n",
287 |     "for data, target in yield_mb(x_test, y_test, BATCHSIZE):\n",
288 |     "    # Get samples\n",
289 |     "    data = mx.nd.array(data, ctx=ctx)\n",
290 |     "    target = mx.nd.array(target, ctx=ctx)\n",
291 |     "    # Forwards\n",
292 |     "    output = net(data)\n",
293 |     "    pred = output.topk(k=1).squeeze()\n",
294 |     "    # Collect results\n",
295 |     "    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred\n",
296 |     "    c += 1\n",
297 |     "mx.nd.waitall()"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": 143,
303 |    "metadata": {},
304 |    "outputs": [
305 |     {
306 |      "name": "stdout",
307 |      "output_type": "stream",
308 |      "text": [
309 |       "Accuracy:  0.857892628205\n"
310 |      ]
311 |     }
312 |    ],
313 |    "source": [
314 |     "print(\"Accuracy: \", sum(y_guess.asnumpy() == y_truth)/len(y_guess))"
315 |    ]
316 |   }
317 |  ],
318 |  "metadata": {
319 |   "kernelspec": {
320 |    "display_name": "Environment (conda_mxnet_p36)",
321 |    "language": "python",
322 |    "name": "conda_mxnet_p36"
323 |   },
324 |   "language_info": {
325 |    "codemirror_mode": {
326 |     "name": "ipython",
327 |     "version": 3
328 |    },
329 |    "file_extension": ".py",
330 |    "mimetype": "text/x-python",
331 |    "name": "python",
332 |    "nbconvert_exporter": "python",
333 |    "pygments_lexer": "ipython3",
334 |    "version": "3.6.4"
335 |   }
336 |  },
337 |  "nbformat": 4,
338 |  "nbformat_minor": 2
339 | }
340 | 


--------------------------------------------------------------------------------
/notebooks/KerasR_TF_RNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# High-level Keras R (TF) RNN Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "# SETUP\n",
 19 |     "#\n",
 20 |     "# Install keras R\n",
 21 |     "# install.packages('keras', repos = \"https://cloud.r-project.org\")\n",
 22 |     "# \n",
 23 |     "# Update reticulate from cran (it defaults to mran which has an outdated version)\n",
 24 |     "# install.packages(\"reticulate\", repos = \"https://cloud.r-project.org\")"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 2,
 30 |    "metadata": {},
 31 |    "outputs": [
 32 |     {
 33 |      "name": "stderr",
 34 |      "output_type": "stream",
 35 |      "text": [
 36 |       "Loading required package: rjson\n"
 37 |      ]
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "library(keras)\n",
 42 |     "use_python('/anaconda/envs/py35')\n",
 43 |     "\n",
 44 |     "# Import util functions\n",
 45 |     "source(\"./common/utils.R\")\n",
 46 |     "\n",
 47 |     "# Import hyper-parameters\n",
 48 |     "params <- load_params('lstm')"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 3,
 54 |    "metadata": {
 55 |     "collapsed": true
 56 |    },
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "# reticulate::py_config()"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "metadata": {},
 66 |    "outputs": [
 67 |     {
 68 |      "name": "stdout",
 69 |      "output_type": "stream",
 70 |      "text": [
 71 |       "OS: Linux \n",
 72 |       "R version 3.4.1 (2017-06-30) \n",
 73 |       "Keras: 2.1.5 \n",
 74 |       "Tensorflow: 1.5 \n",
 75 |       "Keras using tensorflow \n",
 76 |       "Keras channel ordering is channels_last \n",
 77 |       "GPU:  Tesla P100-PCIE-16GB \n",
 78 |       "CUDA Version 8.0.61 \n",
 79 |       "CuDNN Version 6.0.21 \n"
 80 |      ]
 81 |     }
 82 |    ],
 83 |    "source": [
 84 |     "cat(\"OS:\", Sys.info()[\"sysname\"], \"\\n\")\n",
 85 |     "cat(R.version$version.string, \"\\n\")\n",
 86 |     "cat(\"Keras:\", paste0(packageVersion(\"keras\")), \"\\n\")\n",
 87 |     "cat(\"Tensorflow:\", paste0(packageVersion(\"tensorflow\")), \"\\n\")\n",
 88 |     "cat(\"Keras using\", backend()$backend(), \"\\n\")\n",
 89 |     "cat(\"Keras channel ordering is\", backend()$image_data_format(), \"\\n\") \n",
 90 |     "cat(\"GPU: \", get_gpu_name(), \"\\n\")\n",
 91 |     "cat(get_cuda_version(), \"\\n\")\n",
 92 |     "cat(get_cudnn_version(), \"\\n\")"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 5,
 98 |    "metadata": {
 99 |     "collapsed": true
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "create_symbol <- function(CUDNN = TRUE, maxf = params$MAXFEATURES, edim = params$EMBEDSIZE, nhid = params$NUMHIDDEN, maxl = params$MAXLEN){\n",
104 |     "    \n",
105 |     "    model <- keras_model_sequential() %>%\n",
106 |     "    \n",
107 |     "    layer_embedding(maxf, edim, input_length = maxl)\n",
108 |     "    \n",
109 |     "    if (CUDNN){\n",
110 |     "        model %>% layer_cudnn_gru(units = nhid, return_sequences = FALSE, return_state = FALSE)\n",
111 |     "        } else{\n",
112 |     "        model %>% layer_gru(units = nhid, return_sequences = FALSE, return_state = FALSE)    \n",
113 |     "    }\n",
114 |     "    \n",
115 |     "    model %>% layer_dense(2, activation = \"softmax\")\n",
116 |     "    \n",
117 |     "    return(model)\n",
118 |     "}"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 6,
124 |    "metadata": {
125 |     "collapsed": true
126 |    },
127 |    "outputs": [],
128 |    "source": [
129 |     "init_model <- function(m, lr=params$LR, b1=params$BETA_1, b2=params$BETA_2, eps=params$EPS){\n",
130 |     "    m %>% compile(\n",
131 |     "      loss = \"categorical_crossentropy\",\n",
132 |     "      optimizer = optimizer_adam(lr = lr, beta_1 = b1, beta_2 = b2, epsilon = eps),\n",
133 |     "      metrics = \"accuracy\"\n",
134 |     "    )\n",
135 |     "    return(m)\n",
136 |     "}"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": 7,
142 |    "metadata": {
143 |     "collapsed": true
144 |    },
145 |    "outputs": [],
146 |    "source": [
147 |     "imdb <- imdb_for_library()\n",
148 |     "x_train <- imdb$x_train\n",
149 |     "y_train <- imdb$y_train\n",
150 |     "x_test <- imdb$x_test\n",
151 |     "y_test <- imdb$y_test\n",
152 |     "rm(imdb)\n",
153 |     "\n",
154 |     "y_train <- to_categorical(y_train, num_classes = 2)\n",
155 |     "y_test <- to_categorical(y_test, num_classes = 2)"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 8,
161 |    "metadata": {},
162 |    "outputs": [
163 |     {
164 |      "name": "stdout",
165 |      "output_type": "stream",
166 |      "text": [
167 |       "x_train shape: 25000 150 \n",
168 |       "x_test shape: 25000 150 \n",
169 |       "y_train shape: 25000 2 \n",
170 |       "y_test shape: 25000 2 \n"
171 |      ]
172 |     }
173 |    ],
174 |    "source": [
175 |     "cat('x_train shape:', dim(x_train), '\\n')\n",
176 |     "cat('x_test shape:', dim(x_test), '\\n')\n",
177 |     "cat('y_train shape:', dim(y_train), '\\n')\n",
178 |     "cat('y_test shape:', dim(y_test), '\\n')"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": 9,
184 |    "metadata": {
185 |     "collapsed": true
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "# Load symbol\n",
190 |     "sym = create_symbol(CUDNN = TRUE)"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 10,
196 |    "metadata": {
197 |     "collapsed": true
198 |    },
199 |    "outputs": [],
200 |    "source": [
201 |     "# Initialise model\n",
202 |     "model = init_model(sym)"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": 11,
208 |    "metadata": {
209 |     "scrolled": true
210 |    },
211 |    "outputs": [
212 |     {
213 |      "name": "stdout",
214 |      "output_type": "stream",
215 |      "text": [
216 |       "________________________________________________________________________________\n",
217 |       "Layer (type)                        Output Shape                    Param #     \n",
218 |       "================================================================================\n",
219 |       "embedding_1 (Embedding)             (None, 150, 125)                3750000     \n",
220 |       "________________________________________________________________________________\n",
221 |       "cu_dnngru_1 (CuDNNGRU)              (None, 100)                     68100       \n",
222 |       "________________________________________________________________________________\n",
223 |       "dense_1 (Dense)                     (None, 2)                       202         \n",
224 |       "================================================================================\n",
225 |       "Total params: 3,818,302\n",
226 |       "Trainable params: 3,818,302\n",
227 |       "Non-trainable params: 0\n",
228 |       "________________________________________________________________________________\n"
229 |      ]
230 |     }
231 |    ],
232 |    "source": [
233 |     "summary(model)"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 12,
239 |    "metadata": {},
240 |    "outputs": [
241 |     {
242 |      "data": {
243 |       "text/plain": [
244 |        "   user  system elapsed \n",
245 |        " 21.399   4.443  25.228 "
246 |       ]
247 |      },
248 |      "metadata": {},
249 |      "output_type": "display_data"
250 |     }
251 |    ],
252 |    "source": [
253 |     "# Main training loop\n",
254 |     "system.time(\n",
255 |     "    model %>% fit(x_train,\n",
256 |     "          y_train,\n",
257 |     "          batch_size=params$BATCHSIZE,\n",
258 |     "          epochs=params$EPOCHS,\n",
259 |     "          verbose=1)\n",
260 |     ")"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": 13,
266 |    "metadata": {
267 |     "collapsed": true
268 |    },
269 |    "outputs": [],
270 |    "source": [
271 |     "# Main evaluation loop\n",
272 |     "y_guess <- model %>% predict_classes(x_test, batch_size = params$BATCHSIZE)\n",
273 |     "y_truth <- apply(y_test, 1, function(x) which.max(x)-1)"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 14,
279 |    "metadata": {},
280 |    "outputs": [
281 |     {
282 |      "name": "stdout",
283 |      "output_type": "stream",
284 |      "text": [
285 |       "[1] \"Accuracy: 0.85124\"\n"
286 |      ]
287 |     }
288 |    ],
289 |    "source": [
290 |     "print(paste0(\"Accuracy: \", sum(y_guess == y_truth)/length(y_guess)))"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": null,
296 |    "metadata": {
297 |     "collapsed": true
298 |    },
299 |    "outputs": [],
300 |    "source": []
301 |   }
302 |  ],
303 |  "metadata": {
304 |   "kernelspec": {
305 |    "display_name": "R",
306 |    "language": "R",
307 |    "name": "ir"
308 |   },
309 |   "language_info": {
310 |    "codemirror_mode": "r",
311 |    "file_extension": ".r",
312 |    "mimetype": "text/x-r-source",
313 |    "name": "R",
314 |    "pygments_lexer": "r",
315 |    "version": "3.4.1"
316 |   }
317 |  },
318 |  "nbformat": 4,
319 |  "nbformat_minor": 2
320 | }
321 | 


--------------------------------------------------------------------------------
/notebooks/Keras_CNTK_Inference.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Using CNTK backend\n"
 13 |      ]
 14 |     }
 15 |    ],
 16 |    "source": [
 17 |     "import os\n",
 18 |     "os.environ['KERAS_BACKEND'] = \"cntk\"\n",
 19 |     "import sys\n",
 20 |     "import numpy as np\n",
 21 |     "import keras as K\n",
 22 |     "import cntk as C\n",
 23 |     "from keras.applications.resnet50 import ResNet50\n",
 24 |     "from common.params_inf import *\n",
 25 |     "from common.utils import *"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "# Force one-gpu\n",
 35 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
 36 |     "# Faster with channels-last, maybe because model expects that?\n",
 37 |     "K.backend.set_image_data_format('channels_last')"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 3,
 43 |    "metadata": {},
 44 |    "outputs": [
 45 |     {
 46 |      "name": "stdout",
 47 |      "output_type": "stream",
 48 |      "text": [
 49 |       "OS:  linux\n",
 50 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 51 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 52 |       "Numpy:  1.14.1\n",
 53 |       "Keras:  2.1.4\n",
 54 |       "CNTK:  2.4\n",
 55 |       "Keras using cntk\n",
 56 |       "Keras channel ordering is channels_last\n",
 57 |       "CUDA Version 8.0.61\n",
 58 |       "CuDNN Version  6.0.21\n"
 59 |      ]
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "print(\"OS: \", sys.platform)\n",
 64 |     "print(\"Python: \", sys.version)\n",
 65 |     "print(\"Numpy: \", np.__version__)\n",
 66 |     "print(\"Keras: \", K.__version__)\n",
 67 |     "print(\"CNTK: \", C.__version__)\n",
 68 |     "print(\"Keras using {}\".format(K.backend.backend()))\n",
 69 |     "print(\"Keras channel ordering is {}\".format(K.backend.image_data_format()))\n",
 70 |     "print(get_cuda_version())\n",
 71 |     "print(\"CuDNN Version \", get_cudnn_version())"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 4,
 77 |    "metadata": {},
 78 |    "outputs": [
 79 |     {
 80 |      "name": "stdout",
 81 |      "output_type": "stream",
 82 |      "text": [
 83 |       "(1280, 224, 224, 3) (1280, 3, 224, 224)\n"
 84 |      ]
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "# Create batches of fake data\n",
 89 |     "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n",
 90 |     "print(fake_input_data_cl.shape, fake_input_data_cf.shape)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 5,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "def predict_fn(classifier, data, batchsize):\n",
100 |     "    \"\"\" Return features from classifier \"\"\"\n",
101 |     "    out = np.zeros((len(data), RESNET_FEATURES), np.float32)\n",
102 |     "    for idx, dta in yield_mb_X(data, batchsize):\n",
103 |     "        out[idx*batchsize:(idx+1)*batchsize] = classifier.predict_on_batch(dta).squeeze()\n",
104 |     "    return out"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 6,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "# Download Resnet weights\n",
114 |     "model = ResNet50(include_top=False, input_shape=(224,224,3))"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 7,
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "fake_input_data_cl = np.ascontiguousarray(fake_input_data_cl)\n",
124 |     "cold_start = predict_fn(model, fake_input_data_cl, BATCH_SIZE)"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 8,
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "name": "stdout",
134 |      "output_type": "stream",
135 |      "text": [
136 |       "CPU times: user 5.81 s, sys: 1.69 s, total: 7.51 s\n",
137 |       "Wall time: 7.51 s\n"
138 |      ]
139 |     }
140 |    ],
141 |    "source": [
142 |     "%%time\n",
143 |     "features = predict_fn(model, fake_input_data_cl, BATCH_SIZE)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 10,
149 |    "metadata": {},
150 |    "outputs": [
151 |     {
152 |      "name": "stdout",
153 |      "output_type": "stream",
154 |      "text": [
155 |       "Images per second 170.439414114514\n"
156 |      ]
157 |     }
158 |    ],
159 |    "source": [
160 |     "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/7.51))"
161 |    ]
162 |   }
163 |  ],
164 |  "metadata": {
165 |   "kernelspec": {
166 |    "display_name": "Python 3",
167 |    "language": "python",
168 |    "name": "python3"
169 |   },
170 |   "language_info": {
171 |    "codemirror_mode": {
172 |     "name": "ipython",
173 |     "version": 3
174 |    },
175 |    "file_extension": ".py",
176 |    "mimetype": "text/x-python",
177 |    "name": "python",
178 |    "nbconvert_exporter": "python",
179 |    "pygments_lexer": "ipython3",
180 |    "version": "3.5.2"
181 |   }
182 |  },
183 |  "nbformat": 4,
184 |  "nbformat_minor": 2
185 | }
186 | 


--------------------------------------------------------------------------------
/notebooks/Keras_CNTK_RNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# High-level RNN Keras (CNTK) Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "name": "stderr",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "Using CNTK backend\n"
 20 |      ]
 21 |     }
 22 |    ],
 23 |    "source": [
 24 |     "import os\n",
 25 |     "import sys\n",
 26 |     "import numpy as np\n",
 27 |     "os.environ['KERAS_BACKEND'] = \"cntk\"\n",
 28 |     "import keras as K\n",
 29 |     "import cntk\n",
 30 |     "from keras.models import Sequential\n",
 31 |     "from keras.layers import Dense, Embedding, GRU, CuDNNGRU\n",
 32 |     "from common.params_lstm import *\n",
 33 |     "from common.utils import *"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 2,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# Force one-gpu\n",
 43 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 3,
 49 |    "metadata": {},
 50 |    "outputs": [
 51 |     {
 52 |      "name": "stdout",
 53 |      "output_type": "stream",
 54 |      "text": [
 55 |       "OS:  linux\n",
 56 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 57 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 58 |       "Keras:  2.1.4\n",
 59 |       "Numpy:  1.14.1\n",
 60 |       "CNTK:  2.4\n",
 61 |       "cntk\n",
 62 |       "channels_last\n",
 63 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 64 |       "CUDA Version 8.0.61\n",
 65 |       "CuDNN Version  6.0.21\n"
 66 |      ]
 67 |     }
 68 |    ],
 69 |    "source": [
 70 |     "print(\"OS: \", sys.platform)\n",
 71 |     "print(\"Python: \", sys.version)\n",
 72 |     "print(\"Keras: \", K.__version__)\n",
 73 |     "print(\"Numpy: \", np.__version__)\n",
 74 |     "print(\"CNTK: \", cntk.__version__)\n",
 75 |     "print(K.backend.backend())\n",
 76 |     "print(K.backend.image_data_format())\n",
 77 |     "print(\"GPU: \", get_gpu_name())\n",
 78 |     "print(get_cuda_version())\n",
 79 |     "print(\"CuDNN Version \", get_cudnn_version())"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 4,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "def create_symbol(CUDNN=True, maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN, maxl=MAXLEN):\n",
 89 |     "    model = Sequential()\n",
 90 |     "    model.add(Embedding(maxf, edim, input_length=maxl))\n",
 91 |     "    # Only return last output\n",
 92 |     "    if not CUDNN:\n",
 93 |     "        model.add(GRU(nhid, return_sequences=False, return_state=False))\n",
 94 |     "    else:\n",
 95 |     "        model.add(CuDNNGRU(nhid, return_sequences=False, return_state=False))\n",
 96 |     "    model.add(Dense(2, activation='softmax'))\n",
 97 |     "    return model"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 5,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "def init_model(m, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n",
107 |     "    m.compile(\n",
108 |     "        loss = \"categorical_crossentropy\",\n",
109 |     "        optimizer = K.optimizers.Adam(lr, b1, b2, eps),\n",
110 |     "        metrics = ['accuracy'])\n",
111 |     "    return m"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 6,
117 |    "metadata": {},
118 |    "outputs": [
119 |     {
120 |      "name": "stdout",
121 |      "output_type": "stream",
122 |      "text": [
123 |       "Preparing train set...\n",
124 |       "Preparing test set...\n",
125 |       "Trimming to 30000 max-features\n",
126 |       "Padding to length 150\n",
127 |       "(25000, 150) (25000, 150) (25000, 2) (25000, 2)\n",
128 |       "int32 int32 int32 int32\n",
129 |       "CPU times: user 5.39 s, sys: 448 ms, total: 5.84 s\n",
130 |       "Wall time: 5.84 s\n"
131 |      ]
132 |     }
133 |    ],
134 |    "source": [
135 |     "%%time\n",
136 |     "# Data into format for library\n",
137 |     "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES, one_hot=True)\n",
138 |     "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
139 |     "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 7,
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "name": "stdout",
149 |      "output_type": "stream",
150 |      "text": [
151 |       "CPU times: user 428 ms, sys: 554 ms, total: 982 ms\n",
152 |       "Wall time: 597 ms\n"
153 |      ]
154 |     }
155 |    ],
156 |    "source": [
157 |     "%%time\n",
158 |     "# Load symbol\n",
159 |     "# CuDNN RNNs are only available with the TensorFlow backend.\n",
160 |     "sym = create_symbol(CUDNN=False)"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 8,
166 |    "metadata": {},
167 |    "outputs": [
168 |     {
169 |      "name": "stdout",
170 |      "output_type": "stream",
171 |      "text": [
172 |       "CPU times: user 22.7 ms, sys: 61.6 ms, total: 84.3 ms\n",
173 |       "Wall time: 7.05 ms\n"
174 |      ]
175 |     }
176 |    ],
177 |    "source": [
178 |     "%%time\n",
179 |     "# Initialise model\n",
180 |     "model = init_model(sym)"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 9,
186 |    "metadata": {},
187 |    "outputs": [
188 |     {
189 |      "name": "stdout",
190 |      "output_type": "stream",
191 |      "text": [
192 |       "_________________________________________________________________\n",
193 |       "Layer (type)                 Output Shape              Param #   \n",
194 |       "=================================================================\n",
195 |       "embedding_1 (Embedding)      (None, 150, 125)          3750000   \n",
196 |       "_________________________________________________________________\n",
197 |       "gru_1 (GRU)                  (None, 100)               67800     \n",
198 |       "_________________________________________________________________\n",
199 |       "dense_1 (Dense)              (None, 2)                 202       \n",
200 |       "=================================================================\n",
201 |       "Total params: 3,818,002\n",
202 |       "Trainable params: 3,818,002\n",
203 |       "Non-trainable params: 0\n",
204 |       "_________________________________________________________________\n"
205 |      ]
206 |     }
207 |    ],
208 |    "source": [
209 |     "model.summary()"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 10,
215 |    "metadata": {},
216 |    "outputs": [
217 |     {
218 |      "name": "stdout",
219 |      "output_type": "stream",
220 |      "text": [
221 |       "Epoch 1/3\n",
222 |       "25000/25000 [==============================] - 18s 719us/step - loss: 0.5204 - acc: 0.7269\n",
223 |       "Epoch 2/3\n",
224 |       "25000/25000 [==============================] - 18s 709us/step - loss: 0.2381 - acc: 0.9092\n",
225 |       "Epoch 3/3\n",
226 |       "25000/25000 [==============================] - 18s 706us/step - loss: 0.1323 - acc: 0.9545\n",
227 |       "CPU times: user 53.2 s, sys: 1.14 s, total: 54.3 s\n",
228 |       "Wall time: 53.4 s\n"
229 |      ]
230 |     },
231 |     {
232 |      "data": {
233 |       "text/plain": [
234 |        "<keras.callbacks.History at 0x7f0b1e248940>"
235 |       ]
236 |      },
237 |      "execution_count": 10,
238 |      "metadata": {},
239 |      "output_type": "execute_result"
240 |     }
241 |    ],
242 |    "source": [
243 |     "%%time\n",
244 |     "# Main training loop: 53s\n",
245 |     "model.fit(x_train,\n",
246 |     "          y_train,\n",
247 |     "          batch_size=BATCHSIZE,\n",
248 |     "          epochs=EPOCHS,\n",
249 |     "          verbose=1)"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": 11,
255 |    "metadata": {},
256 |    "outputs": [
257 |     {
258 |      "name": "stdout",
259 |      "output_type": "stream",
260 |      "text": [
261 |       "CPU times: user 7.35 s, sys: 52.2 ms, total: 7.4 s\n",
262 |       "Wall time: 7.4 s\n"
263 |      ]
264 |     }
265 |    ],
266 |    "source": [
267 |     "%%time\n",
268 |     "# Main evaluation loop: 7s\n",
269 |     "y_guess = model.predict(x_test, batch_size=BATCHSIZE)\n",
270 |     "y_guess = np.argmax(y_guess, axis=-1)\n",
271 |     "y_truth = np.argmax(y_test, axis=-1)"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": 12,
277 |    "metadata": {},
278 |    "outputs": [
279 |     {
280 |      "name": "stdout",
281 |      "output_type": "stream",
282 |      "text": [
283 |       "Accuracy:  0.86076\n"
284 |      ]
285 |     }
286 |    ],
287 |    "source": [
288 |     "print(\"Accuracy: \", sum(y_guess == y_truth)/len(y_guess))"
289 |    ]
290 |   }
291 |  ],
292 |  "metadata": {
293 |   "anaconda-cloud": {},
294 |   "kernelspec": {
295 |    "display_name": "Python 3",
296 |    "language": "python",
297 |    "name": "python3"
298 |   },
299 |   "language_info": {
300 |    "codemirror_mode": {
301 |     "name": "ipython",
302 |     "version": 3
303 |    },
304 |    "file_extension": ".py",
305 |    "mimetype": "text/x-python",
306 |    "name": "python",
307 |    "nbconvert_exporter": "python",
308 |    "pygments_lexer": "ipython3",
309 |    "version": "3.5.2"
310 |   }
311 |  },
312 |  "nbformat": 4,
313 |  "nbformat_minor": 2
314 | }
315 | 


--------------------------------------------------------------------------------
/notebooks/Keras_TF_Inference.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Using TensorFlow backend.\n"
 13 |      ]
 14 |     }
 15 |    ],
 16 |    "source": [
 17 |     "import os\n",
 18 |     "os.environ['KERAS_BACKEND'] = \"tensorflow\"\n",
 19 |     "import sys\n",
 20 |     "import numpy as np\n",
 21 |     "import keras as K\n",
 22 |     "import tensorflow as tf\n",
 23 |     "from keras.applications.resnet50 import ResNet50\n",
 24 |     "from common.params_inf import *\n",
 25 |     "from common.utils import *"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "# Force one-gpu\n",
 35 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
 36 |     "# Faster with channels-last, maybe because model expects that?\n",
 37 |     "K.backend.set_image_data_format('channels_last')"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 3,
 43 |    "metadata": {},
 44 |    "outputs": [
 45 |     {
 46 |      "name": "stdout",
 47 |      "output_type": "stream",
 48 |      "text": [
 49 |       "OS:  linux\n",
 50 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 51 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 52 |       "Numpy:  1.14.1\n",
 53 |       "Keras:  2.1.4\n",
 54 |       "Tensorflow:  1.4.0\n",
 55 |       "Keras using tensorflow\n",
 56 |       "Keras channel ordering is channels_last\n",
 57 |       "CUDA Version 8.0.61\n",
 58 |       "CuDNN Version  6.0.21\n"
 59 |      ]
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "print(\"OS: \", sys.platform)\n",
 64 |     "print(\"Python: \", sys.version)\n",
 65 |     "print(\"Numpy: \", np.__version__)\n",
 66 |     "print(\"Keras: \", K.__version__)\n",
 67 |     "print(\"Tensorflow: \", tf.__version__)\n",
 68 |     "print(\"Keras using {}\".format(K.backend.backend()))\n",
 69 |     "print(\"Keras channel ordering is {}\".format(K.backend.image_data_format()))\n",
 70 |     "print(get_cuda_version())\n",
 71 |     "print(\"CuDNN Version \", get_cudnn_version())"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 4,
 77 |    "metadata": {},
 78 |    "outputs": [
 79 |     {
 80 |      "name": "stdout",
 81 |      "output_type": "stream",
 82 |      "text": [
 83 |       "(1280, 224, 224, 3) (1280, 3, 224, 224)\n"
 84 |      ]
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "# Create batches of fake data\n",
 89 |     "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n",
 90 |     "print(fake_input_data_cl.shape, fake_input_data_cf.shape)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 5,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "def predict_fn(classifier, data, batchsize):\n",
100 |     "    \"\"\" Return features from classifier \"\"\"\n",
101 |     "    out = np.zeros((len(data), RESNET_FEATURES), np.float32)\n",
102 |     "    for idx, dta in yield_mb_X(data, batchsize):\n",
103 |     "        out[idx*batchsize:(idx+1)*batchsize] = classifier.predict_on_batch(dta).squeeze()\n",
104 |     "    return out"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 6,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "# Download Resnet weights\n",
114 |     "model = ResNet50(include_top=False, input_shape=(224,224,3))"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 7,
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "fake_input_data_cl = np.ascontiguousarray(fake_input_data_cl)\n",
124 |     "cold_start = predict_fn(model, fake_input_data_cl, BATCH_SIZE)"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 8,
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "name": "stdout",
134 |      "output_type": "stream",
135 |      "text": [
136 |       "CPU times: user 3.18 s, sys: 835 ms, total: 4.01 s\n",
137 |       "Wall time: 3.66 s\n"
138 |      ]
139 |     }
140 |    ],
141 |    "source": [
142 |     "%%time\n",
143 |     "features = predict_fn(model, fake_input_data_cl, BATCH_SIZE)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 10,
149 |    "metadata": {},
150 |    "outputs": [
151 |     {
152 |      "name": "stdout",
153 |      "output_type": "stream",
154 |      "text": [
155 |       "Images per second 349.72677595628414\n"
156 |      ]
157 |     }
158 |    ],
159 |    "source": [
160 |     "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/3.66))"
161 |    ]
162 |   }
163 |  ],
164 |  "metadata": {
165 |   "kernelspec": {
166 |    "display_name": "Python 3",
167 |    "language": "python",
168 |    "name": "python3"
169 |   },
170 |   "language_info": {
171 |    "codemirror_mode": {
172 |     "name": "ipython",
173 |     "version": 3
174 |    },
175 |    "file_extension": ".py",
176 |    "mimetype": "text/x-python",
177 |    "name": "python",
178 |    "nbconvert_exporter": "python",
179 |    "pygments_lexer": "ipython3",
180 |    "version": "3.5.2"
181 |   }
182 |  },
183 |  "nbformat": 4,
184 |  "nbformat_minor": 2
185 | }
186 | 


--------------------------------------------------------------------------------
/notebooks/Keras_TF_RNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# High-level RNN Keras (TF) Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "name": "stderr",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "Using TensorFlow backend.\n"
 20 |      ]
 21 |     }
 22 |    ],
 23 |    "source": [
 24 |     "import os\n",
 25 |     "import sys\n",
 26 |     "import numpy as np\n",
 27 |     "os.environ['KERAS_BACKEND'] = \"tensorflow\"\n",
 28 |     "import keras as K\n",
 29 |     "import tensorflow as tf\n",
 30 |     "from keras.models import Sequential\n",
 31 |     "from keras.layers import Dense, Embedding, GRU, CuDNNGRU\n",
 32 |     "from common.params_lstm import *\n",
 33 |     "from common.utils import *"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 2,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# Force one-gpu\n",
 43 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 3,
 49 |    "metadata": {},
 50 |    "outputs": [
 51 |     {
 52 |      "name": "stdout",
 53 |      "output_type": "stream",
 54 |      "text": [
 55 |       "OS:  linux\n",
 56 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 57 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 58 |       "Keras:  2.1.4\n",
 59 |       "Numpy:  1.14.1\n",
 60 |       "Tensorflow:  1.4.0\n",
 61 |       "tensorflow\n",
 62 |       "channels_last\n",
 63 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 64 |       "CUDA Version 8.0.61\n",
 65 |       "CuDNN Version  6.0.21\n"
 66 |      ]
 67 |     }
 68 |    ],
 69 |    "source": [
 70 |     "print(\"OS: \", sys.platform)\n",
 71 |     "print(\"Python: \", sys.version)\n",
 72 |     "print(\"Keras: \", K.__version__)\n",
 73 |     "print(\"Numpy: \", np.__version__)\n",
 74 |     "print(\"Tensorflow: \", tf.__version__)\n",
 75 |     "print(K.backend.backend())\n",
 76 |     "print(K.backend.image_data_format())\n",
 77 |     "print(\"GPU: \", get_gpu_name())\n",
 78 |     "print(get_cuda_version())\n",
 79 |     "print(\"CuDNN Version \", get_cudnn_version())"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 4,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "def create_symbol(CUDNN=True, maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN, maxl=MAXLEN):\n",
 89 |     "    model = Sequential()\n",
 90 |     "    model.add(Embedding(maxf, edim, input_length=maxl))\n",
 91 |     "    # Only return last output\n",
 92 |     "    if not CUDNN:\n",
 93 |     "        model.add(GRU(nhid, return_sequences=False, return_state=False))\n",
 94 |     "    else:\n",
 95 |     "        model.add(CuDNNGRU(nhid, return_sequences=False, return_state=False))\n",
 96 |     "    model.add(Dense(2, activation='softmax'))\n",
 97 |     "    return model"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 5,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "def init_model(m, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n",
107 |     "    m.compile(\n",
108 |     "        loss = \"categorical_crossentropy\",\n",
109 |     "        optimizer = K.optimizers.Adam(lr, b1, b2, eps),\n",
110 |     "        metrics = ['accuracy'])\n",
111 |     "    return m"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 6,
117 |    "metadata": {},
118 |    "outputs": [
119 |     {
120 |      "name": "stdout",
121 |      "output_type": "stream",
122 |      "text": [
123 |       "Preparing train set...\n",
124 |       "Preparing test set...\n",
125 |       "Trimming to 30000 max-features\n",
126 |       "Padding to length 150\n",
127 |       "(25000, 150) (25000, 150) (25000, 2) (25000, 2)\n",
128 |       "int32 int32 int32 int32\n",
129 |       "CPU times: user 5.94 s, sys: 401 ms, total: 6.35 s\n",
130 |       "Wall time: 6.35 s\n"
131 |      ]
132 |     }
133 |    ],
134 |    "source": [
135 |     "%%time\n",
136 |     "# Data into format for library\n",
137 |     "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES, one_hot=True)\n",
138 |     "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
139 |     "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 7,
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "name": "stdout",
149 |      "output_type": "stream",
150 |      "text": [
151 |       "CPU times: user 1.04 s, sys: 699 ms, total: 1.74 s\n",
152 |       "Wall time: 916 ms\n"
153 |      ]
154 |     }
155 |    ],
156 |    "source": [
157 |     "%%time\n",
158 |     "# Load symbol\n",
159 |     "sym = create_symbol()"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 8,
165 |    "metadata": {},
166 |    "outputs": [
167 |     {
168 |      "name": "stdout",
169 |      "output_type": "stream",
170 |      "text": [
171 |       "CPU times: user 99.9 ms, sys: 227 ms, total: 326 ms\n",
172 |       "Wall time: 27.5 ms\n"
173 |      ]
174 |     }
175 |    ],
176 |    "source": [
177 |     "%%time\n",
178 |     "# Initialise model\n",
179 |     "model = init_model(sym)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 9,
185 |    "metadata": {},
186 |    "outputs": [
187 |     {
188 |      "name": "stdout",
189 |      "output_type": "stream",
190 |      "text": [
191 |       "_________________________________________________________________\n",
192 |       "Layer (type)                 Output Shape              Param #   \n",
193 |       "=================================================================\n",
194 |       "embedding_1 (Embedding)      (None, 150, 125)          3750000   \n",
195 |       "_________________________________________________________________\n",
196 |       "cu_dnngru_1 (CuDNNGRU)       (None, 100)               68100     \n",
197 |       "_________________________________________________________________\n",
198 |       "dense_1 (Dense)              (None, 2)                 202       \n",
199 |       "=================================================================\n",
200 |       "Total params: 3,818,302\n",
201 |       "Trainable params: 3,818,302\n",
202 |       "Non-trainable params: 0\n",
203 |       "_________________________________________________________________\n"
204 |      ]
205 |     }
206 |    ],
207 |    "source": [
208 |     "model.summary()"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": 10,
214 |    "metadata": {},
215 |    "outputs": [
216 |     {
217 |      "name": "stdout",
218 |      "output_type": "stream",
219 |      "text": [
220 |       "Epoch 1/3\n",
221 |       "25000/25000 [==============================] - 10s 386us/step - loss: 0.5025 - acc: 0.7336\n",
222 |       "Epoch 2/3\n",
223 |       "25000/25000 [==============================] - 8s 312us/step - loss: 0.2273 - acc: 0.9138\n",
224 |       "Epoch 3/3\n",
225 |       "25000/25000 [==============================] - 8s 313us/step - loss: 0.1246 - acc: 0.9572\n",
226 |       "CPU times: user 23 s, sys: 4.06 s, total: 27.1 s\n",
227 |       "Wall time: 25.6 s\n"
228 |      ]
229 |     },
230 |     {
231 |      "data": {
232 |       "text/plain": [
233 |        "<keras.callbacks.History at 0x7fa259f59fd0>"
234 |       ]
235 |      },
236 |      "execution_count": 10,
237 |      "metadata": {},
238 |      "output_type": "execute_result"
239 |     }
240 |    ],
241 |    "source": [
242 |     "%%time\n",
243 |     "# Main training loop: 26s\n",
244 |     "model.fit(x_train,\n",
245 |     "          y_train,\n",
246 |     "          batch_size=BATCHSIZE,\n",
247 |     "          epochs=EPOCHS,\n",
248 |     "          verbose=1)"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": 11,
254 |    "metadata": {},
255 |    "outputs": [
256 |     {
257 |      "name": "stdout",
258 |      "output_type": "stream",
259 |      "text": [
260 |       "CPU times: user 2.59 s, sys: 386 ms, total: 2.97 s\n",
261 |       "Wall time: 2.72 s\n"
262 |      ]
263 |     }
264 |    ],
265 |    "source": [
266 |     "%%time\n",
267 |     "# Main evaluation loop: 3s\n",
268 |     "y_guess = model.predict(x_test, batch_size=BATCHSIZE)\n",
269 |     "y_guess = np.argmax(y_guess, axis=-1)\n",
270 |     "y_truth = np.argmax(y_test, axis=-1)"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": 12,
276 |    "metadata": {},
277 |    "outputs": [
278 |     {
279 |      "name": "stdout",
280 |      "output_type": "stream",
281 |      "text": [
282 |       "Accuracy:  0.85496\n"
283 |      ]
284 |     }
285 |    ],
286 |    "source": [
287 |     "print(\"Accuracy: \", sum(y_guess == y_truth)/len(y_guess))"
288 |    ]
289 |   }
290 |  ],
291 |  "metadata": {
292 |   "anaconda-cloud": {},
293 |   "kernelspec": {
294 |    "display_name": "Python 3",
295 |    "language": "python",
296 |    "name": "python3"
297 |   },
298 |   "language_info": {
299 |    "codemirror_mode": {
300 |     "name": "ipython",
301 |     "version": 3
302 |    },
303 |    "file_extension": ".py",
304 |    "mimetype": "text/x-python",
305 |    "name": "python",
306 |    "nbconvert_exporter": "python",
307 |    "pygments_lexer": "ipython3",
308 |    "version": "3.5.2"
309 |   }
310 |  },
311 |  "nbformat": 4,
312 |  "nbformat_minor": 2
313 | }
314 | 


--------------------------------------------------------------------------------
/notebooks/Knet_CNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Knet CNN Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "# After installing and starting Julia run the following to install the required packages:\n",
 17 |     "# Pkg.init(); Pkg.update()\n",
 18 |     "# for p in (\"CUDAdrv\",\"IJulia\",\"Knet\"); Pkg.add(p); end\n",
 19 |     "# Pkg.checkout(\"Knet\",\"ilkarman\") # make sure we have the right Knet version\n",
 20 |     "# Pkg.build(\"Knet\")"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 2,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "using Knet\n",
 30 |     "True=true # so we can read the python params\n",
 31 |     "include(\"common/params.py\");"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 3,
 37 |    "metadata": {},
 38 |    "outputs": [
 39 |     {
 40 |      "name": "stdout",
 41 |      "output_type": "stream",
 42 |      "text": [
 43 |       "OS: Linux\n",
 44 |       "Julia: 0.6.1\n",
 45 |       "Knet: 0.8.5+\n",
 46 |       "GPU: Tesla K80\n",
 47 |       "\n"
 48 |      ]
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "println(\"OS: \", Sys.KERNEL)\n",
 53 |     "println(\"Julia: \", VERSION)\n",
 54 |     "println(\"Knet: \", Pkg.installed(\"Knet\"))\n",
 55 |     "println(\"GPU: \", readstring(`nvidia-smi --query-gpu=name --format=csv,noheader`))"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 4,
 61 |    "metadata": {
 62 |     "collapsed": true
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "# define model\n",
 67 |     "function initmodel(; atype=KnetArray, dtype=Float32, winit=xavier, binit=zeros)\n",
 68 |     "    w(dims...)=atype(winit(dtype,dims...))\n",
 69 |     "    b(dims...)=atype(binit(dtype,dims...))\n",
 70 |     "    return Any[\n",
 71 |     "        w(3,3,3,50), b(1,1,50,1),\n",
 72 |     "        w(3,3,50,50), b(1,1,50,1),\n",
 73 |     "        w(3,3,50,100), b(1,1,100,1),\n",
 74 |     "        w(3,3,100,100), b(1,1,100,1),\n",
 75 |     "        w(512,6400), b(512,1),\n",
 76 |     "        w(10,512), b(10,1)\n",
 77 |     "    ]\n",
 78 |     "end;"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 5,
 84 |    "metadata": {
 85 |     "collapsed": true
 86 |    },
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "# define loss and its gradient\n",
 90 |     "function predict(w,x)\n",
 91 |     "    convbias(x,w,b) = conv4(w,x;padding=1) .+ b\n",
 92 |     "    fc(x,w,b) = w * mat(x) .+ b;\n",
 93 |     "    x = relu.(convbias(x,w[1],w[2]))\n",
 94 |     "    x = relu.(pool(convbias(x,w[3],w[4])))\n",
 95 |     "    x = dropout(x,0.25)\n",
 96 |     "    x = relu.(convbias(x,w[5],w[6]))\n",
 97 |     "    x = relu.(pool(convbias(x,w[7],w[8])))\n",
 98 |     "    x = dropout(x,0.25)\n",
 99 |     "    x = relu.(fc(x,w[9],w[10]))\n",
100 |     "    x = dropout(x,0.5)\n",
101 |     "    return fc(x,w[11],w[12])\n",
102 |     "end\n",
103 |     "\n",
104 |     "loss(w,x,y)=nll(predict(w,x),y) # nll: negative log likelihood\n",
105 |     "lossgradient = grad(loss);"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 6,
111 |    "metadata": {},
112 |    "outputs": [
113 |     {
114 |      "name": "stderr",
115 |      "output_type": "stream",
116 |      "text": [
117 |       "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mReading cifar-10-binary.tar.gz...\n",
118 |       "\u001b[39m"
119 |      ]
120 |     },
121 |     {
122 |      "name": "stdout",
123 |      "output_type": "stream",
124 |      "text": [
125 |       "  3.525842 seconds (1.27 M allocations: 1.783 GiB, 18.79% gc time)\n",
126 |       "32×32×3×50000 Array{Float32,4}\n",
127 |       "50000-element Array{UInt8,1}\n",
128 |       "32×32×3×10000 Array{Float32,4}\n",
129 |       "10000-element Array{UInt8,1}\n"
130 |      ]
131 |     }
132 |    ],
133 |    "source": [
134 |     "# load data\n",
135 |     "include(Knet.dir(\"data\",\"cifar.jl\"))\n",
136 |     "@time (xtrn,ytrn,xtst,ytst,lbls)=cifar10()\n",
137 |     "for d in (xtrn,ytrn,xtst,ytst); println(summary(d)); end"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 7,
143 |    "metadata": {
144 |     "collapsed": true
145 |    },
146 |    "outputs": [],
147 |    "source": [
148 |     "# prepare for training\n",
149 |     "model = optim = nothing; knetgc() # Clear memory from last run\n",
150 |     "model = initmodel()\n",
151 |     "optim = optimizers(model, Momentum; lr=LR, gamma=MOMENTUM);"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 8,
157 |    "metadata": {},
158 |    "outputs": [
159 |     {
160 |      "name": "stdout",
161 |      "output_type": "stream",
162 |      "text": [
163 |       " 25.437272 seconds (4.03 M allocations: 784.659 MiB, 11.86% gc time)\n"
164 |      ]
165 |     }
166 |    ],
167 |    "source": [
168 |     "# cold start\n",
169 |     "@time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true,xtype=KnetArray)\n",
170 |     "    grads = lossgradient(model, x, y)\n",
171 |     "    update!(model, grads, optim)\n",
172 |     "end"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 9,
178 |    "metadata": {
179 |     "collapsed": true
180 |    },
181 |    "outputs": [],
182 |    "source": [
183 |     "# prepare for training\n",
184 |     "model = optim = nothing; knetgc() # Clear memory from last run\n",
185 |     "model = initmodel()\n",
186 |     "optim = optimizers(model, Momentum; lr=LR, gamma=MOMENTUM);"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": 10,
192 |    "metadata": {},
193 |    "outputs": [
194 |     {
195 |      "name": "stderr",
196 |      "output_type": "stream",
197 |      "text": [
198 |       "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mTraining...\n",
199 |       "\u001b[39m"
200 |      ]
201 |     },
202 |     {
203 |      "name": "stdout",
204 |      "output_type": "stream",
205 |      "text": [
206 |       " 15.909965 seconds (1.88 M allocations: 670.408 MiB, 0.41% gc time)\n",
207 |       " 15.772903 seconds (1.88 M allocations: 670.285 MiB, 0.42% gc time)\n",
208 |       " 15.829510 seconds (1.88 M allocations: 670.285 MiB, 0.44% gc time)\n",
209 |       " 15.961176 seconds (1.88 M allocations: 670.285 MiB, 0.43% gc time)\n",
210 |       " 15.869710 seconds (1.88 M allocations: 670.285 MiB, 0.46% gc time)\n",
211 |       " 15.872871 seconds (1.88 M allocations: 670.353 MiB, 0.46% gc time)\n",
212 |       " 15.839494 seconds (1.88 M allocations: 670.285 MiB, 0.42% gc time)\n",
213 |       " 16.007868 seconds (1.88 M allocations: 670.285 MiB, 0.47% gc time)\n",
214 |       " 15.859198 seconds (1.88 M allocations: 670.285 MiB, 0.47% gc time)\n",
215 |       " 15.870192 seconds (1.88 M allocations: 670.285 MiB, 0.43% gc time)\n",
216 |       "158.797837 seconds (18.77 M allocations: 6.547 GiB, 0.44% gc time)\n"
217 |      ]
218 |     }
219 |    ],
220 |    "source": [
221 |     "# 159s\n",
222 |     "info(\"Training...\")\n",
223 |     "@time for epoch in 1:EPOCHS\n",
224 |     "    @time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true,xtype=KnetArray)\n",
225 |     "        grads = lossgradient(model, x, y)\n",
226 |     "        update!(model, grads, optim)\n",
227 |     "    end\n",
228 |     "end"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": 11,
234 |    "metadata": {},
235 |    "outputs": [
236 |     {
237 |      "name": "stdout",
238 |      "output_type": "stream",
239 |      "text": [
240 |       "  2.123045 seconds (559.28 k allocations: 145.928 MiB, 1.10% gc time)\n"
241 |      ]
242 |     },
243 |     {
244 |      "data": {
245 |       "text/plain": [
246 |        "0.7754407051282052"
247 |       ]
248 |      },
249 |      "execution_count": 11,
250 |      "metadata": {},
251 |      "output_type": "execute_result"
252 |     }
253 |    ],
254 |    "source": [
255 |     "# test accuracy 77.54\n",
256 |     "testdata = minibatch(xtst,ytst,BATCHSIZE;xtype=KnetArray)\n",
257 |     "@time accuracy(model,testdata,predict)"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": null,
263 |    "metadata": {
264 |     "collapsed": true
265 |    },
266 |    "outputs": [],
267 |    "source": []
268 |   }
269 |  ],
270 |  "metadata": {
271 |   "kernelspec": {
272 |    "display_name": "Julia 0.6.1",
273 |    "language": "julia",
274 |    "name": "julia-0.6"
275 |   },
276 |   "language_info": {
277 |    "file_extension": ".jl",
278 |    "mimetype": "application/julia",
279 |    "name": "julia",
280 |    "version": "0.6.1"
281 |   }
282 |  },
283 |  "nbformat": 4,
284 |  "nbformat_minor": 2
285 | }
286 | 


--------------------------------------------------------------------------------
/notebooks/Knet_Inference.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# GPU: 32*40 in 8.00s = 160/s\n",
 12 |     "# CPU: 32*8 in 115.0s = 2/s"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "# After installing and starting Julia run the following to install the required packages:\n",
 22 |     "# Pkg.init(); Pkg.update()\n",
 23 |     "# for p in (\"CUDAapi\",\"CUDAdrv\",\"MAT\",\"Images\",\"IJulia\",\"Knet\"); Pkg.add(p); end\n",
 24 |     "# Pkg.checkout(\"Knet\",\"ilkarman\") # make sure we have the right Knet version\n",
 25 |     "# Pkg.build(\"Knet\")"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 3,
 31 |    "metadata": {},
 32 |    "outputs": [
 33 |     {
 34 |      "name": "stdout",
 35 |      "output_type": "stream",
 36 |      "text": [
 37 |       "OS: Linux\n",
 38 |       "Julia: 0.6.1\n",
 39 |       "Knet: 0.8.5+\n"
 40 |      ]
 41 |     }
 42 |    ],
 43 |    "source": [
 44 |     "using Knet\n",
 45 |     "include(Knet.dir(\"examples\",\"resnet\", \"resnetlib.jl\"))\n",
 46 |     "using ResNetLib: resnet50init, resnet50\n",
 47 |     "println(\"OS: \", Sys.KERNEL)\n",
 48 |     "println(\"Julia: \", VERSION)\n",
 49 |     "println(\"Knet: \", Pkg.installed(\"Knet\"))"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 4,
 55 |    "metadata": {},
 56 |    "outputs": [
 57 |     {
 58 |      "name": "stdout",
 59 |      "output_type": "stream",
 60 |      "text": [
 61 |       "6\n"
 62 |      ]
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     ";cat /proc/cpuinfo '|' grep processor '|' wc -l"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 5,
 72 |    "metadata": {},
 73 |    "outputs": [
 74 |     {
 75 |      "name": "stdout",
 76 |      "output_type": "stream",
 77 |      "text": [
 78 |       "name\n",
 79 |       "Tesla K80\n"
 80 |      ]
 81 |     }
 82 |    ],
 83 |    "source": [
 84 |     ";nvidia-smi --query-gpu=gpu_name --format=csv"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 6,
 90 |    "metadata": {},
 91 |    "outputs": [
 92 |     {
 93 |      "data": {
 94 |       "text/plain": [
 95 |        "8"
 96 |       ]
 97 |      },
 98 |      "execution_count": 6,
 99 |      "metadata": {},
100 |      "output_type": "execute_result"
101 |     }
102 |    ],
103 |    "source": [
104 |     "const BATCH_SIZE = 32\n",
105 |     "const RESNET_FEATURES = 2048\n",
106 |     "const BATCHES_GPU = 40\n",
107 |     "const BATCHES_CPU = 8"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 7,
113 |    "metadata": {},
114 |    "outputs": [
115 |     {
116 |      "data": {
117 |       "text/plain": [
118 |        "fakedata (generic function with 1 method)"
119 |       ]
120 |      },
121 |      "execution_count": 7,
122 |      "metadata": {},
123 |      "output_type": "execute_result"
124 |     }
125 |    ],
126 |    "source": [
127 |     "# Create batches of fake data\n",
128 |     "function fakedata(batches; atype=KnetArray)\n",
129 |     "    x = rand(Float32, 224, 224, 3, BATCH_SIZE * batches)\n",
130 |     "    minibatch(x, BATCH_SIZE, xtype=atype)\n",
131 |     "end"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 8,
137 |    "metadata": {},
138 |    "outputs": [
139 |     {
140 |      "data": {
141 |       "text/plain": [
142 |        "predictfn (generic function with 1 method)"
143 |       ]
144 |      },
145 |      "execution_count": 8,
146 |      "metadata": {},
147 |      "output_type": "execute_result"
148 |     }
149 |    ],
150 |    "source": [
151 |     "# Return features from classifier\n",
152 |     "function predictfn(weights, moments, data)\n",
153 |     "    out = []\n",
154 |     "    for x in data\n",
155 |     "        pred = resnet50(weights, moments, x; stage=5)\n",
156 |     "        push!(out, mat(pred))\n",
157 |     "    end\n",
158 |     "    return Array(hcat(out...))\n",
159 |     "end"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "## 1. GPU"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 9,
172 |    "metadata": {},
173 |    "outputs": [
174 |     {
175 |      "name": "stderr",
176 |      "output_type": "stream",
177 |      "text": [
178 |       "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mLoading pretrained weights...\n",
179 |       "\u001b[39m\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mLoading imagenet-resnet-50-dag.mat...\n",
180 |       "\u001b[39m"
181 |      ]
182 |     }
183 |    ],
184 |    "source": [
185 |     "# Initialize resnet weights and fake data\n",
186 |     "gpuweights = gpumoments = nothing; knetgc() # clear memory from previous run\n",
187 |     "gpuweights, gpumoments = resnet50init(;stage=5, trained=true, atype=KnetArray);"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": 10,
193 |    "metadata": {},
194 |    "outputs": [
195 |     {
196 |      "name": "stderr",
197 |      "output_type": "stream",
198 |      "text": [
199 |       "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mCold start\n",
200 |       "\u001b[39m"
201 |      ]
202 |     },
203 |     {
204 |      "name": "stdout",
205 |      "output_type": "stream",
206 |      "text": [
207 |       " 21.220333 seconds (1.93 M allocations: 842.832 MiB, 35.05% gc time)\n"
208 |      ]
209 |     }
210 |    ],
211 |    "source": [
212 |     "info(\"Cold start\")\n",
213 |     "gpudata1 = fakedata(BATCHES_GPU, atype=KnetArray)\n",
214 |     "@time predictfn(gpuweights, gpumoments, gpudata1);"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 11,
220 |    "metadata": {},
221 |    "outputs": [
222 |     {
223 |      "name": "stderr",
224 |      "output_type": "stream",
225 |      "text": [
226 |       "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mBenchmarking\n",
227 |       "\u001b[39m"
228 |      ]
229 |     },
230 |     {
231 |      "name": "stdout",
232 |      "output_type": "stream",
233 |      "text": [
234 |       "  8.002292 seconds (360.61 k allocations: 760.376 MiB, 3.82% gc time)\n"
235 |      ]
236 |     }
237 |    ],
238 |    "source": [
239 |     "info(\"Benchmarking\")\n",
240 |     "gpudata = fakedata(BATCHES_GPU, atype=KnetArray)\n",
241 |     "@time predictfn(gpuweights, gpumoments, gpudata);"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "markdown",
246 |    "metadata": {},
247 |    "source": [
248 |     "## 2. CPU"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": 12,
254 |    "metadata": {},
255 |    "outputs": [
256 |     {
257 |      "name": "stderr",
258 |      "output_type": "stream",
259 |      "text": [
260 |       "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mLoading pretrained weights...\n",
261 |       "\u001b[39m"
262 |      ]
263 |     }
264 |    ],
265 |    "source": [
266 |     "# Initialize resnet weights\n",
267 |     "cpuweights, cpumoments = resnet50init(;stage=5, trained=true, atype=Array);"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": 13,
273 |    "metadata": {},
274 |    "outputs": [
275 |     {
276 |      "name": "stderr",
277 |      "output_type": "stream",
278 |      "text": [
279 |       "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mCold start\n",
280 |       "\u001b[39m"
281 |      ]
282 |     },
283 |     {
284 |      "name": "stdout",
285 |      "output_type": "stream",
286 |      "text": [
287 |       " 25.160136 seconds (14.20 M allocations: 4.351 GiB, 10.91% gc time)\n"
288 |      ]
289 |     }
290 |    ],
291 |    "source": [
292 |     "info(\"Cold start\")\n",
293 |     "cpudata1 = fakedata(1, atype=Array);\n",
294 |     "@time predictfn(cpuweights, cpumoments, cpudata1);"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": 14,
300 |    "metadata": {},
301 |    "outputs": [
302 |     {
303 |      "name": "stderr",
304 |      "output_type": "stream",
305 |      "text": [
306 |       "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mBenchmarking\n",
307 |       "\u001b[39m"
308 |      ]
309 |     },
310 |     {
311 |      "name": "stdout",
312 |      "output_type": "stream",
313 |      "text": [
314 |       "115.024997 seconds (174.89 k allocations: 30.150 GiB, 15.85% gc time)\n"
315 |      ]
316 |     }
317 |    ],
318 |    "source": [
319 |     "info(\"Benchmarking\")\n",
320 |     "cpudata = fakedata(BATCHES_CPU, atype=Array);\n",
321 |     "@time predictfn(cpuweights, cpumoments, cpudata);"
322 |    ]
323 |   }
324 |  ],
325 |  "metadata": {
326 |   "kernelspec": {
327 |    "display_name": "Julia 0.6.1",
328 |    "language": "julia",
329 |    "name": "julia-0.6"
330 |   },
331 |   "language_info": {
332 |    "file_extension": ".jl",
333 |    "mimetype": "application/julia",
334 |    "name": "julia",
335 |    "version": "0.6.1"
336 |   }
337 |  },
338 |  "nbformat": 4,
339 |  "nbformat_minor": 2
340 | }
341 | 


--------------------------------------------------------------------------------
/notebooks/Knet_RNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Knet RNN example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "# After installing and starting Julia run the following to install the required packages:\n",
 19 |     "# Pkg.init(); Pkg.update()\n",
 20 |     "# for p in (\"CUDAdrv\",\"IJulia\",\"PyCall\",\"JLD2\",\"Knet\"); Pkg.add(p); end\n",
 21 |     "# Pkg.checkout(\"Knet\",\"ilkarman\") # make sure we have the right Knet version\n",
 22 |     "# Pkg.build(\"Knet\")"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "using Knet\n",
 32 |     "True=true # so we can read the python params\n",
 33 |     "include(\"common/params_lstm.py\");"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 3,
 39 |    "metadata": {},
 40 |    "outputs": [
 41 |     {
 42 |      "name": "stdout",
 43 |      "output_type": "stream",
 44 |      "text": [
 45 |       "OS: Linux\n",
 46 |       "Julia: 0.6.1\n",
 47 |       "Knet: 0.8.5+\n",
 48 |       "GPU: Tesla K80\n",
 49 |       "\n"
 50 |      ]
 51 |     }
 52 |    ],
 53 |    "source": [
 54 |     "println(\"OS: \", Sys.KERNEL)\n",
 55 |     "println(\"Julia: \", VERSION)\n",
 56 |     "println(\"Knet: \", Pkg.installed(\"Knet\"))\n",
 57 |     "println(\"GPU: \", readstring(`nvidia-smi --query-gpu=name --format=csv,noheader`))"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 4,
 63 |    "metadata": {
 64 |     "collapsed": true
 65 |    },
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "# define model\n",
 69 |     "function initmodel()\n",
 70 |     "    rnnSpec,rnnWeights = rnninit(EMBEDSIZE,NUMHIDDEN; rnnType=:gru)\n",
 71 |     "    inputMatrix = KnetArray(xavier(Float32,EMBEDSIZE,MAXFEATURES))\n",
 72 |     "    outputMatrix = KnetArray(xavier(Float32,2,NUMHIDDEN))\n",
 73 |     "    return rnnSpec,(rnnWeights,inputMatrix,outputMatrix)\n",
 74 |     "end;"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 5,
 80 |    "metadata": {
 81 |     "collapsed": true
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "# define loss and its gradient\n",
 86 |     "function predict(weights, inputs, rnnSpec)\n",
 87 |     "    rnnWeights, inputMatrix, outputMatrix = weights # (1,1,W), (X,V), (2,H)\n",
 88 |     "    indices = hcat(inputs...)' # (B,T)\n",
 89 |     "    rnnInput = inputMatrix[:,indices] # (X,B,T)\n",
 90 |     "    rnnOutput = rnnforw(rnnSpec, rnnWeights, rnnInput)[1] # (H,B,T)\n",
 91 |     "    return outputMatrix * rnnOutput[:,:,end] # (2,H) * (H,B) = (2,B)\n",
 92 |     "end\n",
 93 |     "\n",
 94 |     "loss(w,x,y,r)=nll(predict(w,x,r),y)\n",
 95 |     "lossgradient = grad(loss);"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 6,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "name": "stderr",
105 |      "output_type": "stream",
106 |      "text": [
107 |       "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mLoading IMDB...\n",
108 |       "\u001b[39m"
109 |      ]
110 |     },
111 |     {
112 |      "name": "stdout",
113 |      "output_type": "stream",
114 |      "text": [
115 |       " 10.266185 seconds (15.94 M allocations: 835.780 MiB, 3.98% gc time)\n",
116 |       "25000-element Array{Array{Int32,1},1}\n",
117 |       "25000-element Array{Int8,1}\n",
118 |       "25000-element Array{Array{Int32,1},1}\n",
119 |       "25000-element Array{Int8,1}\n"
120 |      ]
121 |     }
122 |    ],
123 |    "source": [
124 |     "# load data\n",
125 |     "include(Knet.dir(\"data\",\"imdb.jl\"))\n",
126 |     "@time (xtrn,ytrn,xtst,ytst,imdbdict)=imdb(maxlen=MAXLEN,maxval=MAXFEATURES)\n",
127 |     "for d in (xtrn,ytrn,xtst,ytst); println(summary(d)); end"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 7,
133 |    "metadata": {
134 |     "collapsed": true
135 |    },
136 |    "outputs": [],
137 |    "source": [
138 |     "# prepare for training\n",
139 |     "weights = nothing; knetgc(); # Reclaim memory from previous run\n",
140 |     "rnnSpec,weights = initmodel()\n",
141 |     "optim = optimizers(weights, Adam; lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS);"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 8,
147 |    "metadata": {},
148 |    "outputs": [
149 |     {
150 |      "name": "stdout",
151 |      "output_type": "stream",
152 |      "text": [
153 |       " 14.319533 seconds (2.08 M allocations: 138.579 MiB, 3.58% gc time)\n"
154 |      ]
155 |     }
156 |    ],
157 |    "source": [
158 |     "# cold start\n",
159 |     "@time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true)\n",
160 |     "    grads = lossgradient(weights,x,y,rnnSpec)\n",
161 |     "    update!(weights, grads, optim)\n",
162 |     "end"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 9,
168 |    "metadata": {
169 |     "collapsed": true
170 |    },
171 |    "outputs": [],
172 |    "source": [
173 |     "# prepare for training\n",
174 |     "weights = nothing; knetgc(); # Reclaim memory from previous run\n",
175 |     "rnnSpec,weights = initmodel()\n",
176 |     "optim = optimizers(weights, Adam; lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS);"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 10,
182 |    "metadata": {},
183 |    "outputs": [
184 |     {
185 |      "name": "stderr",
186 |      "output_type": "stream",
187 |      "text": [
188 |       "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mTraining...\n",
189 |       "\u001b[39m"
190 |      ]
191 |     },
192 |     {
193 |      "name": "stdout",
194 |      "output_type": "stream",
195 |      "text": [
196 |       "  9.776101 seconds (356.68 k allocations: 45.007 MiB, 4.79% gc time)\n",
197 |       "  9.786896 seconds (352.22 k allocations: 44.658 MiB, 5.91% gc time)\n",
198 |       "  9.732747 seconds (352.94 k allocations: 44.669 MiB, 5.92% gc time)\n",
199 |       " 29.298876 seconds (1.07 M allocations: 134.572 MiB, 5.54% gc time)\n"
200 |      ]
201 |     }
202 |    ],
203 |    "source": [
204 |     "# 29s\n",
205 |     "info(\"Training...\")\n",
206 |     "@time for epoch in 1:EPOCHS\n",
207 |     "    @time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true)\n",
208 |     "        grads = lossgradient(weights,x,y,rnnSpec)\n",
209 |     "        update!(weights, grads, optim)\n",
210 |     "    end\n",
211 |     "end"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 14,
217 |    "metadata": {},
218 |    "outputs": [
219 |     {
220 |      "name": "stderr",
221 |      "output_type": "stream",
222 |      "text": [
223 |       "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mTesting...\n",
224 |       "\u001b[39m"
225 |      ]
226 |     },
227 |     {
228 |      "name": "stdout",
229 |      "output_type": "stream",
230 |      "text": [
231 |       "  2.999301 seconds (70.50 k allocations: 34.680 MiB, 11.61% gc time)\n"
232 |      ]
233 |     },
234 |     {
235 |      "data": {
236 |       "text/plain": [
237 |        "0.844511217948718"
238 |       ]
239 |      },
240 |      "execution_count": 14,
241 |      "metadata": {},
242 |      "output_type": "execute_result"
243 |     }
244 |    ],
245 |    "source": [
246 |     "info(\"Testing...\")\n",
247 |     "@time accuracy(weights, minibatch(xtst,ytst,BATCHSIZE), (w,x)->predict(w,x,rnnSpec))"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": null,
253 |    "metadata": {
254 |     "collapsed": true
255 |    },
256 |    "outputs": [],
257 |    "source": []
258 |   }
259 |  ],
260 |  "metadata": {
261 |   "kernelspec": {
262 |    "display_name": "Julia 0.6.1",
263 |    "language": "julia",
264 |    "name": "julia-0.6"
265 |   },
266 |   "language_info": {
267 |    "file_extension": ".jl",
268 |    "mimetype": "application/julia",
269 |    "name": "julia",
270 |    "version": "0.6.1"
271 |   }
272 |  },
273 |  "nbformat": 4,
274 |  "nbformat_minor": 2
275 | }
276 | 


--------------------------------------------------------------------------------
/notebooks/MXNet_CNN_highAPI.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# High-level MXNet Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "import sys\n",
 18 |     "import numpy as np\n",
 19 |     "import mxnet as mx\n",
 20 |     "import logging\n",
 21 |     "from common.params import *\n",
 22 |     "from common.utils import *"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "# logging\n",
 32 |     "logger = logging.getLogger();\n",
 33 |     "logger.setLevel(logging.DEBUG);\n",
 34 |     "formatter = logging.Formatter('%(message)s');\n",
 35 |     "h2 = logging.StreamHandler(sys.stdout)\n",
 36 |     "h2.setFormatter(formatter);\n",
 37 |     "logger.addHandler(h2)"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 3,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "# Force one-gpu\n",
 47 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 4,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "OS:  linux\n",
 60 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 61 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 62 |       "Numpy:  1.14.1\n",
 63 |       "MXNet:  0.12.0\n",
 64 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 65 |       "CUDA Version 8.0.61\n",
 66 |       "CuDNN Version  6.0.21\n"
 67 |      ]
 68 |     }
 69 |    ],
 70 |    "source": [
 71 |     "print(\"OS: \", sys.platform)\n",
 72 |     "print(\"Python: \", sys.version)\n",
 73 |     "print(\"Numpy: \", np.__version__)\n",
 74 |     "print(\"MXNet: \", mx.__version__)\n",
 75 |     "print(\"GPU: \", get_gpu_name())\n",
 76 |     "print(get_cuda_version())\n",
 77 |     "print(\"CuDNN Version \", get_cudnn_version())"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 5,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "def create_symbol(n_classes=N_CLASSES):\n",
 87 |     "    data = mx.symbol.Variable('data')\n",
 88 |     "    # size = [(old-size - kernel + 2*padding)/stride]+1\n",
 89 |     "    # if kernel = 3, pad with 1 either side\n",
 90 |     "    conv1 = mx.symbol.Convolution(data=data, num_filter=50, pad=(1,1), kernel=(3,3))\n",
 91 |     "    relu1 = mx.symbol.Activation(data=conv1, act_type=\"relu\")\n",
 92 |     "    conv2 = mx.symbol.Convolution(data=relu1, num_filter=50, pad=(1,1), kernel=(3,3))\n",
 93 |     "    pool1 = mx.symbol.Pooling(data=conv2, pool_type=\"max\", kernel=(2,2), stride=(2,2))\n",
 94 |     "    relu2 = mx.symbol.Activation(data=pool1, act_type=\"relu\")\n",
 95 |     "    drop1 = mx.symbol.Dropout(data=relu2, p=0.25)\n",
 96 |     "    \n",
 97 |     "    conv3 = mx.symbol.Convolution(data=drop1, num_filter=100, pad=(1,1), kernel=(3,3))\n",
 98 |     "    relu3 = mx.symbol.Activation(data=conv3, act_type=\"relu\")\n",
 99 |     "    conv4 = mx.symbol.Convolution(data=relu3, num_filter=100, pad=(1,1), kernel=(3,3))\n",
100 |     "    pool2 = mx.symbol.Pooling(data=conv4, pool_type=\"max\", kernel=(2,2), stride=(2,2))\n",
101 |     "    relu4 = mx.symbol.Activation(data=pool2, act_type=\"relu\")\n",
102 |     "    drop2 = mx.symbol.Dropout(data=relu4, p=0.25)\n",
103 |     "           \n",
104 |     "    flat1 = mx.symbol.Flatten(data=drop2)\n",
105 |     "    fc1 = mx.symbol.FullyConnected(data=flat1, num_hidden=512)\n",
106 |     "    relu7 = mx.symbol.Activation(data=fc1, act_type=\"relu\")\n",
107 |     "    drop4 = mx.symbol.Dropout(data=relu7, p=0.5)\n",
108 |     "    fc2 = mx.symbol.FullyConnected(data=drop4, num_hidden=n_classes) \n",
109 |     "    \n",
110 |     "    input_y = mx.symbol.Variable('softmax_label')  \n",
111 |     "    m = mx.symbol.SoftmaxOutput(data=fc2, label=input_y, name=\"softmax\")\n",
112 |     "    return m"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 6,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "def init_model(m, batchsize=BATCHSIZE, lr=LR, momentum=MOMENTUM):\n",
122 |     "    ctx = [mx.gpu(0)]\n",
123 |     "    mod = mx.mod.Module(context=ctx, symbol=m)\n",
124 |     "    mod.bind(data_shapes=[('data', (batchsize, 3, 32, 32))],\n",
125 |     "             label_shapes=[('softmax_label', (batchsize,))])\n",
126 |     "    return mod"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 7,
132 |    "metadata": {},
133 |    "outputs": [
134 |     {
135 |      "name": "stdout",
136 |      "output_type": "stream",
137 |      "text": [
138 |       "Preparing train set...\n",
139 |       "Preparing test set...\n",
140 |       "(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)\n",
141 |       "float32 float32 int32 int32\n",
142 |       "CPU times: user 1.07 s, sys: 1.09 s, total: 2.16 s\n",
143 |       "Wall time: 2.17 s\n"
144 |      ]
145 |     }
146 |    ],
147 |    "source": [
148 |     "%%time\n",
149 |     "# Data into format for library\n",
150 |     "x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)\n",
151 |     "# Load data-iterator\n",
152 |     "train_iter = mx.io.NDArrayIter(x_train, y_train, BATCHSIZE, shuffle=True)\n",
153 |     "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
154 |     "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 8,
160 |    "metadata": {},
161 |    "outputs": [
162 |     {
163 |      "name": "stdout",
164 |      "output_type": "stream",
165 |      "text": [
166 |       "CPU times: user 4.49 ms, sys: 0 ns, total: 4.49 ms\n",
167 |       "Wall time: 3.58 ms\n"
168 |      ]
169 |     }
170 |    ],
171 |    "source": [
172 |     "%%time\n",
173 |     "# Load symbol\n",
174 |     "sym = create_symbol()"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": 9,
180 |    "metadata": {
181 |     "scrolled": true
182 |    },
183 |    "outputs": [
184 |     {
185 |      "name": "stdout",
186 |      "output_type": "stream",
187 |      "text": [
188 |       "CPU times: user 1 s, sys: 714 ms, total: 1.71 s\n",
189 |       "Wall time: 1.99 s\n"
190 |      ]
191 |     }
192 |    ],
193 |    "source": [
194 |     "%%time\n",
195 |     "# Initialise model\n",
196 |     "model = init_model(sym)"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": 10,
202 |    "metadata": {},
203 |    "outputs": [
204 |     {
205 |      "name": "stdout",
206 |      "output_type": "stream",
207 |      "text": [
208 |       "Already bound, ignoring bind()\n",
209 |       "Epoch[0] Train-accuracy=0.337976\n",
210 |       "Epoch[0] Time cost=4.913\n",
211 |       "Epoch[1] Train-accuracy=0.498601\n",
212 |       "Epoch[1] Time cost=4.840\n",
213 |       "Epoch[2] Train-accuracy=0.580802\n",
214 |       "Epoch[2] Time cost=4.886\n",
215 |       "Epoch[3] Train-accuracy=0.642144\n",
216 |       "Epoch[3] Time cost=4.821\n",
217 |       "Epoch[4] Train-accuracy=0.686161\n",
218 |       "Epoch[4] Time cost=4.836\n",
219 |       "Epoch[5] Train-accuracy=0.718570\n",
220 |       "Epoch[5] Time cost=4.835\n",
221 |       "Epoch[6] Train-accuracy=0.744246\n",
222 |       "Epoch[6] Time cost=4.849\n",
223 |       "Epoch[7] Train-accuracy=0.767823\n",
224 |       "Epoch[7] Time cost=4.830\n",
225 |       "Epoch[8] Train-accuracy=0.784867\n",
226 |       "Epoch[8] Time cost=4.836\n",
227 |       "Epoch[9] Train-accuracy=0.802130\n",
228 |       "Epoch[9] Time cost=4.828\n",
229 |       "CPU times: user 44.5 s, sys: 17.3 s, total: 1min 1s\n",
230 |       "Wall time: 48.6 s\n"
231 |      ]
232 |     }
233 |    ],
234 |    "source": [
235 |     "%%time\n",
236 |     "# Main training loop: 49s\n",
237 |     "model.fit(train_data=train_iter, \n",
238 |     "          num_epoch=EPOCHS,\n",
239 |     "          initializer=mx.init.Xavier(rnd_type='uniform'),\n",
240 |     "          optimizer='sgd',\n",
241 |     "          optimizer_params=(('learning_rate', LR), ('momentum', MOMENTUM)),\n",
242 |     "          eval_metric=mx.metric.create('acc'))"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": 11,
248 |    "metadata": {},
249 |    "outputs": [
250 |     {
251 |      "name": "stdout",
252 |      "output_type": "stream",
253 |      "text": [
254 |       "CPU times: user 370 ms, sys: 142 ms, total: 512 ms\n",
255 |       "Wall time: 316 ms\n"
256 |      ]
257 |     }
258 |    ],
259 |    "source": [
260 |     "%%time\n",
261 |     "# Main evaluation loop: 311ms\n",
262 |     "y_guess = model.predict(mx.io.NDArrayIter(x_test, batch_size=BATCHSIZE, shuffle=False))\n",
263 |     "y_guess = np.argmax(y_guess.asnumpy(), axis=-1)"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": 12,
269 |    "metadata": {},
270 |    "outputs": [
271 |     {
272 |      "name": "stdout",
273 |      "output_type": "stream",
274 |      "text": [
275 |       "Accuracy:  0.7707\n"
276 |      ]
277 |     }
278 |    ],
279 |    "source": [
280 |     "print(\"Accuracy: \", sum(y_guess == y_test)/len(y_guess))"
281 |    ]
282 |   }
283 |  ],
284 |  "metadata": {
285 |   "anaconda-cloud": {},
286 |   "kernelspec": {
287 |    "display_name": "Python [default]",
288 |    "language": "python",
289 |    "name": "python3"
290 |   },
291 |   "language_info": {
292 |    "codemirror_mode": {
293 |     "name": "ipython",
294 |     "version": 3
295 |    },
296 |    "file_extension": ".py",
297 |    "mimetype": "text/x-python",
298 |    "name": "python",
299 |    "nbconvert_exporter": "python",
300 |    "pygments_lexer": "ipython3",
301 |    "version": "3.5.2"
302 |   }
303 |  },
304 |  "nbformat": 4,
305 |  "nbformat_minor": 2
306 | }
307 | 


--------------------------------------------------------------------------------
/notebooks/MXNet_Inference.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import sys\n",
 11 |     "import numpy as np\n",
 12 |     "import mxnet as mx\n",
 13 |     "from mxnet import gluon, nd\n",
 14 |     "from collections import namedtuple\n",
 15 |     "from common.params_inf import *\n",
 16 |     "from common.utils import *"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "# Force one-gpu\n",
 26 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 3,
 32 |    "metadata": {},
 33 |    "outputs": [
 34 |     {
 35 |      "name": "stdout",
 36 |      "output_type": "stream",
 37 |      "text": [
 38 |       "OS:  linux\n",
 39 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 40 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 41 |       "Numpy:  1.14.1\n",
 42 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 43 |       "CUDA Version 8.0.61\n",
 44 |       "CuDNN Version  6.0.21\n"
 45 |      ]
 46 |     }
 47 |    ],
 48 |    "source": [
 49 |     "print(\"OS: \", sys.platform)\n",
 50 |     "print(\"Python: \", sys.version)\n",
 51 |     "print(\"Numpy: \", np.__version__)\n",
 52 |     "print(\"GPU: \", get_gpu_name())\n",
 53 |     "print(get_cuda_version())\n",
 54 |     "print(\"CuDNN Version \", get_cudnn_version())"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 4,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "Batch = namedtuple('Batch', ['data'])\n",
 64 |     "ctx = mx.gpu(0)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 5,
 70 |    "metadata": {},
 71 |    "outputs": [
 72 |     {
 73 |      "name": "stdout",
 74 |      "output_type": "stream",
 75 |      "text": [
 76 |       "(1280, 224, 224, 3) (1280, 3, 224, 224)\n"
 77 |      ]
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "# Create batches of fake data\n",
 82 |     "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n",
 83 |     "print(fake_input_data_cl.shape, fake_input_data_cf.shape)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 6,
 89 |    "metadata": {},
 90 |    "outputs": [
 91 |     {
 92 |      "name": "stdout",
 93 |      "output_type": "stream",
 94 |      "text": [
 95 |       "Downloaded\n"
 96 |      ]
 97 |     }
 98 |    ],
 99 |    "source": [
100 |     "# Download Resnet weights\n",
101 |     "path='http://data.mxnet.io/models/imagenet/'\n",
102 |     "mx.test_utils.download(path+'resnet/50-layers/resnet-50-symbol.json')\n",
103 |     "mx.test_utils.download(path+'resnet/50-layers/resnet-50-0000.params')\n",
104 |     "print(\"Downloaded\")"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 7,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "# Load model\n",
114 |     "sym, arg_params, aux_params = mx.model.load_checkpoint('resnet-50', 0)\n",
115 |     "# List the last 10 layers\n",
116 |     "all_layers = sym.get_internals()"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 8,
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": [
125 |     "# Get last layer\n",
126 |     "flatten_layer = all_layers['flatten0_output']"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 9,
132 |    "metadata": {},
133 |    "outputs": [],
134 |    "source": [
135 |     "def predict_fn(classifier, data, batchsize, ctx):\n",
136 |     "    \"\"\" Return features from classifier \"\"\"\n",
137 |     "    out = nd.zeros((len(data), RESNET_FEATURES), dtype=np.float32, ctx=ctx)    \n",
138 |     "    for idx, dta in yield_mb_X(data, batchsize):\n",
139 |     "        classifier.forward(Batch(data=[mx.nd.array(dta)]))\n",
140 |     "        out[idx*batchsize:(idx+1)*batchsize] = classifier.get_outputs()[0]\n",
141 |     "    nd.waitall()\n",
142 |     "    return out"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 10,
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "# Get last layer\n",
152 |     "fe_sym = all_layers['flatten0_output']\n",
153 |     "# Initialise GPU\n",
154 |     "fe_mod = mx.mod.Module(symbol=fe_sym, context=ctx, label_names=None)\n",
155 |     "fe_mod.bind(for_training=False, inputs_need_grad=False,\n",
156 |     "            data_shapes=[('data', (BATCH_SIZE,3,224,224))])\n",
157 |     "fe_mod.set_params(arg_params, aux_params)"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 11,
163 |    "metadata": {},
164 |    "outputs": [],
165 |    "source": [
166 |     "cold_start = predict_fn(fe_mod, fake_input_data_cf, BATCH_SIZE, ctx)"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 12,
172 |    "metadata": {},
173 |    "outputs": [
174 |     {
175 |      "name": "stdout",
176 |      "output_type": "stream",
177 |      "text": [
178 |       "CPU times: user 2 s, sys: 631 ms, total: 2.63 s\n",
179 |       "Wall time: 2.1 s\n"
180 |      ]
181 |     }
182 |    ],
183 |    "source": [
184 |     "%%time\n",
185 |     "features = predict_fn(fe_mod, fake_input_data_cf, BATCH_SIZE, ctx)"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": 14,
191 |    "metadata": {},
192 |    "outputs": [
193 |     {
194 |      "name": "stdout",
195 |      "output_type": "stream",
196 |      "text": [
197 |       "Images per second 609.5238095238095\n"
198 |      ]
199 |     }
200 |    ],
201 |    "source": [
202 |     "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/2.1))"
203 |    ]
204 |   }
205 |  ],
206 |  "metadata": {
207 |   "kernelspec": {
208 |    "display_name": "Python [default]",
209 |    "language": "python",
210 |    "name": "python3"
211 |   },
212 |   "language_info": {
213 |    "codemirror_mode": {
214 |     "name": "ipython",
215 |     "version": 3
216 |    },
217 |    "file_extension": ".py",
218 |    "mimetype": "text/x-python",
219 |    "name": "python",
220 |    "nbconvert_exporter": "python",
221 |    "pygments_lexer": "ipython3",
222 |    "version": "3.5.2"
223 |   }
224 |  },
225 |  "nbformat": 4,
226 |  "nbformat_minor": 2
227 | }
228 | 


--------------------------------------------------------------------------------
/notebooks/MXNet_RNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# High-level RNN MXNet Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "import sys\n",
 18 |     "import numpy as np\n",
 19 |     "import mxnet as mx\n",
 20 |     "from mxnet.io import DataDesc\n",
 21 |     "from common.params_lstm import *\n",
 22 |     "from common.utils import *"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "# Force one-gpu\n",
 32 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 3,
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "name": "stdout",
 42 |      "output_type": "stream",
 43 |      "text": [
 44 |       "OS:  linux\n",
 45 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 46 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 47 |       "Numpy:  1.14.1\n",
 48 |       "MXNet:  0.12.0\n",
 49 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 50 |       "CUDA Version 8.0.61\n",
 51 |       "CuDNN Version  6.0.21\n"
 52 |      ]
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "print(\"OS: \", sys.platform)\n",
 57 |     "print(\"Python: \", sys.version)\n",
 58 |     "print(\"Numpy: \", np.__version__)\n",
 59 |     "print(\"MXNet: \", mx.__version__)\n",
 60 |     "print(\"GPU: \", get_gpu_name())\n",
 61 |     "print(get_cuda_version())\n",
 62 |     "print(\"CuDNN Version \", get_cudnn_version())"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 4,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "def create_symbol(CUDNN=True,\n",
 72 |     "                  maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN, maxl=MAXLEN):\n",
 73 |     "    # https://mxnet.incubator.apache.org/api/python/rnn.html\n",
 74 |     "    data = mx.symbol.Variable('data')\n",
 75 |     "    embedded_step = mx.symbol.Embedding(data=data, input_dim=maxf, output_dim=edim)\n",
 76 |     "    \n",
 77 |     "    # Fusing RNN layers across time step into one kernel\n",
 78 |     "    # Improves speed but is less flexible\n",
 79 |     "    # Currently only supported if using cuDNN on GPU\n",
 80 |     "    if not CUDNN:\n",
 81 |     "        gru_cell = mx.rnn.GRUCell(num_hidden=nhid)\n",
 82 |     "    else:\n",
 83 |     "        gru_cell = mx.rnn.FusedRNNCell(num_hidden=nhid, num_layers=1, mode='gru')\n",
 84 |     "    \n",
 85 |     "    begin_state = gru_cell.begin_state()\n",
 86 |     "    # Call the cell to get the output of one time step for a batch.\n",
 87 |     "    # TODO: TNC layout (sequence length, batch size, and feature dimensions) is faster for RNN\n",
 88 |     "    outputs, states = gru_cell.unroll(length=maxl, inputs=embedded_step, merge_outputs=False)\n",
 89 |     "    \n",
 90 |     "    fc1 = mx.symbol.FullyConnected(data=outputs[-1], num_hidden=2) \n",
 91 |     "    input_y = mx.symbol.Variable('softmax_label')  \n",
 92 |     "    m = mx.symbol.SoftmaxOutput(data=fc1, label=input_y, name=\"softmax\")\n",
 93 |     "    return m"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 5,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "def init_model(m, batchs=BATCHSIZE, maxl=MAXLEN, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n",
103 |     "    ctx = [mx.gpu(0)]\n",
104 |     "    mod = mx.mod.Module(context=ctx, symbol=m)\n",
105 |     "    mod.bind(data_shapes=[DataDesc(name='data', shape=(batchs, maxl))],\n",
106 |     "             label_shapes=[DataDesc(name='softmax_label', shape=(batchs,))])\n",
107 |     "    # Glorot-uniform initializer\n",
108 |     "    mod.init_params(initializer=mx.init.Xavier(rnd_type='uniform'))\n",
109 |     "    mod.init_optimizer(optimizer='Adam', \n",
110 |     "                       optimizer_params=(('learning_rate', lr),\n",
111 |     "                                         ('beta1', b1),\n",
112 |     "                                         ('beta2', b2),\n",
113 |     "                                         ('epsilon', eps)))\n",
114 |     "    return mod"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 6,
120 |    "metadata": {},
121 |    "outputs": [
122 |     {
123 |      "name": "stdout",
124 |      "output_type": "stream",
125 |      "text": [
126 |       "Data does not exist. Downloading https://ikpublictutorial.blob.core.windows.net/deeplearningframeworks/imdb.npz\n",
127 |       "Preparing train set...\n",
128 |       "Preparing test set...\n",
129 |       "Trimming to 30000 max-features\n",
130 |       "Padding to length 150\n",
131 |       "(25000, 150) (25000, 150) (25000,) (25000,)\n",
132 |       "int32 int32 int32 int32\n",
133 |       "CPU times: user 6.05 s, sys: 512 ms, total: 6.56 s\n",
134 |       "Wall time: 8.13 s\n"
135 |      ]
136 |     }
137 |    ],
138 |    "source": [
139 |     "%%time\n",
140 |     "# Data into format for library\n",
141 |     "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES)\n",
142 |     "# TNC layout faster for RNN\n",
143 |     "# Train iterator\n",
144 |     "train_iter = mx.io.NDArrayIter(x_train, y_train, BATCHSIZE, shuffle=True)\n",
145 |     "\n",
146 |     "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
147 |     "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 7,
153 |    "metadata": {},
154 |    "outputs": [
155 |     {
156 |      "name": "stdout",
157 |      "output_type": "stream",
158 |      "text": [
159 |       "CPU times: user 44 ms, sys: 709 µs, total: 44.7 ms\n",
160 |       "Wall time: 45.6 ms\n"
161 |      ]
162 |     },
163 |     {
164 |      "name": "stderr",
165 |      "output_type": "stream",
166 |      "text": [
167 |       "/anaconda/envs/py35/lib/python3.5/site-packages/mxnet-0.12.0-py3.5.egg/mxnet/rnn/rnn_cell.py:675: UserWarning: NTC layout detected. Consider using TNC for FusedRNNCell for faster speed\n",
168 |       "  warnings.warn(\"NTC layout detected. Consider using \"\n"
169 |      ]
170 |     }
171 |    ],
172 |    "source": [
173 |     "%%time\n",
174 |     "# Load symbol\n",
175 |     "# See Notebook \"MXNet_RNN_TNC.ipynb\" for example with TNC layout\n",
176 |     "sym = create_symbol()"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 8,
182 |    "metadata": {
183 |     "scrolled": true
184 |    },
185 |    "outputs": [
186 |     {
187 |      "name": "stdout",
188 |      "output_type": "stream",
189 |      "text": [
190 |       "CPU times: user 975 ms, sys: 572 ms, total: 1.55 s\n",
191 |       "Wall time: 1.55 s\n"
192 |      ]
193 |     }
194 |    ],
195 |    "source": [
196 |     "%%time\n",
197 |     "# Initialise model\n",
198 |     "model = init_model(sym)"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 9,
204 |    "metadata": {},
205 |    "outputs": [
206 |     {
207 |      "name": "stdout",
208 |      "output_type": "stream",
209 |      "text": [
210 |       "Epoch 0, Training ('accuracy', 0.7748960997442456)\n",
211 |       "Epoch 1, Training ('accuracy', 0.9239130434782609)\n",
212 |       "Epoch 2, Training ('accuracy', 0.9643941815856778)\n",
213 |       "CPU times: user 19.9 s, sys: 5.64 s, total: 25.6 s\n",
214 |       "Wall time: 24.1 s\n"
215 |      ]
216 |     }
217 |    ],
218 |    "source": [
219 |     "%%time\n",
220 |     "# Main training loop: 12.7s\n",
221 |     "metric = mx.metric.create('acc')\n",
222 |     "for j in range(EPOCHS):\n",
223 |     "    train_iter.reset()\n",
224 |     "    metric.reset()\n",
225 |     "    for batch in train_iter:\n",
226 |     "        model.forward(batch, is_train=True) \n",
227 |     "        model.update_metric(metric, batch.label)\n",
228 |     "        model.backward()              \n",
229 |     "        model.update()\n",
230 |     "    print('Epoch %d, Training %s' % (j, metric.get()))"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": 10,
236 |    "metadata": {},
237 |    "outputs": [
238 |     {
239 |      "name": "stdout",
240 |      "output_type": "stream",
241 |      "text": [
242 |       "CPU times: user 2.36 s, sys: 351 ms, total: 2.71 s\n",
243 |       "Wall time: 2.54 s\n"
244 |      ]
245 |     }
246 |    ],
247 |    "source": [
248 |     "%%time\n",
249 |     "# Main evaluation loop: 1.52s\n",
250 |     "y_guess = model.predict(mx.io.NDArrayIter(x_test, batch_size=BATCHSIZE, shuffle=False))\n",
251 |     "y_guess = np.argmax(y_guess.asnumpy(), axis=-1)"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": 11,
257 |    "metadata": {},
258 |    "outputs": [
259 |     {
260 |      "name": "stdout",
261 |      "output_type": "stream",
262 |      "text": [
263 |       "Accuracy:  0.85864\n"
264 |      ]
265 |     }
266 |    ],
267 |    "source": [
268 |     "print(\"Accuracy: \", 1.*sum(y_guess == y_test)/len(y_guess))"
269 |    ]
270 |   }
271 |  ],
272 |  "metadata": {
273 |   "anaconda-cloud": {},
274 |   "kernelspec": {
275 |    "display_name": "Python [default]",
276 |    "language": "python",
277 |    "name": "python3"
278 |   },
279 |   "language_info": {
280 |    "codemirror_mode": {
281 |     "name": "ipython",
282 |     "version": 3
283 |    },
284 |    "file_extension": ".py",
285 |    "mimetype": "text/x-python",
286 |    "name": "python",
287 |    "nbconvert_exporter": "python",
288 |    "pygments_lexer": "ipython3",
289 |    "version": "3.5.2"
290 |   }
291 |  },
292 |  "nbformat": 4,
293 |  "nbformat_minor": 1
294 | }
295 | 


--------------------------------------------------------------------------------
/notebooks/MXNet_RNN_TNC.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# High-level RNN MXNet Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 2,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "import sys\n",
 18 |     "import numpy as np\n",
 19 |     "import mxnet as mx\n",
 20 |     "from mxnet.io import DataDesc\n",
 21 |     "from common.params_lstm import *\n",
 22 |     "from common.utils import *"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 3,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "# Force one-gpu\n",
 32 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 4,
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "name": "stdout",
 42 |      "output_type": "stream",
 43 |      "text": [
 44 |       "OS:  linux\n",
 45 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 46 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 47 |       "Numpy:  1.14.1\n",
 48 |       "MXNet:  0.12.0\n",
 49 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 50 |       "CUDA Version 8.0.61\n",
 51 |       "CuDNN Version  6.0.21\n"
 52 |      ]
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "print(\"OS: \", sys.platform)\n",
 57 |     "print(\"Python: \", sys.version)\n",
 58 |     "print(\"Numpy: \", np.__version__)\n",
 59 |     "print(\"MXNet: \", mx.__version__)\n",
 60 |     "print(\"GPU: \", get_gpu_name())\n",
 61 |     "print(get_cuda_version())\n",
 62 |     "print(\"CuDNN Version \", get_cudnn_version())"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 5,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "def create_symbol(CUDNN=True,\n",
 72 |     "                  maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN, maxl=MAXLEN):\n",
 73 |     "    # https://mxnet.incubator.apache.org/api/python/rnn.html\n",
 74 |     "    data = mx.symbol.Variable('data')\n",
 75 |     "    embedded_step = mx.symbol.Embedding(data=data, input_dim=maxf, output_dim=edim)\n",
 76 |     "    \n",
 77 |     "    # Fusing RNN layers across time step into one kernel\n",
 78 |     "    # Improves speed but is less flexible\n",
 79 |     "    # Currently only supported if using cuDNN on GPU\n",
 80 |     "    if not CUDNN:\n",
 81 |     "        gru_cell = mx.rnn.GRUCell(num_hidden=nhid)\n",
 82 |     "    else:\n",
 83 |     "        gru_cell = mx.rnn.FusedRNNCell(num_hidden=nhid, num_layers=1, mode='gru')\n",
 84 |     "    \n",
 85 |     "    begin_state = gru_cell.begin_state()\n",
 86 |     "    # Call the cell to get the output of one time step for a batch.\n",
 87 |     "    # TODO: TNC layout (sequence length, batch size, and feature dimensions) is faster for RNN\n",
 88 |     "    outputs, states = gru_cell.unroll(length=maxl, inputs=embedded_step, merge_outputs=False, layout='TNC')\n",
 89 |     "    \n",
 90 |     "    fc1 = mx.symbol.FullyConnected(data=outputs[-1], num_hidden=2) \n",
 91 |     "    input_y = mx.symbol.Variable('softmax_label')  \n",
 92 |     "    m = mx.symbol.SoftmaxOutput(data=fc1, label=input_y, name=\"softmax\")\n",
 93 |     "    return m"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 6,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "def init_model(m, batchs=BATCHSIZE, maxl=MAXLEN, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n",
103 |     "    ctx = [mx.gpu(0)]\n",
104 |     "    mod = mx.mod.Module(context=ctx, symbol=m)\n",
105 |     "    mod.bind(data_shapes=[DataDesc(name='data', shape=(maxl, batchs), layout='TNC')],\n",
106 |     "             label_shapes=[DataDesc(name='softmax_label', shape=(batchs,))])\n",
107 |     "    # Glorot-uniform initializer\n",
108 |     "    mod.init_params(initializer=mx.init.Xavier(rnd_type='uniform'))\n",
109 |     "    mod.init_optimizer(optimizer='Adam', \n",
110 |     "                       optimizer_params=(('learning_rate', lr),\n",
111 |     "                                         ('beta1', b1),\n",
112 |     "                                         ('beta2', b2),\n",
113 |     "                                         ('epsilon', eps)))\n",
114 |     "    return mod"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 7,
120 |    "metadata": {},
121 |    "outputs": [
122 |     {
123 |      "name": "stdout",
124 |      "output_type": "stream",
125 |      "text": [
126 |       "Preparing train set...\n",
127 |       "Preparing test set...\n",
128 |       "Trimming to 30000 max-features\n",
129 |       "Padding to length 150\n",
130 |       "(25000, 150) (25000, 150) (25000,) (25000,)\n",
131 |       "int32 int32 int32 int32\n",
132 |       "CPU times: user 5.59 s, sys: 391 ms, total: 5.98 s\n",
133 |       "Wall time: 5.98 s\n"
134 |      ]
135 |     }
136 |    ],
137 |    "source": [
138 |     "%%time\n",
139 |     "# Data into format for library\n",
140 |     "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES)\n",
141 |     "wrapper_db = lambda args: mx.io.DataBatch(data=[mx.nd.array(args[0])], label=[mx.nd.array(args[1])])\n",
142 |     "\n",
143 |     "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
144 |     "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 8,
150 |    "metadata": {},
151 |    "outputs": [
152 |     {
153 |      "name": "stdout",
154 |      "output_type": "stream",
155 |      "text": [
156 |       "CPU times: user 43.3 ms, sys: 0 ns, total: 43.3 ms\n",
157 |       "Wall time: 42.6 ms\n"
158 |      ]
159 |     }
160 |    ],
161 |    "source": [
162 |     "%%time\n",
163 |     "# Load symbol\n",
164 |     "sym = create_symbol()"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 9,
170 |    "metadata": {
171 |     "scrolled": true
172 |    },
173 |    "outputs": [
174 |     {
175 |      "name": "stdout",
176 |      "output_type": "stream",
177 |      "text": [
178 |       "CPU times: user 901 ms, sys: 521 ms, total: 1.42 s\n",
179 |       "Wall time: 1.43 s\n"
180 |      ]
181 |     }
182 |    ],
183 |    "source": [
184 |     "%%time\n",
185 |     "# Initialise model\n",
186 |     "model = init_model(sym)"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": 10,
192 |    "metadata": {},
193 |    "outputs": [
194 |     {
195 |      "name": "stdout",
196 |      "output_type": "stream",
197 |      "text": [
198 |       "Epoch 0, Training ('accuracy', 0.7873397435897436)\n",
199 |       "Epoch 1, Training ('accuracy', 0.9302083333333333)\n",
200 |       "Epoch 2, Training ('accuracy', 0.9705128205128205)\n",
201 |       "CPU times: user 21 s, sys: 4.39 s, total: 25.4 s\n",
202 |       "Wall time: 23.7 s\n"
203 |      ]
204 |     }
205 |    ],
206 |    "source": [
207 |     "%%time\n",
208 |     "# Main training loop: 12.7s\n",
209 |     "metric = mx.metric.create('acc')\n",
210 |     "for j in range(EPOCHS):\n",
211 |     "    metric.reset()\n",
212 |     "    for batch in map(wrapper_db, yield_mb_tn(x_train, y_train, BATCHSIZE, shuffle=True)):\n",
213 |     "        model.forward(batch) \n",
214 |     "        model.update_metric(metric, batch.label)\n",
215 |     "        model.backward()              \n",
216 |     "        model.update()\n",
217 |     "    print('Epoch %d, Training %s' % (j, metric.get()))"
218 |    ]
219 |   }
220 |  ],
221 |  "metadata": {
222 |   "anaconda-cloud": {},
223 |   "kernelspec": {
224 |    "display_name": "Python 3",
225 |    "language": "python",
226 |    "name": "python3"
227 |   }
228 |  },
229 |  "nbformat": 4,
230 |  "nbformat_minor": 1
231 | }
232 | 


--------------------------------------------------------------------------------
/notebooks/PyTorch_CNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# High-level PyTorch Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "import sys\n",
 18 |     "import numpy as np\n",
 19 |     "import math\n",
 20 |     "import torch\n",
 21 |     "import torch.nn as nn\n",
 22 |     "import torch.nn.functional as F\n",
 23 |     "import torch.optim as optim\n",
 24 |     "import torch.utils.data as data_utils\n",
 25 |     "import torch.nn.init as init\n",
 26 |     "from torch.autograd import Variable\n",
 27 |     "from common.params import *\n",
 28 |     "from common.utils import *"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "# Force one-gpu\n",
 38 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 3,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "# Performance Improvement\n",
 48 |     "# 1. Auto-tune\n",
 49 |     "torch.backends.cudnn.benchmark=True"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 4,
 55 |    "metadata": {},
 56 |    "outputs": [
 57 |     {
 58 |      "name": "stdout",
 59 |      "output_type": "stream",
 60 |      "text": [
 61 |       "OS:  linux\n",
 62 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 63 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 64 |       "PyTorch:  0.3.1\n",
 65 |       "Numpy:  1.14.1\n",
 66 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 67 |       "CUDA Version 8.0.61\n",
 68 |       "CuDNN Version  6.0.21\n"
 69 |      ]
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "print(\"OS: \", sys.platform)\n",
 74 |     "print(\"Python: \", sys.version)\n",
 75 |     "print(\"PyTorch: \", torch.__version__)\n",
 76 |     "print(\"Numpy: \", np.__version__)\n",
 77 |     "print(\"GPU: \", get_gpu_name())\n",
 78 |     "print(get_cuda_version())\n",
 79 |     "print(\"CuDNN Version \", get_cudnn_version())"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 5,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "class SymbolModule(nn.Module):\n",
 89 |     "    def __init__(self, n_classes=N_CLASSES):\n",
 90 |     "        super(SymbolModule, self).__init__()\n",
 91 |     "        self.conv1 = nn.Conv2d(3, 50, kernel_size=3, padding=1)\n",
 92 |     "        self.conv2 = nn.Conv2d(50, 50, kernel_size=3, padding=1)\n",
 93 |     "        self.conv3 = nn.Conv2d(50, 100, kernel_size=3, padding=1)\n",
 94 |     "        self.conv4 = nn.Conv2d(100, 100, kernel_size=3, padding=1)\n",
 95 |     "        # feature map size is 8*8 by pooling\n",
 96 |     "        self.fc1 = nn.Linear(100*8*8, 512)\n",
 97 |     "        self.fc2 = nn.Linear(512, n_classes)\n",
 98 |     "\n",
 99 |     "    def forward(self, x):\n",
100 |     "        # PyTorch requires a flag for training in dropout\n",
101 |     "        x = self.conv2(F.relu(self.conv1(x)))\n",
102 |     "        x = F.relu(F.max_pool2d(x, kernel_size=2, stride=2))\n",
103 |     "        x = F.dropout(x, 0.25, training=self.training)\n",
104 |     "\n",
105 |     "        x = self.conv4(F.relu(self.conv3(x)))\n",
106 |     "        x = F.relu(F.max_pool2d(x, kernel_size=2, stride=2))\n",
107 |     "        x = F.dropout(x, 0.25, training=self.training)\n",
108 |     "\n",
109 |     "        x = x.view(-1, 100*8*8)   # reshape Variable\n",
110 |     "        x = F.dropout(F.relu(self.fc1(x)), 0.5, training=self.training)\n",
111 |     "        return self.fc2(x)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 6,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "def init_model(m, lr=LR, momentum=MOMENTUM):\n",
121 |     "    # This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class\n",
122 |     "    opt = optim.SGD(m.parameters(), lr, momentum)\n",
123 |     "    criterion = nn.CrossEntropyLoss()\n",
124 |     "    return opt, criterion"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 7,
130 |    "metadata": {
131 |     "scrolled": true
132 |    },
133 |    "outputs": [
134 |     {
135 |      "name": "stdout",
136 |      "output_type": "stream",
137 |      "text": [
138 |       "Preparing train set...\n",
139 |       "Preparing test set...\n",
140 |       "(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)\n",
141 |       "float32 float32 int32 int64\n",
142 |       "CPU times: user 709 ms, sys: 601 ms, total: 1.31 s\n",
143 |       "Wall time: 3.54 s\n"
144 |      ]
145 |     }
146 |    ],
147 |    "source": [
148 |     "%%time\n",
149 |     "# Data into format for library\n",
150 |     "x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)\n",
151 |     "# Torch-specific\n",
152 |     "y_train = y_train.astype(np.int32)\n",
153 |     "y_test = y_test.astype(np.int64)\n",
154 |     "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
155 |     "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 8,
161 |    "metadata": {},
162 |    "outputs": [
163 |     {
164 |      "name": "stdout",
165 |      "output_type": "stream",
166 |      "text": [
167 |       "CPU times: user 1.85 s, sys: 726 ms, total: 2.58 s\n",
168 |       "Wall time: 3.73 s\n"
169 |      ]
170 |     }
171 |    ],
172 |    "source": [
173 |     "%%time\n",
174 |     "sym = SymbolModule()\n",
175 |     "sym.cuda() # CUDA!"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 9,
181 |    "metadata": {},
182 |    "outputs": [
183 |     {
184 |      "name": "stdout",
185 |      "output_type": "stream",
186 |      "text": [
187 |       "CPU times: user 131 µs, sys: 76 µs, total: 207 µs\n",
188 |       "Wall time: 212 µs\n"
189 |      ]
190 |     }
191 |    ],
192 |    "source": [
193 |     "%%time\n",
194 |     "optimizer, criterion = init_model(sym)"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 10,
200 |    "metadata": {},
201 |    "outputs": [
202 |     {
203 |      "name": "stdout",
204 |      "output_type": "stream",
205 |      "text": [
206 |       "0\n",
207 |       "1\n",
208 |       "2\n",
209 |       "3\n",
210 |       "4\n",
211 |       "5\n",
212 |       "6\n",
213 |       "7\n",
214 |       "8\n",
215 |       "9\n",
216 |       "CPU times: user 38.3 s, sys: 10.9 s, total: 49.1 s\n",
217 |       "Wall time: 51.2 s\n"
218 |      ]
219 |     }
220 |    ],
221 |    "source": [
222 |     "%%time\n",
223 |     "# Main training loop: 51s\n",
224 |     "sym.train() # Sets training = True  \n",
225 |     "for j in range(EPOCHS):\n",
226 |     "    for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n",
227 |     "        # Get samples\n",
228 |     "        data = Variable(torch.FloatTensor(data).cuda())\n",
229 |     "        target = Variable(torch.LongTensor(target).cuda())\n",
230 |     "        # Init\n",
231 |     "        optimizer.zero_grad()\n",
232 |     "        # Forwards\n",
233 |     "        output = sym(data)\n",
234 |     "        # Loss\n",
235 |     "        loss = criterion(output, target)\n",
236 |     "        # Back-prop\n",
237 |     "        loss.backward()\n",
238 |     "        optimizer.step()\n",
239 |     "    # Log\n",
240 |     "    print(j)"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": 13,
246 |    "metadata": {},
247 |    "outputs": [
248 |     {
249 |      "name": "stdout",
250 |      "output_type": "stream",
251 |      "text": [
252 |       "CPU times: user 262 ms, sys: 46.2 ms, total: 309 ms\n",
253 |       "Wall time: 308 ms\n"
254 |      ]
255 |     }
256 |    ],
257 |    "source": [
258 |     "%%time\n",
259 |     "# Main evaluation loop: 308ms\n",
260 |     "sym.eval() # Sets training = False\n",
261 |     "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n",
262 |     "y_guess = np.zeros(n_samples, dtype=np.int)\n",
263 |     "y_truth = y_test[:n_samples]\n",
264 |     "c = 0\n",
265 |     "for data, target in yield_mb(x_test, y_test, BATCHSIZE):\n",
266 |     "    # Get samples\n",
267 |     "    data = Variable(torch.FloatTensor(data).cuda())\n",
268 |     "    # Forwards\n",
269 |     "    output = sym(data)\n",
270 |     "    pred = output.data.max(1)[1].cpu().numpy().squeeze()\n",
271 |     "    # Collect results\n",
272 |     "    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred\n",
273 |     "    c += 1"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 14,
279 |    "metadata": {},
280 |    "outputs": [
281 |     {
282 |      "name": "stdout",
283 |      "output_type": "stream",
284 |      "text": [
285 |       "Accuracy:  0.7745392628205128\n"
286 |      ]
287 |     }
288 |    ],
289 |    "source": [
290 |     "print(\"Accuracy: \", 1.*sum(y_guess == y_truth)/len(y_guess))"
291 |    ]
292 |   }
293 |  ],
294 |  "metadata": {
295 |   "anaconda-cloud": {},
296 |   "kernelspec": {
297 |    "display_name": "Python 3",
298 |    "language": "python",
299 |    "name": "python3"
300 |   },
301 |   "language_info": {
302 |    "codemirror_mode": {
303 |     "name": "ipython",
304 |     "version": 3
305 |    },
306 |    "file_extension": ".py",
307 |    "mimetype": "text/x-python",
308 |    "name": "python",
309 |    "nbconvert_exporter": "python",
310 |    "pygments_lexer": "ipython3",
311 |    "version": "3.5.2"
312 |   }
313 |  },
314 |  "nbformat": 4,
315 |  "nbformat_minor": 2
316 | }
317 | 


--------------------------------------------------------------------------------
/notebooks/PyTorch_Inference.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import sys\n",
 11 |     "import numpy as np\n",
 12 |     "import torch\n",
 13 |     "import torchvision.models as models\n",
 14 |     "from torch.autograd import Variable\n",
 15 |     "from common.params_inf import *\n",
 16 |     "from common.utils import *"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "# Force one-gpu\n",
 26 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 3,
 32 |    "metadata": {},
 33 |    "outputs": [
 34 |     {
 35 |      "name": "stdout",
 36 |      "output_type": "stream",
 37 |      "text": [
 38 |       "OS:  linux\n",
 39 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 40 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 41 |       "Numpy:  1.14.1\n",
 42 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 43 |       "CUDA Version 8.0.61\n",
 44 |       "CuDNN Version  6.0.21\n"
 45 |      ]
 46 |     }
 47 |    ],
 48 |    "source": [
 49 |     "print(\"OS: \", sys.platform)\n",
 50 |     "print(\"Python: \", sys.version)\n",
 51 |     "print(\"Numpy: \", np.__version__)\n",
 52 |     "print(\"GPU: \", get_gpu_name())\n",
 53 |     "print(get_cuda_version())\n",
 54 |     "print(\"CuDNN Version \", get_cudnn_version())"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 4,
 60 |    "metadata": {},
 61 |    "outputs": [
 62 |     {
 63 |      "name": "stdout",
 64 |      "output_type": "stream",
 65 |      "text": [
 66 |       "(1280, 224, 224, 3) (1280, 3, 224, 224)\n"
 67 |      ]
 68 |     }
 69 |    ],
 70 |    "source": [
 71 |     "# Create batches of fake data\n",
 72 |     "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n",
 73 |     "print(fake_input_data_cl.shape, fake_input_data_cf.shape)"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 5,
 79 |    "metadata": {},
 80 |    "outputs": [
 81 |     {
 82 |      "name": "stdout",
 83 |      "output_type": "stream",
 84 |      "text": [
 85 |       "Linear(in_features=2048, out_features=1000, bias=True)\n",
 86 |       "AvgPool2d(kernel_size=7, stride=7, padding=0, ceil_mode=False, count_include_pad=True)\n"
 87 |      ]
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "# Download ResNet\n",
 92 |     "resnet50 = models.resnet50(pretrained=True)\n",
 93 |     "# Chop-off last FC layer\n",
 94 |     "print(list(resnet50.children())[-1])\n",
 95 |     "chopped_resnet50 = torch.nn.Sequential(*list(resnet50.children())[:-1])\n",
 96 |     "# CUDA\n",
 97 |     "chopped_resnet50.cuda()\n",
 98 |     "# Last layer is now avgpool2d\n",
 99 |     "print(list(chopped_resnet50.children())[-1])"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 6,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "def predict_fn(classifier, data, batchsize):\n",
109 |     "    \"\"\" Return features from classifier \"\"\"\n",
110 |     "    classifier.eval()\n",
111 |     "    out = np.zeros((len(data), RESNET_FEATURES), np.float32)\n",
112 |     "    for idx, dta in yield_mb_X(data, batchsize):\n",
113 |     "        pred = classifier(Variable(torch.FloatTensor(dta).cuda()))\n",
114 |     "        out[idx*batchsize:(idx+1)*batchsize] = pred.data.cpu().numpy().squeeze()\n",
115 |     "    return out"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 7,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "cold_start = predict_fn(chopped_resnet50, fake_input_data_cf, BATCH_SIZE)"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 8,
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "name": "stdout",
134 |      "output_type": "stream",
135 |      "text": [
136 |       "CPU times: user 2.05 s, sys: 397 ms, total: 2.44 s\n",
137 |       "Wall time: 2.44 s\n"
138 |      ]
139 |     }
140 |    ],
141 |    "source": [
142 |     "%%time\n",
143 |     "features = predict_fn(chopped_resnet50, fake_input_data_cf, BATCH_SIZE)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 9,
149 |    "metadata": {},
150 |    "outputs": [
151 |     {
152 |      "name": "stdout",
153 |      "output_type": "stream",
154 |      "text": [
155 |       "Images per second 524.5901639344262\n"
156 |      ]
157 |     }
158 |    ],
159 |    "source": [
160 |     "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/2.44))"
161 |    ]
162 |   }
163 |  ],
164 |  "metadata": {
165 |   "kernelspec": {
166 |    "display_name": "Python 3",
167 |    "language": "python",
168 |    "name": "python3"
169 |   },
170 |   "language_info": {
171 |    "codemirror_mode": {
172 |     "name": "ipython",
173 |     "version": 3
174 |    },
175 |    "file_extension": ".py",
176 |    "mimetype": "text/x-python",
177 |    "name": "python",
178 |    "nbconvert_exporter": "python",
179 |    "pygments_lexer": "ipython3",
180 |    "version": "3.5.2"
181 |   }
182 |  },
183 |  "nbformat": 4,
184 |  "nbformat_minor": 2
185 | }
186 | 


--------------------------------------------------------------------------------
/notebooks/PyTorch_RNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# High-level RNN PyTorch Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "import sys\n",
 18 |     "import numpy as np\n",
 19 |     "import math\n",
 20 |     "import torch\n",
 21 |     "import torch.nn as nn\n",
 22 |     "import torch.nn.functional as F\n",
 23 |     "import torch.optim as optim\n",
 24 |     "import torch.utils.data as data_utils\n",
 25 |     "import torch.nn.init as init\n",
 26 |     "from torch import autograd\n",
 27 |     "from torch.autograd import Variable\n",
 28 |     "from common.params_lstm import *\n",
 29 |     "from common.utils import *"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "# Force one-gpu\n",
 39 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 3,
 45 |    "metadata": {},
 46 |    "outputs": [
 47 |     {
 48 |      "name": "stdout",
 49 |      "output_type": "stream",
 50 |      "text": [
 51 |       "OS:  linux\n",
 52 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 53 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 54 |       "PyTorch:  0.3.1\n",
 55 |       "Numpy:  1.14.1\n",
 56 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 57 |       "CUDA Version 8.0.61\n",
 58 |       "CuDNN Version  6.0.21\n"
 59 |      ]
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "print(\"OS: \", sys.platform)\n",
 64 |     "print(\"Python: \", sys.version)\n",
 65 |     "print(\"PyTorch: \", torch.__version__)\n",
 66 |     "print(\"Numpy: \", np.__version__)\n",
 67 |     "print(\"GPU: \", get_gpu_name())\n",
 68 |     "print(get_cuda_version())\n",
 69 |     "print(\"CuDNN Version \", get_cudnn_version())"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "class SymbolModule(nn.Module):\n",
 79 |     "    def __init__(self, \n",
 80 |     "                 maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN):\n",
 81 |     "        super(SymbolModule, self).__init__()\n",
 82 |     "        self.embedding = nn.Embedding(num_embeddings=maxf,\n",
 83 |     "                                      embedding_dim=edim)\n",
 84 |     "        # If batch-first then input and output \n",
 85 |     "        # provided as (batch, seq, features)\n",
 86 |     "        # Cudnn used by default if possible\n",
 87 |     "        self.gru = nn.GRU(input_size=edim, \n",
 88 |     "                          hidden_size=nhid, \n",
 89 |     "                          num_layers=1,\n",
 90 |     "                          batch_first=True,\n",
 91 |     "                          bidirectional=False)   \n",
 92 |     "        self.l_out = nn.Linear(in_features=nhid*1,\n",
 93 |     "                               out_features=2)\n",
 94 |     "\n",
 95 |     "    def forward(self, x, nhid=NUMHIDDEN, batchs=BATCHSIZE):\n",
 96 |     "        x = self.embedding(x)\n",
 97 |     "        h0 = Variable(torch.zeros(1, batchs, nhid)).cuda()\n",
 98 |     "        x, h = self.gru(x, h0)  # outputs, states\n",
 99 |     "        # just get the last output state\n",
100 |     "        x = x[:,-1,:].squeeze()\n",
101 |     "        x = self.l_out(x)\n",
102 |     "        return x"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 5,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "def init_model(m, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n",
112 |     "    opt = optim.Adam(m.parameters(), lr, betas=(b1, b2), eps=eps)\n",
113 |     "    criterion = nn.CrossEntropyLoss()\n",
114 |     "    return opt, criterion"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 6,
120 |    "metadata": {
121 |     "scrolled": true
122 |    },
123 |    "outputs": [
124 |     {
125 |      "name": "stdout",
126 |      "output_type": "stream",
127 |      "text": [
128 |       "Preparing train set...\n",
129 |       "Preparing test set...\n",
130 |       "Trimming to 30000 max-features\n",
131 |       "Padding to length 150\n",
132 |       "(25000, 150) (25000, 150) (25000,) (25000,)\n",
133 |       "int64 int64 int64 int64\n",
134 |       "CPU times: user 5.72 s, sys: 468 ms, total: 6.19 s\n",
135 |       "Wall time: 6.19 s\n"
136 |      ]
137 |     }
138 |    ],
139 |    "source": [
140 |     "%%time\n",
141 |     "# Data into format for library\n",
142 |     "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES)\n",
143 |     "# Torch-specific\n",
144 |     "x_train = x_train.astype(np.int64)\n",
145 |     "x_test = x_test.astype(np.int64)\n",
146 |     "y_train = y_train.astype(np.int64)\n",
147 |     "y_test = y_test.astype(np.int64)\n",
148 |     "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
149 |     "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 7,
155 |    "metadata": {},
156 |    "outputs": [
157 |     {
158 |      "name": "stdout",
159 |      "output_type": "stream",
160 |      "text": [
161 |       "CPU times: user 2.93 s, sys: 879 ms, total: 3.81 s\n",
162 |       "Wall time: 3.82 s\n"
163 |      ]
164 |     }
165 |    ],
166 |    "source": [
167 |     "%%time\n",
168 |     "sym = SymbolModule()\n",
169 |     "sym.cuda() # CUDA!"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": 8,
175 |    "metadata": {},
176 |    "outputs": [
177 |     {
178 |      "name": "stdout",
179 |      "output_type": "stream",
180 |      "text": [
181 |       "CPU times: user 111 µs, sys: 25 µs, total: 136 µs\n",
182 |       "Wall time: 142 µs\n"
183 |      ]
184 |     }
185 |    ],
186 |    "source": [
187 |     "%%time\n",
188 |     "optimizer, criterion = init_model(sym)"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 9,
194 |    "metadata": {
195 |     "scrolled": false
196 |    },
197 |    "outputs": [
198 |     {
199 |      "name": "stdout",
200 |      "output_type": "stream",
201 |      "text": [
202 |       "0\n",
203 |       "1\n",
204 |       "2\n",
205 |       "CPU times: user 11.7 s, sys: 942 ms, total: 12.6 s\n",
206 |       "Wall time: 12.6 s\n"
207 |      ]
208 |     }
209 |    ],
210 |    "source": [
211 |     "%%time\n",
212 |     "# Main training loop: 12.7s\n",
213 |     "sym.train() # Sets training = True   \n",
214 |     "for j in range(EPOCHS):\n",
215 |     "    for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n",
216 |     "        # Get samples\n",
217 |     "        data = Variable(torch.LongTensor(data).cuda())\n",
218 |     "        target = Variable(torch.LongTensor(target).cuda())\n",
219 |     "        # Init\n",
220 |     "        optimizer.zero_grad()\n",
221 |     "        # Forwards\n",
222 |     "        output = sym(data)\n",
223 |     "        # Loss\n",
224 |     "        loss = criterion(output, target)\n",
225 |     "        # Back-prop\n",
226 |     "        loss.backward()\n",
227 |     "        optimizer.step()\n",
228 |     "    # Log\n",
229 |     "    print(j)"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": 10,
235 |    "metadata": {},
236 |    "outputs": [
237 |     {
238 |      "name": "stdout",
239 |      "output_type": "stream",
240 |      "text": [
241 |       "CPU times: user 1.52 s, sys: 23.9 ms, total: 1.54 s\n",
242 |       "Wall time: 1.54 s\n"
243 |      ]
244 |     }
245 |    ],
246 |    "source": [
247 |     "%%time\n",
248 |     "# Main evaluation loop: 1.52s\n",
249 |     "sym.eval() # Sets training = False\n",
250 |     "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n",
251 |     "y_guess = np.zeros(n_samples, dtype=np.int)\n",
252 |     "y_truth = y_test[:n_samples]\n",
253 |     "c = 0\n",
254 |     "for data, target in yield_mb(x_test, y_test, BATCHSIZE):\n",
255 |     "    # Get samples\n",
256 |     "    data = Variable(torch.LongTensor(data).cuda())\n",
257 |     "    # Forwards\n",
258 |     "    output = sym(data)\n",
259 |     "    pred = output.data.max(1)[1].cpu().numpy().squeeze()\n",
260 |     "    # Collect results\n",
261 |     "    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred\n",
262 |     "    c += 1"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": 11,
268 |    "metadata": {},
269 |    "outputs": [
270 |     {
271 |      "name": "stdout",
272 |      "output_type": "stream",
273 |      "text": [
274 |       "Accuracy:  0.8622996794871794\n"
275 |      ]
276 |     }
277 |    ],
278 |    "source": [
279 |     "print(\"Accuracy: \", sum(y_guess == y_truth)/len(y_guess))"
280 |    ]
281 |   }
282 |  ],
283 |  "metadata": {
284 |   "anaconda-cloud": {},
285 |   "kernelspec": {
286 |    "display_name": "Python 3",
287 |    "language": "python",
288 |    "name": "python3"
289 |   },
290 |   "language_info": {
291 |    "codemirror_mode": {
292 |     "name": "ipython",
293 |     "version": 3
294 |    },
295 |    "file_extension": ".py",
296 |    "mimetype": "text/x-python",
297 |    "name": "python",
298 |    "nbconvert_exporter": "python",
299 |    "pygments_lexer": "ipython3",
300 |    "version": "3.5.2"
301 |   }
302 |  },
303 |  "nbformat": 4,
304 |  "nbformat_minor": 2
305 | }
306 | 


--------------------------------------------------------------------------------
/notebooks/Tensorflow_Inference.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%%bash\n",
 10 |     "#wget http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz\n",
 11 |     "#tar -xvf resnet_v1_50_2016_08_28.tar.gz\n",
 12 |     "#rm resnet_v1_50_2016_08_28.tar.gz"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "metadata": {
 19 |     "scrolled": false
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import os\n",
 24 |     "import sys\n",
 25 |     "import numpy as np\n",
 26 |     "import tensorflow as tf\n",
 27 |     "# Upgrade dask before importing contrib!\n",
 28 |     "import tensorflow.contrib.slim\n",
 29 |     "from tensorflow.contrib.slim.nets import resnet_v1\n",
 30 |     "from common.params_inf import *\n",
 31 |     "from common.utils import *"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 3,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "# Force one-gpu\n",
 41 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 4,
 47 |    "metadata": {},
 48 |    "outputs": [
 49 |     {
 50 |      "name": "stdout",
 51 |      "output_type": "stream",
 52 |      "text": [
 53 |       "OS:  linux\n",
 54 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 55 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 56 |       "Numpy:  1.14.1\n",
 57 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 58 |       "CUDA Version 8.0.61\n",
 59 |       "CuDNN Version  6.0.21\n"
 60 |      ]
 61 |     }
 62 |    ],
 63 |    "source": [
 64 |     "print(\"OS: \", sys.platform)\n",
 65 |     "print(\"Python: \", sys.version)\n",
 66 |     "print(\"Numpy: \", np.__version__)\n",
 67 |     "print(\"GPU: \", get_gpu_name())\n",
 68 |     "print(get_cuda_version())\n",
 69 |     "print(\"CuDNN Version \", get_cudnn_version())"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 5,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "def predict_fn(classifier, data, batchsize):\n",
 79 |     "    \"\"\" Return features from classifier \"\"\"\n",
 80 |     "    out = np.zeros((len(data), RESNET_FEATURES), np.float32)\n",
 81 |     "    for idx, dta in yield_mb_X(data, batchsize):\n",
 82 |     "        pred = sess.run(classifier, feed_dict={input_tensor: dta}).squeeze()\n",
 83 |     "        out[idx*batchsize:(idx+1)*batchsize] = pred\n",
 84 |     "    return out"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 6,
 90 |    "metadata": {},
 91 |    "outputs": [
 92 |     {
 93 |      "name": "stdout",
 94 |      "output_type": "stream",
 95 |      "text": [
 96 |       "(1280, 224, 224, 3) (1280, 3, 224, 224)\n"
 97 |      ]
 98 |     }
 99 |    ],
100 |    "source": [
101 |     "# Create batches of fake data\n",
102 |     "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n",
103 |     "print(fake_input_data_cl.shape, fake_input_data_cf.shape)"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 7,
109 |    "metadata": {},
110 |    "outputs": [
111 |     {
112 |      "name": "stdout",
113 |      "output_type": "stream",
114 |      "text": [
115 |       "INFO:tensorflow:Restoring parameters from resnet_v1_50.ckpt\n"
116 |      ]
117 |     }
118 |    ],
119 |    "source": [
120 |     "# Placeholders\n",
121 |     "checkpoint_file = 'resnet_v1_50.ckpt'\n",
122 |     "input_tensor = tf.placeholder(tf.float32, shape=(None,224,224,3), name='input_image')\n",
123 |     "\n",
124 |     "# Load the model\n",
125 |     "sess = tf.Session()\n",
126 |     "arg_scope = resnet_v1.resnet_arg_scope()\n",
127 |     "with tensorflow.contrib.slim.arg_scope(arg_scope):\n",
128 |     "    # Docstring ->\n",
129 |     "    #     num_classes: Number of predicted classes for classification tasks. If None\n",
130 |     "    #  we return the features before the logit layer.\n",
131 |     "    logits, end_points = resnet_v1.resnet_v1_50(input_tensor, is_training=False)\n",
132 |     "    \n",
133 |     "saver = tf.train.Saver()\n",
134 |     "saver.restore(sess, checkpoint_file)"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 9,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "cold_start = predict_fn(logits, fake_input_data_cl, BATCH_SIZE)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 10,
149 |    "metadata": {},
150 |    "outputs": [
151 |     {
152 |      "name": "stdout",
153 |      "output_type": "stream",
154 |      "text": [
155 |       "CPU times: user 1.96 s, sys: 480 ms, total: 2.44 s\n",
156 |       "Wall time: 2.26 s\n"
157 |      ]
158 |     }
159 |    ],
160 |    "source": [
161 |     "%%time\n",
162 |     "features = predict_fn(logits, fake_input_data_cl, BATCH_SIZE)"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 12,
168 |    "metadata": {},
169 |    "outputs": [
170 |     {
171 |      "name": "stdout",
172 |      "output_type": "stream",
173 |      "text": [
174 |       "Images per second 566.3716814159293\n"
175 |      ]
176 |     }
177 |    ],
178 |    "source": [
179 |     "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/2.26))"
180 |    ]
181 |   }
182 |  ],
183 |  "metadata": {
184 |   "kernelspec": {
185 |    "display_name": "Python 3",
186 |    "language": "python",
187 |    "name": "python3"
188 |   },
189 |   "language_info": {
190 |    "codemirror_mode": {
191 |     "name": "ipython",
192 |     "version": 3
193 |    },
194 |    "file_extension": ".py",
195 |    "mimetype": "text/x-python",
196 |    "name": "python",
197 |    "nbconvert_exporter": "python",
198 |    "pygments_lexer": "ipython3",
199 |    "version": "3.5.2"
200 |   }
201 |  },
202 |  "nbformat": 4,
203 |  "nbformat_minor": 2
204 | }
205 | 


--------------------------------------------------------------------------------
/notebooks/Tensorflow_RNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# High-level RNN TF Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "import os\n",
 18 |     "import sys\n",
 19 |     "import tensorflow as tf\n",
 20 |     "from common.params_lstm import *\n",
 21 |     "from common.utils import *"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 2,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "# Force one-gpu\n",
 31 |     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 3,
 37 |    "metadata": {},
 38 |    "outputs": [
 39 |     {
 40 |      "name": "stdout",
 41 |      "output_type": "stream",
 42 |      "text": [
 43 |       "OS:  linux\n",
 44 |       "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
 45 |       "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
 46 |       "Numpy:  1.14.1\n",
 47 |       "Tensorflow:  1.4.0\n",
 48 |       "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
 49 |       "CUDA Version 8.0.61\n",
 50 |       "CuDNN Version  6.0.21\n"
 51 |      ]
 52 |     }
 53 |    ],
 54 |    "source": [
 55 |     "print(\"OS: \", sys.platform)\n",
 56 |     "print(\"Python: \", sys.version)\n",
 57 |     "print(\"Numpy: \", np.__version__)\n",
 58 |     "print(\"Tensorflow: \", tf.__version__)\n",
 59 |     "print(\"GPU: \", get_gpu_name())\n",
 60 |     "print(get_cuda_version())\n",
 61 |     "print(\"CuDNN Version \", get_cudnn_version())"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 4,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "def create_symbol(CUDNN=True, \n",
 71 |     "                  maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN, batchs=BATCHSIZE):\n",
 72 |     "    word_vectors = tf.contrib.layers.embed_sequence(X, vocab_size=maxf, embed_dim=edim)\n",
 73 |     "    word_list = tf.unstack(word_vectors, axis=1)\n",
 74 |     "    \n",
 75 |     "    if not CUDNN:\n",
 76 |     "        cell = tf.contrib.rnn.GRUCell(nhid)\n",
 77 |     "        outputs, states = tf.contrib.rnn.static_rnn(cell, word_list, dtype=tf.float32)\n",
 78 |     "    else:\n",
 79 |     "        # Using cuDNN since vanilla RNN\n",
 80 |     "        from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops\n",
 81 |     "        cudnn_cell = cudnn_rnn_ops.CudnnGRU(num_layers=1, \n",
 82 |     "                                            num_units=nhid, \n",
 83 |     "                                            input_size=edim, \n",
 84 |     "                                            input_mode='linear_input')\n",
 85 |     "        params_size_t = cudnn_cell.params_size()\n",
 86 |     "        params = tf.Variable(tf.random_uniform([params_size_t], -0.1, 0.1), validate_shape=False)   \n",
 87 |     "        input_h = tf.Variable(tf.zeros([1, batchs, nhid]))\n",
 88 |     "        outputs, states = cudnn_cell(input_data=word_list,\n",
 89 |     "                                     input_h=input_h,\n",
 90 |     "                                     params=params)\n",
 91 |     "        logits = tf.layers.dense(outputs[-1], 2, activation=None, name='output')\n",
 92 |     "    return logits"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 5,
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": [
101 |     "def init_model(m, y, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n",
102 |     "    # Single-class labels, don't need dense one-hot\n",
103 |     "    # Expects unscaled logits, not output of tf.nn.softmax\n",
104 |     "    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=m, labels=y)\n",
105 |     "    loss = tf.reduce_mean(xentropy)\n",
106 |     "    optimizer = tf.train.AdamOptimizer(lr, b1, b2, eps)\n",
107 |     "    training_op = optimizer.minimize(loss)\n",
108 |     "    return training_op"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 6,
114 |    "metadata": {
115 |     "scrolled": true
116 |    },
117 |    "outputs": [
118 |     {
119 |      "name": "stdout",
120 |      "output_type": "stream",
121 |      "text": [
122 |       "Preparing train set...\n",
123 |       "Preparing test set...\n",
124 |       "Trimming to 30000 max-features\n",
125 |       "Padding to length 150\n",
126 |       "(25000, 150) (25000, 150) (25000,) (25000,)\n",
127 |       "int32 int32 int32 int32\n",
128 |       "CPU times: user 5.9 s, sys: 417 ms, total: 6.32 s\n",
129 |       "Wall time: 6.32 s\n"
130 |      ]
131 |     }
132 |    ],
133 |    "source": [
134 |     "%%time\n",
135 |     "# Data into format for library\n",
136 |     "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES)\n",
137 |     "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
138 |     "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": 7,
144 |    "metadata": {},
145 |    "outputs": [
146 |     {
147 |      "name": "stdout",
148 |      "output_type": "stream",
149 |      "text": [
150 |       "CPU times: user 737 ms, sys: 76.1 ms, total: 814 ms\n",
151 |       "Wall time: 820 ms\n"
152 |      ]
153 |     }
154 |    ],
155 |    "source": [
156 |     "%%time\n",
157 |     "# Place-holders\n",
158 |     "X = tf.placeholder(tf.int32, shape=[None, MAXLEN])\n",
159 |     "y = tf.placeholder(tf.int32, shape=[None])\n",
160 |     "sym = create_symbol()"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 8,
166 |    "metadata": {},
167 |    "outputs": [
168 |     {
169 |      "name": "stdout",
170 |      "output_type": "stream",
171 |      "text": [
172 |       "CPU times: user 836 ms, sys: 693 ms, total: 1.53 s\n",
173 |       "Wall time: 1.54 s\n"
174 |      ]
175 |     }
176 |    ],
177 |    "source": [
178 |     "%%time\n",
179 |     "model = init_model(sym, y)\n",
180 |     "init = tf.global_variables_initializer()\n",
181 |     "sess = tf.Session()\n",
182 |     "sess.run(init)"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": 9,
188 |    "metadata": {},
189 |    "outputs": [
190 |     {
191 |      "name": "stdout",
192 |      "output_type": "stream",
193 |      "text": [
194 |       "0 Train accuracy: 0.84375\n",
195 |       "1 Train accuracy: 0.96875\n",
196 |       "2 Train accuracy: 0.984375\n",
197 |       "CPU times: user 19 s, sys: 2.77 s, total: 21.8 s\n",
198 |       "Wall time: 22.2 s\n"
199 |      ]
200 |     }
201 |    ],
202 |    "source": [
203 |     "%%time\n",
204 |     "# Main training loop: 22s\n",
205 |     "correct = tf.nn.in_top_k(sym, y, 1)\n",
206 |     "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n",
207 |     "for j in range(EPOCHS):\n",
208 |     "    for data, label in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n",
209 |     "        sess.run(model, feed_dict={X: data, y: label})\n",
210 |     "    # Log\n",
211 |     "    acc_train = sess.run(accuracy, feed_dict={X: data, y: label})\n",
212 |     "    print(j, \"Train accuracy:\", acc_train)"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": 10,
218 |    "metadata": {},
219 |    "outputs": [
220 |     {
221 |      "name": "stdout",
222 |      "output_type": "stream",
223 |      "text": [
224 |       "CPU times: user 8.67 s, sys: 651 ms, total: 9.32 s\n",
225 |       "Wall time: 9.19 s\n"
226 |      ]
227 |     }
228 |    ],
229 |    "source": [
230 |     "%%time\n",
231 |     "# Main evaluation loop: 9.19s\n",
232 |     "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n",
233 |     "y_guess = np.zeros(n_samples, dtype=np.int)\n",
234 |     "y_truth = y_test[:n_samples]\n",
235 |     "c = 0\n",
236 |     "for data, label in yield_mb(x_test, y_test, BATCHSIZE):\n",
237 |     "    pred = tf.argmax(sym, 1)\n",
238 |     "    output = sess.run(pred, feed_dict={X: data})\n",
239 |     "    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = output\n",
240 |     "    c += 1"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": 12,
246 |    "metadata": {},
247 |    "outputs": [
248 |     {
249 |      "name": "stdout",
250 |      "output_type": "stream",
251 |      "text": [
252 |       "Accuracy:  0.8598557692307692\n"
253 |      ]
254 |     }
255 |    ],
256 |    "source": [
257 |     "print(\"Accuracy: \", 1.*sum(y_guess == y_truth)/len(y_guess))"
258 |    ]
259 |   }
260 |  ],
261 |  "metadata": {
262 |   "anaconda-cloud": {},
263 |   "kernelspec": {
264 |    "display_name": "Python 3",
265 |    "language": "python",
266 |    "name": "python3"
267 |   },
268 |   "language_info": {
269 |    "codemirror_mode": {
270 |     "name": "ipython",
271 |     "version": 3
272 |    },
273 |    "file_extension": ".py",
274 |    "mimetype": "text/x-python",
275 |    "name": "python",
276 |    "nbconvert_exporter": "python",
277 |    "pygments_lexer": "ipython3",
278 |    "version": "3.5.2"
279 |   }
280 |  },
281 |  "nbformat": 4,
282 |  "nbformat_minor": 2
283 | }
284 | 


--------------------------------------------------------------------------------
/notebooks/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/notebooks/common/__init__.py


--------------------------------------------------------------------------------
/notebooks/common/automobile10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/notebooks/common/automobile10.png


--------------------------------------------------------------------------------
/notebooks/common/info.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/notebooks/common/info.PNG


--------------------------------------------------------------------------------
/notebooks/common/params.json:
--------------------------------------------------------------------------------
 1 | { 
 2 |     "params_cnn":
 3 |     {
 4 |         "EPOCHS":10,
 5 |         "BATCHSIZE":64,
 6 |         "LR":0.01,
 7 |         "MOMENTUM":0.9,
 8 |         "N_CLASSES":10,
 9 |         "GPU":true
10 |    },
11 |    "params_lstm":
12 |    {
13 |         "EPOCHS":3,
14 |         "BATCHSIZE":64,
15 |         "EMBEDSIZE":125,
16 |         "NUMHIDDEN":100,
17 |         "DROPOUT":0.2,
18 |         "LR":0.001,
19 |         "BETA_1":0.9,
20 |         "BETA_2":0.999,
21 |         "EPS":1e-08,
22 |         "MAXLEN":150,
23 |         "MAXFEATURES":30000,
24 |         "GPU":true
25 |    },
26 |    "params_inf":
27 |    {
28 |        "BATCH_SIZE":32, 
29 |        "RESNET_FEATURES":2048, 
30 |        "BATCHES_GPU":40
31 |    }
32 | }
33 | 


--------------------------------------------------------------------------------
/notebooks/common/params.py:
--------------------------------------------------------------------------------
1 | # Hyperparams
2 | EPOCHS = 10
3 | BATCHSIZE = 64
4 | LR = 0.01
5 | MOMENTUM = 0.9
6 | N_CLASSES = 10
7 | GPU = True


--------------------------------------------------------------------------------
/notebooks/common/params_dense.py:
--------------------------------------------------------------------------------
 1 | CLASSES = 14
 2 | WIDTH = 224
 3 | HEIGHT = 224
 4 | CHANNELS = 3
 5 | LR = 0.0001
 6 | EPOCHS = 5
 7 | BATCHSIZE = 64
 8 | IMAGENET_RGB_MEAN = [0.485, 0.456, 0.406]
 9 | IMAGENET_RGB_SD = [0.229, 0.224, 0.225]
10 | TOT_PATIENT_NUMBER = 30805  # From data


--------------------------------------------------------------------------------
/notebooks/common/params_inf.py:
--------------------------------------------------------------------------------
1 | BATCH_SIZE = 32
2 | RESNET_FEATURES = 2048
3 | BATCHES_GPU = 40


--------------------------------------------------------------------------------
/notebooks/common/params_lstm.py:
--------------------------------------------------------------------------------
 1 | # Hyperparams LSTM
 2 | EPOCHS=3
 3 | BATCHSIZE=64
 4 | EMBEDSIZE=125
 5 | NUMHIDDEN=100
 6 | DROPOUT=0.2
 7 | LR=0.001
 8 | BETA_1=0.9
 9 | BETA_2=0.999
10 | EPS=1e-08
11 | MAXLEN=150 #maximum size of the word sequence
12 | MAXFEATURES=30000 #vocabulary size
13 | GPU=True
14 | 


--------------------------------------------------------------------------------
/notebooks/common/utils.R:
--------------------------------------------------------------------------------
  1 | # Create an array of fake data to run inference on
  2 | give_fake_data <- function(batches, col_major = FALSE){
  3 |   set.seed(0)
  4 |   if (col_major) {
  5 |     shape <- c(224, 224, 3, batches)
  6 |   } else {
  7 |     shape <- c(batches, 224, 224, 3)
  8 |   }
  9 |   dat <- array(runif(batches*224*224*3), dim = shape)
 10 |   return(dat)
 11 | }
 12 | 
 13 | # Return features from classifier (OLD)
 14 | predict_fn <- function(classifier, data, batchsize){
 15 |     out <- array(0, dim = c(dim(data)[1], params$RESNET_FEATURES))
 16 |     idx <- 0:(dim(data)[1] %/% batchsize - 1)
 17 |     for (i in idx){
 18 |         dta <- data[(i*batchsize + 1):((i+1)*batchsize),,,]
 19 |         out[(i*batchsize + 1):((i+1)*batchsize), ] <- predict_on_batch(classifier, dta)
 20 |     }
 21 |     return(out)
 22 | }
 23 | 
 24 | 
 25 | # Get GPU name
 26 | get_gpu_name <- function(){
 27 |     tryCatch(
 28 |         {
 29 |             out_list <- system("nvidia-smi --query-gpu=gpu_name --format=csv", intern = TRUE)
 30 |             out_list <- out_list[out_list != "name"]
 31 |             return(out_list)
 32 |         },
 33 |         error = function(e)
 34 |         {
 35 |             print(e)
 36 |         }
 37 |         )
 38 | }
 39 | 
 40 | # Get CUDA version
 41 | get_cuda_version <- function(){
 42 |     tryCatch(
 43 |         {
 44 |             out <- system("cat /usr/local/cuda/version.txt", intern = TRUE)
 45 |             return(out)
 46 |         },
 47 |         error = function(e)
 48 |         {
 49 |             print(e)
 50 |         }
 51 |         )
 52 | }
 53 | 
 54 | # Get CuDNN version
 55 | get_cudnn_version <- function(){
 56 |     tryCatch(
 57 |         {
 58 |             out <- system("cat /usr/include/cudnn.h | grep CUDNN_MAJOR", intern = TRUE)[1]
 59 |             indx <- regexpr("(\\d+)", out)
 60 |             major <- regmatches(out, indx)
 61 |             
 62 |             out <- system("cat /usr/include/cudnn.h | grep CUDNN_MINOR", intern = TRUE)[1]
 63 |             indx <- regexpr("(\\d+)", out)
 64 |             minor <- regmatches(out, indx)
 65 |             
 66 |             out <- system("cat /usr/include/cudnn.h | grep CUDNN_PATCHLEVEL", intern = TRUE)[1]
 67 |             indx <- regexpr("(\\d+)", out)
 68 |             patch <- regmatches(out, indx)
 69 |             
 70 |             version <- paste(major, minor, patch, sep = ".")
 71 |             return(paste0("CuDNN Version ", version))
 72 |         },
 73 |         error = function(e)
 74 |         {
 75 |             print(e)
 76 |         }
 77 |         )
 78 | }
 79 | 
 80 | 
 81 | 
 82 | # Function to download the cifar data, if not already downloaded
 83 | maybe_download_cifar <- function(col_major = TRUE, src = 'https://ikpublictutorial.blob.core.windows.net/deeplearningframeworks/cifar-10-binary.tar.gz '){
 84 |   
 85 |   tryCatch(
 86 |     {
 87 |       data <- suppressWarnings(process_cifar_bin(col_major))
 88 |       return(data)
 89 |     },
 90 |     error = function(e)
 91 |     {
 92 |       print(paste0('Data does not exist. Downloading ', src))
 93 |       download.file(src, destfile="tmp.tar.gz")
 94 |       print('Extracting files ...')
 95 |       untar("tmp.tar.gz")
 96 |       file.remove('tmp.tar.gz')
 97 |       return(process_cifar_bin(col_major))
 98 |     }
 99 |   )
100 | }
101 | 
102 | 
103 | # A function to process CIFAR10 dataset in binary format
104 | process_cifar_bin <- function(col_major) {
105 |   
106 |   data_dir <- "cifar-10-batches-bin"
107 |   
108 |   train <- lapply(file.path(data_dir, paste0("data_batch_", 1:5, ".bin")), read_file)
109 |   train <- do.call(c, train)
110 |   
111 |   x_train <- unlist(lapply(train, function(x) x$image))
112 |   if (col_major) {
113 |     perm <- c(2, 1, 3, 4)
114 |   } else {
115 |     perm <- c(4, 3, 2, 1)
116 |   }
117 |   
118 |   x_train <- aperm(array(x_train, c(32, 32, 3, 50000)), perm = perm)
119 |   x_train <- x_train / 255
120 |   y_train <- unlist(lapply(train, function(x) x$label))
121 |   
122 |   test <- read_file(file.path(data_dir, "test_batch.bin"))
123 |   x_test <- unlist(lapply(test, function(x) x$image))
124 |   x_test <- aperm(array(x_test, c(32, 32, 3, 10000)), perm = perm)
125 |   x_test <- x_test / 255
126 |   y_test <- unlist(lapply(test, function(x) x$label))
127 |   
128 |   list(x_train = x_train, x_test = x_test, y_train = y_train, y_test = y_test)
129 | }
130 | 
131 |                           
132 |                           
133 | # A function to load CIFAR10 dataset
134 | cifar_for_library <- function(one_hot = FALSE, col_major = TRUE) {
135 |   
136 |   cifar <- maybe_download_cifar(col_major)
137 |   
138 |   x_train <- cifar$x_train
139 |   y_train <- cifar$y_train
140 |   x_test <- cifar$x_test
141 |   y_test <- cifar$y_test
142 |   
143 |   if(one_hot){
144 |     Y = data.frame(label = factor(y_train))
145 |     y_train = with(Y, model.matrix(~label+0))
146 |     Y = data.frame(label = factor(y_test))
147 |     y_test = with(Y, model.matrix(~label+0))
148 |   }
149 |   
150 |   list(x_train=x_train, x_test=x_test, y_train=y_train, y_test=y_test)
151 |   
152 | }                          
153 | 
154 | # Load hyper-parameters for different scenarios:
155 | # cnn, lstm, or inference
156 | load_params <- function(params_for){
157 |     
158 |     require(rjson)
159 |     params <- fromJSON(file = "./common/params.json")
160 | 
161 |     if (params_for == "cnn"){
162 |         return(params$params_cnn)
163 |     } else if (params_for == "lstm"){
164 |         return(params$params_lstm)
165 |     } else if (params_for == "inference"){
166 |         return(params$params_inf)
167 |     } else {
168 |         stop("params_for should be set to one of the following: cnn, lstm or inference.")
169 |     }
170 | }
171 | 
172 | 
173 | # Function to download the mxnet resnet50 model, if not already downloaded
174 | maybe_download_resnet50 <- function() {
175 |   src <- 'http://data.mxnet.io/models/imagenet/'
176 |   tryCatch(
177 |     {
178 |       model <- suppressWarnings(mx.model.load(prefix = "resnet-50", iteration = 0))
179 |       return(model)
180 |     },
181 |     error = function(e)
182 |     {
183 |       print(paste0('Model does not exist. Downloading ', src))
184 |       download.file(file.path(src, 'resnet/50-layers/resnet-50-symbol.json'), destfile="resnet-50-symbol.json")
185 |       download.file(file.path(src, 'resnet/50-layers/resnet-50-0000.params'), destfile="resnet-50-0000.params")
186 |       return(mx.model.load(prefix = "resnet-50", iteration = 0))
187 |     }
188 |   )
189 | }
190 | 
191 | load_resnet50 <- function() maybe_download_resnet50()
192 | 
193 | read_image <- function(i, to_read) {
194 |   label <- readBin(to_read, integer(), n = 1, size = 1)
195 |   image <- as.integer(readBin(to_read, raw(), size = 1, n = 32*32*3))
196 |   list(label = label, image = image)
197 | }
198 | 
199 | 
200 | read_file <- function(f) {
201 |   to_read <- file(f, "rb")
202 |   examples <- lapply(1:10000, read_image, to_read)
203 |   close(to_read)
204 |   examples
205 | }
206 | 
207 | # Plot a CIFAR10 image
208 | plot_image <- function(img) {
209 |   library(grid)
210 |   img_dim <- dim(img)
211 |   if (img_dim[1] < img_dim[3]) {
212 |     r <- img[1,,]
213 |     g <- img[2,,]
214 |     b <- img[3,,]
215 |   } else {
216 |     r <- img[,,1]
217 |     g <- img[,,2]
218 |     b <- img[,,3]
219 |   }
220 |   img.col.mat <- rgb(r, g, b, maxColorValue = 1)
221 |   dim(img.col.mat) <- dim(r)
222 |   grid.raster(img.col.mat, interpolate = FALSE)
223 |   rm(img.col.mat)
224 | }
225 | 
226 | 
227 | maybe_download_imdb <- function(src = 'https://ikpublictutorial.blob.core.windows.net/deeplearningframeworks/imdb.Rds'){
228 |   
229 |   tryCatch(
230 |     {
231 |       data <- suppressWarnings(readRDS("imdb.Rds"))
232 |       return(data)
233 |     },
234 |     error = function(e)
235 |     {
236 |       print(paste0('Data does not exist. Downloading ', src))
237 |       download.file(src, destfile="imdb.Rds")
238 |       return(readRDS("imdb.Rds"))
239 |     }
240 |   )
241 | }
242 | 
243 | 
244 | imdb_for_library <- function() maybe_download_imdb()
245 |     
246 | 
247 | 


--------------------------------------------------------------------------------
/support/chainer_4gpu.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/support/chainer_4gpu.JPG


--------------------------------------------------------------------------------
/support/gluon_4gpu.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/support/gluon_4gpu.JPG


--------------------------------------------------------------------------------
/support/keras_4gpu.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/support/keras_4gpu.JPG


--------------------------------------------------------------------------------
/support/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/support/logo.png


--------------------------------------------------------------------------------
/support/pytorch_4gpu.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/support/pytorch_4gpu.JPG


--------------------------------------------------------------------------------
/support/tensorflow_4gpu.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/support/tensorflow_4gpu.JPG


--------------------------------------------------------------------------------