├── .gitignore ├── LICENSE ├── README.md ├── notebooks ├── CNTK_CNN.ipynb ├── CNTK_CNN_highAPI.ipynb ├── CNTK_Inference.ipynb ├── CNTK_RNN.ipynb ├── Caffe2_CNN.ipynb ├── Caffe2_Inference.ipynb ├── Chainer_CNN.ipynb ├── Chainer_Inference.ipynb ├── Chainer_MultiGPU.ipynb ├── Gluon_CNN.ipynb ├── Gluon_Inference.ipynb ├── Gluon_MultiGPU.ipynb ├── Gluon_RNN.ipynb ├── KerasR_TF_CNN.ipynb ├── KerasR_TF_Inference.ipynb ├── KerasR_TF_RNN.ipynb ├── Keras_CNTK_CNN.ipynb ├── Keras_CNTK_Inference.ipynb ├── Keras_CNTK_RNN.ipynb ├── Keras_TF_CNN.ipynb ├── Keras_TF_Inference.ipynb ├── Keras_TF_MultiGPU.ipynb ├── Keras_TF_RNN.ipynb ├── Keras_Theano_CNN.ipynb ├── Knet_CNN.ipynb ├── Knet_Inference.ipynb ├── Knet_RNN.ipynb ├── MXNet_CNN.ipynb ├── MXNet_CNN_highAPI.ipynb ├── MXNet_Inference.ipynb ├── MXNet_RNN.ipynb ├── MXNet_RNN_TNC.ipynb ├── PyTorch_CNN.ipynb ├── PyTorch_Inference.ipynb ├── PyTorch_MultiGPU-Exp-Loss.ipynb ├── PyTorch_MultiGPU.ipynb ├── PyTorch_RNN.ipynb ├── Tensorflow_CNN.ipynb ├── Tensorflow_CNN_highAPI.ipynb ├── Tensorflow_Inference.ipynb ├── Tensorflow_MultiGPU.ipynb ├── Tensorflow_RNN.ipynb ├── Theano_Lasagne_CNN.ipynb └── common │ ├── __init__.py │ ├── automobile10.png │ ├── info.PNG │ ├── params.json │ ├── params.py │ ├── params_dense.py │ ├── params_inf.py │ ├── params_lstm.py │ ├── utils.R │ └── utils.py └── support ├── chainer_4gpu.JPG ├── gluon_4gpu.JPG ├── keras_4gpu.JPG ├── logo.png ├── pytorch_4gpu.JPG └── tensorflow_4gpu.JPG /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | *.swp 4 | .ipynb_checkpoints/ 5 | cifar-10-batches-py/ 6 | __pycache__ 7 | .DS_Store 8 | notebooks/chestxray 9 | notebooks/*-0000.params 10 | notebooks/*-symbol.json 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Microsoft Corporation 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /notebooks/CNTK_CNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# High-level CNTK Example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import os\n", 18 | "import sys\n", 19 | "import cntk\n", 20 | "from cntk.layers import Convolution2D, MaxPooling, Dense, Dropout\n", 21 | "from common.params import *\n", 22 | "from common.utils import *" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# Force one-gpu\n", 32 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "OS: linux\n", 45 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 46 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 47 | "Numpy: 1.14.1\n", 48 | "CNTK: 2.4\n", 49 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 50 | "CUDA Version 8.0.61\n", 51 | "CuDNN Version 6.0.21\n" 52 | ] 53 | } 54 | ], 55 | "source": [ 56 | "print(\"OS: \", sys.platform)\n", 57 | "print(\"Python: \", sys.version)\n", 58 | "print(\"Numpy: \", np.__version__)\n", 59 | "print(\"CNTK: \", cntk.__version__)\n", 60 | "print(\"GPU: \", get_gpu_name())\n", 61 | "print(get_cuda_version())\n", 62 | "print(\"CuDNN Version \", get_cudnn_version())" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 4, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "def create_symbol(n_classes=N_CLASSES):\n", 72 | " # Weight initialiser from uniform distribution\n", 73 | " # Activation (unless states) is None\n", 74 | " with cntk.layers.default_options(init = cntk.glorot_uniform(), activation = cntk.relu):\n", 75 | " x = Convolution2D(filter_shape=(3, 3), num_filters=50, pad=True)(features)\n", 76 | " x = Convolution2D(filter_shape=(3, 3), num_filters=50, pad=True)(x)\n", 77 | " x = MaxPooling((2, 2), strides=(2, 2), pad=False)(x)\n", 78 | " x = Dropout(0.25)(x)\n", 79 | "\n", 80 | " x = Convolution2D(filter_shape=(3, 3), num_filters=100, pad=True)(x)\n", 81 | " x = Convolution2D(filter_shape=(3, 3), num_filters=100, pad=True)(x)\n", 82 | " x = MaxPooling((2, 2), strides=(2, 2), pad=False)(x)\n", 83 | " x = Dropout(0.25)(x) \n", 84 | " \n", 85 | " x = Dense(512)(x)\n", 86 | " x = Dropout(0.5)(x)\n", 87 | " x = Dense(n_classes, activation=None)(x)\n", 88 | " return x" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 5, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "def init_model(m, labels, lr=LR, momentum=MOMENTUM):\n", 98 | " # Loss (dense labels); check if support for sparse labels\n", 99 | " loss = cntk.cross_entropy_with_softmax(m, labels) \n", 100 | " # Momentum SGD\n", 101 | " # https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_use_learners.ipynb\n", 102 | " # unit_gain=False: momentum_direction = momentum*old_momentum_direction + gradient\n", 103 | " # if unit_gain=True then ...(1-momentum)*gradient\n", 104 | " learner = cntk.momentum_sgd(m.parameters,\n", 105 | " lr=cntk.learning_rate_schedule(lr, cntk.UnitType.minibatch) ,\n", 106 | " momentum=cntk.momentum_schedule(momentum), \n", 107 | " unit_gain=False)\n", 108 | " trainer = cntk.Trainer(m, (loss, cntk.classification_error(m, labels)), [learner])\n", 109 | " return trainer" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 6, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "name": "stdout", 119 | "output_type": "stream", 120 | "text": [ 121 | "Preparing train set...\n", 122 | "Preparing test set...\n", 123 | "(50000, 3, 32, 32) (10000, 3, 32, 32) (50000, 10) (10000, 10)\n", 124 | "float32 float32 float32 float32\n", 125 | "CPU times: user 671 ms, sys: 576 ms, total: 1.25 s\n", 126 | "Wall time: 1.25 s\n" 127 | ] 128 | } 129 | ], 130 | "source": [ 131 | "%%time\n", 132 | "# Data into format for library\n", 133 | "x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True, one_hot=True)\n", 134 | "# CNTK format\n", 135 | "y_train = y_train.astype(np.float32)\n", 136 | "y_test = y_test.astype(np.float32)\n", 137 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n", 138 | "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 7, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "name": "stdout", 148 | "output_type": "stream", 149 | "text": [ 150 | "CPU times: user 24 ms, sys: 32 ms, total: 56 ms\n", 151 | "Wall time: 69 ms\n" 152 | ] 153 | } 154 | ], 155 | "source": [ 156 | "%%time\n", 157 | "# Placeholders\n", 158 | "features = cntk.input_variable((3, 32, 32), np.float32)\n", 159 | "labels = cntk.input_variable(N_CLASSES, np.float32)\n", 160 | "# Load symbol\n", 161 | "sym = create_symbol()" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 8, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "name": "stdout", 171 | "output_type": "stream", 172 | "text": [ 173 | "CPU times: user 119 ms, sys: 116 ms, total: 235 ms\n", 174 | "Wall time: 236 ms\n" 175 | ] 176 | } 177 | ], 178 | "source": [ 179 | "%%time\n", 180 | "trainer = init_model(sym, labels)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 9, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "name": "stdout", 190 | "output_type": "stream", 191 | "text": [ 192 | "Epoch 1 | Accuracy: 0.468750\n", 193 | "Epoch 2 | Accuracy: 0.640625\n", 194 | "Epoch 3 | Accuracy: 0.609375\n", 195 | "Epoch 4 | Accuracy: 0.578125\n", 196 | "Epoch 5 | Accuracy: 0.812500\n", 197 | "Epoch 6 | Accuracy: 0.781250\n", 198 | "Epoch 7 | Accuracy: 0.671875\n", 199 | "Epoch 8 | Accuracy: 0.843750\n", 200 | "Epoch 9 | Accuracy: 0.796875\n", 201 | "Epoch 10 | Accuracy: 0.843750\n", 202 | "CPU times: user 40.3 s, sys: 13.1 s, total: 53.3 s\n", 203 | "Wall time: 53.2 s\n" 204 | ] 205 | } 206 | ], 207 | "source": [ 208 | "%%time \n", 209 | "# Main training loop: 53s\n", 210 | "for j in range(EPOCHS):\n", 211 | " for data, label in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n", 212 | " trainer.train_minibatch({features: data, labels: label})\n", 213 | " # Log (this is just last batch in epoch, not average of batches)\n", 214 | " eval_error = trainer.previous_minibatch_evaluation_average\n", 215 | " print(\"Epoch %d | Accuracy: %.6f\" % (j+1, (1-eval_error)))" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 10, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "name": "stdout", 225 | "output_type": "stream", 226 | "text": [ 227 | "CPU times: user 291 ms, sys: 88.9 ms, total: 379 ms\n", 228 | "Wall time: 408 ms\n" 229 | ] 230 | } 231 | ], 232 | "source": [ 233 | "%%time\n", 234 | "# Main evaluation loop: 343ms\n", 235 | "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n", 236 | "y_guess = np.zeros(n_samples, dtype=np.int)\n", 237 | "y_truth = np.argmax(y_test[:n_samples], axis=-1)\n", 238 | "c = 0\n", 239 | "for data, label in yield_mb(x_test, y_test, BATCHSIZE):\n", 240 | " predicted_label_probs = sym.eval({features : data})\n", 241 | " y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = np.argmax(predicted_label_probs, axis=-1)\n", 242 | " c += 1" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 11, 248 | "metadata": {}, 249 | "outputs": [ 250 | { 251 | "name": "stdout", 252 | "output_type": "stream", 253 | "text": [ 254 | "Accuracy: 0.7701322115384616\n" 255 | ] 256 | } 257 | ], 258 | "source": [ 259 | "print(\"Accuracy: \", 1.*sum(y_guess == y_truth)/len(y_guess))" 260 | ] 261 | } 262 | ], 263 | "metadata": { 264 | "anaconda-cloud": {}, 265 | "kernelspec": { 266 | "display_name": "Python 3", 267 | "language": "python", 268 | "name": "python3" 269 | }, 270 | "language_info": { 271 | "codemirror_mode": { 272 | "name": "ipython", 273 | "version": 3 274 | }, 275 | "file_extension": ".py", 276 | "mimetype": "text/x-python", 277 | "name": "python", 278 | "nbconvert_exporter": "python", 279 | "pygments_lexer": "ipython3", 280 | "version": "3.5.2" 281 | } 282 | }, 283 | "nbformat": 4, 284 | "nbformat_minor": 2 285 | } 286 | -------------------------------------------------------------------------------- /notebooks/CNTK_Inference.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%%bash\n", 10 | "# Download model\n", 11 | "#wget https://www.cntk.ai/Models/CNTK_Pretrained/ResNet50_ImageNet_CNTK.model" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import os\n", 21 | "import sys\n", 22 | "import numpy as np\n", 23 | "import cntk as C\n", 24 | "from cntk import load_model, combine\n", 25 | "from common.params_inf import *\n", 26 | "from common.utils import *" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 3, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "# Force one-gpu\n", 36 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 4, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "name": "stdout", 46 | "output_type": "stream", 47 | "text": [ 48 | "OS: linux\n", 49 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 50 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 51 | "Numpy: 1.14.1\n", 52 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 53 | "CUDA Version 8.0.61\n", 54 | "CuDNN Version 6.0.21\n" 55 | ] 56 | } 57 | ], 58 | "source": [ 59 | "print(\"OS: \", sys.platform)\n", 60 | "print(\"Python: \", sys.version)\n", 61 | "print(\"Numpy: \", np.__version__)\n", 62 | "print(\"GPU: \", get_gpu_name())\n", 63 | "print(get_cuda_version())\n", 64 | "print(\"CuDNN Version \", get_cudnn_version())" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 5, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "name": "stdout", 74 | "output_type": "stream", 75 | "text": [ 76 | "(1280, 224, 224, 3) (1280, 3, 224, 224)\n" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "# Create batches of fake data\n", 82 | "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n", 83 | "print(fake_input_data_cl.shape, fake_input_data_cf.shape)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 6, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "# Features (penultimate layer)\n", 93 | "node_name = \"z.x\"\n", 94 | "model_file = \"ResNet50_ImageNet_CNTK.model\"" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 7, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "def predict_fn(classifier, data, batchsize):\n", 104 | " \"\"\" Return features from classifier \"\"\"\n", 105 | " out = np.zeros((len(data), RESNET_FEATURES), np.float32)\n", 106 | " for idx, dta in yield_mb_X(data, batchsize):\n", 107 | " pred = classifier.eval(dta)\n", 108 | " out[idx*batchsize:(idx+1)*batchsize] = pred[0].squeeze()\n", 109 | " return out" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 8, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "# Load model\n", 119 | "loaded_model = load_model(model_file)\n", 120 | "node_in_graph = loaded_model.find_by_name(node_name)\n", 121 | "output_nodes = combine([node_in_graph.owner])" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 9, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "fake_input_data_cf = np.ascontiguousarray(fake_input_data_cf)\n", 131 | "cold_start = predict_fn(output_nodes, fake_input_data_cf, BATCH_SIZE)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 10, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "CPU times: user 1.53 s, sys: 501 ms, total: 2.03 s\n", 144 | "Wall time: 2.26 s\n" 145 | ] 146 | } 147 | ], 148 | "source": [ 149 | "%%time\n", 150 | "features = predict_fn(output_nodes, fake_input_data_cf, BATCH_SIZE)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 11, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "name": "stdout", 160 | "output_type": "stream", 161 | "text": [ 162 | "Images per second 624.3902439024391\n" 163 | ] 164 | } 165 | ], 166 | "source": [ 167 | "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/2.05))" 168 | ] 169 | } 170 | ], 171 | "metadata": { 172 | "kernelspec": { 173 | "display_name": "Python 3", 174 | "language": "python", 175 | "name": "python3" 176 | }, 177 | "language_info": { 178 | "codemirror_mode": { 179 | "name": "ipython", 180 | "version": 3 181 | }, 182 | "file_extension": ".py", 183 | "mimetype": "text/x-python", 184 | "name": "python", 185 | "nbconvert_exporter": "python", 186 | "pygments_lexer": "ipython3", 187 | "version": "3.5.2" 188 | } 189 | }, 190 | "nbformat": 4, 191 | "nbformat_minor": 2 192 | } 193 | -------------------------------------------------------------------------------- /notebooks/CNTK_RNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# High-level RNN CNTK Example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import os\n", 18 | "import sys\n", 19 | "import cntk\n", 20 | "from cntk.layers import Embedding, LSTM, GRU, Dense, Recurrence\n", 21 | "from cntk import sequence\n", 22 | "from common.params_lstm import *\n", 23 | "from common.utils import *" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "# Force one-gpu\n", 33 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "OS: linux\n", 46 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 47 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 48 | "Numpy: 1.14.1\n", 49 | "CNTK: 2.4\n", 50 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 51 | "CUDA Version 8.0.61\n", 52 | "CuDNN Version 6.0.21\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "print(\"OS: \", sys.platform)\n", 58 | "print(\"Python: \", sys.version)\n", 59 | "print(\"Numpy: \", np.__version__)\n", 60 | "print(\"CNTK: \", cntk.__version__)\n", 61 | "print(\"GPU: \", get_gpu_name())\n", 62 | "print(get_cuda_version())\n", 63 | "print(\"CuDNN Version \", get_cudnn_version())" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 4, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "def create_symbol(CUDNN=True, edim=EMBEDSIZE, nhid=NUMHIDDEN):\n", 73 | " # Weight initialiser from uniform distribution\n", 74 | " # Activation (unless states) is None\n", 75 | " with cntk.layers.default_options(init = cntk.glorot_uniform()):\n", 76 | " x = Embedding(edim)(features) # output: list of len=BATCHSIZE of arrays with shape=(MAXLEN, EMBEDSIZE)\n", 77 | " \n", 78 | " # Since we have a vanilla RNN, instead of using the more flexible Recurrence(GRU) unit, which allows for\n", 79 | " # example LayerNormalisation to be added to the network, we can use optimized_rnnstack which quickly\n", 80 | " # goes down to the CuDNN level. This is another reason not to read much into the speed comparison because\n", 81 | " # it becomes a measure of which framework has the fastest way to go down to CuDNN.\n", 82 | " if not CUDNN:\n", 83 | " x = Recurrence(GRU(nhid))(x) # output: list of len=BATCHSIZE of arrays with shape=(MAXLEN, NUMHIDDEN)\n", 84 | " else:\n", 85 | " W = cntk.parameter((cntk.InferredDimension, 4))\n", 86 | " x = cntk.ops.optimized_rnnstack(x, W, nhid, \n", 87 | " num_layers=1, bidirectional=False, recurrent_op='gru')\n", 88 | " \n", 89 | " x = sequence.last(x) #o utput: array with shape=(BATCHSIZE, NUMHIDDEN)\n", 90 | " x = Dense(2)(x) # output: array with shape=(BATCHSIZE, 2)\n", 91 | " return x" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 5, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "def init_model(m, labels, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n", 101 | " # Loss (dense labels); check if support for sparse labels\n", 102 | " loss = cntk.cross_entropy_with_softmax(m, labels) \n", 103 | " # ADAM, set unit_gain to False to match others\n", 104 | " learner = cntk.adam(m.parameters,\n", 105 | " lr=cntk.learning_rate_schedule(lr, cntk.UnitType.minibatch) ,\n", 106 | " momentum=cntk.momentum_schedule(b1), \n", 107 | " variance_momentum=cntk.momentum_schedule(b2),\n", 108 | " epsilon=eps,\n", 109 | " unit_gain=False)\n", 110 | " trainer = cntk.Trainer(m, (loss, cntk.classification_error(m, labels)), [learner])\n", 111 | " return trainer" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 6, 117 | "metadata": { 118 | "scrolled": true 119 | }, 120 | "outputs": [ 121 | { 122 | "name": "stdout", 123 | "output_type": "stream", 124 | "text": [ 125 | "Preparing train set...\n", 126 | "Preparing test set...\n", 127 | "Trimming to 30000 max-features\n", 128 | "Padding to length 150\n", 129 | "(25000, 150) (25000, 150) (25000, 2) (25000, 2)\n", 130 | "int32 int32 float32 float32\n", 131 | "CPU times: user 5.77 s, sys: 379 ms, total: 6.15 s\n", 132 | "Wall time: 6.15 s\n" 133 | ] 134 | } 135 | ], 136 | "source": [ 137 | "%%time\n", 138 | "# Data into format for library\n", 139 | "x_train, x_test, y_train, y_test = imdb_for_library(\n", 140 | " seq_len=MAXLEN, max_features=MAXFEATURES, one_hot=True) # CNTK format\n", 141 | "y_train = y_train.astype(np.float32)\n", 142 | "y_test = y_test.astype(np.float32)\n", 143 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n", 144 | "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 7, 150 | "metadata": {}, 151 | "outputs": [ 152 | { 153 | "name": "stdout", 154 | "output_type": "stream", 155 | "text": [ 156 | "CPU times: user 10.6 ms, sys: 32.4 ms, total: 43 ms\n", 157 | "Wall time: 52.9 ms\n" 158 | ] 159 | } 160 | ], 161 | "source": [ 162 | "%%time\n", 163 | "# Placeholders\n", 164 | "features = sequence.input_variable(shape=MAXFEATURES, is_sparse=True)\n", 165 | "labels = cntk.input_variable(2)\n", 166 | "# Load symbol\n", 167 | "sym = create_symbol()" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 8, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | "CPU times: user 110 ms, sys: 262 ms, total: 371 ms\n", 180 | "Wall time: 377 ms\n" 181 | ] 182 | } 183 | ], 184 | "source": [ 185 | "%%time\n", 186 | "trainer = init_model(sym, labels)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 9, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "name": "stdout", 196 | "output_type": "stream", 197 | "text": [ 198 | "Epoch 1 | Accuracy: 0.890625\n", 199 | "Epoch 2 | Accuracy: 0.875000\n", 200 | "Epoch 3 | Accuracy: 0.968750\n", 201 | "CPU times: user 13.7 s, sys: 1.31 s, total: 15 s\n", 202 | "Wall time: 14.6 s\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "%%time\n", 208 | "# Main training loop: 14.6s\n", 209 | "for j in range(EPOCHS):\n", 210 | " for data, label in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n", 211 | " data_1hot = cntk.Value.one_hot(data, MAXFEATURES) #TODO: do this externally and generate batches of 1hot\n", 212 | " trainer.train_minibatch({features: data_1hot, labels: label})\n", 213 | " # Log (this is just last batch in epoch, not average of batches)\n", 214 | " eval_error = trainer.previous_minibatch_evaluation_average\n", 215 | " print(\"Epoch %d | Accuracy: %.6f\" % (j+1, (1-eval_error)))" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 10, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "name": "stdout", 225 | "output_type": "stream", 226 | "text": [ 227 | "CPU times: user 2.38 s, sys: 156 ms, total: 2.54 s\n", 228 | "Wall time: 2.54 s\n" 229 | ] 230 | } 231 | ], 232 | "source": [ 233 | "%%time\n", 234 | "# Main evaluation loop: 2.55s\n", 235 | "z = cntk.softmax(sym)\n", 236 | "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n", 237 | "y_guess = np.zeros(n_samples, dtype=np.int)\n", 238 | "y_truth = np.argmax(y_test[:n_samples], axis=-1)\n", 239 | "c = 0\n", 240 | "for data, label in yield_mb(x_test, y_test, BATCHSIZE):\n", 241 | " data = cntk.Value.one_hot(data, MAXFEATURES)\n", 242 | " predicted_label_probs = z.eval({features : data})\n", 243 | " y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = np.argmax(predicted_label_probs, axis=-1)\n", 244 | " c += 1" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 11, 250 | "metadata": {}, 251 | "outputs": [ 252 | { 253 | "name": "stdout", 254 | "output_type": "stream", 255 | "text": [ 256 | "Accuracy: 0.8565304487179487\n" 257 | ] 258 | } 259 | ], 260 | "source": [ 261 | "print(\"Accuracy: \", 1.*sum(y_guess == y_truth)/len(y_guess))" 262 | ] 263 | } 264 | ], 265 | "metadata": { 266 | "anaconda-cloud": {}, 267 | "kernelspec": { 268 | "display_name": "Python 3", 269 | "language": "python", 270 | "name": "python3" 271 | }, 272 | "language_info": { 273 | "codemirror_mode": { 274 | "name": "ipython", 275 | "version": 3 276 | }, 277 | "file_extension": ".py", 278 | "mimetype": "text/x-python", 279 | "name": "python", 280 | "nbconvert_exporter": "python", 281 | "pygments_lexer": "ipython3", 282 | "version": "3.5.2" 283 | } 284 | }, 285 | "nbformat": 4, 286 | "nbformat_minor": 2 287 | } 288 | -------------------------------------------------------------------------------- /notebooks/Caffe2_Inference.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%%bash\n", 10 | "# Download ResNet50 pre-trained\n", 11 | "#wget https://github.com/leonardvandriel/caffe2_models/raw/master/model/resnet50_init_net.pb\n", 12 | "#wget https://github.com/leonardvandriel/caffe2_models/raw/master/model/resnet50_predict_net.pb" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "init_net_loc = 'resnet50_init_net.pb'\n", 22 | "predict_net_loc = 'resnet50_predict_net.pb'" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "import os\n", 32 | "import sys\n", 33 | "import numpy as np\n", 34 | "import caffe2\n", 35 | "from caffe2.proto import caffe2_pb2\n", 36 | "from caffe2.python import model_helper, core, workspace, models\n", 37 | "from common.params_inf import *\n", 38 | "from common.utils import *" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# Force one-gpu\n", 48 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 5, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "name": "stdout", 58 | "output_type": "stream", 59 | "text": [ 60 | "OS: linux\n", 61 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 62 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 63 | "Numpy: 1.14.1\n", 64 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 65 | "CUDA Version 8.0.61\n", 66 | "CuDNN Version 6.0.21\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "print(\"OS: \", sys.platform)\n", 72 | "print(\"Python: \", sys.version)\n", 73 | "print(\"Numpy: \", np.__version__)\n", 74 | "print(\"GPU: \", get_gpu_name())\n", 75 | "print(get_cuda_version())\n", 76 | "print(\"CuDNN Version \", get_cudnn_version())" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 6, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "(1280, 224, 224, 3) (1280, 3, 224, 224)\n" 89 | ] 90 | } 91 | ], 92 | "source": [ 93 | "# Create batches of fake data\n", 94 | "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n", 95 | "print(fake_input_data_cl.shape, fake_input_data_cf.shape)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 7, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "def load_net(INIT_NET, PREDICT_NET, device_opts):\n", 105 | " init_def = caffe2_pb2.NetDef()\n", 106 | " with open(INIT_NET, 'rb') as f:\n", 107 | " init_def.ParseFromString(f.read())\n", 108 | " init_def.device_option.CopyFrom(device_opts)\n", 109 | " workspace.RunNetOnce(init_def.SerializeToString())\n", 110 | " net_def = caffe2_pb2.NetDef()\n", 111 | " with open(PREDICT_NET, 'rb') as f:\n", 112 | " net_def.ParseFromString(f.read())\n", 113 | " net_def.device_option.CopyFrom(device_opts)\n", 114 | " workspace.CreateNet(net_def.SerializeToString(), overwrite=True)\n", 115 | " return net_def.name" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 8, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "def predict_fn(classifier, data, batchsize, device_opts):\n", 125 | " \"\"\" Return features from classifier \"\"\"\n", 126 | " out = np.zeros((len(data), RESNET_FEATURES), np.float32)\n", 127 | " for idx, dta in yield_mb_X(data, batchsize):\n", 128 | " workspace.FeedBlob(\"data\", dta, device_option=device_opts)\n", 129 | " workspace.RunNet(classifier, 1)\n", 130 | " out[idx*batchsize:(idx+1)*batchsize] = workspace.FetchBlob('pool5').squeeze()\n", 131 | " return out" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 9, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "device_opts = core.DeviceOption(caffe2_pb2.CUDA, 0) \n", 141 | "test_net = load_net(init_net_loc, \n", 142 | " predict_net_loc,\n", 143 | " device_opts=device_opts)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 10, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "cold_start = predict_fn(test_net, fake_input_data_cf, BATCH_SIZE, device_opts)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 11, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | "CPU times: user 8.28 s, sys: 1.84 s, total: 10.1 s\n", 165 | "Wall time: 10.1 s\n" 166 | ] 167 | } 168 | ], 169 | "source": [ 170 | "%%time\n", 171 | "features = predict_fn(test_net, fake_input_data_cf, BATCH_SIZE, device_opts)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 13, 177 | "metadata": {}, 178 | "outputs": [ 179 | { 180 | "name": "stdout", 181 | "output_type": "stream", 182 | "text": [ 183 | "Images per second 126.73267326732673\n" 184 | ] 185 | } 186 | ], 187 | "source": [ 188 | "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/10.1))" 189 | ] 190 | } 191 | ], 192 | "metadata": { 193 | "kernelspec": { 194 | "display_name": "Python 3", 195 | "language": "python", 196 | "name": "python3" 197 | }, 198 | "language_info": { 199 | "codemirror_mode": { 200 | "name": "ipython", 201 | "version": 3 202 | }, 203 | "file_extension": ".py", 204 | "mimetype": "text/x-python", 205 | "name": "python", 206 | "nbconvert_exporter": "python", 207 | "pygments_lexer": "ipython3", 208 | "version": "3.5.2" 209 | } 210 | }, 211 | "nbformat": 4, 212 | "nbformat_minor": 2 213 | } 214 | -------------------------------------------------------------------------------- /notebooks/Chainer_CNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# High-level Chainer Example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import sys\n", 18 | "import numpy as np\n", 19 | "import math\n", 20 | "import chainer\n", 21 | "import chainer.functions as F\n", 22 | "import chainer.links as L\n", 23 | "from chainer import optimizers\n", 24 | "from chainer import cuda\n", 25 | "from common.params import *\n", 26 | "from common.utils import *" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "# Performance Improvement\n", 36 | "# 1. Auto-tune\n", 37 | "# This adds very little now .. not sure if True by default?\n", 38 | "chainer.global_config.autotune = True" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# Force one-gpu\n", 48 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 4, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "name": "stdout", 58 | "output_type": "stream", 59 | "text": [ 60 | "OS: linux\n", 61 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 62 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 63 | "Chainer: 3.4.0\n", 64 | "CuPy: 2.4.0\n", 65 | "Numpy: 1.14.1\n", 66 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 67 | "CUDA Version 8.0.61\n", 68 | "CuDNN Version 6.0.21\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | "print(\"OS: \", sys.platform)\n", 74 | "print(\"Python: \", sys.version)\n", 75 | "print(\"Chainer: \", chainer.__version__)\n", 76 | "print(\"CuPy: \", chainer.cuda.cupy.__version__)\n", 77 | "print(\"Numpy: \", np.__version__)\n", 78 | "print(\"GPU: \", get_gpu_name())\n", 79 | "print(get_cuda_version())\n", 80 | "print(\"CuDNN Version \", get_cudnn_version())" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 5, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "class SymbolModule(chainer.Chain):\n", 90 | " def __init__(self, n_classes=N_CLASSES):\n", 91 | " super(SymbolModule, self).__init__()\n", 92 | " with self.init_scope():\n", 93 | " self.conv1 = L.Convolution2D(3, 50, ksize=3, pad=1)\n", 94 | " self.conv2 = L.Convolution2D(50, 50, ksize=3, pad=1)\n", 95 | " self.conv3 = L.Convolution2D(50, 100, ksize=3, pad=1)\n", 96 | " self.conv4 = L.Convolution2D(100, 100, ksize=3, pad=1)\n", 97 | " # feature map size is 8*8 by pooling\n", 98 | " self.fc1 = L.Linear(100*8*8, 512)\n", 99 | " self.fc2 = L.Linear(512, n_classes)\n", 100 | " \n", 101 | " def __call__(self, x):\n", 102 | " h = self.conv2(F.relu(self.conv1(x)))\n", 103 | " h = F.relu(F.max_pooling_2d(h, ksize=2, stride=2))\n", 104 | " h = F.dropout(h, 0.25)\n", 105 | " \n", 106 | " h = self.conv4(F.relu(self.conv3(h)))\n", 107 | " h = F.relu(F.max_pooling_2d(h, ksize=2, stride=2))\n", 108 | " h = F.dropout(h, 0.25) \n", 109 | " \n", 110 | " h = F.dropout(F.relu(self.fc1(h)), 0.5)\n", 111 | " return self.fc2(h)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 6, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "def init_model(m, lr=LR, momentum=MOMENTUM):\n", 121 | " optimizer = optimizers.MomentumSGD(lr, momentum)\n", 122 | " optimizer.setup(m)\n", 123 | " return optimizer" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 7, 129 | "metadata": { 130 | "scrolled": true 131 | }, 132 | "outputs": [ 133 | { 134 | "name": "stdout", 135 | "output_type": "stream", 136 | "text": [ 137 | "Preparing train set...\n", 138 | "Preparing test set...\n", 139 | "(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)\n", 140 | "float32 float32 int32 int32\n", 141 | "CPU times: user 605 ms, sys: 612 ms, total: 1.22 s\n", 142 | "Wall time: 1.22 s\n" 143 | ] 144 | } 145 | ], 146 | "source": [ 147 | "%%time\n", 148 | "# Data into format for library\n", 149 | "x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)\n", 150 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n", 151 | "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 8, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "name": "stdout", 161 | "output_type": "stream", 162 | "text": [ 163 | "CPU times: user 216 ms, sys: 132 ms, total: 349 ms\n", 164 | "Wall time: 348 ms\n" 165 | ] 166 | } 167 | ], 168 | "source": [ 169 | "%%time\n", 170 | "# Create symbol\n", 171 | "sym = SymbolModule()\n", 172 | "chainer.cuda.get_device(0).use() # Make a specified GPU current\n", 173 | "sym.to_gpu() # Copy the model to the GPU" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 9, 179 | "metadata": {}, 180 | "outputs": [ 181 | { 182 | "name": "stdout", 183 | "output_type": "stream", 184 | "text": [ 185 | "CPU times: user 115 µs, sys: 0 ns, total: 115 µs\n", 186 | "Wall time: 119 µs\n" 187 | ] 188 | } 189 | ], 190 | "source": [ 191 | "%%time\n", 192 | "optimizer = init_model(sym)" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 10, 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "name": "stdout", 202 | "output_type": "stream", 203 | "text": [ 204 | "0\n", 205 | "1\n", 206 | "2\n", 207 | "3\n", 208 | "4\n", 209 | "5\n", 210 | "6\n", 211 | "7\n", 212 | "8\n", 213 | "9\n", 214 | "CPU times: user 1min 7s, sys: 1.61 s, total: 1min 8s\n", 215 | "Wall time: 1min 9s\n" 216 | ] 217 | } 218 | ], 219 | "source": [ 220 | "%%time\n", 221 | "# Main training loop: 69s\n", 222 | "for j in range(EPOCHS):\n", 223 | " for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n", 224 | " # Get samples\n", 225 | " data = cuda.to_gpu(data)\n", 226 | " target = cuda.to_gpu(target)\n", 227 | " # Forwards\n", 228 | " output = sym(data)\n", 229 | " # Loss\n", 230 | " loss = F.softmax_cross_entropy(output, target)\n", 231 | " sym.cleargrads()\n", 232 | " # Back-prop\n", 233 | " loss.backward()\n", 234 | " optimizer.update()\n", 235 | " # Log\n", 236 | " print(j)" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 13, 242 | "metadata": {}, 243 | "outputs": [ 244 | { 245 | "name": "stdout", 246 | "output_type": "stream", 247 | "text": [ 248 | "CPU times: user 466 ms, sys: 0 ns, total: 466 ms\n", 249 | "Wall time: 466 ms\n" 250 | ] 251 | } 252 | ], 253 | "source": [ 254 | "%%time\n", 255 | "# Main evaluation loop: 800ms\n", 256 | "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n", 257 | "y_guess = np.zeros(n_samples, dtype=np.int)\n", 258 | "y_truth = y_test[:n_samples]\n", 259 | "c = 0\n", 260 | "with chainer.using_config('train', False), chainer.using_config('enable_backprop', False):\n", 261 | " for data, target in yield_mb(x_test, y_test, BATCHSIZE):\n", 262 | " # Forwards\n", 263 | " pred = cuda.to_cpu(sym(cuda.to_gpu(data)).data.argmax(-1))\n", 264 | " # Collect results\n", 265 | " y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred\n", 266 | " c += 1" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 14, 272 | "metadata": {}, 273 | "outputs": [ 274 | { 275 | "name": "stdout", 276 | "output_type": "stream", 277 | "text": [ 278 | "Accuracy: 0.7901642628205128\n" 279 | ] 280 | } 281 | ], 282 | "source": [ 283 | "print(\"Accuracy: \", 1.*sum(y_guess == y_truth)/len(y_guess))" 284 | ] 285 | } 286 | ], 287 | "metadata": { 288 | "anaconda-cloud": {}, 289 | "kernelspec": { 290 | "display_name": "Python 3", 291 | "language": "python", 292 | "name": "python3" 293 | }, 294 | "language_info": { 295 | "codemirror_mode": { 296 | "name": "ipython", 297 | "version": 3 298 | }, 299 | "file_extension": ".py", 300 | "mimetype": "text/x-python", 301 | "name": "python", 302 | "nbconvert_exporter": "python", 303 | "pygments_lexer": "ipython3", 304 | "version": "3.5.2" 305 | } 306 | }, 307 | "nbformat": 4, 308 | "nbformat_minor": 2 309 | } 310 | -------------------------------------------------------------------------------- /notebooks/Chainer_Inference.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%%bash\n", 10 | "# Downloaded from https://github.com/KaimingHe/deep-residual-networks\n", 11 | "#cd /home/iliauk/.chainer/dataset/pfnet/chainer/models/\n", 12 | "#wget https://ikpublictutorial.blob.core.windows.net/deeplearningframeworks/ResNet-50-model.caffemodel" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "import os\n", 22 | "import sys\n", 23 | "import numpy as np\n", 24 | "import chainer\n", 25 | "import chainer.functions as F\n", 26 | "import chainer.links as L\n", 27 | "from chainer import optimizers\n", 28 | "from chainer import cuda\n", 29 | "from common.params_inf import *\n", 30 | "from common.utils import *" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "# Force one-gpu\n", 40 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 4, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "OS: linux\n", 53 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 54 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 55 | "Numpy: 1.14.1\n", 56 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 57 | "CUDA Version 8.0.61\n", 58 | "CuDNN Version 6.0.21\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "print(\"OS: \", sys.platform)\n", 64 | "print(\"Python: \", sys.version)\n", 65 | "print(\"Numpy: \", np.__version__)\n", 66 | "print(\"GPU: \", get_gpu_name())\n", 67 | "print(get_cuda_version())\n", 68 | "print(\"CuDNN Version \", get_cudnn_version())" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 5, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "(1280, 224, 224, 3) (1280, 3, 224, 224)\n" 81 | ] 82 | } 83 | ], 84 | "source": [ 85 | "# Create batches of fake data\n", 86 | "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n", 87 | "print(fake_input_data_cl.shape, fake_input_data_cf.shape)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 6, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/plain": [ 98 | "" 99 | ] 100 | }, 101 | "execution_count": 6, 102 | "metadata": {}, 103 | "output_type": "execute_result" 104 | } 105 | ], 106 | "source": [ 107 | "resnet50 = chainer.links.ResNet50Layers(pretrained_model=\"auto\")\n", 108 | "# GPU\n", 109 | "chainer.cuda.get_device(0).use() # Make a specified GPU current\n", 110 | "resnet50.to_gpu() # Copy the model to the GPU" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 7, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/plain": [ 121 | "['conv1', 'pool1', 'res2', 'res3', 'res4', 'res5', 'pool5', 'fc6', 'prob']" 122 | ] 123 | }, 124 | "execution_count": 7, 125 | "metadata": {}, 126 | "output_type": "execute_result" 127 | } 128 | ], 129 | "source": [ 130 | "resnet50.available_layers" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 8, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "def predict_fn(classifier, data, batchsize):\n", 140 | " \"\"\" Return features from classifier \"\"\"\n", 141 | " out = np.zeros((len(data), RESNET_FEATURES), np.float32)\n", 142 | " with chainer.using_config('train', False), chainer.using_config('enable_backprop', False): \n", 143 | " for idx, dta in yield_mb_X(data, batchsize):\n", 144 | " pred = classifier(cuda.to_gpu(dta), layers=['pool5'])\n", 145 | " out[idx*batchsize:(idx+1)*batchsize] = cuda.to_cpu(pred['pool5'].data).squeeze() \n", 146 | " return out" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 9, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "cold_start = predict_fn(resnet50, fake_input_data_cf, BATCH_SIZE)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 10, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | "CPU times: user 3.52 s, sys: 7.74 ms, total: 3.53 s\n", 168 | "Wall time: 3.52 s\n" 169 | ] 170 | } 171 | ], 172 | "source": [ 173 | "%%time\n", 174 | "features = predict_fn(resnet50, fake_input_data_cf, BATCH_SIZE)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 11, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "name": "stdout", 184 | "output_type": "stream", 185 | "text": [ 186 | "Images per second 363.6363636363636\n" 187 | ] 188 | } 189 | ], 190 | "source": [ 191 | "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/3.52))" 192 | ] 193 | } 194 | ], 195 | "metadata": { 196 | "kernelspec": { 197 | "display_name": "Python 3", 198 | "language": "python", 199 | "name": "python3" 200 | }, 201 | "language_info": { 202 | "codemirror_mode": { 203 | "name": "ipython", 204 | "version": 3 205 | }, 206 | "file_extension": ".py", 207 | "mimetype": "text/x-python", 208 | "name": "python", 209 | "nbconvert_exporter": "python", 210 | "pygments_lexer": "ipython3", 211 | "version": "3.5.2" 212 | } 213 | }, 214 | "nbformat": 4, 215 | "nbformat_minor": 2 216 | } 217 | -------------------------------------------------------------------------------- /notebooks/Gluon_CNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# MXNet/Gluon CNN example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import sys\n", 18 | "import numpy as np\n", 19 | "import math\n", 20 | "import mxnet as mx\n", 21 | "from mxnet import nd, autograd\n", 22 | "from mxnet import gluon\n", 23 | "from common.params import *\n", 24 | "from common.utils import *" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "# Force one-gpu\n", 34 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "OS: linux\n", 47 | "Python: 3.6.3 |Anaconda custom (64-bit)| (default, Oct 13 2017, 12:02:49) \n", 48 | "[GCC 7.2.0]\n", 49 | "MXNet: 1.3.0\n", 50 | "Numpy: 1.13.3\n", 51 | "GPU: ['Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB']\n", 52 | "CUDA Version 9.1.85\n", 53 | "CuDNN Version 7.1.3\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "print(\"OS: \", sys.platform)\n", 59 | "print(\"Python: \", sys.version)\n", 60 | "print(\"MXNet: \", mx.__version__)\n", 61 | "print(\"Numpy: \", np.__version__)\n", 62 | "print(\"GPU: \", get_gpu_name())\n", 63 | "print(get_cuda_version())\n", 64 | "print(\"CuDNN Version \", get_cudnn_version())" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "## Build model" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 13, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "def build_model(n_classes=N_CLASSES):\n", 81 | " net = gluon.nn.HybridSequential()\n", 82 | " with net.name_scope():\n", 83 | " net.add(gluon.nn.Conv2D(channels=50, kernel_size=3, padding=1, activation='relu'))\n", 84 | " net.add(gluon.nn.Conv2D(channels=50, kernel_size=3, padding=1))\n", 85 | " net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))\n", 86 | " net.add(gluon.nn.Activation('relu'))\n", 87 | " # Equiv to gluon.nn.LeakyReLU(0)\n", 88 | " net.add(gluon.nn.Dropout(0.25))\n", 89 | " net.add(gluon.nn.Conv2D(channels=100, kernel_size=3, padding=1, activation='relu'))\n", 90 | " net.add(gluon.nn.Conv2D(channels=100, kernel_size=3, padding=1))\n", 91 | " net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))\n", 92 | " net.add(gluon.nn.Activation('relu'))\n", 93 | " net.add(gluon.nn.Dropout(0.25))\n", 94 | " net.add(gluon.nn.Flatten())\n", 95 | " net.add(gluon.nn.Dense(512, activation='relu'))\n", 96 | " net.add(gluon.nn.Dropout(0.25))\n", 97 | " net.add(gluon.nn.Dense(n_classes))\n", 98 | " return net" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "## Init model" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 8, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "def init_model(net, ctx, lr=LR, momentum=MOMENTUM):\n", 115 | " net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)\n", 116 | " trainer = gluon.Trainer(\n", 117 | " net.collect_params(), \n", 118 | " 'sgd',\n", 119 | " {'learning_rate': lr, 'momentum':momentum})\n", 120 | " criterion = gluon.loss.SoftmaxCrossEntropyLoss()\n", 121 | " return trainer, criterion" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "## Get data" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 17, 134 | "metadata": { 135 | "scrolled": true 136 | }, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "Preparing train set...\n", 143 | "Preparing test set...\n", 144 | "(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)\n", 145 | "float32 float32 int32 int32\n", 146 | "CPU times: user 776 ms, sys: 568 ms, total: 1.34 s\n", 147 | "Wall time: 2.34 s\n" 148 | ] 149 | } 150 | ], 151 | "source": [ 152 | "%%time\n", 153 | "# Data into format for library\n", 154 | "x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)\n", 155 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n", 156 | "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "## Create model" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 30, 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "name": "stdout", 173 | "output_type": "stream", 174 | "text": [ 175 | "CPU times: user 4 ms, sys: 0 ns, total: 4 ms\n", 176 | "Wall time: 3.4 ms\n" 177 | ] 178 | } 179 | ], 180 | "source": [ 181 | "%%time\n", 182 | "ctx = mx.gpu()\n", 183 | "net = build_model()" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 31, 189 | "metadata": {}, 190 | "outputs": [ 191 | { 192 | "name": "stdout", 193 | "output_type": "stream", 194 | "text": [ 195 | "CPU times: user 8 ms, sys: 0 ns, total: 8 ms\n", 196 | "Wall time: 4.42 ms\n" 197 | ] 198 | } 199 | ], 200 | "source": [ 201 | "%%time\n", 202 | "trainer, criterion = init_model(net, ctx)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "## Training Loop" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 32, 215 | "metadata": {}, 216 | "outputs": [ 217 | { 218 | "name": "stdout", 219 | "output_type": "stream", 220 | "text": [ 221 | "Epoch 0: loss: 1.8582\n", 222 | "Epoch 1: loss: 1.3819\n", 223 | "Epoch 2: loss: 1.1333\n", 224 | "Epoch 3: loss: 0.9515\n", 225 | "Epoch 4: loss: 0.8145\n", 226 | "Epoch 5: loss: 0.7097\n", 227 | "Epoch 6: loss: 0.6174\n", 228 | "Epoch 7: loss: 0.5324\n", 229 | "Epoch 8: loss: 0.4575\n", 230 | "Epoch 9: loss: 0.3964\n", 231 | "CPU times: user 49 s, sys: 13.3 s, total: 1min 2s\n", 232 | "Wall time: 37.2 s\n" 233 | ] 234 | } 235 | ], 236 | "source": [ 237 | "%%time\n", 238 | "net.hybridize()\n", 239 | "for j in range(EPOCHS):\n", 240 | " train_loss = nd.zeros(1, ctx=ctx)\n", 241 | " for i, (data, target) in enumerate(yield_mb(x_train, y_train, BATCHSIZE, shuffle=True)):\n", 242 | " # Get samples\n", 243 | " data = nd.array(data).as_in_context(ctx)\n", 244 | " target = nd.array(target).as_in_context(ctx)\n", 245 | " with autograd.record():\n", 246 | " # Forwards\n", 247 | " output = net(data)\n", 248 | " # Loss\n", 249 | " loss = criterion(output, target)\n", 250 | " # Back-prop\n", 251 | " loss.backward()\n", 252 | " trainer.step(data.shape[0])\n", 253 | " train_loss += loss.mean()\n", 254 | " # Log \n", 255 | " print('Epoch %3d: loss: %5.4f'%(j, train_loss.asscalar()/(i+1)))" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "## Evaluation loop" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 34, 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "name": "stdout", 272 | "output_type": "stream", 273 | "text": [ 274 | "CPU times: user 296 ms, sys: 48 ms, total: 344 ms\n", 275 | "Wall time: 278 ms\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "%%time\n", 281 | "# Main evaluation loop: 453ms\n", 282 | "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n", 283 | "y_guess = np.zeros(n_samples, dtype=np.int)\n", 284 | "y_truth = y_test[:n_samples]\n", 285 | "c = 0\n", 286 | "for data, target in yield_mb(x_test, y_test, BATCHSIZE):\n", 287 | " # Get samples\n", 288 | " data = nd.array(data).as_in_context(ctx)\n", 289 | " # Forwards\n", 290 | " output = net(data)\n", 291 | " pred = nd.argmax(output, axis=1)\n", 292 | " # Collect results\n", 293 | " y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred.asnumpy()\n", 294 | " c += 1" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 35, 300 | "metadata": {}, 301 | "outputs": [ 302 | { 303 | "name": "stdout", 304 | "output_type": "stream", 305 | "text": [ 306 | "Accuracy: 0.765324519231\n" 307 | ] 308 | } 309 | ], 310 | "source": [ 311 | "print(\"Accuracy: \", 1.*sum(y_guess == y_truth)/len(y_guess))" 312 | ] 313 | } 314 | ], 315 | "metadata": { 316 | "anaconda-cloud": {}, 317 | "kernelspec": { 318 | "display_name": "Python 3", 319 | "language": "python", 320 | "name": "python3" 321 | }, 322 | "language_info": { 323 | "codemirror_mode": { 324 | "name": "ipython", 325 | "version": 3 326 | }, 327 | "file_extension": ".py", 328 | "mimetype": "text/x-python", 329 | "name": "python", 330 | "nbconvert_exporter": "python", 331 | "pygments_lexer": "ipython3", 332 | "version": "3.6.3" 333 | } 334 | }, 335 | "nbformat": 4, 336 | "nbformat_minor": 2 337 | } 338 | -------------------------------------------------------------------------------- /notebooks/Gluon_Inference.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# MXNet/Gluon Inference" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 13, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import sys\n", 18 | "import time\n", 19 | "import numpy as np\n", 20 | "import mxnet as mx\n", 21 | "from mxnet import gluon, nd\n", 22 | "from collections import namedtuple\n", 23 | "from common.params_inf import *\n", 24 | "from common.utils import *" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "# Force one-gpu\n", 34 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "OS: linux\n", 47 | "Python: 3.6.3 |Anaconda custom (64-bit)| (default, Oct 13 2017, 12:02:49) \n", 48 | "[GCC 7.2.0]\n", 49 | "Numpy: 1.13.3\n", 50 | "MXNet: 1.3.0\n", 51 | "GPU: ['Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB']\n", 52 | "CUDA Version 9.1.85\n", 53 | "CuDNN Version 7.1.3\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "print(\"OS: \", sys.platform)\n", 59 | "print(\"Python: \", sys.version)\n", 60 | "print(\"Numpy: \", np.__version__)\n", 61 | "print(\"MXNet: \", mx.__version__)\n", 62 | "print(\"GPU: \", get_gpu_name())\n", 63 | "print(get_cuda_version())\n", 64 | "print(\"CuDNN Version \", get_cudnn_version())" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "## Get pre-trained model" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "# We create the network\n", 81 | "ctx = mx.gpu()\n", 82 | "net = mx.gluon.model_zoo.vision.resnet50_v1(pretrained=True, ctx=ctx).features\n", 83 | "# We hybridize the network\n", 84 | "net.hybridize(static_alloc=True, static_shape=True)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "## Get data" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 5, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "name": "stdout", 101 | "output_type": "stream", 102 | "text": [ 103 | "(1280, 224, 224, 3) (1280, 3, 224, 224)\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "# Create batches of fake data\n", 109 | "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n", 110 | "print(fake_input_data_cl.shape, fake_input_data_cf.shape)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "## Run inference" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 7, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "def predict_fn(classifier, data, batchsize):\n", 127 | " \"\"\" Return features from classifier \"\"\"\n", 128 | " out = nd.zeros((len(data), RESNET_FEATURES), dtype=np.float32, ctx=ctx)\n", 129 | " for idx, dta in yield_mb_X(data, batchsize):\n", 130 | " outputs = classifier(mx.nd.array(dta, ctx=ctx))\n", 131 | " out[idx*batchsize:(idx+1)*batchsize] = outputs.squeeze()\n", 132 | " return out.asnumpy()" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 8, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "cold_start = predict_fn(net, fake_input_data_cf, BATCH_SIZE)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 14, 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "name": "stdout", 151 | "output_type": "stream", 152 | "text": [ 153 | "CPU times: user 1.37 s, sys: 328 ms, total: 1.7 s\n", 154 | "Wall time: 1.25 s\n" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | "%%time\n", 160 | "tick = time.time()\n", 161 | "features = predict_fn(net, fake_input_data_cf, BATCH_SIZE)\n", 162 | "total = time.time()-tick" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 15, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "name": "stdout", 172 | "output_type": "stream", 173 | "text": [ 174 | "Images per second 1024.1136844948533\n" 175 | ] 176 | } 177 | ], 178 | "source": [ 179 | "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/total))" 180 | ] 181 | } 182 | ], 183 | "metadata": { 184 | "kernelspec": { 185 | "display_name": "Python 3", 186 | "language": "python", 187 | "name": "python3" 188 | }, 189 | "language_info": { 190 | "codemirror_mode": { 191 | "name": "ipython", 192 | "version": 3 193 | }, 194 | "file_extension": ".py", 195 | "mimetype": "text/x-python", 196 | "name": "python", 197 | "nbconvert_exporter": "python", 198 | "pygments_lexer": "ipython3", 199 | "version": "3.6.3" 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 2 204 | } 205 | -------------------------------------------------------------------------------- /notebooks/Gluon_RNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# High-level RNN Gluon Example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 11, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import sys\n", 18 | "import numpy as np\n", 19 | "import math\n", 20 | "import mxnet as mx\n", 21 | "from mxnet import gluon\n", 22 | "from common.params_lstm import *\n", 23 | "from common.utils import *" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 9, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "name": "stdout", 33 | "output_type": "stream", 34 | "text": [ 35 | "OS: linux\n", 36 | "Python: 3.6.4 |Anaconda, Inc.| (default, Jan 16 2018, 18:10:19) \n", 37 | "[GCC 7.2.0]\n", 38 | "MXNet: 1.3.0\n", 39 | "Numpy: 1.13.3\n", 40 | "GPU: ['Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB']\n", 41 | "CUDA Version 9.1.85\n", 42 | "CuDNN Version 7.1.3\n" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "print(\"OS: \", sys.platform)\n", 48 | "print(\"Python: \", sys.version)\n", 49 | "print(\"MXNet: \", mx.__version__)\n", 50 | "print(\"Numpy: \", np.__version__)\n", 51 | "print(\"GPU: \", get_gpu_name())\n", 52 | "print(get_cuda_version())\n", 53 | "print(\"CuDNN Version \", get_cudnn_version())" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 60, 59 | "metadata": {}, 60 | "outputs": [ 61 | { 62 | "name": "stdout", 63 | "output_type": "stream", 64 | "text": [ 65 | "30000\n", 66 | "125\n", 67 | "100\n", 68 | "150\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | "print(MAXFEATURES)\n", 74 | "print(EMBEDSIZE)\n", 75 | "print(NUMHIDDEN)\n", 76 | "print(MAXLEN)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "## Create the model" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 136, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "class RNN(gluon.Block):\n", 93 | " def __init__(self, \n", 94 | " maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN, **kwargs):\n", 95 | " super(RNN, self).__init__(**kwargs)\n", 96 | " self.nhid = nhid\n", 97 | " with self.name_scope():\n", 98 | " self.embedding = gluon.nn.Embedding(input_dim=maxf,\n", 99 | " output_dim=edim)\n", 100 | " self.gru = gluon.rnn.GRU(\n", 101 | " hidden_size=nhid, \n", 102 | " num_layers=1,\n", 103 | " layout=\"NTC\",\n", 104 | " bidirectional=False) \n", 105 | " self.l_out = gluon.nn.Dense(units=2)\n", 106 | "\n", 107 | " def forward(self, x):\n", 108 | " x = self.embedding(x) \n", 109 | " x = self.gru(x) # default state will be all 0\n", 110 | " x = x[:,-1,:].squeeze()\n", 111 | " x = self.l_out(x)\n", 112 | " return x" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "## Create optimizer" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 137, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "def init_model(net, ctx, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n", 129 | " net.initialize(mx.init.Xavier(), ctx=ctx)\n", 130 | " trainer = gluon.Trainer(\n", 131 | " net.collect_params(), \n", 132 | " 'adam',\n", 133 | " {'learning_rate': lr, 'beta1':BETA_1, 'beta2':BETA_2, 'epsilon':EPS}\n", 134 | " )\n", 135 | " criterion = gluon.loss.SoftmaxCrossEntropyLoss()\n", 136 | " return trainer, criterion" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "## Get data" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 138, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "name": "stdout", 153 | "output_type": "stream", 154 | "text": [ 155 | "Preparing train set...\n", 156 | "Preparing test set...\n", 157 | "Trimming to 30000 max-features\n", 158 | "Padding to length 150\n", 159 | "(25000, 150) (25000, 150) (25000,) (25000,)\n", 160 | "int64 int64 int64 int64\n", 161 | "CPU times: user 5.63 s, sys: 248 ms, total: 5.88 s\n", 162 | "Wall time: 5.87 s\n" 163 | ] 164 | } 165 | ], 166 | "source": [ 167 | "%%time\n", 168 | "# Data into format for library\n", 169 | "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES)\n", 170 | "# Torch-specific\n", 171 | "x_train = x_train.astype(np.int64)\n", 172 | "x_test = x_test.astype(np.int64)\n", 173 | "y_train = y_train.astype(np.int64)\n", 174 | "y_test = y_test.astype(np.int64)\n", 175 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n", 176 | "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "## Initialize Model" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 139, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "# Run on one GPU\n", 193 | "ctx = mx.gpu(0)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 140, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "name": "stdout", 203 | "output_type": "stream", 204 | "text": [ 205 | "CPU times: user 40 ms, sys: 0 ns, total: 40 ms\n", 206 | "Wall time: 3.47 ms\n" 207 | ] 208 | } 209 | ], 210 | "source": [ 211 | "%%time\n", 212 | "net = RNN()\n", 213 | "trainer, loss_fn = init_model(net, ctx)" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "## Train Model" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 141, 226 | "metadata": {}, 227 | "outputs": [ 228 | { 229 | "name": "stdout", 230 | "output_type": "stream", 231 | "text": [ 232 | "Epoch [0], loss: 0.4858\n", 233 | "Epoch [1], loss: 0.2264\n", 234 | "Epoch [2], loss: 0.1178\n", 235 | "CPU times: user 15.2 s, sys: 3.01 s, total: 18.2 s\n", 236 | "Wall time: 11.4 s\n" 237 | ] 238 | } 239 | ], 240 | "source": [ 241 | "%%time\n", 242 | "for i in range(EPOCHS):\n", 243 | " loss_acc = mx.nd.zeros((1), ctx)\n", 244 | " for j, (data, target) in enumerate(yield_mb(x_train, y_train, BATCHSIZE, shuffle=True)):\n", 245 | " # Get samples\n", 246 | " data = mx.nd.array(data, ctx=ctx)\n", 247 | " target = mx.nd.array(target, ctx=ctx)\n", 248 | " # Forwards\n", 249 | " with mx.autograd.record():\n", 250 | " output = net(data)\n", 251 | " loss = loss_fn(output, target)\n", 252 | " # Back-prop\n", 253 | " loss.backward()\n", 254 | " loss_acc += loss.mean()\n", 255 | " trainer.step(data.shape[0])\n", 256 | " print(\"Epoch [{}], loss: {:.4f}\".format(i, loss_acc.asscalar()/(j+1)))" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "## Evaluate" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 142, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | "CPU times: user 9.34 s, sys: 16.2 s, total: 25.5 s\n", 276 | "Wall time: 1.78 s\n" 277 | ] 278 | } 279 | ], 280 | "source": [ 281 | "%%time\n", 282 | "# Main evaluation loop: 1.52s\n", 283 | "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n", 284 | "y_guess = mx.nd.zeros((n_samples), dtype=np.int)\n", 285 | "y_truth = y_test[:n_samples]\n", 286 | "c = 0\n", 287 | "for data, target in yield_mb(x_test, y_test, BATCHSIZE):\n", 288 | " # Get samples\n", 289 | " data = mx.nd.array(data, ctx=ctx)\n", 290 | " target = mx.nd.array(target, ctx=ctx)\n", 291 | " # Forwards\n", 292 | " output = net(data)\n", 293 | " pred = output.topk(k=1).squeeze()\n", 294 | " # Collect results\n", 295 | " y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred\n", 296 | " c += 1\n", 297 | "mx.nd.waitall()" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 143, 303 | "metadata": {}, 304 | "outputs": [ 305 | { 306 | "name": "stdout", 307 | "output_type": "stream", 308 | "text": [ 309 | "Accuracy: 0.857892628205\n" 310 | ] 311 | } 312 | ], 313 | "source": [ 314 | "print(\"Accuracy: \", sum(y_guess.asnumpy() == y_truth)/len(y_guess))" 315 | ] 316 | } 317 | ], 318 | "metadata": { 319 | "kernelspec": { 320 | "display_name": "Environment (conda_mxnet_p36)", 321 | "language": "python", 322 | "name": "conda_mxnet_p36" 323 | }, 324 | "language_info": { 325 | "codemirror_mode": { 326 | "name": "ipython", 327 | "version": 3 328 | }, 329 | "file_extension": ".py", 330 | "mimetype": "text/x-python", 331 | "name": "python", 332 | "nbconvert_exporter": "python", 333 | "pygments_lexer": "ipython3", 334 | "version": "3.6.4" 335 | } 336 | }, 337 | "nbformat": 4, 338 | "nbformat_minor": 2 339 | } 340 | -------------------------------------------------------------------------------- /notebooks/KerasR_TF_RNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# High-level Keras R (TF) RNN Example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "# SETUP\n", 19 | "#\n", 20 | "# Install keras R\n", 21 | "# install.packages('keras', repos = \"https://cloud.r-project.org\")\n", 22 | "# \n", 23 | "# Update reticulate from cran (it defaults to mran which has an outdated version)\n", 24 | "# install.packages(\"reticulate\", repos = \"https://cloud.r-project.org\")" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "name": "stderr", 34 | "output_type": "stream", 35 | "text": [ 36 | "Loading required package: rjson\n" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "library(keras)\n", 42 | "use_python('/anaconda/envs/py35')\n", 43 | "\n", 44 | "# Import util functions\n", 45 | "source(\"./common/utils.R\")\n", 46 | "\n", 47 | "# Import hyper-parameters\n", 48 | "params <- load_params('lstm')" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "metadata": { 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "# reticulate::py_config()" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 4, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stdout", 69 | "output_type": "stream", 70 | "text": [ 71 | "OS: Linux \n", 72 | "R version 3.4.1 (2017-06-30) \n", 73 | "Keras: 2.1.5 \n", 74 | "Tensorflow: 1.5 \n", 75 | "Keras using tensorflow \n", 76 | "Keras channel ordering is channels_last \n", 77 | "GPU: Tesla P100-PCIE-16GB \n", 78 | "CUDA Version 8.0.61 \n", 79 | "CuDNN Version 6.0.21 \n" 80 | ] 81 | } 82 | ], 83 | "source": [ 84 | "cat(\"OS:\", Sys.info()[\"sysname\"], \"\\n\")\n", 85 | "cat(R.version$version.string, \"\\n\")\n", 86 | "cat(\"Keras:\", paste0(packageVersion(\"keras\")), \"\\n\")\n", 87 | "cat(\"Tensorflow:\", paste0(packageVersion(\"tensorflow\")), \"\\n\")\n", 88 | "cat(\"Keras using\", backend()$backend(), \"\\n\")\n", 89 | "cat(\"Keras channel ordering is\", backend()$image_data_format(), \"\\n\") \n", 90 | "cat(\"GPU: \", get_gpu_name(), \"\\n\")\n", 91 | "cat(get_cuda_version(), \"\\n\")\n", 92 | "cat(get_cudnn_version(), \"\\n\")" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 5, 98 | "metadata": { 99 | "collapsed": true 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "create_symbol <- function(CUDNN = TRUE, maxf = params$MAXFEATURES, edim = params$EMBEDSIZE, nhid = params$NUMHIDDEN, maxl = params$MAXLEN){\n", 104 | " \n", 105 | " model <- keras_model_sequential() %>%\n", 106 | " \n", 107 | " layer_embedding(maxf, edim, input_length = maxl)\n", 108 | " \n", 109 | " if (CUDNN){\n", 110 | " model %>% layer_cudnn_gru(units = nhid, return_sequences = FALSE, return_state = FALSE)\n", 111 | " } else{\n", 112 | " model %>% layer_gru(units = nhid, return_sequences = FALSE, return_state = FALSE) \n", 113 | " }\n", 114 | " \n", 115 | " model %>% layer_dense(2, activation = \"softmax\")\n", 116 | " \n", 117 | " return(model)\n", 118 | "}" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 6, 124 | "metadata": { 125 | "collapsed": true 126 | }, 127 | "outputs": [], 128 | "source": [ 129 | "init_model <- function(m, lr=params$LR, b1=params$BETA_1, b2=params$BETA_2, eps=params$EPS){\n", 130 | " m %>% compile(\n", 131 | " loss = \"categorical_crossentropy\",\n", 132 | " optimizer = optimizer_adam(lr = lr, beta_1 = b1, beta_2 = b2, epsilon = eps),\n", 133 | " metrics = \"accuracy\"\n", 134 | " )\n", 135 | " return(m)\n", 136 | "}" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 7, 142 | "metadata": { 143 | "collapsed": true 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "imdb <- imdb_for_library()\n", 148 | "x_train <- imdb$x_train\n", 149 | "y_train <- imdb$y_train\n", 150 | "x_test <- imdb$x_test\n", 151 | "y_test <- imdb$y_test\n", 152 | "rm(imdb)\n", 153 | "\n", 154 | "y_train <- to_categorical(y_train, num_classes = 2)\n", 155 | "y_test <- to_categorical(y_test, num_classes = 2)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 8, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | "x_train shape: 25000 150 \n", 168 | "x_test shape: 25000 150 \n", 169 | "y_train shape: 25000 2 \n", 170 | "y_test shape: 25000 2 \n" 171 | ] 172 | } 173 | ], 174 | "source": [ 175 | "cat('x_train shape:', dim(x_train), '\\n')\n", 176 | "cat('x_test shape:', dim(x_test), '\\n')\n", 177 | "cat('y_train shape:', dim(y_train), '\\n')\n", 178 | "cat('y_test shape:', dim(y_test), '\\n')" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 9, 184 | "metadata": { 185 | "collapsed": true 186 | }, 187 | "outputs": [], 188 | "source": [ 189 | "# Load symbol\n", 190 | "sym = create_symbol(CUDNN = TRUE)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 10, 196 | "metadata": { 197 | "collapsed": true 198 | }, 199 | "outputs": [], 200 | "source": [ 201 | "# Initialise model\n", 202 | "model = init_model(sym)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 11, 208 | "metadata": { 209 | "scrolled": true 210 | }, 211 | "outputs": [ 212 | { 213 | "name": "stdout", 214 | "output_type": "stream", 215 | "text": [ 216 | "________________________________________________________________________________\n", 217 | "Layer (type) Output Shape Param # \n", 218 | "================================================================================\n", 219 | "embedding_1 (Embedding) (None, 150, 125) 3750000 \n", 220 | "________________________________________________________________________________\n", 221 | "cu_dnngru_1 (CuDNNGRU) (None, 100) 68100 \n", 222 | "________________________________________________________________________________\n", 223 | "dense_1 (Dense) (None, 2) 202 \n", 224 | "================================================================================\n", 225 | "Total params: 3,818,302\n", 226 | "Trainable params: 3,818,302\n", 227 | "Non-trainable params: 0\n", 228 | "________________________________________________________________________________\n" 229 | ] 230 | } 231 | ], 232 | "source": [ 233 | "summary(model)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 12, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "data": { 243 | "text/plain": [ 244 | " user system elapsed \n", 245 | " 21.399 4.443 25.228 " 246 | ] 247 | }, 248 | "metadata": {}, 249 | "output_type": "display_data" 250 | } 251 | ], 252 | "source": [ 253 | "# Main training loop\n", 254 | "system.time(\n", 255 | " model %>% fit(x_train,\n", 256 | " y_train,\n", 257 | " batch_size=params$BATCHSIZE,\n", 258 | " epochs=params$EPOCHS,\n", 259 | " verbose=1)\n", 260 | ")" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 13, 266 | "metadata": { 267 | "collapsed": true 268 | }, 269 | "outputs": [], 270 | "source": [ 271 | "# Main evaluation loop\n", 272 | "y_guess <- model %>% predict_classes(x_test, batch_size = params$BATCHSIZE)\n", 273 | "y_truth <- apply(y_test, 1, function(x) which.max(x)-1)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 14, 279 | "metadata": {}, 280 | "outputs": [ 281 | { 282 | "name": "stdout", 283 | "output_type": "stream", 284 | "text": [ 285 | "[1] \"Accuracy: 0.85124\"\n" 286 | ] 287 | } 288 | ], 289 | "source": [ 290 | "print(paste0(\"Accuracy: \", sum(y_guess == y_truth)/length(y_guess)))" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": { 297 | "collapsed": true 298 | }, 299 | "outputs": [], 300 | "source": [] 301 | } 302 | ], 303 | "metadata": { 304 | "kernelspec": { 305 | "display_name": "R", 306 | "language": "R", 307 | "name": "ir" 308 | }, 309 | "language_info": { 310 | "codemirror_mode": "r", 311 | "file_extension": ".r", 312 | "mimetype": "text/x-r-source", 313 | "name": "R", 314 | "pygments_lexer": "r", 315 | "version": "3.4.1" 316 | } 317 | }, 318 | "nbformat": 4, 319 | "nbformat_minor": 2 320 | } 321 | -------------------------------------------------------------------------------- /notebooks/Keras_CNTK_Inference.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "Using CNTK backend\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "import os\n", 18 | "os.environ['KERAS_BACKEND'] = \"cntk\"\n", 19 | "import sys\n", 20 | "import numpy as np\n", 21 | "import keras as K\n", 22 | "import cntk as C\n", 23 | "from keras.applications.resnet50 import ResNet50\n", 24 | "from common.params_inf import *\n", 25 | "from common.utils import *" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "# Force one-gpu\n", 35 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", 36 | "# Faster with channels-last, maybe because model expects that?\n", 37 | "K.backend.set_image_data_format('channels_last')" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | "OS: linux\n", 50 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 51 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 52 | "Numpy: 1.14.1\n", 53 | "Keras: 2.1.4\n", 54 | "CNTK: 2.4\n", 55 | "Keras using cntk\n", 56 | "Keras channel ordering is channels_last\n", 57 | "CUDA Version 8.0.61\n", 58 | "CuDNN Version 6.0.21\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "print(\"OS: \", sys.platform)\n", 64 | "print(\"Python: \", sys.version)\n", 65 | "print(\"Numpy: \", np.__version__)\n", 66 | "print(\"Keras: \", K.__version__)\n", 67 | "print(\"CNTK: \", C.__version__)\n", 68 | "print(\"Keras using {}\".format(K.backend.backend()))\n", 69 | "print(\"Keras channel ordering is {}\".format(K.backend.image_data_format()))\n", 70 | "print(get_cuda_version())\n", 71 | "print(\"CuDNN Version \", get_cudnn_version())" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | "(1280, 224, 224, 3) (1280, 3, 224, 224)\n" 84 | ] 85 | } 86 | ], 87 | "source": [ 88 | "# Create batches of fake data\n", 89 | "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n", 90 | "print(fake_input_data_cl.shape, fake_input_data_cf.shape)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 5, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "def predict_fn(classifier, data, batchsize):\n", 100 | " \"\"\" Return features from classifier \"\"\"\n", 101 | " out = np.zeros((len(data), RESNET_FEATURES), np.float32)\n", 102 | " for idx, dta in yield_mb_X(data, batchsize):\n", 103 | " out[idx*batchsize:(idx+1)*batchsize] = classifier.predict_on_batch(dta).squeeze()\n", 104 | " return out" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 6, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "# Download Resnet weights\n", 114 | "model = ResNet50(include_top=False, input_shape=(224,224,3))" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 7, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "fake_input_data_cl = np.ascontiguousarray(fake_input_data_cl)\n", 124 | "cold_start = predict_fn(model, fake_input_data_cl, BATCH_SIZE)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 8, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "name": "stdout", 134 | "output_type": "stream", 135 | "text": [ 136 | "CPU times: user 5.81 s, sys: 1.69 s, total: 7.51 s\n", 137 | "Wall time: 7.51 s\n" 138 | ] 139 | } 140 | ], 141 | "source": [ 142 | "%%time\n", 143 | "features = predict_fn(model, fake_input_data_cl, BATCH_SIZE)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 10, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "name": "stdout", 153 | "output_type": "stream", 154 | "text": [ 155 | "Images per second 170.439414114514\n" 156 | ] 157 | } 158 | ], 159 | "source": [ 160 | "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/7.51))" 161 | ] 162 | } 163 | ], 164 | "metadata": { 165 | "kernelspec": { 166 | "display_name": "Python 3", 167 | "language": "python", 168 | "name": "python3" 169 | }, 170 | "language_info": { 171 | "codemirror_mode": { 172 | "name": "ipython", 173 | "version": 3 174 | }, 175 | "file_extension": ".py", 176 | "mimetype": "text/x-python", 177 | "name": "python", 178 | "nbconvert_exporter": "python", 179 | "pygments_lexer": "ipython3", 180 | "version": "3.5.2" 181 | } 182 | }, 183 | "nbformat": 4, 184 | "nbformat_minor": 2 185 | } 186 | -------------------------------------------------------------------------------- /notebooks/Keras_CNTK_RNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# High-level RNN Keras (CNTK) Example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stderr", 17 | "output_type": "stream", 18 | "text": [ 19 | "Using CNTK backend\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "import os\n", 25 | "import sys\n", 26 | "import numpy as np\n", 27 | "os.environ['KERAS_BACKEND'] = \"cntk\"\n", 28 | "import keras as K\n", 29 | "import cntk\n", 30 | "from keras.models import Sequential\n", 31 | "from keras.layers import Dense, Embedding, GRU, CuDNNGRU\n", 32 | "from common.params_lstm import *\n", 33 | "from common.utils import *" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "# Force one-gpu\n", 43 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 3, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": [ 55 | "OS: linux\n", 56 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 57 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 58 | "Keras: 2.1.4\n", 59 | "Numpy: 1.14.1\n", 60 | "CNTK: 2.4\n", 61 | "cntk\n", 62 | "channels_last\n", 63 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 64 | "CUDA Version 8.0.61\n", 65 | "CuDNN Version 6.0.21\n" 66 | ] 67 | } 68 | ], 69 | "source": [ 70 | "print(\"OS: \", sys.platform)\n", 71 | "print(\"Python: \", sys.version)\n", 72 | "print(\"Keras: \", K.__version__)\n", 73 | "print(\"Numpy: \", np.__version__)\n", 74 | "print(\"CNTK: \", cntk.__version__)\n", 75 | "print(K.backend.backend())\n", 76 | "print(K.backend.image_data_format())\n", 77 | "print(\"GPU: \", get_gpu_name())\n", 78 | "print(get_cuda_version())\n", 79 | "print(\"CuDNN Version \", get_cudnn_version())" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 4, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "def create_symbol(CUDNN=True, maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN, maxl=MAXLEN):\n", 89 | " model = Sequential()\n", 90 | " model.add(Embedding(maxf, edim, input_length=maxl))\n", 91 | " # Only return last output\n", 92 | " if not CUDNN:\n", 93 | " model.add(GRU(nhid, return_sequences=False, return_state=False))\n", 94 | " else:\n", 95 | " model.add(CuDNNGRU(nhid, return_sequences=False, return_state=False))\n", 96 | " model.add(Dense(2, activation='softmax'))\n", 97 | " return model" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 5, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "def init_model(m, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n", 107 | " m.compile(\n", 108 | " loss = \"categorical_crossentropy\",\n", 109 | " optimizer = K.optimizers.Adam(lr, b1, b2, eps),\n", 110 | " metrics = ['accuracy'])\n", 111 | " return m" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 6, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "name": "stdout", 121 | "output_type": "stream", 122 | "text": [ 123 | "Preparing train set...\n", 124 | "Preparing test set...\n", 125 | "Trimming to 30000 max-features\n", 126 | "Padding to length 150\n", 127 | "(25000, 150) (25000, 150) (25000, 2) (25000, 2)\n", 128 | "int32 int32 int32 int32\n", 129 | "CPU times: user 5.39 s, sys: 448 ms, total: 5.84 s\n", 130 | "Wall time: 5.84 s\n" 131 | ] 132 | } 133 | ], 134 | "source": [ 135 | "%%time\n", 136 | "# Data into format for library\n", 137 | "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES, one_hot=True)\n", 138 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n", 139 | "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 7, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "name": "stdout", 149 | "output_type": "stream", 150 | "text": [ 151 | "CPU times: user 428 ms, sys: 554 ms, total: 982 ms\n", 152 | "Wall time: 597 ms\n" 153 | ] 154 | } 155 | ], 156 | "source": [ 157 | "%%time\n", 158 | "# Load symbol\n", 159 | "# CuDNN RNNs are only available with the TensorFlow backend.\n", 160 | "sym = create_symbol(CUDNN=False)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 8, 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "name": "stdout", 170 | "output_type": "stream", 171 | "text": [ 172 | "CPU times: user 22.7 ms, sys: 61.6 ms, total: 84.3 ms\n", 173 | "Wall time: 7.05 ms\n" 174 | ] 175 | } 176 | ], 177 | "source": [ 178 | "%%time\n", 179 | "# Initialise model\n", 180 | "model = init_model(sym)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 9, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "name": "stdout", 190 | "output_type": "stream", 191 | "text": [ 192 | "_________________________________________________________________\n", 193 | "Layer (type) Output Shape Param # \n", 194 | "=================================================================\n", 195 | "embedding_1 (Embedding) (None, 150, 125) 3750000 \n", 196 | "_________________________________________________________________\n", 197 | "gru_1 (GRU) (None, 100) 67800 \n", 198 | "_________________________________________________________________\n", 199 | "dense_1 (Dense) (None, 2) 202 \n", 200 | "=================================================================\n", 201 | "Total params: 3,818,002\n", 202 | "Trainable params: 3,818,002\n", 203 | "Non-trainable params: 0\n", 204 | "_________________________________________________________________\n" 205 | ] 206 | } 207 | ], 208 | "source": [ 209 | "model.summary()" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 10, 215 | "metadata": {}, 216 | "outputs": [ 217 | { 218 | "name": "stdout", 219 | "output_type": "stream", 220 | "text": [ 221 | "Epoch 1/3\n", 222 | "25000/25000 [==============================] - 18s 719us/step - loss: 0.5204 - acc: 0.7269\n", 223 | "Epoch 2/3\n", 224 | "25000/25000 [==============================] - 18s 709us/step - loss: 0.2381 - acc: 0.9092\n", 225 | "Epoch 3/3\n", 226 | "25000/25000 [==============================] - 18s 706us/step - loss: 0.1323 - acc: 0.9545\n", 227 | "CPU times: user 53.2 s, sys: 1.14 s, total: 54.3 s\n", 228 | "Wall time: 53.4 s\n" 229 | ] 230 | }, 231 | { 232 | "data": { 233 | "text/plain": [ 234 | "" 235 | ] 236 | }, 237 | "execution_count": 10, 238 | "metadata": {}, 239 | "output_type": "execute_result" 240 | } 241 | ], 242 | "source": [ 243 | "%%time\n", 244 | "# Main training loop: 53s\n", 245 | "model.fit(x_train,\n", 246 | " y_train,\n", 247 | " batch_size=BATCHSIZE,\n", 248 | " epochs=EPOCHS,\n", 249 | " verbose=1)" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 11, 255 | "metadata": {}, 256 | "outputs": [ 257 | { 258 | "name": "stdout", 259 | "output_type": "stream", 260 | "text": [ 261 | "CPU times: user 7.35 s, sys: 52.2 ms, total: 7.4 s\n", 262 | "Wall time: 7.4 s\n" 263 | ] 264 | } 265 | ], 266 | "source": [ 267 | "%%time\n", 268 | "# Main evaluation loop: 7s\n", 269 | "y_guess = model.predict(x_test, batch_size=BATCHSIZE)\n", 270 | "y_guess = np.argmax(y_guess, axis=-1)\n", 271 | "y_truth = np.argmax(y_test, axis=-1)" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 12, 277 | "metadata": {}, 278 | "outputs": [ 279 | { 280 | "name": "stdout", 281 | "output_type": "stream", 282 | "text": [ 283 | "Accuracy: 0.86076\n" 284 | ] 285 | } 286 | ], 287 | "source": [ 288 | "print(\"Accuracy: \", sum(y_guess == y_truth)/len(y_guess))" 289 | ] 290 | } 291 | ], 292 | "metadata": { 293 | "anaconda-cloud": {}, 294 | "kernelspec": { 295 | "display_name": "Python 3", 296 | "language": "python", 297 | "name": "python3" 298 | }, 299 | "language_info": { 300 | "codemirror_mode": { 301 | "name": "ipython", 302 | "version": 3 303 | }, 304 | "file_extension": ".py", 305 | "mimetype": "text/x-python", 306 | "name": "python", 307 | "nbconvert_exporter": "python", 308 | "pygments_lexer": "ipython3", 309 | "version": "3.5.2" 310 | } 311 | }, 312 | "nbformat": 4, 313 | "nbformat_minor": 2 314 | } 315 | -------------------------------------------------------------------------------- /notebooks/Keras_TF_Inference.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "Using TensorFlow backend.\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "import os\n", 18 | "os.environ['KERAS_BACKEND'] = \"tensorflow\"\n", 19 | "import sys\n", 20 | "import numpy as np\n", 21 | "import keras as K\n", 22 | "import tensorflow as tf\n", 23 | "from keras.applications.resnet50 import ResNet50\n", 24 | "from common.params_inf import *\n", 25 | "from common.utils import *" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "# Force one-gpu\n", 35 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", 36 | "# Faster with channels-last, maybe because model expects that?\n", 37 | "K.backend.set_image_data_format('channels_last')" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | "OS: linux\n", 50 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 51 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 52 | "Numpy: 1.14.1\n", 53 | "Keras: 2.1.4\n", 54 | "Tensorflow: 1.4.0\n", 55 | "Keras using tensorflow\n", 56 | "Keras channel ordering is channels_last\n", 57 | "CUDA Version 8.0.61\n", 58 | "CuDNN Version 6.0.21\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "print(\"OS: \", sys.platform)\n", 64 | "print(\"Python: \", sys.version)\n", 65 | "print(\"Numpy: \", np.__version__)\n", 66 | "print(\"Keras: \", K.__version__)\n", 67 | "print(\"Tensorflow: \", tf.__version__)\n", 68 | "print(\"Keras using {}\".format(K.backend.backend()))\n", 69 | "print(\"Keras channel ordering is {}\".format(K.backend.image_data_format()))\n", 70 | "print(get_cuda_version())\n", 71 | "print(\"CuDNN Version \", get_cudnn_version())" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | "(1280, 224, 224, 3) (1280, 3, 224, 224)\n" 84 | ] 85 | } 86 | ], 87 | "source": [ 88 | "# Create batches of fake data\n", 89 | "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n", 90 | "print(fake_input_data_cl.shape, fake_input_data_cf.shape)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 5, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "def predict_fn(classifier, data, batchsize):\n", 100 | " \"\"\" Return features from classifier \"\"\"\n", 101 | " out = np.zeros((len(data), RESNET_FEATURES), np.float32)\n", 102 | " for idx, dta in yield_mb_X(data, batchsize):\n", 103 | " out[idx*batchsize:(idx+1)*batchsize] = classifier.predict_on_batch(dta).squeeze()\n", 104 | " return out" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 6, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "# Download Resnet weights\n", 114 | "model = ResNet50(include_top=False, input_shape=(224,224,3))" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 7, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "fake_input_data_cl = np.ascontiguousarray(fake_input_data_cl)\n", 124 | "cold_start = predict_fn(model, fake_input_data_cl, BATCH_SIZE)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 8, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "name": "stdout", 134 | "output_type": "stream", 135 | "text": [ 136 | "CPU times: user 3.18 s, sys: 835 ms, total: 4.01 s\n", 137 | "Wall time: 3.66 s\n" 138 | ] 139 | } 140 | ], 141 | "source": [ 142 | "%%time\n", 143 | "features = predict_fn(model, fake_input_data_cl, BATCH_SIZE)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 10, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "name": "stdout", 153 | "output_type": "stream", 154 | "text": [ 155 | "Images per second 349.72677595628414\n" 156 | ] 157 | } 158 | ], 159 | "source": [ 160 | "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/3.66))" 161 | ] 162 | } 163 | ], 164 | "metadata": { 165 | "kernelspec": { 166 | "display_name": "Python 3", 167 | "language": "python", 168 | "name": "python3" 169 | }, 170 | "language_info": { 171 | "codemirror_mode": { 172 | "name": "ipython", 173 | "version": 3 174 | }, 175 | "file_extension": ".py", 176 | "mimetype": "text/x-python", 177 | "name": "python", 178 | "nbconvert_exporter": "python", 179 | "pygments_lexer": "ipython3", 180 | "version": "3.5.2" 181 | } 182 | }, 183 | "nbformat": 4, 184 | "nbformat_minor": 2 185 | } 186 | -------------------------------------------------------------------------------- /notebooks/Keras_TF_RNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# High-level RNN Keras (TF) Example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stderr", 17 | "output_type": "stream", 18 | "text": [ 19 | "Using TensorFlow backend.\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "import os\n", 25 | "import sys\n", 26 | "import numpy as np\n", 27 | "os.environ['KERAS_BACKEND'] = \"tensorflow\"\n", 28 | "import keras as K\n", 29 | "import tensorflow as tf\n", 30 | "from keras.models import Sequential\n", 31 | "from keras.layers import Dense, Embedding, GRU, CuDNNGRU\n", 32 | "from common.params_lstm import *\n", 33 | "from common.utils import *" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "# Force one-gpu\n", 43 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 3, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": [ 55 | "OS: linux\n", 56 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 57 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 58 | "Keras: 2.1.4\n", 59 | "Numpy: 1.14.1\n", 60 | "Tensorflow: 1.4.0\n", 61 | "tensorflow\n", 62 | "channels_last\n", 63 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 64 | "CUDA Version 8.0.61\n", 65 | "CuDNN Version 6.0.21\n" 66 | ] 67 | } 68 | ], 69 | "source": [ 70 | "print(\"OS: \", sys.platform)\n", 71 | "print(\"Python: \", sys.version)\n", 72 | "print(\"Keras: \", K.__version__)\n", 73 | "print(\"Numpy: \", np.__version__)\n", 74 | "print(\"Tensorflow: \", tf.__version__)\n", 75 | "print(K.backend.backend())\n", 76 | "print(K.backend.image_data_format())\n", 77 | "print(\"GPU: \", get_gpu_name())\n", 78 | "print(get_cuda_version())\n", 79 | "print(\"CuDNN Version \", get_cudnn_version())" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 4, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "def create_symbol(CUDNN=True, maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN, maxl=MAXLEN):\n", 89 | " model = Sequential()\n", 90 | " model.add(Embedding(maxf, edim, input_length=maxl))\n", 91 | " # Only return last output\n", 92 | " if not CUDNN:\n", 93 | " model.add(GRU(nhid, return_sequences=False, return_state=False))\n", 94 | " else:\n", 95 | " model.add(CuDNNGRU(nhid, return_sequences=False, return_state=False))\n", 96 | " model.add(Dense(2, activation='softmax'))\n", 97 | " return model" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 5, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "def init_model(m, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n", 107 | " m.compile(\n", 108 | " loss = \"categorical_crossentropy\",\n", 109 | " optimizer = K.optimizers.Adam(lr, b1, b2, eps),\n", 110 | " metrics = ['accuracy'])\n", 111 | " return m" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 6, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "name": "stdout", 121 | "output_type": "stream", 122 | "text": [ 123 | "Preparing train set...\n", 124 | "Preparing test set...\n", 125 | "Trimming to 30000 max-features\n", 126 | "Padding to length 150\n", 127 | "(25000, 150) (25000, 150) (25000, 2) (25000, 2)\n", 128 | "int32 int32 int32 int32\n", 129 | "CPU times: user 5.94 s, sys: 401 ms, total: 6.35 s\n", 130 | "Wall time: 6.35 s\n" 131 | ] 132 | } 133 | ], 134 | "source": [ 135 | "%%time\n", 136 | "# Data into format for library\n", 137 | "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES, one_hot=True)\n", 138 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n", 139 | "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 7, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "name": "stdout", 149 | "output_type": "stream", 150 | "text": [ 151 | "CPU times: user 1.04 s, sys: 699 ms, total: 1.74 s\n", 152 | "Wall time: 916 ms\n" 153 | ] 154 | } 155 | ], 156 | "source": [ 157 | "%%time\n", 158 | "# Load symbol\n", 159 | "sym = create_symbol()" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 8, 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "name": "stdout", 169 | "output_type": "stream", 170 | "text": [ 171 | "CPU times: user 99.9 ms, sys: 227 ms, total: 326 ms\n", 172 | "Wall time: 27.5 ms\n" 173 | ] 174 | } 175 | ], 176 | "source": [ 177 | "%%time\n", 178 | "# Initialise model\n", 179 | "model = init_model(sym)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 9, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "name": "stdout", 189 | "output_type": "stream", 190 | "text": [ 191 | "_________________________________________________________________\n", 192 | "Layer (type) Output Shape Param # \n", 193 | "=================================================================\n", 194 | "embedding_1 (Embedding) (None, 150, 125) 3750000 \n", 195 | "_________________________________________________________________\n", 196 | "cu_dnngru_1 (CuDNNGRU) (None, 100) 68100 \n", 197 | "_________________________________________________________________\n", 198 | "dense_1 (Dense) (None, 2) 202 \n", 199 | "=================================================================\n", 200 | "Total params: 3,818,302\n", 201 | "Trainable params: 3,818,302\n", 202 | "Non-trainable params: 0\n", 203 | "_________________________________________________________________\n" 204 | ] 205 | } 206 | ], 207 | "source": [ 208 | "model.summary()" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 10, 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "name": "stdout", 218 | "output_type": "stream", 219 | "text": [ 220 | "Epoch 1/3\n", 221 | "25000/25000 [==============================] - 10s 386us/step - loss: 0.5025 - acc: 0.7336\n", 222 | "Epoch 2/3\n", 223 | "25000/25000 [==============================] - 8s 312us/step - loss: 0.2273 - acc: 0.9138\n", 224 | "Epoch 3/3\n", 225 | "25000/25000 [==============================] - 8s 313us/step - loss: 0.1246 - acc: 0.9572\n", 226 | "CPU times: user 23 s, sys: 4.06 s, total: 27.1 s\n", 227 | "Wall time: 25.6 s\n" 228 | ] 229 | }, 230 | { 231 | "data": { 232 | "text/plain": [ 233 | "" 234 | ] 235 | }, 236 | "execution_count": 10, 237 | "metadata": {}, 238 | "output_type": "execute_result" 239 | } 240 | ], 241 | "source": [ 242 | "%%time\n", 243 | "# Main training loop: 26s\n", 244 | "model.fit(x_train,\n", 245 | " y_train,\n", 246 | " batch_size=BATCHSIZE,\n", 247 | " epochs=EPOCHS,\n", 248 | " verbose=1)" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 11, 254 | "metadata": {}, 255 | "outputs": [ 256 | { 257 | "name": "stdout", 258 | "output_type": "stream", 259 | "text": [ 260 | "CPU times: user 2.59 s, sys: 386 ms, total: 2.97 s\n", 261 | "Wall time: 2.72 s\n" 262 | ] 263 | } 264 | ], 265 | "source": [ 266 | "%%time\n", 267 | "# Main evaluation loop: 3s\n", 268 | "y_guess = model.predict(x_test, batch_size=BATCHSIZE)\n", 269 | "y_guess = np.argmax(y_guess, axis=-1)\n", 270 | "y_truth = np.argmax(y_test, axis=-1)" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 12, 276 | "metadata": {}, 277 | "outputs": [ 278 | { 279 | "name": "stdout", 280 | "output_type": "stream", 281 | "text": [ 282 | "Accuracy: 0.85496\n" 283 | ] 284 | } 285 | ], 286 | "source": [ 287 | "print(\"Accuracy: \", sum(y_guess == y_truth)/len(y_guess))" 288 | ] 289 | } 290 | ], 291 | "metadata": { 292 | "anaconda-cloud": {}, 293 | "kernelspec": { 294 | "display_name": "Python 3", 295 | "language": "python", 296 | "name": "python3" 297 | }, 298 | "language_info": { 299 | "codemirror_mode": { 300 | "name": "ipython", 301 | "version": 3 302 | }, 303 | "file_extension": ".py", 304 | "mimetype": "text/x-python", 305 | "name": "python", 306 | "nbconvert_exporter": "python", 307 | "pygments_lexer": "ipython3", 308 | "version": "3.5.2" 309 | } 310 | }, 311 | "nbformat": 4, 312 | "nbformat_minor": 2 313 | } 314 | -------------------------------------------------------------------------------- /notebooks/Knet_CNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Knet CNN Example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# After installing and starting Julia run the following to install the required packages:\n", 17 | "# Pkg.init(); Pkg.update()\n", 18 | "# for p in (\"CUDAdrv\",\"IJulia\",\"Knet\"); Pkg.add(p); end\n", 19 | "# Pkg.checkout(\"Knet\",\"ilkarman\") # make sure we have the right Knet version\n", 20 | "# Pkg.build(\"Knet\")" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "using Knet\n", 30 | "True=true # so we can read the python params\n", 31 | "include(\"common/params.py\");" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "name": "stdout", 41 | "output_type": "stream", 42 | "text": [ 43 | "OS: Linux\n", 44 | "Julia: 0.6.1\n", 45 | "Knet: 0.8.5+\n", 46 | "GPU: Tesla K80\n", 47 | "\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "println(\"OS: \", Sys.KERNEL)\n", 53 | "println(\"Julia: \", VERSION)\n", 54 | "println(\"Knet: \", Pkg.installed(\"Knet\"))\n", 55 | "println(\"GPU: \", readstring(`nvidia-smi --query-gpu=name --format=csv,noheader`))" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 4, 61 | "metadata": { 62 | "collapsed": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "# define model\n", 67 | "function initmodel(; atype=KnetArray, dtype=Float32, winit=xavier, binit=zeros)\n", 68 | " w(dims...)=atype(winit(dtype,dims...))\n", 69 | " b(dims...)=atype(binit(dtype,dims...))\n", 70 | " return Any[\n", 71 | " w(3,3,3,50), b(1,1,50,1),\n", 72 | " w(3,3,50,50), b(1,1,50,1),\n", 73 | " w(3,3,50,100), b(1,1,100,1),\n", 74 | " w(3,3,100,100), b(1,1,100,1),\n", 75 | " w(512,6400), b(512,1),\n", 76 | " w(10,512), b(10,1)\n", 77 | " ]\n", 78 | "end;" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 5, 84 | "metadata": { 85 | "collapsed": true 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "# define loss and its gradient\n", 90 | "function predict(w,x)\n", 91 | " convbias(x,w,b) = conv4(w,x;padding=1) .+ b\n", 92 | " fc(x,w,b) = w * mat(x) .+ b;\n", 93 | " x = relu.(convbias(x,w[1],w[2]))\n", 94 | " x = relu.(pool(convbias(x,w[3],w[4])))\n", 95 | " x = dropout(x,0.25)\n", 96 | " x = relu.(convbias(x,w[5],w[6]))\n", 97 | " x = relu.(pool(convbias(x,w[7],w[8])))\n", 98 | " x = dropout(x,0.25)\n", 99 | " x = relu.(fc(x,w[9],w[10]))\n", 100 | " x = dropout(x,0.5)\n", 101 | " return fc(x,w[11],w[12])\n", 102 | "end\n", 103 | "\n", 104 | "loss(w,x,y)=nll(predict(w,x),y) # nll: negative log likelihood\n", 105 | "lossgradient = grad(loss);" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 6, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "name": "stderr", 115 | "output_type": "stream", 116 | "text": [ 117 | "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mReading cifar-10-binary.tar.gz...\n", 118 | "\u001b[39m" 119 | ] 120 | }, 121 | { 122 | "name": "stdout", 123 | "output_type": "stream", 124 | "text": [ 125 | " 3.525842 seconds (1.27 M allocations: 1.783 GiB, 18.79% gc time)\n", 126 | "32×32×3×50000 Array{Float32,4}\n", 127 | "50000-element Array{UInt8,1}\n", 128 | "32×32×3×10000 Array{Float32,4}\n", 129 | "10000-element Array{UInt8,1}\n" 130 | ] 131 | } 132 | ], 133 | "source": [ 134 | "# load data\n", 135 | "include(Knet.dir(\"data\",\"cifar.jl\"))\n", 136 | "@time (xtrn,ytrn,xtst,ytst,lbls)=cifar10()\n", 137 | "for d in (xtrn,ytrn,xtst,ytst); println(summary(d)); end" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 7, 143 | "metadata": { 144 | "collapsed": true 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "# prepare for training\n", 149 | "model = optim = nothing; knetgc() # Clear memory from last run\n", 150 | "model = initmodel()\n", 151 | "optim = optimizers(model, Momentum; lr=LR, gamma=MOMENTUM);" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 8, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "name": "stdout", 161 | "output_type": "stream", 162 | "text": [ 163 | " 25.437272 seconds (4.03 M allocations: 784.659 MiB, 11.86% gc time)\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "# cold start\n", 169 | "@time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true,xtype=KnetArray)\n", 170 | " grads = lossgradient(model, x, y)\n", 171 | " update!(model, grads, optim)\n", 172 | "end" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 9, 178 | "metadata": { 179 | "collapsed": true 180 | }, 181 | "outputs": [], 182 | "source": [ 183 | "# prepare for training\n", 184 | "model = optim = nothing; knetgc() # Clear memory from last run\n", 185 | "model = initmodel()\n", 186 | "optim = optimizers(model, Momentum; lr=LR, gamma=MOMENTUM);" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 10, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "name": "stderr", 196 | "output_type": "stream", 197 | "text": [ 198 | "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mTraining...\n", 199 | "\u001b[39m" 200 | ] 201 | }, 202 | { 203 | "name": "stdout", 204 | "output_type": "stream", 205 | "text": [ 206 | " 15.909965 seconds (1.88 M allocations: 670.408 MiB, 0.41% gc time)\n", 207 | " 15.772903 seconds (1.88 M allocations: 670.285 MiB, 0.42% gc time)\n", 208 | " 15.829510 seconds (1.88 M allocations: 670.285 MiB, 0.44% gc time)\n", 209 | " 15.961176 seconds (1.88 M allocations: 670.285 MiB, 0.43% gc time)\n", 210 | " 15.869710 seconds (1.88 M allocations: 670.285 MiB, 0.46% gc time)\n", 211 | " 15.872871 seconds (1.88 M allocations: 670.353 MiB, 0.46% gc time)\n", 212 | " 15.839494 seconds (1.88 M allocations: 670.285 MiB, 0.42% gc time)\n", 213 | " 16.007868 seconds (1.88 M allocations: 670.285 MiB, 0.47% gc time)\n", 214 | " 15.859198 seconds (1.88 M allocations: 670.285 MiB, 0.47% gc time)\n", 215 | " 15.870192 seconds (1.88 M allocations: 670.285 MiB, 0.43% gc time)\n", 216 | "158.797837 seconds (18.77 M allocations: 6.547 GiB, 0.44% gc time)\n" 217 | ] 218 | } 219 | ], 220 | "source": [ 221 | "# 159s\n", 222 | "info(\"Training...\")\n", 223 | "@time for epoch in 1:EPOCHS\n", 224 | " @time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true,xtype=KnetArray)\n", 225 | " grads = lossgradient(model, x, y)\n", 226 | " update!(model, grads, optim)\n", 227 | " end\n", 228 | "end" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 11, 234 | "metadata": {}, 235 | "outputs": [ 236 | { 237 | "name": "stdout", 238 | "output_type": "stream", 239 | "text": [ 240 | " 2.123045 seconds (559.28 k allocations: 145.928 MiB, 1.10% gc time)\n" 241 | ] 242 | }, 243 | { 244 | "data": { 245 | "text/plain": [ 246 | "0.7754407051282052" 247 | ] 248 | }, 249 | "execution_count": 11, 250 | "metadata": {}, 251 | "output_type": "execute_result" 252 | } 253 | ], 254 | "source": [ 255 | "# test accuracy 77.54\n", 256 | "testdata = minibatch(xtst,ytst,BATCHSIZE;xtype=KnetArray)\n", 257 | "@time accuracy(model,testdata,predict)" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": { 264 | "collapsed": true 265 | }, 266 | "outputs": [], 267 | "source": [] 268 | } 269 | ], 270 | "metadata": { 271 | "kernelspec": { 272 | "display_name": "Julia 0.6.1", 273 | "language": "julia", 274 | "name": "julia-0.6" 275 | }, 276 | "language_info": { 277 | "file_extension": ".jl", 278 | "mimetype": "application/julia", 279 | "name": "julia", 280 | "version": "0.6.1" 281 | } 282 | }, 283 | "nbformat": 4, 284 | "nbformat_minor": 2 285 | } 286 | -------------------------------------------------------------------------------- /notebooks/Knet_Inference.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# GPU: 32*40 in 8.00s = 160/s\n", 12 | "# CPU: 32*8 in 115.0s = 2/s" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "# After installing and starting Julia run the following to install the required packages:\n", 22 | "# Pkg.init(); Pkg.update()\n", 23 | "# for p in (\"CUDAapi\",\"CUDAdrv\",\"MAT\",\"Images\",\"IJulia\",\"Knet\"); Pkg.add(p); end\n", 24 | "# Pkg.checkout(\"Knet\",\"ilkarman\") # make sure we have the right Knet version\n", 25 | "# Pkg.build(\"Knet\")" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "OS: Linux\n", 38 | "Julia: 0.6.1\n", 39 | "Knet: 0.8.5+\n" 40 | ] 41 | } 42 | ], 43 | "source": [ 44 | "using Knet\n", 45 | "include(Knet.dir(\"examples\",\"resnet\", \"resnetlib.jl\"))\n", 46 | "using ResNetLib: resnet50init, resnet50\n", 47 | "println(\"OS: \", Sys.KERNEL)\n", 48 | "println(\"Julia: \", VERSION)\n", 49 | "println(\"Knet: \", Pkg.installed(\"Knet\"))" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "6\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | ";cat /proc/cpuinfo '|' grep processor '|' wc -l" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 5, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "name": "stdout", 76 | "output_type": "stream", 77 | "text": [ 78 | "name\n", 79 | "Tesla K80\n" 80 | ] 81 | } 82 | ], 83 | "source": [ 84 | ";nvidia-smi --query-gpu=gpu_name --format=csv" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 6, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/plain": [ 95 | "8" 96 | ] 97 | }, 98 | "execution_count": 6, 99 | "metadata": {}, 100 | "output_type": "execute_result" 101 | } 102 | ], 103 | "source": [ 104 | "const BATCH_SIZE = 32\n", 105 | "const RESNET_FEATURES = 2048\n", 106 | "const BATCHES_GPU = 40\n", 107 | "const BATCHES_CPU = 8" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 7, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "data": { 117 | "text/plain": [ 118 | "fakedata (generic function with 1 method)" 119 | ] 120 | }, 121 | "execution_count": 7, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": [ 127 | "# Create batches of fake data\n", 128 | "function fakedata(batches; atype=KnetArray)\n", 129 | " x = rand(Float32, 224, 224, 3, BATCH_SIZE * batches)\n", 130 | " minibatch(x, BATCH_SIZE, xtype=atype)\n", 131 | "end" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 8, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "data": { 141 | "text/plain": [ 142 | "predictfn (generic function with 1 method)" 143 | ] 144 | }, 145 | "execution_count": 8, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | } 149 | ], 150 | "source": [ 151 | "# Return features from classifier\n", 152 | "function predictfn(weights, moments, data)\n", 153 | " out = []\n", 154 | " for x in data\n", 155 | " pred = resnet50(weights, moments, x; stage=5)\n", 156 | " push!(out, mat(pred))\n", 157 | " end\n", 158 | " return Array(hcat(out...))\n", 159 | "end" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "## 1. GPU" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 9, 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "name": "stderr", 176 | "output_type": "stream", 177 | "text": [ 178 | "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mLoading pretrained weights...\n", 179 | "\u001b[39m\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mLoading imagenet-resnet-50-dag.mat...\n", 180 | "\u001b[39m" 181 | ] 182 | } 183 | ], 184 | "source": [ 185 | "# Initialize resnet weights and fake data\n", 186 | "gpuweights = gpumoments = nothing; knetgc() # clear memory from previous run\n", 187 | "gpuweights, gpumoments = resnet50init(;stage=5, trained=true, atype=KnetArray);" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 10, 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "name": "stderr", 197 | "output_type": "stream", 198 | "text": [ 199 | "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mCold start\n", 200 | "\u001b[39m" 201 | ] 202 | }, 203 | { 204 | "name": "stdout", 205 | "output_type": "stream", 206 | "text": [ 207 | " 21.220333 seconds (1.93 M allocations: 842.832 MiB, 35.05% gc time)\n" 208 | ] 209 | } 210 | ], 211 | "source": [ 212 | "info(\"Cold start\")\n", 213 | "gpudata1 = fakedata(BATCHES_GPU, atype=KnetArray)\n", 214 | "@time predictfn(gpuweights, gpumoments, gpudata1);" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 11, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "name": "stderr", 224 | "output_type": "stream", 225 | "text": [ 226 | "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mBenchmarking\n", 227 | "\u001b[39m" 228 | ] 229 | }, 230 | { 231 | "name": "stdout", 232 | "output_type": "stream", 233 | "text": [ 234 | " 8.002292 seconds (360.61 k allocations: 760.376 MiB, 3.82% gc time)\n" 235 | ] 236 | } 237 | ], 238 | "source": [ 239 | "info(\"Benchmarking\")\n", 240 | "gpudata = fakedata(BATCHES_GPU, atype=KnetArray)\n", 241 | "@time predictfn(gpuweights, gpumoments, gpudata);" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "## 2. CPU" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 12, 254 | "metadata": {}, 255 | "outputs": [ 256 | { 257 | "name": "stderr", 258 | "output_type": "stream", 259 | "text": [ 260 | "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mLoading pretrained weights...\n", 261 | "\u001b[39m" 262 | ] 263 | } 264 | ], 265 | "source": [ 266 | "# Initialize resnet weights\n", 267 | "cpuweights, cpumoments = resnet50init(;stage=5, trained=true, atype=Array);" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 13, 273 | "metadata": {}, 274 | "outputs": [ 275 | { 276 | "name": "stderr", 277 | "output_type": "stream", 278 | "text": [ 279 | "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mCold start\n", 280 | "\u001b[39m" 281 | ] 282 | }, 283 | { 284 | "name": "stdout", 285 | "output_type": "stream", 286 | "text": [ 287 | " 25.160136 seconds (14.20 M allocations: 4.351 GiB, 10.91% gc time)\n" 288 | ] 289 | } 290 | ], 291 | "source": [ 292 | "info(\"Cold start\")\n", 293 | "cpudata1 = fakedata(1, atype=Array);\n", 294 | "@time predictfn(cpuweights, cpumoments, cpudata1);" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 14, 300 | "metadata": {}, 301 | "outputs": [ 302 | { 303 | "name": "stderr", 304 | "output_type": "stream", 305 | "text": [ 306 | "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mBenchmarking\n", 307 | "\u001b[39m" 308 | ] 309 | }, 310 | { 311 | "name": "stdout", 312 | "output_type": "stream", 313 | "text": [ 314 | "115.024997 seconds (174.89 k allocations: 30.150 GiB, 15.85% gc time)\n" 315 | ] 316 | } 317 | ], 318 | "source": [ 319 | "info(\"Benchmarking\")\n", 320 | "cpudata = fakedata(BATCHES_CPU, atype=Array);\n", 321 | "@time predictfn(cpuweights, cpumoments, cpudata);" 322 | ] 323 | } 324 | ], 325 | "metadata": { 326 | "kernelspec": { 327 | "display_name": "Julia 0.6.1", 328 | "language": "julia", 329 | "name": "julia-0.6" 330 | }, 331 | "language_info": { 332 | "file_extension": ".jl", 333 | "mimetype": "application/julia", 334 | "name": "julia", 335 | "version": "0.6.1" 336 | } 337 | }, 338 | "nbformat": 4, 339 | "nbformat_minor": 2 340 | } 341 | -------------------------------------------------------------------------------- /notebooks/Knet_RNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Knet RNN example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "# After installing and starting Julia run the following to install the required packages:\n", 19 | "# Pkg.init(); Pkg.update()\n", 20 | "# for p in (\"CUDAdrv\",\"IJulia\",\"PyCall\",\"JLD2\",\"Knet\"); Pkg.add(p); end\n", 21 | "# Pkg.checkout(\"Knet\",\"ilkarman\") # make sure we have the right Knet version\n", 22 | "# Pkg.build(\"Knet\")" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "using Knet\n", 32 | "True=true # so we can read the python params\n", 33 | "include(\"common/params_lstm.py\");" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "OS: Linux\n", 46 | "Julia: 0.6.1\n", 47 | "Knet: 0.8.5+\n", 48 | "GPU: Tesla K80\n", 49 | "\n" 50 | ] 51 | } 52 | ], 53 | "source": [ 54 | "println(\"OS: \", Sys.KERNEL)\n", 55 | "println(\"Julia: \", VERSION)\n", 56 | "println(\"Knet: \", Pkg.installed(\"Knet\"))\n", 57 | "println(\"GPU: \", readstring(`nvidia-smi --query-gpu=name --format=csv,noheader`))" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "metadata": { 64 | "collapsed": true 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "# define model\n", 69 | "function initmodel()\n", 70 | " rnnSpec,rnnWeights = rnninit(EMBEDSIZE,NUMHIDDEN; rnnType=:gru)\n", 71 | " inputMatrix = KnetArray(xavier(Float32,EMBEDSIZE,MAXFEATURES))\n", 72 | " outputMatrix = KnetArray(xavier(Float32,2,NUMHIDDEN))\n", 73 | " return rnnSpec,(rnnWeights,inputMatrix,outputMatrix)\n", 74 | "end;" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "metadata": { 81 | "collapsed": true 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "# define loss and its gradient\n", 86 | "function predict(weights, inputs, rnnSpec)\n", 87 | " rnnWeights, inputMatrix, outputMatrix = weights # (1,1,W), (X,V), (2,H)\n", 88 | " indices = hcat(inputs...)' # (B,T)\n", 89 | " rnnInput = inputMatrix[:,indices] # (X,B,T)\n", 90 | " rnnOutput = rnnforw(rnnSpec, rnnWeights, rnnInput)[1] # (H,B,T)\n", 91 | " return outputMatrix * rnnOutput[:,:,end] # (2,H) * (H,B) = (2,B)\n", 92 | "end\n", 93 | "\n", 94 | "loss(w,x,y,r)=nll(predict(w,x,r),y)\n", 95 | "lossgradient = grad(loss);" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 6, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stderr", 105 | "output_type": "stream", 106 | "text": [ 107 | "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mLoading IMDB...\n", 108 | "\u001b[39m" 109 | ] 110 | }, 111 | { 112 | "name": "stdout", 113 | "output_type": "stream", 114 | "text": [ 115 | " 10.266185 seconds (15.94 M allocations: 835.780 MiB, 3.98% gc time)\n", 116 | "25000-element Array{Array{Int32,1},1}\n", 117 | "25000-element Array{Int8,1}\n", 118 | "25000-element Array{Array{Int32,1},1}\n", 119 | "25000-element Array{Int8,1}\n" 120 | ] 121 | } 122 | ], 123 | "source": [ 124 | "# load data\n", 125 | "include(Knet.dir(\"data\",\"imdb.jl\"))\n", 126 | "@time (xtrn,ytrn,xtst,ytst,imdbdict)=imdb(maxlen=MAXLEN,maxval=MAXFEATURES)\n", 127 | "for d in (xtrn,ytrn,xtst,ytst); println(summary(d)); end" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 7, 133 | "metadata": { 134 | "collapsed": true 135 | }, 136 | "outputs": [], 137 | "source": [ 138 | "# prepare for training\n", 139 | "weights = nothing; knetgc(); # Reclaim memory from previous run\n", 140 | "rnnSpec,weights = initmodel()\n", 141 | "optim = optimizers(weights, Adam; lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS);" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 8, 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "name": "stdout", 151 | "output_type": "stream", 152 | "text": [ 153 | " 14.319533 seconds (2.08 M allocations: 138.579 MiB, 3.58% gc time)\n" 154 | ] 155 | } 156 | ], 157 | "source": [ 158 | "# cold start\n", 159 | "@time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true)\n", 160 | " grads = lossgradient(weights,x,y,rnnSpec)\n", 161 | " update!(weights, grads, optim)\n", 162 | "end" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 9, 168 | "metadata": { 169 | "collapsed": true 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "# prepare for training\n", 174 | "weights = nothing; knetgc(); # Reclaim memory from previous run\n", 175 | "rnnSpec,weights = initmodel()\n", 176 | "optim = optimizers(weights, Adam; lr=LR, beta1=BETA_1, beta2=BETA_2, eps=EPS);" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 10, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "name": "stderr", 186 | "output_type": "stream", 187 | "text": [ 188 | "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mTraining...\n", 189 | "\u001b[39m" 190 | ] 191 | }, 192 | { 193 | "name": "stdout", 194 | "output_type": "stream", 195 | "text": [ 196 | " 9.776101 seconds (356.68 k allocations: 45.007 MiB, 4.79% gc time)\n", 197 | " 9.786896 seconds (352.22 k allocations: 44.658 MiB, 5.91% gc time)\n", 198 | " 9.732747 seconds (352.94 k allocations: 44.669 MiB, 5.92% gc time)\n", 199 | " 29.298876 seconds (1.07 M allocations: 134.572 MiB, 5.54% gc time)\n" 200 | ] 201 | } 202 | ], 203 | "source": [ 204 | "# 29s\n", 205 | "info(\"Training...\")\n", 206 | "@time for epoch in 1:EPOCHS\n", 207 | " @time for (x,y) in minibatch(xtrn,ytrn,BATCHSIZE;shuffle=true)\n", 208 | " grads = lossgradient(weights,x,y,rnnSpec)\n", 209 | " update!(weights, grads, optim)\n", 210 | " end\n", 211 | "end" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 14, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "name": "stderr", 221 | "output_type": "stream", 222 | "text": [ 223 | "\u001b[1m\u001b[36mINFO: \u001b[39m\u001b[22m\u001b[36mTesting...\n", 224 | "\u001b[39m" 225 | ] 226 | }, 227 | { 228 | "name": "stdout", 229 | "output_type": "stream", 230 | "text": [ 231 | " 2.999301 seconds (70.50 k allocations: 34.680 MiB, 11.61% gc time)\n" 232 | ] 233 | }, 234 | { 235 | "data": { 236 | "text/plain": [ 237 | "0.844511217948718" 238 | ] 239 | }, 240 | "execution_count": 14, 241 | "metadata": {}, 242 | "output_type": "execute_result" 243 | } 244 | ], 245 | "source": [ 246 | "info(\"Testing...\")\n", 247 | "@time accuracy(weights, minibatch(xtst,ytst,BATCHSIZE), (w,x)->predict(w,x,rnnSpec))" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": { 254 | "collapsed": true 255 | }, 256 | "outputs": [], 257 | "source": [] 258 | } 259 | ], 260 | "metadata": { 261 | "kernelspec": { 262 | "display_name": "Julia 0.6.1", 263 | "language": "julia", 264 | "name": "julia-0.6" 265 | }, 266 | "language_info": { 267 | "file_extension": ".jl", 268 | "mimetype": "application/julia", 269 | "name": "julia", 270 | "version": "0.6.1" 271 | } 272 | }, 273 | "nbformat": 4, 274 | "nbformat_minor": 2 275 | } 276 | -------------------------------------------------------------------------------- /notebooks/MXNet_CNN_highAPI.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# High-level MXNet Example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import sys\n", 18 | "import numpy as np\n", 19 | "import mxnet as mx\n", 20 | "import logging\n", 21 | "from common.params import *\n", 22 | "from common.utils import *" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# logging\n", 32 | "logger = logging.getLogger();\n", 33 | "logger.setLevel(logging.DEBUG);\n", 34 | "formatter = logging.Formatter('%(message)s');\n", 35 | "h2 = logging.StreamHandler(sys.stdout)\n", 36 | "h2.setFormatter(formatter);\n", 37 | "logger.addHandler(h2)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# Force one-gpu\n", 47 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "OS: linux\n", 60 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 61 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 62 | "Numpy: 1.14.1\n", 63 | "MXNet: 0.12.0\n", 64 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 65 | "CUDA Version 8.0.61\n", 66 | "CuDNN Version 6.0.21\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "print(\"OS: \", sys.platform)\n", 72 | "print(\"Python: \", sys.version)\n", 73 | "print(\"Numpy: \", np.__version__)\n", 74 | "print(\"MXNet: \", mx.__version__)\n", 75 | "print(\"GPU: \", get_gpu_name())\n", 76 | "print(get_cuda_version())\n", 77 | "print(\"CuDNN Version \", get_cudnn_version())" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 5, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "def create_symbol(n_classes=N_CLASSES):\n", 87 | " data = mx.symbol.Variable('data')\n", 88 | " # size = [(old-size - kernel + 2*padding)/stride]+1\n", 89 | " # if kernel = 3, pad with 1 either side\n", 90 | " conv1 = mx.symbol.Convolution(data=data, num_filter=50, pad=(1,1), kernel=(3,3))\n", 91 | " relu1 = mx.symbol.Activation(data=conv1, act_type=\"relu\")\n", 92 | " conv2 = mx.symbol.Convolution(data=relu1, num_filter=50, pad=(1,1), kernel=(3,3))\n", 93 | " pool1 = mx.symbol.Pooling(data=conv2, pool_type=\"max\", kernel=(2,2), stride=(2,2))\n", 94 | " relu2 = mx.symbol.Activation(data=pool1, act_type=\"relu\")\n", 95 | " drop1 = mx.symbol.Dropout(data=relu2, p=0.25)\n", 96 | " \n", 97 | " conv3 = mx.symbol.Convolution(data=drop1, num_filter=100, pad=(1,1), kernel=(3,3))\n", 98 | " relu3 = mx.symbol.Activation(data=conv3, act_type=\"relu\")\n", 99 | " conv4 = mx.symbol.Convolution(data=relu3, num_filter=100, pad=(1,1), kernel=(3,3))\n", 100 | " pool2 = mx.symbol.Pooling(data=conv4, pool_type=\"max\", kernel=(2,2), stride=(2,2))\n", 101 | " relu4 = mx.symbol.Activation(data=pool2, act_type=\"relu\")\n", 102 | " drop2 = mx.symbol.Dropout(data=relu4, p=0.25)\n", 103 | " \n", 104 | " flat1 = mx.symbol.Flatten(data=drop2)\n", 105 | " fc1 = mx.symbol.FullyConnected(data=flat1, num_hidden=512)\n", 106 | " relu7 = mx.symbol.Activation(data=fc1, act_type=\"relu\")\n", 107 | " drop4 = mx.symbol.Dropout(data=relu7, p=0.5)\n", 108 | " fc2 = mx.symbol.FullyConnected(data=drop4, num_hidden=n_classes) \n", 109 | " \n", 110 | " input_y = mx.symbol.Variable('softmax_label') \n", 111 | " m = mx.symbol.SoftmaxOutput(data=fc2, label=input_y, name=\"softmax\")\n", 112 | " return m" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 6, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "def init_model(m, batchsize=BATCHSIZE, lr=LR, momentum=MOMENTUM):\n", 122 | " ctx = [mx.gpu(0)]\n", 123 | " mod = mx.mod.Module(context=ctx, symbol=m)\n", 124 | " mod.bind(data_shapes=[('data', (batchsize, 3, 32, 32))],\n", 125 | " label_shapes=[('softmax_label', (batchsize,))])\n", 126 | " return mod" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 7, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "name": "stdout", 136 | "output_type": "stream", 137 | "text": [ 138 | "Preparing train set...\n", 139 | "Preparing test set...\n", 140 | "(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)\n", 141 | "float32 float32 int32 int32\n", 142 | "CPU times: user 1.07 s, sys: 1.09 s, total: 2.16 s\n", 143 | "Wall time: 2.17 s\n" 144 | ] 145 | } 146 | ], 147 | "source": [ 148 | "%%time\n", 149 | "# Data into format for library\n", 150 | "x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)\n", 151 | "# Load data-iterator\n", 152 | "train_iter = mx.io.NDArrayIter(x_train, y_train, BATCHSIZE, shuffle=True)\n", 153 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n", 154 | "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 8, 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | "CPU times: user 4.49 ms, sys: 0 ns, total: 4.49 ms\n", 167 | "Wall time: 3.58 ms\n" 168 | ] 169 | } 170 | ], 171 | "source": [ 172 | "%%time\n", 173 | "# Load symbol\n", 174 | "sym = create_symbol()" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 9, 180 | "metadata": { 181 | "scrolled": true 182 | }, 183 | "outputs": [ 184 | { 185 | "name": "stdout", 186 | "output_type": "stream", 187 | "text": [ 188 | "CPU times: user 1 s, sys: 714 ms, total: 1.71 s\n", 189 | "Wall time: 1.99 s\n" 190 | ] 191 | } 192 | ], 193 | "source": [ 194 | "%%time\n", 195 | "# Initialise model\n", 196 | "model = init_model(sym)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 10, 202 | "metadata": {}, 203 | "outputs": [ 204 | { 205 | "name": "stdout", 206 | "output_type": "stream", 207 | "text": [ 208 | "Already bound, ignoring bind()\n", 209 | "Epoch[0] Train-accuracy=0.337976\n", 210 | "Epoch[0] Time cost=4.913\n", 211 | "Epoch[1] Train-accuracy=0.498601\n", 212 | "Epoch[1] Time cost=4.840\n", 213 | "Epoch[2] Train-accuracy=0.580802\n", 214 | "Epoch[2] Time cost=4.886\n", 215 | "Epoch[3] Train-accuracy=0.642144\n", 216 | "Epoch[3] Time cost=4.821\n", 217 | "Epoch[4] Train-accuracy=0.686161\n", 218 | "Epoch[4] Time cost=4.836\n", 219 | "Epoch[5] Train-accuracy=0.718570\n", 220 | "Epoch[5] Time cost=4.835\n", 221 | "Epoch[6] Train-accuracy=0.744246\n", 222 | "Epoch[6] Time cost=4.849\n", 223 | "Epoch[7] Train-accuracy=0.767823\n", 224 | "Epoch[7] Time cost=4.830\n", 225 | "Epoch[8] Train-accuracy=0.784867\n", 226 | "Epoch[8] Time cost=4.836\n", 227 | "Epoch[9] Train-accuracy=0.802130\n", 228 | "Epoch[9] Time cost=4.828\n", 229 | "CPU times: user 44.5 s, sys: 17.3 s, total: 1min 1s\n", 230 | "Wall time: 48.6 s\n" 231 | ] 232 | } 233 | ], 234 | "source": [ 235 | "%%time\n", 236 | "# Main training loop: 49s\n", 237 | "model.fit(train_data=train_iter, \n", 238 | " num_epoch=EPOCHS,\n", 239 | " initializer=mx.init.Xavier(rnd_type='uniform'),\n", 240 | " optimizer='sgd',\n", 241 | " optimizer_params=(('learning_rate', LR), ('momentum', MOMENTUM)),\n", 242 | " eval_metric=mx.metric.create('acc'))" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 11, 248 | "metadata": {}, 249 | "outputs": [ 250 | { 251 | "name": "stdout", 252 | "output_type": "stream", 253 | "text": [ 254 | "CPU times: user 370 ms, sys: 142 ms, total: 512 ms\n", 255 | "Wall time: 316 ms\n" 256 | ] 257 | } 258 | ], 259 | "source": [ 260 | "%%time\n", 261 | "# Main evaluation loop: 311ms\n", 262 | "y_guess = model.predict(mx.io.NDArrayIter(x_test, batch_size=BATCHSIZE, shuffle=False))\n", 263 | "y_guess = np.argmax(y_guess.asnumpy(), axis=-1)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 12, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | "Accuracy: 0.7707\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "print(\"Accuracy: \", sum(y_guess == y_test)/len(y_guess))" 281 | ] 282 | } 283 | ], 284 | "metadata": { 285 | "anaconda-cloud": {}, 286 | "kernelspec": { 287 | "display_name": "Python [default]", 288 | "language": "python", 289 | "name": "python3" 290 | }, 291 | "language_info": { 292 | "codemirror_mode": { 293 | "name": "ipython", 294 | "version": 3 295 | }, 296 | "file_extension": ".py", 297 | "mimetype": "text/x-python", 298 | "name": "python", 299 | "nbconvert_exporter": "python", 300 | "pygments_lexer": "ipython3", 301 | "version": "3.5.2" 302 | } 303 | }, 304 | "nbformat": 4, 305 | "nbformat_minor": 2 306 | } 307 | -------------------------------------------------------------------------------- /notebooks/MXNet_Inference.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import sys\n", 11 | "import numpy as np\n", 12 | "import mxnet as mx\n", 13 | "from mxnet import gluon, nd\n", 14 | "from collections import namedtuple\n", 15 | "from common.params_inf import *\n", 16 | "from common.utils import *" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# Force one-gpu\n", 26 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 3, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "OS: linux\n", 39 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 40 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 41 | "Numpy: 1.14.1\n", 42 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 43 | "CUDA Version 8.0.61\n", 44 | "CuDNN Version 6.0.21\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "print(\"OS: \", sys.platform)\n", 50 | "print(\"Python: \", sys.version)\n", 51 | "print(\"Numpy: \", np.__version__)\n", 52 | "print(\"GPU: \", get_gpu_name())\n", 53 | "print(get_cuda_version())\n", 54 | "print(\"CuDNN Version \", get_cudnn_version())" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 4, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "Batch = namedtuple('Batch', ['data'])\n", 64 | "ctx = mx.gpu(0)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 5, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "name": "stdout", 74 | "output_type": "stream", 75 | "text": [ 76 | "(1280, 224, 224, 3) (1280, 3, 224, 224)\n" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "# Create batches of fake data\n", 82 | "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n", 83 | "print(fake_input_data_cl.shape, fake_input_data_cf.shape)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 6, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "name": "stdout", 93 | "output_type": "stream", 94 | "text": [ 95 | "Downloaded\n" 96 | ] 97 | } 98 | ], 99 | "source": [ 100 | "# Download Resnet weights\n", 101 | "path='http://data.mxnet.io/models/imagenet/'\n", 102 | "mx.test_utils.download(path+'resnet/50-layers/resnet-50-symbol.json')\n", 103 | "mx.test_utils.download(path+'resnet/50-layers/resnet-50-0000.params')\n", 104 | "print(\"Downloaded\")" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 7, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "# Load model\n", 114 | "sym, arg_params, aux_params = mx.model.load_checkpoint('resnet-50', 0)\n", 115 | "# List the last 10 layers\n", 116 | "all_layers = sym.get_internals()" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 8, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "# Get last layer\n", 126 | "flatten_layer = all_layers['flatten0_output']" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 9, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "def predict_fn(classifier, data, batchsize, ctx):\n", 136 | " \"\"\" Return features from classifier \"\"\"\n", 137 | " out = nd.zeros((len(data), RESNET_FEATURES), dtype=np.float32, ctx=ctx) \n", 138 | " for idx, dta in yield_mb_X(data, batchsize):\n", 139 | " classifier.forward(Batch(data=[mx.nd.array(dta)]))\n", 140 | " out[idx*batchsize:(idx+1)*batchsize] = classifier.get_outputs()[0]\n", 141 | " nd.waitall()\n", 142 | " return out" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 10, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "# Get last layer\n", 152 | "fe_sym = all_layers['flatten0_output']\n", 153 | "# Initialise GPU\n", 154 | "fe_mod = mx.mod.Module(symbol=fe_sym, context=ctx, label_names=None)\n", 155 | "fe_mod.bind(for_training=False, inputs_need_grad=False,\n", 156 | " data_shapes=[('data', (BATCH_SIZE,3,224,224))])\n", 157 | "fe_mod.set_params(arg_params, aux_params)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 11, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "cold_start = predict_fn(fe_mod, fake_input_data_cf, BATCH_SIZE, ctx)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 12, 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "name": "stdout", 176 | "output_type": "stream", 177 | "text": [ 178 | "CPU times: user 2 s, sys: 631 ms, total: 2.63 s\n", 179 | "Wall time: 2.1 s\n" 180 | ] 181 | } 182 | ], 183 | "source": [ 184 | "%%time\n", 185 | "features = predict_fn(fe_mod, fake_input_data_cf, BATCH_SIZE, ctx)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 14, 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "name": "stdout", 195 | "output_type": "stream", 196 | "text": [ 197 | "Images per second 609.5238095238095\n" 198 | ] 199 | } 200 | ], 201 | "source": [ 202 | "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/2.1))" 203 | ] 204 | } 205 | ], 206 | "metadata": { 207 | "kernelspec": { 208 | "display_name": "Python [default]", 209 | "language": "python", 210 | "name": "python3" 211 | }, 212 | "language_info": { 213 | "codemirror_mode": { 214 | "name": "ipython", 215 | "version": 3 216 | }, 217 | "file_extension": ".py", 218 | "mimetype": "text/x-python", 219 | "name": "python", 220 | "nbconvert_exporter": "python", 221 | "pygments_lexer": "ipython3", 222 | "version": "3.5.2" 223 | } 224 | }, 225 | "nbformat": 4, 226 | "nbformat_minor": 2 227 | } 228 | -------------------------------------------------------------------------------- /notebooks/MXNet_RNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# High-level RNN MXNet Example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import sys\n", 18 | "import numpy as np\n", 19 | "import mxnet as mx\n", 20 | "from mxnet.io import DataDesc\n", 21 | "from common.params_lstm import *\n", 22 | "from common.utils import *" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# Force one-gpu\n", 32 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "OS: linux\n", 45 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 46 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 47 | "Numpy: 1.14.1\n", 48 | "MXNet: 0.12.0\n", 49 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 50 | "CUDA Version 8.0.61\n", 51 | "CuDNN Version 6.0.21\n" 52 | ] 53 | } 54 | ], 55 | "source": [ 56 | "print(\"OS: \", sys.platform)\n", 57 | "print(\"Python: \", sys.version)\n", 58 | "print(\"Numpy: \", np.__version__)\n", 59 | "print(\"MXNet: \", mx.__version__)\n", 60 | "print(\"GPU: \", get_gpu_name())\n", 61 | "print(get_cuda_version())\n", 62 | "print(\"CuDNN Version \", get_cudnn_version())" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 4, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "def create_symbol(CUDNN=True,\n", 72 | " maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN, maxl=MAXLEN):\n", 73 | " # https://mxnet.incubator.apache.org/api/python/rnn.html\n", 74 | " data = mx.symbol.Variable('data')\n", 75 | " embedded_step = mx.symbol.Embedding(data=data, input_dim=maxf, output_dim=edim)\n", 76 | " \n", 77 | " # Fusing RNN layers across time step into one kernel\n", 78 | " # Improves speed but is less flexible\n", 79 | " # Currently only supported if using cuDNN on GPU\n", 80 | " if not CUDNN:\n", 81 | " gru_cell = mx.rnn.GRUCell(num_hidden=nhid)\n", 82 | " else:\n", 83 | " gru_cell = mx.rnn.FusedRNNCell(num_hidden=nhid, num_layers=1, mode='gru')\n", 84 | " \n", 85 | " begin_state = gru_cell.begin_state()\n", 86 | " # Call the cell to get the output of one time step for a batch.\n", 87 | " # TODO: TNC layout (sequence length, batch size, and feature dimensions) is faster for RNN\n", 88 | " outputs, states = gru_cell.unroll(length=maxl, inputs=embedded_step, merge_outputs=False)\n", 89 | " \n", 90 | " fc1 = mx.symbol.FullyConnected(data=outputs[-1], num_hidden=2) \n", 91 | " input_y = mx.symbol.Variable('softmax_label') \n", 92 | " m = mx.symbol.SoftmaxOutput(data=fc1, label=input_y, name=\"softmax\")\n", 93 | " return m" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 5, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "def init_model(m, batchs=BATCHSIZE, maxl=MAXLEN, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n", 103 | " ctx = [mx.gpu(0)]\n", 104 | " mod = mx.mod.Module(context=ctx, symbol=m)\n", 105 | " mod.bind(data_shapes=[DataDesc(name='data', shape=(batchs, maxl))],\n", 106 | " label_shapes=[DataDesc(name='softmax_label', shape=(batchs,))])\n", 107 | " # Glorot-uniform initializer\n", 108 | " mod.init_params(initializer=mx.init.Xavier(rnd_type='uniform'))\n", 109 | " mod.init_optimizer(optimizer='Adam', \n", 110 | " optimizer_params=(('learning_rate', lr),\n", 111 | " ('beta1', b1),\n", 112 | " ('beta2', b2),\n", 113 | " ('epsilon', eps)))\n", 114 | " return mod" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 6, 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "name": "stdout", 124 | "output_type": "stream", 125 | "text": [ 126 | "Data does not exist. Downloading https://ikpublictutorial.blob.core.windows.net/deeplearningframeworks/imdb.npz\n", 127 | "Preparing train set...\n", 128 | "Preparing test set...\n", 129 | "Trimming to 30000 max-features\n", 130 | "Padding to length 150\n", 131 | "(25000, 150) (25000, 150) (25000,) (25000,)\n", 132 | "int32 int32 int32 int32\n", 133 | "CPU times: user 6.05 s, sys: 512 ms, total: 6.56 s\n", 134 | "Wall time: 8.13 s\n" 135 | ] 136 | } 137 | ], 138 | "source": [ 139 | "%%time\n", 140 | "# Data into format for library\n", 141 | "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES)\n", 142 | "# TNC layout faster for RNN\n", 143 | "# Train iterator\n", 144 | "train_iter = mx.io.NDArrayIter(x_train, y_train, BATCHSIZE, shuffle=True)\n", 145 | "\n", 146 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n", 147 | "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 7, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "CPU times: user 44 ms, sys: 709 µs, total: 44.7 ms\n", 160 | "Wall time: 45.6 ms\n" 161 | ] 162 | }, 163 | { 164 | "name": "stderr", 165 | "output_type": "stream", 166 | "text": [ 167 | "/anaconda/envs/py35/lib/python3.5/site-packages/mxnet-0.12.0-py3.5.egg/mxnet/rnn/rnn_cell.py:675: UserWarning: NTC layout detected. Consider using TNC for FusedRNNCell for faster speed\n", 168 | " warnings.warn(\"NTC layout detected. Consider using \"\n" 169 | ] 170 | } 171 | ], 172 | "source": [ 173 | "%%time\n", 174 | "# Load symbol\n", 175 | "# See Notebook \"MXNet_RNN_TNC.ipynb\" for example with TNC layout\n", 176 | "sym = create_symbol()" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 8, 182 | "metadata": { 183 | "scrolled": true 184 | }, 185 | "outputs": [ 186 | { 187 | "name": "stdout", 188 | "output_type": "stream", 189 | "text": [ 190 | "CPU times: user 975 ms, sys: 572 ms, total: 1.55 s\n", 191 | "Wall time: 1.55 s\n" 192 | ] 193 | } 194 | ], 195 | "source": [ 196 | "%%time\n", 197 | "# Initialise model\n", 198 | "model = init_model(sym)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 9, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "name": "stdout", 208 | "output_type": "stream", 209 | "text": [ 210 | "Epoch 0, Training ('accuracy', 0.7748960997442456)\n", 211 | "Epoch 1, Training ('accuracy', 0.9239130434782609)\n", 212 | "Epoch 2, Training ('accuracy', 0.9643941815856778)\n", 213 | "CPU times: user 19.9 s, sys: 5.64 s, total: 25.6 s\n", 214 | "Wall time: 24.1 s\n" 215 | ] 216 | } 217 | ], 218 | "source": [ 219 | "%%time\n", 220 | "# Main training loop: 12.7s\n", 221 | "metric = mx.metric.create('acc')\n", 222 | "for j in range(EPOCHS):\n", 223 | " train_iter.reset()\n", 224 | " metric.reset()\n", 225 | " for batch in train_iter:\n", 226 | " model.forward(batch, is_train=True) \n", 227 | " model.update_metric(metric, batch.label)\n", 228 | " model.backward() \n", 229 | " model.update()\n", 230 | " print('Epoch %d, Training %s' % (j, metric.get()))" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 10, 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "name": "stdout", 240 | "output_type": "stream", 241 | "text": [ 242 | "CPU times: user 2.36 s, sys: 351 ms, total: 2.71 s\n", 243 | "Wall time: 2.54 s\n" 244 | ] 245 | } 246 | ], 247 | "source": [ 248 | "%%time\n", 249 | "# Main evaluation loop: 1.52s\n", 250 | "y_guess = model.predict(mx.io.NDArrayIter(x_test, batch_size=BATCHSIZE, shuffle=False))\n", 251 | "y_guess = np.argmax(y_guess.asnumpy(), axis=-1)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 11, 257 | "metadata": {}, 258 | "outputs": [ 259 | { 260 | "name": "stdout", 261 | "output_type": "stream", 262 | "text": [ 263 | "Accuracy: 0.85864\n" 264 | ] 265 | } 266 | ], 267 | "source": [ 268 | "print(\"Accuracy: \", 1.*sum(y_guess == y_test)/len(y_guess))" 269 | ] 270 | } 271 | ], 272 | "metadata": { 273 | "anaconda-cloud": {}, 274 | "kernelspec": { 275 | "display_name": "Python [default]", 276 | "language": "python", 277 | "name": "python3" 278 | }, 279 | "language_info": { 280 | "codemirror_mode": { 281 | "name": "ipython", 282 | "version": 3 283 | }, 284 | "file_extension": ".py", 285 | "mimetype": "text/x-python", 286 | "name": "python", 287 | "nbconvert_exporter": "python", 288 | "pygments_lexer": "ipython3", 289 | "version": "3.5.2" 290 | } 291 | }, 292 | "nbformat": 4, 293 | "nbformat_minor": 1 294 | } 295 | -------------------------------------------------------------------------------- /notebooks/MXNet_RNN_TNC.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# High-level RNN MXNet Example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import sys\n", 18 | "import numpy as np\n", 19 | "import mxnet as mx\n", 20 | "from mxnet.io import DataDesc\n", 21 | "from common.params_lstm import *\n", 22 | "from common.utils import *" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# Force one-gpu\n", 32 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 4, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "OS: linux\n", 45 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 46 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 47 | "Numpy: 1.14.1\n", 48 | "MXNet: 0.12.0\n", 49 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 50 | "CUDA Version 8.0.61\n", 51 | "CuDNN Version 6.0.21\n" 52 | ] 53 | } 54 | ], 55 | "source": [ 56 | "print(\"OS: \", sys.platform)\n", 57 | "print(\"Python: \", sys.version)\n", 58 | "print(\"Numpy: \", np.__version__)\n", 59 | "print(\"MXNet: \", mx.__version__)\n", 60 | "print(\"GPU: \", get_gpu_name())\n", 61 | "print(get_cuda_version())\n", 62 | "print(\"CuDNN Version \", get_cudnn_version())" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 5, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "def create_symbol(CUDNN=True,\n", 72 | " maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN, maxl=MAXLEN):\n", 73 | " # https://mxnet.incubator.apache.org/api/python/rnn.html\n", 74 | " data = mx.symbol.Variable('data')\n", 75 | " embedded_step = mx.symbol.Embedding(data=data, input_dim=maxf, output_dim=edim)\n", 76 | " \n", 77 | " # Fusing RNN layers across time step into one kernel\n", 78 | " # Improves speed but is less flexible\n", 79 | " # Currently only supported if using cuDNN on GPU\n", 80 | " if not CUDNN:\n", 81 | " gru_cell = mx.rnn.GRUCell(num_hidden=nhid)\n", 82 | " else:\n", 83 | " gru_cell = mx.rnn.FusedRNNCell(num_hidden=nhid, num_layers=1, mode='gru')\n", 84 | " \n", 85 | " begin_state = gru_cell.begin_state()\n", 86 | " # Call the cell to get the output of one time step for a batch.\n", 87 | " # TODO: TNC layout (sequence length, batch size, and feature dimensions) is faster for RNN\n", 88 | " outputs, states = gru_cell.unroll(length=maxl, inputs=embedded_step, merge_outputs=False, layout='TNC')\n", 89 | " \n", 90 | " fc1 = mx.symbol.FullyConnected(data=outputs[-1], num_hidden=2) \n", 91 | " input_y = mx.symbol.Variable('softmax_label') \n", 92 | " m = mx.symbol.SoftmaxOutput(data=fc1, label=input_y, name=\"softmax\")\n", 93 | " return m" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 6, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "def init_model(m, batchs=BATCHSIZE, maxl=MAXLEN, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n", 103 | " ctx = [mx.gpu(0)]\n", 104 | " mod = mx.mod.Module(context=ctx, symbol=m)\n", 105 | " mod.bind(data_shapes=[DataDesc(name='data', shape=(maxl, batchs), layout='TNC')],\n", 106 | " label_shapes=[DataDesc(name='softmax_label', shape=(batchs,))])\n", 107 | " # Glorot-uniform initializer\n", 108 | " mod.init_params(initializer=mx.init.Xavier(rnd_type='uniform'))\n", 109 | " mod.init_optimizer(optimizer='Adam', \n", 110 | " optimizer_params=(('learning_rate', lr),\n", 111 | " ('beta1', b1),\n", 112 | " ('beta2', b2),\n", 113 | " ('epsilon', eps)))\n", 114 | " return mod" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 7, 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "name": "stdout", 124 | "output_type": "stream", 125 | "text": [ 126 | "Preparing train set...\n", 127 | "Preparing test set...\n", 128 | "Trimming to 30000 max-features\n", 129 | "Padding to length 150\n", 130 | "(25000, 150) (25000, 150) (25000,) (25000,)\n", 131 | "int32 int32 int32 int32\n", 132 | "CPU times: user 5.59 s, sys: 391 ms, total: 5.98 s\n", 133 | "Wall time: 5.98 s\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "%%time\n", 139 | "# Data into format for library\n", 140 | "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES)\n", 141 | "wrapper_db = lambda args: mx.io.DataBatch(data=[mx.nd.array(args[0])], label=[mx.nd.array(args[1])])\n", 142 | "\n", 143 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n", 144 | "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 8, 150 | "metadata": {}, 151 | "outputs": [ 152 | { 153 | "name": "stdout", 154 | "output_type": "stream", 155 | "text": [ 156 | "CPU times: user 43.3 ms, sys: 0 ns, total: 43.3 ms\n", 157 | "Wall time: 42.6 ms\n" 158 | ] 159 | } 160 | ], 161 | "source": [ 162 | "%%time\n", 163 | "# Load symbol\n", 164 | "sym = create_symbol()" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 9, 170 | "metadata": { 171 | "scrolled": true 172 | }, 173 | "outputs": [ 174 | { 175 | "name": "stdout", 176 | "output_type": "stream", 177 | "text": [ 178 | "CPU times: user 901 ms, sys: 521 ms, total: 1.42 s\n", 179 | "Wall time: 1.43 s\n" 180 | ] 181 | } 182 | ], 183 | "source": [ 184 | "%%time\n", 185 | "# Initialise model\n", 186 | "model = init_model(sym)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 10, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "name": "stdout", 196 | "output_type": "stream", 197 | "text": [ 198 | "Epoch 0, Training ('accuracy', 0.7873397435897436)\n", 199 | "Epoch 1, Training ('accuracy', 0.9302083333333333)\n", 200 | "Epoch 2, Training ('accuracy', 0.9705128205128205)\n", 201 | "CPU times: user 21 s, sys: 4.39 s, total: 25.4 s\n", 202 | "Wall time: 23.7 s\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "%%time\n", 208 | "# Main training loop: 12.7s\n", 209 | "metric = mx.metric.create('acc')\n", 210 | "for j in range(EPOCHS):\n", 211 | " metric.reset()\n", 212 | " for batch in map(wrapper_db, yield_mb_tn(x_train, y_train, BATCHSIZE, shuffle=True)):\n", 213 | " model.forward(batch) \n", 214 | " model.update_metric(metric, batch.label)\n", 215 | " model.backward() \n", 216 | " model.update()\n", 217 | " print('Epoch %d, Training %s' % (j, metric.get()))" 218 | ] 219 | } 220 | ], 221 | "metadata": { 222 | "anaconda-cloud": {}, 223 | "kernelspec": { 224 | "display_name": "Python 3", 225 | "language": "python", 226 | "name": "python3" 227 | } 228 | }, 229 | "nbformat": 4, 230 | "nbformat_minor": 1 231 | } 232 | -------------------------------------------------------------------------------- /notebooks/PyTorch_CNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# High-level PyTorch Example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import sys\n", 18 | "import numpy as np\n", 19 | "import math\n", 20 | "import torch\n", 21 | "import torch.nn as nn\n", 22 | "import torch.nn.functional as F\n", 23 | "import torch.optim as optim\n", 24 | "import torch.utils.data as data_utils\n", 25 | "import torch.nn.init as init\n", 26 | "from torch.autograd import Variable\n", 27 | "from common.params import *\n", 28 | "from common.utils import *" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "# Force one-gpu\n", 38 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# Performance Improvement\n", 48 | "# 1. Auto-tune\n", 49 | "torch.backends.cudnn.benchmark=True" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "OS: linux\n", 62 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 63 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 64 | "PyTorch: 0.3.1\n", 65 | "Numpy: 1.14.1\n", 66 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 67 | "CUDA Version 8.0.61\n", 68 | "CuDNN Version 6.0.21\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | "print(\"OS: \", sys.platform)\n", 74 | "print(\"Python: \", sys.version)\n", 75 | "print(\"PyTorch: \", torch.__version__)\n", 76 | "print(\"Numpy: \", np.__version__)\n", 77 | "print(\"GPU: \", get_gpu_name())\n", 78 | "print(get_cuda_version())\n", 79 | "print(\"CuDNN Version \", get_cudnn_version())" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 5, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "class SymbolModule(nn.Module):\n", 89 | " def __init__(self, n_classes=N_CLASSES):\n", 90 | " super(SymbolModule, self).__init__()\n", 91 | " self.conv1 = nn.Conv2d(3, 50, kernel_size=3, padding=1)\n", 92 | " self.conv2 = nn.Conv2d(50, 50, kernel_size=3, padding=1)\n", 93 | " self.conv3 = nn.Conv2d(50, 100, kernel_size=3, padding=1)\n", 94 | " self.conv4 = nn.Conv2d(100, 100, kernel_size=3, padding=1)\n", 95 | " # feature map size is 8*8 by pooling\n", 96 | " self.fc1 = nn.Linear(100*8*8, 512)\n", 97 | " self.fc2 = nn.Linear(512, n_classes)\n", 98 | "\n", 99 | " def forward(self, x):\n", 100 | " # PyTorch requires a flag for training in dropout\n", 101 | " x = self.conv2(F.relu(self.conv1(x)))\n", 102 | " x = F.relu(F.max_pool2d(x, kernel_size=2, stride=2))\n", 103 | " x = F.dropout(x, 0.25, training=self.training)\n", 104 | "\n", 105 | " x = self.conv4(F.relu(self.conv3(x)))\n", 106 | " x = F.relu(F.max_pool2d(x, kernel_size=2, stride=2))\n", 107 | " x = F.dropout(x, 0.25, training=self.training)\n", 108 | "\n", 109 | " x = x.view(-1, 100*8*8) # reshape Variable\n", 110 | " x = F.dropout(F.relu(self.fc1(x)), 0.5, training=self.training)\n", 111 | " return self.fc2(x)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 6, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "def init_model(m, lr=LR, momentum=MOMENTUM):\n", 121 | " # This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class\n", 122 | " opt = optim.SGD(m.parameters(), lr, momentum)\n", 123 | " criterion = nn.CrossEntropyLoss()\n", 124 | " return opt, criterion" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 7, 130 | "metadata": { 131 | "scrolled": true 132 | }, 133 | "outputs": [ 134 | { 135 | "name": "stdout", 136 | "output_type": "stream", 137 | "text": [ 138 | "Preparing train set...\n", 139 | "Preparing test set...\n", 140 | "(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)\n", 141 | "float32 float32 int32 int64\n", 142 | "CPU times: user 709 ms, sys: 601 ms, total: 1.31 s\n", 143 | "Wall time: 3.54 s\n" 144 | ] 145 | } 146 | ], 147 | "source": [ 148 | "%%time\n", 149 | "# Data into format for library\n", 150 | "x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)\n", 151 | "# Torch-specific\n", 152 | "y_train = y_train.astype(np.int32)\n", 153 | "y_test = y_test.astype(np.int64)\n", 154 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n", 155 | "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 8, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | "CPU times: user 1.85 s, sys: 726 ms, total: 2.58 s\n", 168 | "Wall time: 3.73 s\n" 169 | ] 170 | } 171 | ], 172 | "source": [ 173 | "%%time\n", 174 | "sym = SymbolModule()\n", 175 | "sym.cuda() # CUDA!" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 9, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "name": "stdout", 185 | "output_type": "stream", 186 | "text": [ 187 | "CPU times: user 131 µs, sys: 76 µs, total: 207 µs\n", 188 | "Wall time: 212 µs\n" 189 | ] 190 | } 191 | ], 192 | "source": [ 193 | "%%time\n", 194 | "optimizer, criterion = init_model(sym)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 10, 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "name": "stdout", 204 | "output_type": "stream", 205 | "text": [ 206 | "0\n", 207 | "1\n", 208 | "2\n", 209 | "3\n", 210 | "4\n", 211 | "5\n", 212 | "6\n", 213 | "7\n", 214 | "8\n", 215 | "9\n", 216 | "CPU times: user 38.3 s, sys: 10.9 s, total: 49.1 s\n", 217 | "Wall time: 51.2 s\n" 218 | ] 219 | } 220 | ], 221 | "source": [ 222 | "%%time\n", 223 | "# Main training loop: 51s\n", 224 | "sym.train() # Sets training = True \n", 225 | "for j in range(EPOCHS):\n", 226 | " for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n", 227 | " # Get samples\n", 228 | " data = Variable(torch.FloatTensor(data).cuda())\n", 229 | " target = Variable(torch.LongTensor(target).cuda())\n", 230 | " # Init\n", 231 | " optimizer.zero_grad()\n", 232 | " # Forwards\n", 233 | " output = sym(data)\n", 234 | " # Loss\n", 235 | " loss = criterion(output, target)\n", 236 | " # Back-prop\n", 237 | " loss.backward()\n", 238 | " optimizer.step()\n", 239 | " # Log\n", 240 | " print(j)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 13, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "name": "stdout", 250 | "output_type": "stream", 251 | "text": [ 252 | "CPU times: user 262 ms, sys: 46.2 ms, total: 309 ms\n", 253 | "Wall time: 308 ms\n" 254 | ] 255 | } 256 | ], 257 | "source": [ 258 | "%%time\n", 259 | "# Main evaluation loop: 308ms\n", 260 | "sym.eval() # Sets training = False\n", 261 | "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n", 262 | "y_guess = np.zeros(n_samples, dtype=np.int)\n", 263 | "y_truth = y_test[:n_samples]\n", 264 | "c = 0\n", 265 | "for data, target in yield_mb(x_test, y_test, BATCHSIZE):\n", 266 | " # Get samples\n", 267 | " data = Variable(torch.FloatTensor(data).cuda())\n", 268 | " # Forwards\n", 269 | " output = sym(data)\n", 270 | " pred = output.data.max(1)[1].cpu().numpy().squeeze()\n", 271 | " # Collect results\n", 272 | " y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred\n", 273 | " c += 1" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 14, 279 | "metadata": {}, 280 | "outputs": [ 281 | { 282 | "name": "stdout", 283 | "output_type": "stream", 284 | "text": [ 285 | "Accuracy: 0.7745392628205128\n" 286 | ] 287 | } 288 | ], 289 | "source": [ 290 | "print(\"Accuracy: \", 1.*sum(y_guess == y_truth)/len(y_guess))" 291 | ] 292 | } 293 | ], 294 | "metadata": { 295 | "anaconda-cloud": {}, 296 | "kernelspec": { 297 | "display_name": "Python 3", 298 | "language": "python", 299 | "name": "python3" 300 | }, 301 | "language_info": { 302 | "codemirror_mode": { 303 | "name": "ipython", 304 | "version": 3 305 | }, 306 | "file_extension": ".py", 307 | "mimetype": "text/x-python", 308 | "name": "python", 309 | "nbconvert_exporter": "python", 310 | "pygments_lexer": "ipython3", 311 | "version": "3.5.2" 312 | } 313 | }, 314 | "nbformat": 4, 315 | "nbformat_minor": 2 316 | } 317 | -------------------------------------------------------------------------------- /notebooks/PyTorch_Inference.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import sys\n", 11 | "import numpy as np\n", 12 | "import torch\n", 13 | "import torchvision.models as models\n", 14 | "from torch.autograd import Variable\n", 15 | "from common.params_inf import *\n", 16 | "from common.utils import *" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# Force one-gpu\n", 26 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 3, 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "OS: linux\n", 39 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 40 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 41 | "Numpy: 1.14.1\n", 42 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 43 | "CUDA Version 8.0.61\n", 44 | "CuDNN Version 6.0.21\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "print(\"OS: \", sys.platform)\n", 50 | "print(\"Python: \", sys.version)\n", 51 | "print(\"Numpy: \", np.__version__)\n", 52 | "print(\"GPU: \", get_gpu_name())\n", 53 | "print(get_cuda_version())\n", 54 | "print(\"CuDNN Version \", get_cudnn_version())" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 4, 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "name": "stdout", 64 | "output_type": "stream", 65 | "text": [ 66 | "(1280, 224, 224, 3) (1280, 3, 224, 224)\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "# Create batches of fake data\n", 72 | "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n", 73 | "print(fake_input_data_cl.shape, fake_input_data_cf.shape)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 5, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "name": "stdout", 83 | "output_type": "stream", 84 | "text": [ 85 | "Linear(in_features=2048, out_features=1000, bias=True)\n", 86 | "AvgPool2d(kernel_size=7, stride=7, padding=0, ceil_mode=False, count_include_pad=True)\n" 87 | ] 88 | } 89 | ], 90 | "source": [ 91 | "# Download ResNet\n", 92 | "resnet50 = models.resnet50(pretrained=True)\n", 93 | "# Chop-off last FC layer\n", 94 | "print(list(resnet50.children())[-1])\n", 95 | "chopped_resnet50 = torch.nn.Sequential(*list(resnet50.children())[:-1])\n", 96 | "# CUDA\n", 97 | "chopped_resnet50.cuda()\n", 98 | "# Last layer is now avgpool2d\n", 99 | "print(list(chopped_resnet50.children())[-1])" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 6, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "def predict_fn(classifier, data, batchsize):\n", 109 | " \"\"\" Return features from classifier \"\"\"\n", 110 | " classifier.eval()\n", 111 | " out = np.zeros((len(data), RESNET_FEATURES), np.float32)\n", 112 | " for idx, dta in yield_mb_X(data, batchsize):\n", 113 | " pred = classifier(Variable(torch.FloatTensor(dta).cuda()))\n", 114 | " out[idx*batchsize:(idx+1)*batchsize] = pred.data.cpu().numpy().squeeze()\n", 115 | " return out" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 7, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "cold_start = predict_fn(chopped_resnet50, fake_input_data_cf, BATCH_SIZE)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 8, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "name": "stdout", 134 | "output_type": "stream", 135 | "text": [ 136 | "CPU times: user 2.05 s, sys: 397 ms, total: 2.44 s\n", 137 | "Wall time: 2.44 s\n" 138 | ] 139 | } 140 | ], 141 | "source": [ 142 | "%%time\n", 143 | "features = predict_fn(chopped_resnet50, fake_input_data_cf, BATCH_SIZE)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 9, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "name": "stdout", 153 | "output_type": "stream", 154 | "text": [ 155 | "Images per second 524.5901639344262\n" 156 | ] 157 | } 158 | ], 159 | "source": [ 160 | "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/2.44))" 161 | ] 162 | } 163 | ], 164 | "metadata": { 165 | "kernelspec": { 166 | "display_name": "Python 3", 167 | "language": "python", 168 | "name": "python3" 169 | }, 170 | "language_info": { 171 | "codemirror_mode": { 172 | "name": "ipython", 173 | "version": 3 174 | }, 175 | "file_extension": ".py", 176 | "mimetype": "text/x-python", 177 | "name": "python", 178 | "nbconvert_exporter": "python", 179 | "pygments_lexer": "ipython3", 180 | "version": "3.5.2" 181 | } 182 | }, 183 | "nbformat": 4, 184 | "nbformat_minor": 2 185 | } 186 | -------------------------------------------------------------------------------- /notebooks/PyTorch_RNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# High-level RNN PyTorch Example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import sys\n", 18 | "import numpy as np\n", 19 | "import math\n", 20 | "import torch\n", 21 | "import torch.nn as nn\n", 22 | "import torch.nn.functional as F\n", 23 | "import torch.optim as optim\n", 24 | "import torch.utils.data as data_utils\n", 25 | "import torch.nn.init as init\n", 26 | "from torch import autograd\n", 27 | "from torch.autograd import Variable\n", 28 | "from common.params_lstm import *\n", 29 | "from common.utils import *" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "# Force one-gpu\n", 39 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "OS: linux\n", 52 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 53 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 54 | "PyTorch: 0.3.1\n", 55 | "Numpy: 1.14.1\n", 56 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 57 | "CUDA Version 8.0.61\n", 58 | "CuDNN Version 6.0.21\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "print(\"OS: \", sys.platform)\n", 64 | "print(\"Python: \", sys.version)\n", 65 | "print(\"PyTorch: \", torch.__version__)\n", 66 | "print(\"Numpy: \", np.__version__)\n", 67 | "print(\"GPU: \", get_gpu_name())\n", 68 | "print(get_cuda_version())\n", 69 | "print(\"CuDNN Version \", get_cudnn_version())" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "class SymbolModule(nn.Module):\n", 79 | " def __init__(self, \n", 80 | " maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN):\n", 81 | " super(SymbolModule, self).__init__()\n", 82 | " self.embedding = nn.Embedding(num_embeddings=maxf,\n", 83 | " embedding_dim=edim)\n", 84 | " # If batch-first then input and output \n", 85 | " # provided as (batch, seq, features)\n", 86 | " # Cudnn used by default if possible\n", 87 | " self.gru = nn.GRU(input_size=edim, \n", 88 | " hidden_size=nhid, \n", 89 | " num_layers=1,\n", 90 | " batch_first=True,\n", 91 | " bidirectional=False) \n", 92 | " self.l_out = nn.Linear(in_features=nhid*1,\n", 93 | " out_features=2)\n", 94 | "\n", 95 | " def forward(self, x, nhid=NUMHIDDEN, batchs=BATCHSIZE):\n", 96 | " x = self.embedding(x)\n", 97 | " h0 = Variable(torch.zeros(1, batchs, nhid)).cuda()\n", 98 | " x, h = self.gru(x, h0) # outputs, states\n", 99 | " # just get the last output state\n", 100 | " x = x[:,-1,:].squeeze()\n", 101 | " x = self.l_out(x)\n", 102 | " return x" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 5, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "def init_model(m, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n", 112 | " opt = optim.Adam(m.parameters(), lr, betas=(b1, b2), eps=eps)\n", 113 | " criterion = nn.CrossEntropyLoss()\n", 114 | " return opt, criterion" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 6, 120 | "metadata": { 121 | "scrolled": true 122 | }, 123 | "outputs": [ 124 | { 125 | "name": "stdout", 126 | "output_type": "stream", 127 | "text": [ 128 | "Preparing train set...\n", 129 | "Preparing test set...\n", 130 | "Trimming to 30000 max-features\n", 131 | "Padding to length 150\n", 132 | "(25000, 150) (25000, 150) (25000,) (25000,)\n", 133 | "int64 int64 int64 int64\n", 134 | "CPU times: user 5.72 s, sys: 468 ms, total: 6.19 s\n", 135 | "Wall time: 6.19 s\n" 136 | ] 137 | } 138 | ], 139 | "source": [ 140 | "%%time\n", 141 | "# Data into format for library\n", 142 | "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES)\n", 143 | "# Torch-specific\n", 144 | "x_train = x_train.astype(np.int64)\n", 145 | "x_test = x_test.astype(np.int64)\n", 146 | "y_train = y_train.astype(np.int64)\n", 147 | "y_test = y_test.astype(np.int64)\n", 148 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n", 149 | "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 7, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "name": "stdout", 159 | "output_type": "stream", 160 | "text": [ 161 | "CPU times: user 2.93 s, sys: 879 ms, total: 3.81 s\n", 162 | "Wall time: 3.82 s\n" 163 | ] 164 | } 165 | ], 166 | "source": [ 167 | "%%time\n", 168 | "sym = SymbolModule()\n", 169 | "sym.cuda() # CUDA!" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 8, 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "name": "stdout", 179 | "output_type": "stream", 180 | "text": [ 181 | "CPU times: user 111 µs, sys: 25 µs, total: 136 µs\n", 182 | "Wall time: 142 µs\n" 183 | ] 184 | } 185 | ], 186 | "source": [ 187 | "%%time\n", 188 | "optimizer, criterion = init_model(sym)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 9, 194 | "metadata": { 195 | "scrolled": false 196 | }, 197 | "outputs": [ 198 | { 199 | "name": "stdout", 200 | "output_type": "stream", 201 | "text": [ 202 | "0\n", 203 | "1\n", 204 | "2\n", 205 | "CPU times: user 11.7 s, sys: 942 ms, total: 12.6 s\n", 206 | "Wall time: 12.6 s\n" 207 | ] 208 | } 209 | ], 210 | "source": [ 211 | "%%time\n", 212 | "# Main training loop: 12.7s\n", 213 | "sym.train() # Sets training = True \n", 214 | "for j in range(EPOCHS):\n", 215 | " for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n", 216 | " # Get samples\n", 217 | " data = Variable(torch.LongTensor(data).cuda())\n", 218 | " target = Variable(torch.LongTensor(target).cuda())\n", 219 | " # Init\n", 220 | " optimizer.zero_grad()\n", 221 | " # Forwards\n", 222 | " output = sym(data)\n", 223 | " # Loss\n", 224 | " loss = criterion(output, target)\n", 225 | " # Back-prop\n", 226 | " loss.backward()\n", 227 | " optimizer.step()\n", 228 | " # Log\n", 229 | " print(j)" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 10, 235 | "metadata": {}, 236 | "outputs": [ 237 | { 238 | "name": "stdout", 239 | "output_type": "stream", 240 | "text": [ 241 | "CPU times: user 1.52 s, sys: 23.9 ms, total: 1.54 s\n", 242 | "Wall time: 1.54 s\n" 243 | ] 244 | } 245 | ], 246 | "source": [ 247 | "%%time\n", 248 | "# Main evaluation loop: 1.52s\n", 249 | "sym.eval() # Sets training = False\n", 250 | "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n", 251 | "y_guess = np.zeros(n_samples, dtype=np.int)\n", 252 | "y_truth = y_test[:n_samples]\n", 253 | "c = 0\n", 254 | "for data, target in yield_mb(x_test, y_test, BATCHSIZE):\n", 255 | " # Get samples\n", 256 | " data = Variable(torch.LongTensor(data).cuda())\n", 257 | " # Forwards\n", 258 | " output = sym(data)\n", 259 | " pred = output.data.max(1)[1].cpu().numpy().squeeze()\n", 260 | " # Collect results\n", 261 | " y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred\n", 262 | " c += 1" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 11, 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "name": "stdout", 272 | "output_type": "stream", 273 | "text": [ 274 | "Accuracy: 0.8622996794871794\n" 275 | ] 276 | } 277 | ], 278 | "source": [ 279 | "print(\"Accuracy: \", sum(y_guess == y_truth)/len(y_guess))" 280 | ] 281 | } 282 | ], 283 | "metadata": { 284 | "anaconda-cloud": {}, 285 | "kernelspec": { 286 | "display_name": "Python 3", 287 | "language": "python", 288 | "name": "python3" 289 | }, 290 | "language_info": { 291 | "codemirror_mode": { 292 | "name": "ipython", 293 | "version": 3 294 | }, 295 | "file_extension": ".py", 296 | "mimetype": "text/x-python", 297 | "name": "python", 298 | "nbconvert_exporter": "python", 299 | "pygments_lexer": "ipython3", 300 | "version": "3.5.2" 301 | } 302 | }, 303 | "nbformat": 4, 304 | "nbformat_minor": 2 305 | } 306 | -------------------------------------------------------------------------------- /notebooks/Tensorflow_Inference.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%%bash\n", 10 | "#wget http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz\n", 11 | "#tar -xvf resnet_v1_50_2016_08_28.tar.gz\n", 12 | "#rm resnet_v1_50_2016_08_28.tar.gz" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": { 19 | "scrolled": false 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "import os\n", 24 | "import sys\n", 25 | "import numpy as np\n", 26 | "import tensorflow as tf\n", 27 | "# Upgrade dask before importing contrib!\n", 28 | "import tensorflow.contrib.slim\n", 29 | "from tensorflow.contrib.slim.nets import resnet_v1\n", 30 | "from common.params_inf import *\n", 31 | "from common.utils import *" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# Force one-gpu\n", 41 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 4, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "OS: linux\n", 54 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 55 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 56 | "Numpy: 1.14.1\n", 57 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 58 | "CUDA Version 8.0.61\n", 59 | "CuDNN Version 6.0.21\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "print(\"OS: \", sys.platform)\n", 65 | "print(\"Python: \", sys.version)\n", 66 | "print(\"Numpy: \", np.__version__)\n", 67 | "print(\"GPU: \", get_gpu_name())\n", 68 | "print(get_cuda_version())\n", 69 | "print(\"CuDNN Version \", get_cudnn_version())" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 5, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "def predict_fn(classifier, data, batchsize):\n", 79 | " \"\"\" Return features from classifier \"\"\"\n", 80 | " out = np.zeros((len(data), RESNET_FEATURES), np.float32)\n", 81 | " for idx, dta in yield_mb_X(data, batchsize):\n", 82 | " pred = sess.run(classifier, feed_dict={input_tensor: dta}).squeeze()\n", 83 | " out[idx*batchsize:(idx+1)*batchsize] = pred\n", 84 | " return out" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 6, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "name": "stdout", 94 | "output_type": "stream", 95 | "text": [ 96 | "(1280, 224, 224, 3) (1280, 3, 224, 224)\n" 97 | ] 98 | } 99 | ], 100 | "source": [ 101 | "# Create batches of fake data\n", 102 | "fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)\n", 103 | "print(fake_input_data_cl.shape, fake_input_data_cf.shape)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 7, 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "name": "stdout", 113 | "output_type": "stream", 114 | "text": [ 115 | "INFO:tensorflow:Restoring parameters from resnet_v1_50.ckpt\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "# Placeholders\n", 121 | "checkpoint_file = 'resnet_v1_50.ckpt'\n", 122 | "input_tensor = tf.placeholder(tf.float32, shape=(None,224,224,3), name='input_image')\n", 123 | "\n", 124 | "# Load the model\n", 125 | "sess = tf.Session()\n", 126 | "arg_scope = resnet_v1.resnet_arg_scope()\n", 127 | "with tensorflow.contrib.slim.arg_scope(arg_scope):\n", 128 | " # Docstring ->\n", 129 | " # num_classes: Number of predicted classes for classification tasks. If None\n", 130 | " # we return the features before the logit layer.\n", 131 | " logits, end_points = resnet_v1.resnet_v1_50(input_tensor, is_training=False)\n", 132 | " \n", 133 | "saver = tf.train.Saver()\n", 134 | "saver.restore(sess, checkpoint_file)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 9, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "cold_start = predict_fn(logits, fake_input_data_cl, BATCH_SIZE)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 10, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "name": "stdout", 153 | "output_type": "stream", 154 | "text": [ 155 | "CPU times: user 1.96 s, sys: 480 ms, total: 2.44 s\n", 156 | "Wall time: 2.26 s\n" 157 | ] 158 | } 159 | ], 160 | "source": [ 161 | "%%time\n", 162 | "features = predict_fn(logits, fake_input_data_cl, BATCH_SIZE)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 12, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "name": "stdout", 172 | "output_type": "stream", 173 | "text": [ 174 | "Images per second 566.3716814159293\n" 175 | ] 176 | } 177 | ], 178 | "source": [ 179 | "print(\"Images per second {}\".format((BATCH_SIZE*BATCHES_GPU)/2.26))" 180 | ] 181 | } 182 | ], 183 | "metadata": { 184 | "kernelspec": { 185 | "display_name": "Python 3", 186 | "language": "python", 187 | "name": "python3" 188 | }, 189 | "language_info": { 190 | "codemirror_mode": { 191 | "name": "ipython", 192 | "version": 3 193 | }, 194 | "file_extension": ".py", 195 | "mimetype": "text/x-python", 196 | "name": "python", 197 | "nbconvert_exporter": "python", 198 | "pygments_lexer": "ipython3", 199 | "version": "3.5.2" 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 2 204 | } 205 | -------------------------------------------------------------------------------- /notebooks/Tensorflow_RNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# High-level RNN TF Example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import os\n", 18 | "import sys\n", 19 | "import tensorflow as tf\n", 20 | "from common.params_lstm import *\n", 21 | "from common.utils import *" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "# Force one-gpu\n", 31 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "name": "stdout", 41 | "output_type": "stream", 42 | "text": [ 43 | "OS: linux\n", 44 | "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", 45 | "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", 46 | "Numpy: 1.14.1\n", 47 | "Tensorflow: 1.4.0\n", 48 | "GPU: ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n", 49 | "CUDA Version 8.0.61\n", 50 | "CuDNN Version 6.0.21\n" 51 | ] 52 | } 53 | ], 54 | "source": [ 55 | "print(\"OS: \", sys.platform)\n", 56 | "print(\"Python: \", sys.version)\n", 57 | "print(\"Numpy: \", np.__version__)\n", 58 | "print(\"Tensorflow: \", tf.__version__)\n", 59 | "print(\"GPU: \", get_gpu_name())\n", 60 | "print(get_cuda_version())\n", 61 | "print(\"CuDNN Version \", get_cudnn_version())" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 4, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "def create_symbol(CUDNN=True, \n", 71 | " maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN, batchs=BATCHSIZE):\n", 72 | " word_vectors = tf.contrib.layers.embed_sequence(X, vocab_size=maxf, embed_dim=edim)\n", 73 | " word_list = tf.unstack(word_vectors, axis=1)\n", 74 | " \n", 75 | " if not CUDNN:\n", 76 | " cell = tf.contrib.rnn.GRUCell(nhid)\n", 77 | " outputs, states = tf.contrib.rnn.static_rnn(cell, word_list, dtype=tf.float32)\n", 78 | " else:\n", 79 | " # Using cuDNN since vanilla RNN\n", 80 | " from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops\n", 81 | " cudnn_cell = cudnn_rnn_ops.CudnnGRU(num_layers=1, \n", 82 | " num_units=nhid, \n", 83 | " input_size=edim, \n", 84 | " input_mode='linear_input')\n", 85 | " params_size_t = cudnn_cell.params_size()\n", 86 | " params = tf.Variable(tf.random_uniform([params_size_t], -0.1, 0.1), validate_shape=False) \n", 87 | " input_h = tf.Variable(tf.zeros([1, batchs, nhid]))\n", 88 | " outputs, states = cudnn_cell(input_data=word_list,\n", 89 | " input_h=input_h,\n", 90 | " params=params)\n", 91 | " logits = tf.layers.dense(outputs[-1], 2, activation=None, name='output')\n", 92 | " return logits" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 5, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "def init_model(m, y, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):\n", 102 | " # Single-class labels, don't need dense one-hot\n", 103 | " # Expects unscaled logits, not output of tf.nn.softmax\n", 104 | " xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=m, labels=y)\n", 105 | " loss = tf.reduce_mean(xentropy)\n", 106 | " optimizer = tf.train.AdamOptimizer(lr, b1, b2, eps)\n", 107 | " training_op = optimizer.minimize(loss)\n", 108 | " return training_op" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 6, 114 | "metadata": { 115 | "scrolled": true 116 | }, 117 | "outputs": [ 118 | { 119 | "name": "stdout", 120 | "output_type": "stream", 121 | "text": [ 122 | "Preparing train set...\n", 123 | "Preparing test set...\n", 124 | "Trimming to 30000 max-features\n", 125 | "Padding to length 150\n", 126 | "(25000, 150) (25000, 150) (25000,) (25000,)\n", 127 | "int32 int32 int32 int32\n", 128 | "CPU times: user 5.9 s, sys: 417 ms, total: 6.32 s\n", 129 | "Wall time: 6.32 s\n" 130 | ] 131 | } 132 | ], 133 | "source": [ 134 | "%%time\n", 135 | "# Data into format for library\n", 136 | "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES)\n", 137 | "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n", 138 | "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 7, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "name": "stdout", 148 | "output_type": "stream", 149 | "text": [ 150 | "CPU times: user 737 ms, sys: 76.1 ms, total: 814 ms\n", 151 | "Wall time: 820 ms\n" 152 | ] 153 | } 154 | ], 155 | "source": [ 156 | "%%time\n", 157 | "# Place-holders\n", 158 | "X = tf.placeholder(tf.int32, shape=[None, MAXLEN])\n", 159 | "y = tf.placeholder(tf.int32, shape=[None])\n", 160 | "sym = create_symbol()" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 8, 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "name": "stdout", 170 | "output_type": "stream", 171 | "text": [ 172 | "CPU times: user 836 ms, sys: 693 ms, total: 1.53 s\n", 173 | "Wall time: 1.54 s\n" 174 | ] 175 | } 176 | ], 177 | "source": [ 178 | "%%time\n", 179 | "model = init_model(sym, y)\n", 180 | "init = tf.global_variables_initializer()\n", 181 | "sess = tf.Session()\n", 182 | "sess.run(init)" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 9, 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "name": "stdout", 192 | "output_type": "stream", 193 | "text": [ 194 | "0 Train accuracy: 0.84375\n", 195 | "1 Train accuracy: 0.96875\n", 196 | "2 Train accuracy: 0.984375\n", 197 | "CPU times: user 19 s, sys: 2.77 s, total: 21.8 s\n", 198 | "Wall time: 22.2 s\n" 199 | ] 200 | } 201 | ], 202 | "source": [ 203 | "%%time\n", 204 | "# Main training loop: 22s\n", 205 | "correct = tf.nn.in_top_k(sym, y, 1)\n", 206 | "accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))\n", 207 | "for j in range(EPOCHS):\n", 208 | " for data, label in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n", 209 | " sess.run(model, feed_dict={X: data, y: label})\n", 210 | " # Log\n", 211 | " acc_train = sess.run(accuracy, feed_dict={X: data, y: label})\n", 212 | " print(j, \"Train accuracy:\", acc_train)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 10, 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "name": "stdout", 222 | "output_type": "stream", 223 | "text": [ 224 | "CPU times: user 8.67 s, sys: 651 ms, total: 9.32 s\n", 225 | "Wall time: 9.19 s\n" 226 | ] 227 | } 228 | ], 229 | "source": [ 230 | "%%time\n", 231 | "# Main evaluation loop: 9.19s\n", 232 | "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n", 233 | "y_guess = np.zeros(n_samples, dtype=np.int)\n", 234 | "y_truth = y_test[:n_samples]\n", 235 | "c = 0\n", 236 | "for data, label in yield_mb(x_test, y_test, BATCHSIZE):\n", 237 | " pred = tf.argmax(sym, 1)\n", 238 | " output = sess.run(pred, feed_dict={X: data})\n", 239 | " y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = output\n", 240 | " c += 1" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 12, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "name": "stdout", 250 | "output_type": "stream", 251 | "text": [ 252 | "Accuracy: 0.8598557692307692\n" 253 | ] 254 | } 255 | ], 256 | "source": [ 257 | "print(\"Accuracy: \", 1.*sum(y_guess == y_truth)/len(y_guess))" 258 | ] 259 | } 260 | ], 261 | "metadata": { 262 | "anaconda-cloud": {}, 263 | "kernelspec": { 264 | "display_name": "Python 3", 265 | "language": "python", 266 | "name": "python3" 267 | }, 268 | "language_info": { 269 | "codemirror_mode": { 270 | "name": "ipython", 271 | "version": 3 272 | }, 273 | "file_extension": ".py", 274 | "mimetype": "text/x-python", 275 | "name": "python", 276 | "nbconvert_exporter": "python", 277 | "pygments_lexer": "ipython3", 278 | "version": "3.5.2" 279 | } 280 | }, 281 | "nbformat": 4, 282 | "nbformat_minor": 2 283 | } 284 | -------------------------------------------------------------------------------- /notebooks/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/notebooks/common/__init__.py -------------------------------------------------------------------------------- /notebooks/common/automobile10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/notebooks/common/automobile10.png -------------------------------------------------------------------------------- /notebooks/common/info.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/notebooks/common/info.PNG -------------------------------------------------------------------------------- /notebooks/common/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "params_cnn": 3 | { 4 | "EPOCHS":10, 5 | "BATCHSIZE":64, 6 | "LR":0.01, 7 | "MOMENTUM":0.9, 8 | "N_CLASSES":10, 9 | "GPU":true 10 | }, 11 | "params_lstm": 12 | { 13 | "EPOCHS":3, 14 | "BATCHSIZE":64, 15 | "EMBEDSIZE":125, 16 | "NUMHIDDEN":100, 17 | "DROPOUT":0.2, 18 | "LR":0.001, 19 | "BETA_1":0.9, 20 | "BETA_2":0.999, 21 | "EPS":1e-08, 22 | "MAXLEN":150, 23 | "MAXFEATURES":30000, 24 | "GPU":true 25 | }, 26 | "params_inf": 27 | { 28 | "BATCH_SIZE":32, 29 | "RESNET_FEATURES":2048, 30 | "BATCHES_GPU":40 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /notebooks/common/params.py: -------------------------------------------------------------------------------- 1 | # Hyperparams 2 | EPOCHS = 10 3 | BATCHSIZE = 64 4 | LR = 0.01 5 | MOMENTUM = 0.9 6 | N_CLASSES = 10 7 | GPU = True -------------------------------------------------------------------------------- /notebooks/common/params_dense.py: -------------------------------------------------------------------------------- 1 | CLASSES = 14 2 | WIDTH = 224 3 | HEIGHT = 224 4 | CHANNELS = 3 5 | LR = 0.0001 6 | EPOCHS = 5 7 | BATCHSIZE = 64 8 | IMAGENET_RGB_MEAN = [0.485, 0.456, 0.406] 9 | IMAGENET_RGB_SD = [0.229, 0.224, 0.225] 10 | TOT_PATIENT_NUMBER = 30805 # From data -------------------------------------------------------------------------------- /notebooks/common/params_inf.py: -------------------------------------------------------------------------------- 1 | BATCH_SIZE = 32 2 | RESNET_FEATURES = 2048 3 | BATCHES_GPU = 40 -------------------------------------------------------------------------------- /notebooks/common/params_lstm.py: -------------------------------------------------------------------------------- 1 | # Hyperparams LSTM 2 | EPOCHS=3 3 | BATCHSIZE=64 4 | EMBEDSIZE=125 5 | NUMHIDDEN=100 6 | DROPOUT=0.2 7 | LR=0.001 8 | BETA_1=0.9 9 | BETA_2=0.999 10 | EPS=1e-08 11 | MAXLEN=150 #maximum size of the word sequence 12 | MAXFEATURES=30000 #vocabulary size 13 | GPU=True 14 | -------------------------------------------------------------------------------- /notebooks/common/utils.R: -------------------------------------------------------------------------------- 1 | # Create an array of fake data to run inference on 2 | give_fake_data <- function(batches, col_major = FALSE){ 3 | set.seed(0) 4 | if (col_major) { 5 | shape <- c(224, 224, 3, batches) 6 | } else { 7 | shape <- c(batches, 224, 224, 3) 8 | } 9 | dat <- array(runif(batches*224*224*3), dim = shape) 10 | return(dat) 11 | } 12 | 13 | # Return features from classifier (OLD) 14 | predict_fn <- function(classifier, data, batchsize){ 15 | out <- array(0, dim = c(dim(data)[1], params$RESNET_FEATURES)) 16 | idx <- 0:(dim(data)[1] %/% batchsize - 1) 17 | for (i in idx){ 18 | dta <- data[(i*batchsize + 1):((i+1)*batchsize),,,] 19 | out[(i*batchsize + 1):((i+1)*batchsize), ] <- predict_on_batch(classifier, dta) 20 | } 21 | return(out) 22 | } 23 | 24 | 25 | # Get GPU name 26 | get_gpu_name <- function(){ 27 | tryCatch( 28 | { 29 | out_list <- system("nvidia-smi --query-gpu=gpu_name --format=csv", intern = TRUE) 30 | out_list <- out_list[out_list != "name"] 31 | return(out_list) 32 | }, 33 | error = function(e) 34 | { 35 | print(e) 36 | } 37 | ) 38 | } 39 | 40 | # Get CUDA version 41 | get_cuda_version <- function(){ 42 | tryCatch( 43 | { 44 | out <- system("cat /usr/local/cuda/version.txt", intern = TRUE) 45 | return(out) 46 | }, 47 | error = function(e) 48 | { 49 | print(e) 50 | } 51 | ) 52 | } 53 | 54 | # Get CuDNN version 55 | get_cudnn_version <- function(){ 56 | tryCatch( 57 | { 58 | out <- system("cat /usr/include/cudnn.h | grep CUDNN_MAJOR", intern = TRUE)[1] 59 | indx <- regexpr("(\\d+)", out) 60 | major <- regmatches(out, indx) 61 | 62 | out <- system("cat /usr/include/cudnn.h | grep CUDNN_MINOR", intern = TRUE)[1] 63 | indx <- regexpr("(\\d+)", out) 64 | minor <- regmatches(out, indx) 65 | 66 | out <- system("cat /usr/include/cudnn.h | grep CUDNN_PATCHLEVEL", intern = TRUE)[1] 67 | indx <- regexpr("(\\d+)", out) 68 | patch <- regmatches(out, indx) 69 | 70 | version <- paste(major, minor, patch, sep = ".") 71 | return(paste0("CuDNN Version ", version)) 72 | }, 73 | error = function(e) 74 | { 75 | print(e) 76 | } 77 | ) 78 | } 79 | 80 | 81 | 82 | # Function to download the cifar data, if not already downloaded 83 | maybe_download_cifar <- function(col_major = TRUE, src = 'https://ikpublictutorial.blob.core.windows.net/deeplearningframeworks/cifar-10-binary.tar.gz '){ 84 | 85 | tryCatch( 86 | { 87 | data <- suppressWarnings(process_cifar_bin(col_major)) 88 | return(data) 89 | }, 90 | error = function(e) 91 | { 92 | print(paste0('Data does not exist. Downloading ', src)) 93 | download.file(src, destfile="tmp.tar.gz") 94 | print('Extracting files ...') 95 | untar("tmp.tar.gz") 96 | file.remove('tmp.tar.gz') 97 | return(process_cifar_bin(col_major)) 98 | } 99 | ) 100 | } 101 | 102 | 103 | # A function to process CIFAR10 dataset in binary format 104 | process_cifar_bin <- function(col_major) { 105 | 106 | data_dir <- "cifar-10-batches-bin" 107 | 108 | train <- lapply(file.path(data_dir, paste0("data_batch_", 1:5, ".bin")), read_file) 109 | train <- do.call(c, train) 110 | 111 | x_train <- unlist(lapply(train, function(x) x$image)) 112 | if (col_major) { 113 | perm <- c(2, 1, 3, 4) 114 | } else { 115 | perm <- c(4, 3, 2, 1) 116 | } 117 | 118 | x_train <- aperm(array(x_train, c(32, 32, 3, 50000)), perm = perm) 119 | x_train <- x_train / 255 120 | y_train <- unlist(lapply(train, function(x) x$label)) 121 | 122 | test <- read_file(file.path(data_dir, "test_batch.bin")) 123 | x_test <- unlist(lapply(test, function(x) x$image)) 124 | x_test <- aperm(array(x_test, c(32, 32, 3, 10000)), perm = perm) 125 | x_test <- x_test / 255 126 | y_test <- unlist(lapply(test, function(x) x$label)) 127 | 128 | list(x_train = x_train, x_test = x_test, y_train = y_train, y_test = y_test) 129 | } 130 | 131 | 132 | 133 | # A function to load CIFAR10 dataset 134 | cifar_for_library <- function(one_hot = FALSE, col_major = TRUE) { 135 | 136 | cifar <- maybe_download_cifar(col_major) 137 | 138 | x_train <- cifar$x_train 139 | y_train <- cifar$y_train 140 | x_test <- cifar$x_test 141 | y_test <- cifar$y_test 142 | 143 | if(one_hot){ 144 | Y = data.frame(label = factor(y_train)) 145 | y_train = with(Y, model.matrix(~label+0)) 146 | Y = data.frame(label = factor(y_test)) 147 | y_test = with(Y, model.matrix(~label+0)) 148 | } 149 | 150 | list(x_train=x_train, x_test=x_test, y_train=y_train, y_test=y_test) 151 | 152 | } 153 | 154 | # Load hyper-parameters for different scenarios: 155 | # cnn, lstm, or inference 156 | load_params <- function(params_for){ 157 | 158 | require(rjson) 159 | params <- fromJSON(file = "./common/params.json") 160 | 161 | if (params_for == "cnn"){ 162 | return(params$params_cnn) 163 | } else if (params_for == "lstm"){ 164 | return(params$params_lstm) 165 | } else if (params_for == "inference"){ 166 | return(params$params_inf) 167 | } else { 168 | stop("params_for should be set to one of the following: cnn, lstm or inference.") 169 | } 170 | } 171 | 172 | 173 | # Function to download the mxnet resnet50 model, if not already downloaded 174 | maybe_download_resnet50 <- function() { 175 | src <- 'http://data.mxnet.io/models/imagenet/' 176 | tryCatch( 177 | { 178 | model <- suppressWarnings(mx.model.load(prefix = "resnet-50", iteration = 0)) 179 | return(model) 180 | }, 181 | error = function(e) 182 | { 183 | print(paste0('Model does not exist. Downloading ', src)) 184 | download.file(file.path(src, 'resnet/50-layers/resnet-50-symbol.json'), destfile="resnet-50-symbol.json") 185 | download.file(file.path(src, 'resnet/50-layers/resnet-50-0000.params'), destfile="resnet-50-0000.params") 186 | return(mx.model.load(prefix = "resnet-50", iteration = 0)) 187 | } 188 | ) 189 | } 190 | 191 | load_resnet50 <- function() maybe_download_resnet50() 192 | 193 | read_image <- function(i, to_read) { 194 | label <- readBin(to_read, integer(), n = 1, size = 1) 195 | image <- as.integer(readBin(to_read, raw(), size = 1, n = 32*32*3)) 196 | list(label = label, image = image) 197 | } 198 | 199 | 200 | read_file <- function(f) { 201 | to_read <- file(f, "rb") 202 | examples <- lapply(1:10000, read_image, to_read) 203 | close(to_read) 204 | examples 205 | } 206 | 207 | # Plot a CIFAR10 image 208 | plot_image <- function(img) { 209 | library(grid) 210 | img_dim <- dim(img) 211 | if (img_dim[1] < img_dim[3]) { 212 | r <- img[1,,] 213 | g <- img[2,,] 214 | b <- img[3,,] 215 | } else { 216 | r <- img[,,1] 217 | g <- img[,,2] 218 | b <- img[,,3] 219 | } 220 | img.col.mat <- rgb(r, g, b, maxColorValue = 1) 221 | dim(img.col.mat) <- dim(r) 222 | grid.raster(img.col.mat, interpolate = FALSE) 223 | rm(img.col.mat) 224 | } 225 | 226 | 227 | maybe_download_imdb <- function(src = 'https://ikpublictutorial.blob.core.windows.net/deeplearningframeworks/imdb.Rds'){ 228 | 229 | tryCatch( 230 | { 231 | data <- suppressWarnings(readRDS("imdb.Rds")) 232 | return(data) 233 | }, 234 | error = function(e) 235 | { 236 | print(paste0('Data does not exist. Downloading ', src)) 237 | download.file(src, destfile="imdb.Rds") 238 | return(readRDS("imdb.Rds")) 239 | } 240 | ) 241 | } 242 | 243 | 244 | imdb_for_library <- function() maybe_download_imdb() 245 | 246 | 247 | -------------------------------------------------------------------------------- /support/chainer_4gpu.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/support/chainer_4gpu.JPG -------------------------------------------------------------------------------- /support/gluon_4gpu.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/support/gluon_4gpu.JPG -------------------------------------------------------------------------------- /support/keras_4gpu.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/support/keras_4gpu.JPG -------------------------------------------------------------------------------- /support/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/support/logo.png -------------------------------------------------------------------------------- /support/pytorch_4gpu.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/support/pytorch_4gpu.JPG -------------------------------------------------------------------------------- /support/tensorflow_4gpu.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ilkarman/DeepLearningFrameworks/671e2a0198ff6b82babab2661295b8d49b4baccf/support/tensorflow_4gpu.JPG --------------------------------------------------------------------------------