├── .gitignore
├── LICENSE
├── README.md
├── pytorch_ipynb
    ├── autoencoder
    │   ├── ae-basic-with-rf.ipynb
    │   ├── ae-basic.ipynb
    │   ├── ae-cnn-cvae.ipynb
    │   ├── ae-cnn-cvae_no-out-concat.ipynb
    │   ├── ae-conv-nneighbor-celeba.ipynb
    │   ├── ae-conv-nneighbor-quickdraw-1.ipynb
    │   ├── ae-conv-nneighbor.ipynb
    │   ├── ae-conv-var.ipynb
    │   ├── ae-cvae.ipynb
    │   ├── ae-cvae_no-out-concat.ipynb
    │   ├── ae-deconv-jaccard.ipynb
    │   ├── ae-deconv-nopool.ipynb
    │   ├── ae-deconv.ipynb
    │   └── ae-var.ipynb
    ├── basic-ml
    │   ├── logistic-regression.ipynb
    │   ├── perceptron.ipynb
    │   ├── softmax-regression-mlxtend-1.ipynb
    │   └── softmax-regression.ipynb
    ├── cnn
    │   ├── cnn-alexnet-cifar10.ipynb
    │   ├── cnn-allconv.ipynb
    │   ├── cnn-basic.ipynb
    │   ├── cnn-densenet121-cifar10.ipynb
    │   ├── cnn-densenet121-mnist.ipynb
    │   ├── cnn-he-init.ipynb
    │   ├── cnn-lenet5-cifar10.ipynb
    │   ├── cnn-lenet5-mnist.ipynb
    │   ├── cnn-lenet5-quickdraw.ipynb
    │   ├── cnn-resnet101-celeba.ipynb
    │   ├── cnn-resnet101-cifar10.ipynb
    │   ├── cnn-resnet152-celeba.ipynb
    │   ├── cnn-resnet18-celeba-dataparallel.ipynb
    │   ├── cnn-resnet18-mnist.ipynb
    │   ├── cnn-resnet34-celeba-dataparallel.ipynb
    │   ├── cnn-resnet34-cifar10-pinmem.ipynb
    │   ├── cnn-resnet34-mnist.ipynb
    │   ├── cnn-resnet34-quickdraw.ipynb
    │   ├── cnn-resnet50-celeba-dataparallel.ipynb
    │   ├── cnn-resnet50-mnist.ipynb
    │   ├── cnn-standardized.ipynb
    │   ├── cnn-vgg16-cats-dogs.ipynb
    │   ├── cnn-vgg16-celeba-data-parallel.ipynb
    │   ├── cnn-vgg16-celeba.ipynb
    │   ├── cnn-vgg16.ipynb
    │   ├── cnn-vgg19.ipynb
    │   ├── fc-to-conv.ipynb
    │   ├── images
    │   │   └── cats-and-dogs-download-all.png
    │   ├── nin-cifar10.ipynb
    │   ├── nin-cifar10_batchnorm.ipynb
    │   ├── nin-cifar10_filter-response-norm.ipynb
    │   └── resnet-ex-1.ipynb
    ├── data
    │   ├── iris.data
    │   └── perceptron_toydata.txt
    ├── gan
    │   ├── dc-wgan-1.ipynb
    │   ├── dcgan-cats-and-dogs.ipynb
    │   ├── dcgan-celeba.ipynb
    │   ├── gan-conv-smoothing.ipynb
    │   ├── gan-conv.ipynb
    │   ├── gan.ipynb
    │   ├── images
    │   │   ├── screenshot-downl-celeba-aligned.png
    │   │   └── screenshot-radford-dcgan-generator.png
    │   └── wgan-1.ipynb
    ├── gnn
    │   ├── gnn-basic-1.ipynb
    │   ├── gnn-basic-edge-1.ipynb
    │   └── gnn-basic-graph-spectral-1.ipynb
    ├── helper.py
    ├── images
    │   ├── cyclical-learning-rate
    │   │   └── cyclical-lr.png
    │   ├── dataparallel
    │   │   ├── dataparallel.png
    │   │   ├── minibatch-update-dataparallel.png
    │   │   └── minibatch-update.png
    │   ├── densenet
    │   │   ├── densenet-fig-2.jpg
    │   │   └── densenet-tab-1-dnet121.jpg
    │   ├── fc-to-conv
    │   │   ├── fc-to-conv-1.png
    │   │   └── fc-to-conv-2.png
    │   ├── lenet
    │   │   └── lenet-5_1.jpg
    │   ├── manual-gradients
    │   │   ├── graph_1.png
    │   │   ├── graph_2.png
    │   │   └── graph_3.png
    │   ├── resnets
    │   │   ├── resnet-ex-1-1.png
    │   │   ├── resnet-ex-1-2.png
    │   │   ├── resnet-ex-1-3.png
    │   │   ├── resnet101
    │   │   │   └── resnet101-arch-1.png
    │   │   ├── resnet152
    │   │   │   └── resnet152-arch-1.png
    │   │   ├── resnet34
    │   │   │   └── resnet34-arch.png
    │   │   └── resnet50
    │   │   │   ├── resnet-50-bottleneck.png
    │   │   │   └── resnet50-arch-1.png
    │   ├── vgg16
    │   │   └── vgg16-arch-table.png
    │   ├── vgg19
    │   │   └── vgg19-arch-table.png
    │   └── weight-sharing
    │   │   ├── weight-sharing-1.png
    │   │   └── weight-sharing-2.png
    ├── mechanics
    │   ├── cnn-weight-sharing.ipynb
    │   ├── custom-data-loader-afad.ipynb
    │   ├── custom-data-loader-celeba.ipynb
    │   ├── custom-data-loader-csv.ipynb
    │   ├── custom-data-loader-mnist.ipynb
    │   ├── custom-data-loader-quickdraw.ipynb
    │   ├── custom-data-loader-svhn.ipynb
    │   ├── custom-data-loader_dating-historical-color-images.ipynb
    │   ├── custom-dataloader-png
    │   │   └── custom-dataloader-example.ipynb
    │   ├── manual-gradients.ipynb
    │   ├── mlp-sequential.ipynb
    │   ├── plot-jupyter-matplotlib.ipynb
    │   ├── torchvision-transform-examples.ipynb
    │   ├── transferlearning-vgg16.ipynb
    │   └── validation-splits.ipynb
    ├── mlp
    │   ├── mlp-basic.ipynb
    │   ├── mlp-batchnorm.ipynb
    │   ├── mlp-dropout.ipynb
    │   └── mlp-fromscratch__sigmoid-mse.ipynb
    ├── ordinal
    │   ├── ordinal-cnn-beckham2016-afadlite.ipynb
    │   ├── ordinal-cnn-coral-afadlite.ipynb
    │   └── ordinal-cnn-niu-afadlite.ipynb
    ├── rnn
    │   ├── char_rnn-charlesdickens.ipynb
    │   ├── rnn_bi_multilayer_lstm_own_csv_agnews.ipynb
    │   ├── rnn_bi_multilayer_lstm_own_csv_amazon-polarity.ipynb
    │   ├── rnn_bi_multilayer_lstm_own_csv_yelp-polarity.ipynb
    │   ├── rnn_gru_packed_imdb.ipynb
    │   ├── rnn_lstm_bi_imdb.ipynb
    │   ├── rnn_lstm_packed_imdb-glove.ipynb
    │   ├── rnn_lstm_packed_imdb.ipynb
    │   ├── rnn_lstm_packed_own_csv_imdb.ipynb
    │   ├── rnn_simple_imdb.ipynb
    │   └── rnn_simple_packed_imdb.ipynb
    ├── transfer
    │   └── transferlearning-vgg16-cifar10-1.ipynb
    ├── tricks
    │   ├── cnn-alexnet-cifar10-batchincrease.ipynb
    │   ├── cyclical-learning-rate.ipynb
    │   └── gradclipping_mlp.ipynb
    └── viz
    │   └── cnns
    │       └── cats-and-dogs
    │           ├── _cnn-basemodel__vgg16-cats-dogs.ipynb
    │           ├── cnn-viz-grad__vgg16-cats-dogs.ipynb
    │           ├── cnn-viz-guided-backprop__vgg16-cats-dogs.ipynb
    │           ├── datautils.py
    │           ├── gradient.png
    │           ├── images
    │               └── cats-and-dogs-download-all.png
    │           └── vgg16.py
└── tensorflow1_ipynb
    ├── autoencoder
        ├── ae-basic.ipynb
        ├── ae-conv-nneighbor.ipynb
        └── ae-deconv.ipynb
    ├── basic-ml
        ├── logistic-regression.ipynb
        ├── perceptron.ipynb
        └── softmax-regression.ipynb
    ├── cnn
        ├── cnn-basic.ipynb
        └── cnn-vgg16.ipynb
    ├── data
        └── perceptron_toydata.txt
    ├── gan
        ├── gan-conv-smoothing.ipynb
        ├── gan-conv.ipynb
        └── gan.ipynb
    ├── helper.py
    ├── mechanics
        ├── dataset-api.ipynb
        ├── file-queues.ipynb
        ├── image-data-chunking-hdf5.ipynb
        ├── image-data-chunking-npz.ipynb
        ├── saving-and-reloading-models.ipynb
        └── tfrecords.ipynb
    ├── metric
        └── siamese-1.ipynb
    └── mlp
        ├── mlp-basic.ipynb
        ├── mlp-batchnorm.ipynb
        ├── mlp-dropout.ipynb
        └── mlp-lowlevel.ipynb


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Datasets
  2 | pytorch_ipynb/viz/cnns/cats-and-dogs/dogs-vs-cats
  3 | pytorch_ipynb/gan/dogs-vs-cats
  4 | pytorch_ipynb/viz/cnns/cats-and-dogs/dogs-vs-cats
  5 | pytorch_ipynb/rnn/yelp_review_polarity_csv/
  6 | pytorch_ipynb/rnn/ag_news_csv/
  7 | pytorch_ipynb/rnn/amazon_review_polarity_csv/
  8 | HistoricalColor-ECCV2012*
  9 | AFAD-Lite
 10 | tarball*
 11 | pytorch_ipynb/rnn/.data/
 12 | pytorch_ipynb/rnn/.vector_cache/
 13 | cifar-10-batches-py
 14 | celeba_gender_attr_test.txt
 15 | celeba_gender_attr_train.txt
 16 | iris.h5
 17 | test_32x32.mat
 18 | train_32x32.mat
 19 | code/model_zoo/pytorch_ipynb/svhn_cropped/
 20 | list_attr_celeba.txt 
 21 | list_eval_partition.txt
 22 | img_align_celeba
 23 | quickdraw-*
 24 | *.csv
 25 | *.zip
 26 | *.npz
 27 | *.npy
 28 | *.tar.gz
 29 | *ubyte.gz
 30 | *archive.ics.uci.edu*
 31 | code/model_zoo/cifar-10
 32 | code/model_zoo/pytorch_ipynb/data
 33 | 
 34 | # Binary PyTorch models
 35 | *.pt
 36 | *.state_dict
 37 | 
 38 | # Temporary OS files
 39 | .DS_Store
 40 | 
 41 | # TensorFlow Checkpoint files
 42 | checkpoint
 43 | code/*/*.data-?????-of-?????
 44 | code/*/*.index
 45 | code/*/*.meta
 46 | code/model_zoo/tensorflow_ipynb/*.data-?????-of-?????
 47 | code/model_zoo/tensorflow_ipynb/*.index
 48 | code/model_zoo/tensorflow_ipynb/*.meta
 49 | code/model_zoo/tensorflow_ipynb/cifar-10/*
 50 | 
 51 | # Byte-compiled / optimized / DLL files
 52 | __pycache__/
 53 | *.py[cod]
 54 | *$py.class
 55 | 
 56 | # C extensions
 57 | *.so
 58 | 
 59 | # Distribution / packaging
 60 | .Python
 61 | env/
 62 | build/
 63 | develop-eggs/
 64 | dist/
 65 | downloads/
 66 | eggs/
 67 | .eggs/
 68 | lib/
 69 | lib64/
 70 | parts/
 71 | sdist/
 72 | var/
 73 | *.egg-info/
 74 | .installed.cfg
 75 | *.egg
 76 | 
 77 | # PyInstaller
 78 | #  Usually these files are written by a python script from a template
 79 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 80 | *.manifest
 81 | *.spec
 82 | 
 83 | # Installer logs
 84 | pip-log.txt
 85 | pip-delete-this-directory.txt
 86 | 
 87 | # Unit test / coverage reports
 88 | htmlcov/
 89 | .tox/
 90 | .coverage
 91 | .coverage.*
 92 | .cache
 93 | nosetests.xml
 94 | coverage.xml
 95 | *,cover
 96 | .hypothesis/
 97 | 
 98 | # Translations
 99 | *.mo
100 | *.pot
101 | 
102 | # Django stuff:
103 | *.log
104 | local_settings.py
105 | 
106 | # Flask stuff:
107 | instance/
108 | .webassets-cache
109 | 
110 | # Scrapy stuff:
111 | .scrapy
112 | 
113 | # Sphinx documentation
114 | docs/_build/
115 | 
116 | # PyBuilder
117 | target/
118 | 
119 | # IPython Notebook
120 | .ipynb_checkpoints
121 | 
122 | # pyenv
123 | .python-version
124 | 
125 | # celery beat schedule file
126 | celerybeat-schedule
127 | 
128 | # dotenv
129 | .env
130 | 
131 | # virtualenv
132 | venv/
133 | ENV/
134 | 
135 | # Spyder project settings
136 | .spyderproject
137 | 
138 | # Rope project settings
139 | .ropeproject
140 | 
141 | # Datasets
142 | MNIST*
143 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019-2020 Sebastian Raschka
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/pytorch_ipynb/basic-ml/softmax-regression.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n",
  8 |     "- Author: Sebastian Raschka\n",
  9 |     "- GitHub Repository: https://github.com/rasbt/deeplearning-models"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Sebastian Raschka \n",
 22 |       "\n",
 23 |       "CPython 3.6.8\n",
 24 |       "IPython 7.2.0\n",
 25 |       "\n",
 26 |       "torch 1.0.0\n"
 27 |      ]
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "%load_ext watermark\n",
 32 |     "%watermark -a 'Sebastian Raschka' -v -p torch"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "- Runs on CPU or GPU (if available)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "# Model Zoo -- Softmax Regression"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "Implementation of softmax regression (multinomial logistic regression)."
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "## Imports"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 2,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "from torchvision import datasets\n",
 70 |     "from torchvision import transforms\n",
 71 |     "from torch.utils.data import DataLoader\n",
 72 |     "import torch.nn.functional as F\n",
 73 |     "import torch"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "## Settings and Dataset"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 3,
 86 |    "metadata": {},
 87 |    "outputs": [
 88 |     {
 89 |      "name": "stdout",
 90 |      "output_type": "stream",
 91 |      "text": [
 92 |       "Image batch dimensions: torch.Size([256, 1, 28, 28])\n",
 93 |       "Image label dimensions: torch.Size([256])\n"
 94 |      ]
 95 |     }
 96 |    ],
 97 |    "source": [
 98 |     "##########################\n",
 99 |     "### SETTINGS\n",
100 |     "##########################\n",
101 |     "\n",
102 |     "# Device\n",
103 |     "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
104 |     "\n",
105 |     "# Hyperparameters\n",
106 |     "random_seed = 123\n",
107 |     "learning_rate = 0.1\n",
108 |     "num_epochs = 10\n",
109 |     "batch_size = 256\n",
110 |     "\n",
111 |     "# Architecture\n",
112 |     "num_features = 784\n",
113 |     "num_classes = 10\n",
114 |     "\n",
115 |     "\n",
116 |     "##########################\n",
117 |     "### MNIST DATASET\n",
118 |     "##########################\n",
119 |     "\n",
120 |     "train_dataset = datasets.MNIST(root='data', \n",
121 |     "                               train=True, \n",
122 |     "                               transform=transforms.ToTensor(),  \n",
123 |     "                               download=True)\n",
124 |     "\n",
125 |     "test_dataset = datasets.MNIST(root='data', \n",
126 |     "                              train=False, \n",
127 |     "                              transform=transforms.ToTensor())\n",
128 |     "\n",
129 |     "\n",
130 |     "train_loader = DataLoader(dataset=train_dataset, \n",
131 |     "                          batch_size=batch_size, \n",
132 |     "                          shuffle=True)\n",
133 |     "\n",
134 |     "test_loader = DataLoader(dataset=test_dataset, \n",
135 |     "                         batch_size=batch_size, \n",
136 |     "                         shuffle=False)\n",
137 |     "\n",
138 |     "\n",
139 |     "# Checking the dataset\n",
140 |     "for images, labels in train_loader:  \n",
141 |     "    print('Image batch dimensions:', images.shape)\n",
142 |     "    print('Image label dimensions:', labels.shape)\n",
143 |     "    break"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 4,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "##########################\n",
153 |     "### MODEL\n",
154 |     "##########################\n",
155 |     "\n",
156 |     "class SoftmaxRegression(torch.nn.Module):\n",
157 |     "\n",
158 |     "    def __init__(self, num_features, num_classes):\n",
159 |     "        super(SoftmaxRegression, self).__init__()\n",
160 |     "        self.linear = torch.nn.Linear(num_features, num_classes)\n",
161 |     "        \n",
162 |     "        self.linear.weight.detach().zero_()\n",
163 |     "        self.linear.bias.detach().zero_()\n",
164 |     "        \n",
165 |     "    def forward(self, x):\n",
166 |     "        logits = self.linear(x)\n",
167 |     "        probas = F.softmax(logits, dim=1)\n",
168 |     "        return logits, probas\n",
169 |     "\n",
170 |     "model = SoftmaxRegression(num_features=num_features,\n",
171 |     "                          num_classes=num_classes)\n",
172 |     "\n",
173 |     "model.to(device)\n",
174 |     "\n",
175 |     "##########################\n",
176 |     "### COST AND OPTIMIZER\n",
177 |     "##########################\n",
178 |     "\n",
179 |     "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  "
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 5,
185 |    "metadata": {},
186 |    "outputs": [
187 |     {
188 |      "name": "stdout",
189 |      "output_type": "stream",
190 |      "text": [
191 |       "Epoch: 001/010 | Batch 000/234 | Cost: 2.3026\n",
192 |       "Epoch: 001/010 | Batch 050/234 | Cost: 0.7941\n",
193 |       "Epoch: 001/010 | Batch 100/234 | Cost: 0.5651\n",
194 |       "Epoch: 001/010 | Batch 150/234 | Cost: 0.4603\n",
195 |       "Epoch: 001/010 | Batch 200/234 | Cost: 0.4822\n",
196 |       "Epoch: 001/010 training accuracy: 88.04%\n",
197 |       "Epoch: 002/010 | Batch 000/234 | Cost: 0.4105\n",
198 |       "Epoch: 002/010 | Batch 050/234 | Cost: 0.4415\n",
199 |       "Epoch: 002/010 | Batch 100/234 | Cost: 0.4367\n",
200 |       "Epoch: 002/010 | Batch 150/234 | Cost: 0.4289\n",
201 |       "Epoch: 002/010 | Batch 200/234 | Cost: 0.3926\n",
202 |       "Epoch: 002/010 training accuracy: 89.37%\n",
203 |       "Epoch: 003/010 | Batch 000/234 | Cost: 0.4112\n",
204 |       "Epoch: 003/010 | Batch 050/234 | Cost: 0.3579\n",
205 |       "Epoch: 003/010 | Batch 100/234 | Cost: 0.3013\n",
206 |       "Epoch: 003/010 | Batch 150/234 | Cost: 0.3258\n",
207 |       "Epoch: 003/010 | Batch 200/234 | Cost: 0.4254\n",
208 |       "Epoch: 003/010 training accuracy: 89.98%\n",
209 |       "Epoch: 004/010 | Batch 000/234 | Cost: 0.3988\n",
210 |       "Epoch: 004/010 | Batch 050/234 | Cost: 0.3690\n",
211 |       "Epoch: 004/010 | Batch 100/234 | Cost: 0.3459\n",
212 |       "Epoch: 004/010 | Batch 150/234 | Cost: 0.4030\n",
213 |       "Epoch: 004/010 | Batch 200/234 | Cost: 0.3240\n",
214 |       "Epoch: 004/010 training accuracy: 90.35%\n",
215 |       "Epoch: 005/010 | Batch 000/234 | Cost: 0.3265\n",
216 |       "Epoch: 005/010 | Batch 050/234 | Cost: 0.3673\n",
217 |       "Epoch: 005/010 | Batch 100/234 | Cost: 0.3085\n",
218 |       "Epoch: 005/010 | Batch 150/234 | Cost: 0.3183\n",
219 |       "Epoch: 005/010 | Batch 200/234 | Cost: 0.3316\n",
220 |       "Epoch: 005/010 training accuracy: 90.64%\n",
221 |       "Epoch: 006/010 | Batch 000/234 | Cost: 0.4518\n",
222 |       "Epoch: 006/010 | Batch 050/234 | Cost: 0.3863\n",
223 |       "Epoch: 006/010 | Batch 100/234 | Cost: 0.3620\n",
224 |       "Epoch: 006/010 | Batch 150/234 | Cost: 0.3733\n",
225 |       "Epoch: 006/010 | Batch 200/234 | Cost: 0.3289\n",
226 |       "Epoch: 006/010 training accuracy: 90.86%\n",
227 |       "Epoch: 007/010 | Batch 000/234 | Cost: 0.3450\n",
228 |       "Epoch: 007/010 | Batch 050/234 | Cost: 0.2289\n",
229 |       "Epoch: 007/010 | Batch 100/234 | Cost: 0.3073\n",
230 |       "Epoch: 007/010 | Batch 150/234 | Cost: 0.2750\n",
231 |       "Epoch: 007/010 | Batch 200/234 | Cost: 0.3456\n",
232 |       "Epoch: 007/010 training accuracy: 91.00%\n",
233 |       "Epoch: 008/010 | Batch 000/234 | Cost: 0.4900\n",
234 |       "Epoch: 008/010 | Batch 050/234 | Cost: 0.3479\n",
235 |       "Epoch: 008/010 | Batch 100/234 | Cost: 0.2343\n",
236 |       "Epoch: 008/010 | Batch 150/234 | Cost: 0.3059\n",
237 |       "Epoch: 008/010 | Batch 200/234 | Cost: 0.3684\n",
238 |       "Epoch: 008/010 training accuracy: 91.22%\n",
239 |       "Epoch: 009/010 | Batch 000/234 | Cost: 0.3762\n",
240 |       "Epoch: 009/010 | Batch 050/234 | Cost: 0.2976\n",
241 |       "Epoch: 009/010 | Batch 100/234 | Cost: 0.2690\n",
242 |       "Epoch: 009/010 | Batch 150/234 | Cost: 0.2610\n",
243 |       "Epoch: 009/010 | Batch 200/234 | Cost: 0.3140\n",
244 |       "Epoch: 009/010 training accuracy: 91.34%\n",
245 |       "Epoch: 010/010 | Batch 000/234 | Cost: 0.2790\n",
246 |       "Epoch: 010/010 | Batch 050/234 | Cost: 0.3070\n",
247 |       "Epoch: 010/010 | Batch 100/234 | Cost: 0.3300\n",
248 |       "Epoch: 010/010 | Batch 150/234 | Cost: 0.2520\n",
249 |       "Epoch: 010/010 | Batch 200/234 | Cost: 0.3301\n",
250 |       "Epoch: 010/010 training accuracy: 91.40%\n"
251 |      ]
252 |     }
253 |    ],
254 |    "source": [
255 |     "# Manual seed for deterministic data loader\n",
256 |     "torch.manual_seed(random_seed)\n",
257 |     "\n",
258 |     "\n",
259 |     "def compute_accuracy(model, data_loader):\n",
260 |     "    correct_pred, num_examples = 0, 0\n",
261 |     "    \n",
262 |     "    for features, targets in data_loader:\n",
263 |     "        features = features.view(-1, 28*28).to(device)\n",
264 |     "        targets = targets.to(device)\n",
265 |     "        logits, probas = model(features)\n",
266 |     "        _, predicted_labels = torch.max(probas, 1)\n",
267 |     "        num_examples += targets.size(0)\n",
268 |     "        correct_pred += (predicted_labels == targets).sum()\n",
269 |     "        \n",
270 |     "    return correct_pred.float() / num_examples * 100\n",
271 |     "    \n",
272 |     "\n",
273 |     "for epoch in range(num_epochs):\n",
274 |     "    for batch_idx, (features, targets) in enumerate(train_loader):\n",
275 |     "        \n",
276 |     "        features = features.view(-1, 28*28).to(device)\n",
277 |     "        targets = targets.to(device)\n",
278 |     "            \n",
279 |     "        ### FORWARD AND BACK PROP\n",
280 |     "        logits, probas = model(features)\n",
281 |     "        \n",
282 |     "        # note that the PyTorch implementation of\n",
283 |     "        # CrossEntropyLoss works with logits, not\n",
284 |     "        # probabilities\n",
285 |     "        cost = F.cross_entropy(logits, targets)\n",
286 |     "        optimizer.zero_grad()\n",
287 |     "        cost.backward()\n",
288 |     "        \n",
289 |     "        ### UPDATE MODEL PARAMETERS\n",
290 |     "        optimizer.step()\n",
291 |     "        \n",
292 |     "        ### LOGGING\n",
293 |     "        if not batch_idx % 50:\n",
294 |     "            print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' \n",
295 |     "                   %(epoch+1, num_epochs, batch_idx, \n",
296 |     "                     len(train_dataset)//batch_size, cost))\n",
297 |     "            \n",
298 |     "    with torch.set_grad_enabled(False):\n",
299 |     "        print('Epoch: %03d/%03d training accuracy: %.2f%%' % (\n",
300 |     "              epoch+1, num_epochs, \n",
301 |     "              compute_accuracy(model, train_loader)))"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": 6,
307 |    "metadata": {},
308 |    "outputs": [
309 |     {
310 |      "name": "stdout",
311 |      "output_type": "stream",
312 |      "text": [
313 |       "Test accuracy: 91.77%\n"
314 |      ]
315 |     }
316 |    ],
317 |    "source": [
318 |     "print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader)))"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": 7,
324 |    "metadata": {},
325 |    "outputs": [
326 |     {
327 |      "name": "stdout",
328 |      "output_type": "stream",
329 |      "text": [
330 |       "torch       1.0.0\n",
331 |       "\n"
332 |      ]
333 |     }
334 |    ],
335 |    "source": [
336 |     "%watermark -iv"
337 |    ]
338 |   }
339 |  ],
340 |  "metadata": {
341 |   "kernelspec": {
342 |    "display_name": "Python 3",
343 |    "language": "python",
344 |    "name": "python3"
345 |   },
346 |   "language_info": {
347 |    "codemirror_mode": {
348 |     "name": "ipython",
349 |     "version": 3
350 |    },
351 |    "file_extension": ".py",
352 |    "mimetype": "text/x-python",
353 |    "name": "python",
354 |    "nbconvert_exporter": "python",
355 |    "pygments_lexer": "ipython3",
356 |    "version": "3.7.1"
357 |   },
358 |   "toc": {
359 |    "nav_menu": {},
360 |    "number_sections": true,
361 |    "sideBar": true,
362 |    "skip_h1_title": false,
363 |    "title_cell": "Table of Contents",
364 |    "title_sidebar": "Contents",
365 |    "toc_cell": false,
366 |    "toc_position": {},
367 |    "toc_section_display": true,
368 |    "toc_window_display": false
369 |   }
370 |  },
371 |  "nbformat": 4,
372 |  "nbformat_minor": 2
373 | }
374 | 


--------------------------------------------------------------------------------
/pytorch_ipynb/cnn/fc-to-conv.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n",
  8 |     "- Author: Sebastian Raschka\n",
  9 |     "- GitHub Repository: https://github.com/rasbt/deeplearning-models"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "%load_ext watermark\n",
 19 |     "%watermark -a 'Sebastian Raschka' -v -p torch"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "# Replacing Fully-Connnected by Equivalent Convolutional Layers"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 15,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import torch"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "Assume we have a 2x2 input image:"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 16,
 48 |    "metadata": {},
 49 |    "outputs": [
 50 |     {
 51 |      "data": {
 52 |       "text/plain": [
 53 |        "torch.Size([1, 1, 2, 2])"
 54 |       ]
 55 |      },
 56 |      "execution_count": 16,
 57 |      "metadata": {},
 58 |      "output_type": "execute_result"
 59 |     }
 60 |    ],
 61 |    "source": [
 62 |     "inputs = torch.tensor([[[[1., 2.],\n",
 63 |     "                         [3., 4.]]]])\n",
 64 |     "\n",
 65 |     "inputs.shape"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "## Fully Connected"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "A fully connected layer, which maps the 4 input features two 2 outputs, would be computed as follows:"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 17,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "fc = torch.nn.Linear(4, 2)\n",
 89 |     "\n",
 90 |     "weights = torch.tensor([[1.1, 1.2, 1.3, 1.4],\n",
 91 |     "                        [1.5, 1.6, 1.7, 1.8]])\n",
 92 |     "bias = torch.tensor([1.9, 2.0])\n",
 93 |     "fc.weight.data = weights\n",
 94 |     "fc.bias.data = bias"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 18,
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "data": {
104 |       "text/plain": [
105 |        "tensor([[14.9000, 19.0000]], grad_fn=<ReluBackward0>)"
106 |       ]
107 |      },
108 |      "execution_count": 18,
109 |      "metadata": {},
110 |      "output_type": "execute_result"
111 |     }
112 |    ],
113 |    "source": [
114 |     "torch.relu(fc(inputs.view(-1, 4)))"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "## Convolution with Kernels equal to the input size"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "![](../images/fc-to-conv/fc-to-conv-1.png)"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "We can obtain the same outputs if we use convolutional layers where the kernel size is the same size as the input feature array:"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 19,
141 |    "metadata": {},
142 |    "outputs": [
143 |     {
144 |      "name": "stdout",
145 |      "output_type": "stream",
146 |      "text": [
147 |       "torch.Size([2, 1, 2, 2])\n",
148 |       "torch.Size([2])\n"
149 |      ]
150 |     }
151 |    ],
152 |    "source": [
153 |     "conv = torch.nn.Conv2d(in_channels=1,\n",
154 |     "                       out_channels=2,\n",
155 |     "                       kernel_size=inputs.squeeze(dim=(0)).squeeze(dim=(0)).size())\n",
156 |     "print(conv.weight.size())\n",
157 |     "print(conv.bias.size())"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 20,
163 |    "metadata": {},
164 |    "outputs": [],
165 |    "source": [
166 |     "conv.weight.data = weights.view(2, 1, 2, 2)\n",
167 |     "conv.bias.data = bias"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 21,
173 |    "metadata": {},
174 |    "outputs": [
175 |     {
176 |      "data": {
177 |       "text/plain": [
178 |        "tensor([[[[14.9000]],\n",
179 |        "\n",
180 |        "         [[19.0000]]]], grad_fn=<ReluBackward0>)"
181 |       ]
182 |      },
183 |      "execution_count": 21,
184 |      "metadata": {},
185 |      "output_type": "execute_result"
186 |     }
187 |    ],
188 |    "source": [
189 |     "torch.relu(conv(inputs))"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "markdown",
194 |    "metadata": {},
195 |    "source": [
196 |     "## Convolution with 1x1 Kernels"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "markdown",
201 |    "metadata": {},
202 |    "source": [
203 |     "![](../images/fc-to-conv/fc-to-conv-2.png)"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "markdown",
208 |    "metadata": {},
209 |    "source": [
210 |     "Similarly, we can replace the fully connected layer using a convolutional layer when we reshape the input image into a num_inputs x 1 x 1 image:"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": 23,
216 |    "metadata": {},
217 |    "outputs": [
218 |     {
219 |      "data": {
220 |       "text/plain": [
221 |        "tensor([[[[14.9000]],\n",
222 |        "\n",
223 |        "         [[19.0000]]]], grad_fn=<ReluBackward0>)"
224 |       ]
225 |      },
226 |      "execution_count": 23,
227 |      "metadata": {},
228 |      "output_type": "execute_result"
229 |     }
230 |    ],
231 |    "source": [
232 |     "conv = torch.nn.Conv2d(in_channels=4,\n",
233 |     "                       out_channels=2,\n",
234 |     "                       kernel_size=(1, 1))\n",
235 |     "\n",
236 |     "conv.weight.data = weights.view(2, 4, 1, 1)\n",
237 |     "conv.bias.data = bias\n",
238 |     "torch.relu(conv(inputs.view(1, 4, 1, 1)))"
239 |    ]
240 |   }
241 |  ],
242 |  "metadata": {
243 |   "kernelspec": {
244 |    "display_name": "Python 3",
245 |    "language": "python",
246 |    "name": "python3"
247 |   },
248 |   "language_info": {
249 |    "codemirror_mode": {
250 |     "name": "ipython",
251 |     "version": 3
252 |    },
253 |    "file_extension": ".py",
254 |    "mimetype": "text/x-python",
255 |    "name": "python",
256 |    "nbconvert_exporter": "python",
257 |    "pygments_lexer": "ipython3",
258 |    "version": "3.7.1"
259 |   }
260 |  },
261 |  "nbformat": 4,
262 |  "nbformat_minor": 2
263 | }
264 | 


--------------------------------------------------------------------------------
/pytorch_ipynb/cnn/images/cats-and-dogs-download-all.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/cnn/images/cats-and-dogs-download-all.png


--------------------------------------------------------------------------------
/pytorch_ipynb/data/iris.data:
--------------------------------------------------------------------------------
  1 | 5.1,3.5,1.4,0.2,Iris-setosa
  2 | 4.9,3.0,1.4,0.2,Iris-setosa
  3 | 4.7,3.2,1.3,0.2,Iris-setosa
  4 | 4.6,3.1,1.5,0.2,Iris-setosa
  5 | 5.0,3.6,1.4,0.2,Iris-setosa
  6 | 5.4,3.9,1.7,0.4,Iris-setosa
  7 | 4.6,3.4,1.4,0.3,Iris-setosa
  8 | 5.0,3.4,1.5,0.2,Iris-setosa
  9 | 4.4,2.9,1.4,0.2,Iris-setosa
 10 | 4.9,3.1,1.5,0.1,Iris-setosa
 11 | 5.4,3.7,1.5,0.2,Iris-setosa
 12 | 4.8,3.4,1.6,0.2,Iris-setosa
 13 | 4.8,3.0,1.4,0.1,Iris-setosa
 14 | 4.3,3.0,1.1,0.1,Iris-setosa
 15 | 5.8,4.0,1.2,0.2,Iris-setosa
 16 | 5.7,4.4,1.5,0.4,Iris-setosa
 17 | 5.4,3.9,1.3,0.4,Iris-setosa
 18 | 5.1,3.5,1.4,0.3,Iris-setosa
 19 | 5.7,3.8,1.7,0.3,Iris-setosa
 20 | 5.1,3.8,1.5,0.3,Iris-setosa
 21 | 5.4,3.4,1.7,0.2,Iris-setosa
 22 | 5.1,3.7,1.5,0.4,Iris-setosa
 23 | 4.6,3.6,1.0,0.2,Iris-setosa
 24 | 5.1,3.3,1.7,0.5,Iris-setosa
 25 | 4.8,3.4,1.9,0.2,Iris-setosa
 26 | 5.0,3.0,1.6,0.2,Iris-setosa
 27 | 5.0,3.4,1.6,0.4,Iris-setosa
 28 | 5.2,3.5,1.5,0.2,Iris-setosa
 29 | 5.2,3.4,1.4,0.2,Iris-setosa
 30 | 4.7,3.2,1.6,0.2,Iris-setosa
 31 | 4.8,3.1,1.6,0.2,Iris-setosa
 32 | 5.4,3.4,1.5,0.4,Iris-setosa
 33 | 5.2,4.1,1.5,0.1,Iris-setosa
 34 | 5.5,4.2,1.4,0.2,Iris-setosa
 35 | 4.9,3.1,1.5,0.1,Iris-setosa
 36 | 5.0,3.2,1.2,0.2,Iris-setosa
 37 | 5.5,3.5,1.3,0.2,Iris-setosa
 38 | 4.9,3.1,1.5,0.1,Iris-setosa
 39 | 4.4,3.0,1.3,0.2,Iris-setosa
 40 | 5.1,3.4,1.5,0.2,Iris-setosa
 41 | 5.0,3.5,1.3,0.3,Iris-setosa
 42 | 4.5,2.3,1.3,0.3,Iris-setosa
 43 | 4.4,3.2,1.3,0.2,Iris-setosa
 44 | 5.0,3.5,1.6,0.6,Iris-setosa
 45 | 5.1,3.8,1.9,0.4,Iris-setosa
 46 | 4.8,3.0,1.4,0.3,Iris-setosa
 47 | 5.1,3.8,1.6,0.2,Iris-setosa
 48 | 4.6,3.2,1.4,0.2,Iris-setosa
 49 | 5.3,3.7,1.5,0.2,Iris-setosa
 50 | 5.0,3.3,1.4,0.2,Iris-setosa
 51 | 7.0,3.2,4.7,1.4,Iris-versicolor
 52 | 6.4,3.2,4.5,1.5,Iris-versicolor
 53 | 6.9,3.1,4.9,1.5,Iris-versicolor
 54 | 5.5,2.3,4.0,1.3,Iris-versicolor
 55 | 6.5,2.8,4.6,1.5,Iris-versicolor
 56 | 5.7,2.8,4.5,1.3,Iris-versicolor
 57 | 6.3,3.3,4.7,1.6,Iris-versicolor
 58 | 4.9,2.4,3.3,1.0,Iris-versicolor
 59 | 6.6,2.9,4.6,1.3,Iris-versicolor
 60 | 5.2,2.7,3.9,1.4,Iris-versicolor
 61 | 5.0,2.0,3.5,1.0,Iris-versicolor
 62 | 5.9,3.0,4.2,1.5,Iris-versicolor
 63 | 6.0,2.2,4.0,1.0,Iris-versicolor
 64 | 6.1,2.9,4.7,1.4,Iris-versicolor
 65 | 5.6,2.9,3.6,1.3,Iris-versicolor
 66 | 6.7,3.1,4.4,1.4,Iris-versicolor
 67 | 5.6,3.0,4.5,1.5,Iris-versicolor
 68 | 5.8,2.7,4.1,1.0,Iris-versicolor
 69 | 6.2,2.2,4.5,1.5,Iris-versicolor
 70 | 5.6,2.5,3.9,1.1,Iris-versicolor
 71 | 5.9,3.2,4.8,1.8,Iris-versicolor
 72 | 6.1,2.8,4.0,1.3,Iris-versicolor
 73 | 6.3,2.5,4.9,1.5,Iris-versicolor
 74 | 6.1,2.8,4.7,1.2,Iris-versicolor
 75 | 6.4,2.9,4.3,1.3,Iris-versicolor
 76 | 6.6,3.0,4.4,1.4,Iris-versicolor
 77 | 6.8,2.8,4.8,1.4,Iris-versicolor
 78 | 6.7,3.0,5.0,1.7,Iris-versicolor
 79 | 6.0,2.9,4.5,1.5,Iris-versicolor
 80 | 5.7,2.6,3.5,1.0,Iris-versicolor
 81 | 5.5,2.4,3.8,1.1,Iris-versicolor
 82 | 5.5,2.4,3.7,1.0,Iris-versicolor
 83 | 5.8,2.7,3.9,1.2,Iris-versicolor
 84 | 6.0,2.7,5.1,1.6,Iris-versicolor
 85 | 5.4,3.0,4.5,1.5,Iris-versicolor
 86 | 6.0,3.4,4.5,1.6,Iris-versicolor
 87 | 6.7,3.1,4.7,1.5,Iris-versicolor
 88 | 6.3,2.3,4.4,1.3,Iris-versicolor
 89 | 5.6,3.0,4.1,1.3,Iris-versicolor
 90 | 5.5,2.5,4.0,1.3,Iris-versicolor
 91 | 5.5,2.6,4.4,1.2,Iris-versicolor
 92 | 6.1,3.0,4.6,1.4,Iris-versicolor
 93 | 5.8,2.6,4.0,1.2,Iris-versicolor
 94 | 5.0,2.3,3.3,1.0,Iris-versicolor
 95 | 5.6,2.7,4.2,1.3,Iris-versicolor
 96 | 5.7,3.0,4.2,1.2,Iris-versicolor
 97 | 5.7,2.9,4.2,1.3,Iris-versicolor
 98 | 6.2,2.9,4.3,1.3,Iris-versicolor
 99 | 5.1,2.5,3.0,1.1,Iris-versicolor
100 | 5.7,2.8,4.1,1.3,Iris-versicolor
101 | 6.3,3.3,6.0,2.5,Iris-virginica
102 | 5.8,2.7,5.1,1.9,Iris-virginica
103 | 7.1,3.0,5.9,2.1,Iris-virginica
104 | 6.3,2.9,5.6,1.8,Iris-virginica
105 | 6.5,3.0,5.8,2.2,Iris-virginica
106 | 7.6,3.0,6.6,2.1,Iris-virginica
107 | 4.9,2.5,4.5,1.7,Iris-virginica
108 | 7.3,2.9,6.3,1.8,Iris-virginica
109 | 6.7,2.5,5.8,1.8,Iris-virginica
110 | 7.2,3.6,6.1,2.5,Iris-virginica
111 | 6.5,3.2,5.1,2.0,Iris-virginica
112 | 6.4,2.7,5.3,1.9,Iris-virginica
113 | 6.8,3.0,5.5,2.1,Iris-virginica
114 | 5.7,2.5,5.0,2.0,Iris-virginica
115 | 5.8,2.8,5.1,2.4,Iris-virginica
116 | 6.4,3.2,5.3,2.3,Iris-virginica
117 | 6.5,3.0,5.5,1.8,Iris-virginica
118 | 7.7,3.8,6.7,2.2,Iris-virginica
119 | 7.7,2.6,6.9,2.3,Iris-virginica
120 | 6.0,2.2,5.0,1.5,Iris-virginica
121 | 6.9,3.2,5.7,2.3,Iris-virginica
122 | 5.6,2.8,4.9,2.0,Iris-virginica
123 | 7.7,2.8,6.7,2.0,Iris-virginica
124 | 6.3,2.7,4.9,1.8,Iris-virginica
125 | 6.7,3.3,5.7,2.1,Iris-virginica
126 | 7.2,3.2,6.0,1.8,Iris-virginica
127 | 6.2,2.8,4.8,1.8,Iris-virginica
128 | 6.1,3.0,4.9,1.8,Iris-virginica
129 | 6.4,2.8,5.6,2.1,Iris-virginica
130 | 7.2,3.0,5.8,1.6,Iris-virginica
131 | 7.4,2.8,6.1,1.9,Iris-virginica
132 | 7.9,3.8,6.4,2.0,Iris-virginica
133 | 6.4,2.8,5.6,2.2,Iris-virginica
134 | 6.3,2.8,5.1,1.5,Iris-virginica
135 | 6.1,2.6,5.6,1.4,Iris-virginica
136 | 7.7,3.0,6.1,2.3,Iris-virginica
137 | 6.3,3.4,5.6,2.4,Iris-virginica
138 | 6.4,3.1,5.5,1.8,Iris-virginica
139 | 6.0,3.0,4.8,1.8,Iris-virginica
140 | 6.9,3.1,5.4,2.1,Iris-virginica
141 | 6.7,3.1,5.6,2.4,Iris-virginica
142 | 6.9,3.1,5.1,2.3,Iris-virginica
143 | 5.8,2.7,5.1,1.9,Iris-virginica
144 | 6.8,3.2,5.9,2.3,Iris-virginica
145 | 6.7,3.3,5.7,2.5,Iris-virginica
146 | 6.7,3.0,5.2,2.3,Iris-virginica
147 | 6.3,2.5,5.0,1.9,Iris-virginica
148 | 6.5,3.0,5.2,2.0,Iris-virginica
149 | 6.2,3.4,5.4,2.3,Iris-virginica
150 | 5.9,3.0,5.1,1.8,Iris-virginica
151 | 
152 | 


--------------------------------------------------------------------------------
/pytorch_ipynb/data/perceptron_toydata.txt:
--------------------------------------------------------------------------------
  1 | 0.77	-1.14	0
  2 | -0.33	1.44	0
  3 | 0.91	-3.07	0
  4 | -0.37	-1.91	0
  5 | -1.84	-1.13	0
  6 | -1.50	0.34	0
  7 | -0.63	-1.53	0
  8 | -1.08	-1.23	0
  9 | 0.39	-1.99	0
 10 | -1.26	-2.90	0
 11 | -5.27	-0.78	0
 12 | -0.49	-2.74	0
 13 | 1.48	-3.74	0
 14 | -1.64	-1.96	0
 15 | 0.45	0.36	0
 16 | -1.48	-1.17	0
 17 | -2.94	-4.47	0
 18 | -2.19	-1.48	0
 19 | 0.02	-0.02	0
 20 | -2.24	-2.12	0
 21 | -3.17	-3.69	0
 22 | -4.09	1.03	0
 23 | -2.41	-2.31	0
 24 | -3.45	-0.61	0
 25 | -3.96	-2.00	0
 26 | -2.95	-1.16	0
 27 | -2.42	-3.35	0
 28 | -1.74	-1.10	0
 29 | -1.61	-1.28	0
 30 | -2.59	-2.21	0
 31 | -2.64	-2.20	0
 32 | -2.84	-4.12	0
 33 | -1.45	-2.26	0
 34 | -3.98	-1.05	0
 35 | -2.97	-1.63	0
 36 | -0.68	-1.52	0
 37 | -0.10	-3.43	0
 38 | -1.14	-2.66	0
 39 | -2.92	-2.51	0
 40 | -2.14	-1.62	0
 41 | -3.33	-0.44	0
 42 | -1.05	-3.85	0
 43 | 0.38	0.95	0
 44 | -0.05	-1.95	0
 45 | -3.20	-0.22	0
 46 | -2.26	0.01	0
 47 | -1.41	-0.33	0
 48 | -1.20	-0.71	0
 49 | -1.69	0.80	0
 50 | -1.52	-1.14	0
 51 | 3.88	0.65	1
 52 | 0.73	2.97	1
 53 | 0.83	3.94	1
 54 | 1.59	1.25	1
 55 | 3.92	3.48	1
 56 | 3.87	2.91	1
 57 | 1.14	3.91	1
 58 | 1.73	2.80	1
 59 | 2.95	1.84	1
 60 | 2.61	2.92	1
 61 | 2.38	0.90	1
 62 | 2.30	3.33	1
 63 | 1.31	1.85	1
 64 | 1.56	3.85	1
 65 | 2.67	2.41	1
 66 | 1.23	2.54	1
 67 | 1.33	2.03	1
 68 | 1.36	2.68	1
 69 | 2.58	1.79	1
 70 | 2.40	0.91	1
 71 | 0.51	2.44	1
 72 | 2.17	2.64	1
 73 | 4.38	2.94	1
 74 | 1.09	3.12	1
 75 | 0.68	1.54	1
 76 | 1.93	3.71	1
 77 | 1.26	1.17	1
 78 | 1.90	1.34	1
 79 | 3.13	0.92	1
 80 | 0.85	1.56	1
 81 | 1.50	3.93	1
 82 | 2.95	2.09	1
 83 | 0.77	2.84	1
 84 | 1.00	0.46	1
 85 | 3.19	2.32	1
 86 | 2.92	2.32	1
 87 | 2.86	1.35	1
 88 | 0.97	2.68	1
 89 | 1.20	1.31	1
 90 | 1.54	2.02	1
 91 | 1.65	0.63	1
 92 | 1.36	-0.22	1
 93 | 2.63	0.40	1
 94 | 0.90	2.05	1
 95 | 1.26	3.54	1
 96 | 0.71	2.27	1
 97 | 1.96	0.83	1
 98 | 2.52	1.83	1
 99 | 2.77	2.82	1
100 | 4.16	3.34	1
101 | 


--------------------------------------------------------------------------------
/pytorch_ipynb/gan/images/screenshot-downl-celeba-aligned.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/gan/images/screenshot-downl-celeba-aligned.png


--------------------------------------------------------------------------------
/pytorch_ipynb/gan/images/screenshot-radford-dcgan-generator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/gan/images/screenshot-radford-dcgan-generator.png


--------------------------------------------------------------------------------
/pytorch_ipynb/helper.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import imageio
 3 | import numpy as np
 4 | 
 5 | 
 6 | def quickdraw_npy_to_imagefile(inpath, outpath, filetype='png', subset=None):
 7 |     """
 8 |     Creates a folder with subfolders for each image class
 9 |     from the Quickdraw dataset (https://quickdraw.withgoogle.com)
10 |     downloaded in .npy format.
11 | 
12 |     To download the .npy formatted dataset:
13 |       gsutil -m cp gs://quickdraw_dataset/full/numpy_bitmap/*.npy quickdraw-png
14 | 
15 |     Usage example:
16 |       quickdraw_npy_to_imagefile('quickdraw-npy', 'quickdraw-png')
17 | 
18 |     Parameters
19 |     ----------
20 | 
21 |     inpath : str
22 |         string specifying the path to the input directory containing
23 |         the .npy files
24 | 
25 |     outpath : str
26 |         string specifying the path for the output images
27 | 
28 |     subset : tuple or list (default=None)
29 |         A subset of categories to consider. E.g.
30 |         `("lollipop", "binoculars", "mouse", "basket")`
31 | 
32 |     """
33 |     if not os.path.exists(outpath):
34 |         os.mkdir(outpath)
35 |     npy_list = [i for i in os.listdir(inpath) if i.endswith('.npy')]
36 | 
37 |     if subset:
38 |         npy_list = [i for i in npy_list if i.split('.npy')[0] in subset]
39 | 
40 |     if not len(npy_list):
41 |         raise ValueError('No .npy files found in %s' % inpath)
42 | 
43 |     npy_paths = [os.path.join(inpath, i) for i in npy_list]
44 | 
45 |     for i, j in zip(npy_list, npy_paths):
46 | 
47 |         label = (i.split('-')[-1]).split('.npy')[0]
48 |         folder = os.path.join(outpath, label)
49 |         if not os.path.exists(folder):
50 |             os.mkdir(folder)
51 |         X = np.load(j)
52 | 
53 |         cnt = 0
54 |         for row in X:
55 |             img_array = row.reshape(28, 28)
56 |             assert cnt < 1000000
57 |             outfile = os.path.join(folder, '%s_%06d.%s' % (
58 |                 label, cnt, filetype))
59 |             imageio.imwrite(outfile,
60 |                             img_array[:, :])
61 |             cnt += 1
62 | 


--------------------------------------------------------------------------------
/pytorch_ipynb/images/cyclical-learning-rate/cyclical-lr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/cyclical-learning-rate/cyclical-lr.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/dataparallel/dataparallel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/dataparallel/dataparallel.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/dataparallel/minibatch-update-dataparallel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/dataparallel/minibatch-update-dataparallel.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/dataparallel/minibatch-update.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/dataparallel/minibatch-update.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/densenet/densenet-fig-2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/densenet/densenet-fig-2.jpg


--------------------------------------------------------------------------------
/pytorch_ipynb/images/densenet/densenet-tab-1-dnet121.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/densenet/densenet-tab-1-dnet121.jpg


--------------------------------------------------------------------------------
/pytorch_ipynb/images/fc-to-conv/fc-to-conv-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/fc-to-conv/fc-to-conv-1.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/fc-to-conv/fc-to-conv-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/fc-to-conv/fc-to-conv-2.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/lenet/lenet-5_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/lenet/lenet-5_1.jpg


--------------------------------------------------------------------------------
/pytorch_ipynb/images/manual-gradients/graph_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/manual-gradients/graph_1.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/manual-gradients/graph_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/manual-gradients/graph_2.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/manual-gradients/graph_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/manual-gradients/graph_3.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/resnets/resnet-ex-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/resnets/resnet-ex-1-1.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/resnets/resnet-ex-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/resnets/resnet-ex-1-2.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/resnets/resnet-ex-1-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/resnets/resnet-ex-1-3.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/resnets/resnet101/resnet101-arch-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/resnets/resnet101/resnet101-arch-1.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/resnets/resnet152/resnet152-arch-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/resnets/resnet152/resnet152-arch-1.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/resnets/resnet34/resnet34-arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/resnets/resnet34/resnet34-arch.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/resnets/resnet50/resnet-50-bottleneck.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/resnets/resnet50/resnet-50-bottleneck.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/resnets/resnet50/resnet50-arch-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/resnets/resnet50/resnet50-arch-1.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/vgg16/vgg16-arch-table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/vgg16/vgg16-arch-table.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/vgg19/vgg19-arch-table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/vgg19/vgg19-arch-table.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/weight-sharing/weight-sharing-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/weight-sharing/weight-sharing-1.png


--------------------------------------------------------------------------------
/pytorch_ipynb/images/weight-sharing/weight-sharing-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/images/weight-sharing/weight-sharing-2.png


--------------------------------------------------------------------------------
/pytorch_ipynb/mechanics/custom-data-loader-csv.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n",
  8 |     "- Author: Sebastian Raschka\n",
  9 |     "- GitHub Repository: https://github.com/rasbt/deeplearning-models"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Sebastian Raschka \n",
 22 |       "\n",
 23 |       "CPython 3.7.1\n",
 24 |       "IPython 7.2.0\n",
 25 |       "\n",
 26 |       "torch 1.0.0\n"
 27 |      ]
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "%load_ext watermark\n",
 32 |     "%watermark -a 'Sebastian Raschka' -v -p torch"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "# Model Zoo -- Using PyTorch Dataset Loading Utilities for Custom Datasets (CSV files converted to HDF5)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "This notebook provides an example for how to load a dataset from an HDF5 file created from a CSV file, using PyTorch's data loading utilities. For a more in-depth discussion, please see the official\n",
 47 |     "\n",
 48 |     "- [Data Loading and Processing Tutorial](http://pytorch.org/tutorials/beginner/data_loading_tutorial.html)\n",
 49 |     "- [torch.utils.data](http://pytorch.org/docs/master/data.html) API documentation\n",
 50 |     "\n",
 51 |     "An Hierarchical Data Format (HDF) is a convenient way that allows quick access to data instances during minibatch learning if a dataset is too large to fit into memory. The approach outlined in this notebook uses uses the common [HDF5](https://support.hdfgroup.org/HDF5/) format and should be accessible to any programming language or tool with an HDF5 API.\n",
 52 |     "\n",
 53 |     "**In this example, we are going to use the Iris dataset for illustrative purposes. Let's pretend it's our large training dataset that doesn't fit into memory**.\n",
 54 |     "\n"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "## Imports"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 2,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "import pandas as pd\n",
 71 |     "import numpy as np\n",
 72 |     "import h5py\n",
 73 |     "import torch\n",
 74 |     "from torch.utils.data import Dataset\n",
 75 |     "from torch.utils.data import DataLoader"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "metadata": {},
 81 |    "source": [
 82 |     "## Converting a CSV file to HDF5"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "In this first step, we are going to process a CSV file (here, Iris) into an HDF5 database:"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 3,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "# suppose this is a large CSV that does not \n",
 99 |     "# fit into memory:\n",
100 |     "csv_path = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'\n",
101 |     "\n",
102 |     "# Get number of lines in the CSV file if it's on your hard drive:\n",
103 |     "#num_lines = subprocess.check_output(['wc', '-l', in_csv])\n",
104 |     "#num_lines = int(nlines.split()[0]) \n",
105 |     "num_lines = 150\n",
106 |     "num_features = 4\n",
107 |     "\n",
108 |     "class_dict = {'Iris-setosa': 0,\n",
109 |     "              'Iris-versicolor': 1,\n",
110 |     "              'Iris-virginica': 2}\n",
111 |     "\n",
112 |     "# use 10,000 or 100,000 or so for large files\n",
113 |     "chunksize = 10\n",
114 |     "\n",
115 |     "# this is your HDF5 database:\n",
116 |     "with h5py.File('iris.h5', 'w') as h5f:\n",
117 |     "    \n",
118 |     "    # use num_features-1 if the csv file has a column header\n",
119 |     "    dset1 = h5f.create_dataset('features',\n",
120 |     "                               shape=(num_lines, num_features),\n",
121 |     "                               compression=None,\n",
122 |     "                               dtype='float32')\n",
123 |     "    dset2 = h5f.create_dataset('labels',\n",
124 |     "                               shape=(num_lines,),\n",
125 |     "                               compression=None,\n",
126 |     "                               dtype='int32')\n",
127 |     "\n",
128 |     "    # change range argument from 0 -> 1 if your csv file contains a column header\n",
129 |     "    for i in range(0, num_lines, chunksize):  \n",
130 |     "\n",
131 |     "        df = pd.read_csv(csv_path,  \n",
132 |     "                header=None,  # no header, define column header manually later\n",
133 |     "                nrows=chunksize, # number of rows to read at each iteration\n",
134 |     "                skiprows=i)   # skip rows that were already read\n",
135 |     "        \n",
136 |     "        df[4] = df[4].map(class_dict)\n",
137 |     "\n",
138 |     "        features = df.values[:, :4]\n",
139 |     "        labels = df.values[:, -1]\n",
140 |     "        \n",
141 |     "        # use i-1 and i-1+10 if csv file has a column header\n",
142 |     "        dset1[i:i+10, :] = features\n",
143 |     "        dset2[i:i+10] = labels[0]"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "markdown",
148 |    "metadata": {},
149 |    "source": [
150 |     "After creating the database, let's double-check that everything works correctly:"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 4,
156 |    "metadata": {},
157 |    "outputs": [
158 |     {
159 |      "name": "stdout",
160 |      "output_type": "stream",
161 |      "text": [
162 |       "(150, 4)\n",
163 |       "(150,)\n"
164 |      ]
165 |     }
166 |    ],
167 |    "source": [
168 |     "with h5py.File('iris.h5', 'r') as h5f:\n",
169 |     "    print(h5f['features'].shape)\n",
170 |     "    print(h5f['labels'].shape)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 5,
176 |    "metadata": {},
177 |    "outputs": [
178 |     {
179 |      "name": "stdout",
180 |      "output_type": "stream",
181 |      "text": [
182 |       "Features of entry no. 99: [5.7 2.8 4.1 1.3]\n",
183 |       "Class label of entry no. 99: 1\n"
184 |      ]
185 |     }
186 |    ],
187 |    "source": [
188 |     "with h5py.File('iris.h5', 'r') as h5f:\n",
189 |     "    print('Features of entry no. 99:', h5f['features'][99])\n",
190 |     "    print('Class label of entry no. 99:', h5f['labels'][99])"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "markdown",
195 |    "metadata": {},
196 |    "source": [
197 |     "## Implementing a Custom Dataset Class"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "markdown",
202 |    "metadata": {},
203 |    "source": [
204 |     "Now, we implement a custom `Dataset` for reading the training examples. The `__getitem__` method will\n",
205 |     "\n",
206 |     "1. read a single training example from HDF5 based on an `index` (more on batching later)\n",
207 |     "2. return a single training example and it's corresponding label\n",
208 |     "\n",
209 |     "Note that we will keep an open connection to the database for efficiency via `self.h5f = h5py.File(h5_path, 'r')` -- you may want to close it when you are done (more on this later)."
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 6,
215 |    "metadata": {},
216 |    "outputs": [],
217 |    "source": [
218 |     "class Hdf5Dataset(Dataset):\n",
219 |     "    \"\"\"Custom Dataset for loading entries from HDF5 databases\"\"\"\n",
220 |     "\n",
221 |     "    def __init__(self, h5_path, transform=None):\n",
222 |     "    \n",
223 |     "        self.h5f = h5py.File(h5_path, 'r')\n",
224 |     "        self.num_entries = self.h5f['labels'].shape[0]\n",
225 |     "        self.transform = transform\n",
226 |     "\n",
227 |     "    def __getitem__(self, index):\n",
228 |     "        \n",
229 |     "        features = self.h5f['features'][index]\n",
230 |     "        label = self.h5f['labels'][index]\n",
231 |     "        if self.transform is not None:\n",
232 |     "            features = self.transform(features)\n",
233 |     "        return features, label\n",
234 |     "\n",
235 |     "    def __len__(self):\n",
236 |     "        return self.num_entries"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "markdown",
241 |    "metadata": {},
242 |    "source": [
243 |     "Now that we have created our custom Dataset class, we can initialize a Dataset instance for the training examples using the 'iris.h5' database file. Then, we initialize a `DataLoader` that allows us to read from the dataset."
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": 7,
249 |    "metadata": {},
250 |    "outputs": [],
251 |    "source": [
252 |     "train_dataset = Hdf5Dataset(h5_path='iris.h5',\n",
253 |     "                            transform=None)\n",
254 |     "\n",
255 |     "train_loader = DataLoader(dataset=train_dataset,\n",
256 |     "                          batch_size=50,\n",
257 |     "                          shuffle=True,\n",
258 |     "                          num_workers=4) "
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {},
264 |    "source": [
265 |     "That's it! Now we can iterate over an epoch using the train_loader as an iterator and use the features and labels from the training dataset for model training as shown in the next section"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "markdown",
270 |    "metadata": {},
271 |    "source": [
272 |     "## Iterating Through the Custom Dataset"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": 8,
278 |    "metadata": {},
279 |    "outputs": [
280 |     {
281 |      "name": "stdout",
282 |      "output_type": "stream",
283 |      "text": [
284 |       "Epoch: 1 | Batch index: 0 | Batch size: 50\n",
285 |       "Epoch: 1 | Batch index: 1 | Batch size: 50\n",
286 |       "Epoch: 1 | Batch index: 2 | Batch size: 50\n",
287 |       "Epoch: 2 | Batch index: 0 | Batch size: 50\n",
288 |       "Epoch: 2 | Batch index: 1 | Batch size: 50\n",
289 |       "Epoch: 2 | Batch index: 2 | Batch size: 50\n",
290 |       "Epoch: 3 | Batch index: 0 | Batch size: 50\n",
291 |       "Epoch: 3 | Batch index: 1 | Batch size: 50\n",
292 |       "Epoch: 3 | Batch index: 2 | Batch size: 50\n",
293 |       "Epoch: 4 | Batch index: 0 | Batch size: 50\n",
294 |       "Epoch: 4 | Batch index: 1 | Batch size: 50\n",
295 |       "Epoch: 4 | Batch index: 2 | Batch size: 50\n",
296 |       "Epoch: 5 | Batch index: 0 | Batch size: 50\n",
297 |       "Epoch: 5 | Batch index: 1 | Batch size: 50\n",
298 |       "Epoch: 5 | Batch index: 2 | Batch size: 50\n"
299 |      ]
300 |     }
301 |    ],
302 |    "source": [
303 |     "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
304 |     "torch.manual_seed(0)\n",
305 |     "\n",
306 |     "num_epochs = 5\n",
307 |     "for epoch in range(num_epochs):\n",
308 |     "\n",
309 |     "    for batch_idx, (x, y) in enumerate(train_loader):\n",
310 |     "        \n",
311 |     "        print('Epoch:', epoch+1, end='')\n",
312 |     "        print(' | Batch index:', batch_idx, end='')\n",
313 |     "        print(' | Batch size:', y.size()[0])\n",
314 |     "        \n",
315 |     "        x = x.to(device)\n",
316 |     "        y = y.to(device)\n",
317 |     "\n",
318 |     "        # do model training on x and y here"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "markdown",
323 |    "metadata": {},
324 |    "source": [
325 |     "**Remember that we kept an open connection to the HDF5 database in the `Hdf5Dataset` (via `self.h5f = h5py.File(h5_path, 'r')`). Once we are done, we may want to close this connection:**"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": 9,
331 |    "metadata": {},
332 |    "outputs": [],
333 |    "source": [
334 |     "train_dataset.h5f.close()"
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": 10,
340 |    "metadata": {},
341 |    "outputs": [
342 |     {
343 |      "name": "stdout",
344 |      "output_type": "stream",
345 |      "text": [
346 |       "torch  1.0.0\n",
347 |       "pandas 0.23.4\n",
348 |       "numpy  1.15.4\n",
349 |       "h5py   2.8.0\n",
350 |       "\n"
351 |      ]
352 |     }
353 |    ],
354 |    "source": [
355 |     "%watermark -iv"
356 |    ]
357 |   }
358 |  ],
359 |  "metadata": {
360 |   "kernelspec": {
361 |    "display_name": "Python 3",
362 |    "language": "python",
363 |    "name": "python3"
364 |   },
365 |   "language_info": {
366 |    "codemirror_mode": {
367 |     "name": "ipython",
368 |     "version": 3
369 |    },
370 |    "file_extension": ".py",
371 |    "mimetype": "text/x-python",
372 |    "name": "python",
373 |    "nbconvert_exporter": "python",
374 |    "pygments_lexer": "ipython3",
375 |    "version": "3.7.1"
376 |   },
377 |   "toc": {
378 |    "nav_menu": {},
379 |    "number_sections": true,
380 |    "sideBar": true,
381 |    "skip_h1_title": false,
382 |    "title_cell": "Table of Contents",
383 |    "title_sidebar": "Contents",
384 |    "toc_cell": false,
385 |    "toc_position": {},
386 |    "toc_section_display": true,
387 |    "toc_window_display": false
388 |   }
389 |  },
390 |  "nbformat": 4,
391 |  "nbformat_minor": 2
392 | }
393 | 


--------------------------------------------------------------------------------
/pytorch_ipynb/mechanics/manual-gradients.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n",
  8 |     "- Author: Sebastian Raschka\n",
  9 |     "- GitHub Repository: https://github.com/rasbt/deeplearning-models"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Sebastian Raschka \n",
 22 |       "\n",
 23 |       "CPython 3.6.8\n",
 24 |       "IPython 7.2.0\n",
 25 |       "\n",
 26 |       "torch 1.0.0\n"
 27 |      ]
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "%load_ext watermark\n",
 32 |     "%watermark -a 'Sebastian Raschka' -v -p torch"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "# Model Zoo -- Getting Gradients of an Intermediate Variable in PyTorch"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "This notebook illustrates how we can fetch the intermediate gradients of a function that is composed of multiple inputs and multiple computation steps in PyTorch. Note that gradient is simply a vector listing the derivatives of a function with respect\n",
 47 |     "to each argument of the function. So, strictly speaking, we are discussing how to obtain the partial derivatives here."
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "Assume we have this simple toy graph:\n",
 55 |     "    \n",
 56 |     "![](../images/manual-gradients/graph_1.png)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "Now, we provide the following values to b, x, and w; the red numbers indicate the intermediate values of the computation and the end result:\n",
 64 |     "\n",
 65 |     "![](../images/manual-gradients/graph_2.png)"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "Now, the next image shows the partial derivatives of the output node, a, with respect to the input nodes (b, x, and w) as well as all the intermediate partial derivatives:\n",
 73 |     "\n",
 74 |     "\n",
 75 |     "![](../images/manual-gradients/graph_3.png)"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "metadata": {},
 81 |    "source": [
 82 |     "(The images were taken from my PyData Talk in August 2017, for more information of how to arrive at these derivatives, please see the talk/slides at https://github.com/rasbt/pydata-annarbor2017-dl-tutorial; also, I put up a little calculus and differentiation primer if helpful: https://sebastianraschka.com/pdf/books/dlb/appendix_d_calculus.pdf)\n",
 83 |     "\n"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "For instance, if we are interested in obtaining the partial derivative of the output a with respect to each of the input and intermediate nodes, we could do the following in TensorFlow, where `d_a_b` denotes \"partial derivative of a with respect to b\" and so forth:"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 2,
 96 |    "metadata": {},
 97 |    "outputs": [
 98 |     {
 99 |      "name": "stdout",
100 |      "output_type": "stream",
101 |      "text": [
102 |       "[[2.0], [3.0], [1.0], [1.0], [1.0]]\n"
103 |      ]
104 |     }
105 |    ],
106 |    "source": [
107 |     "import tensorflow as tf\n",
108 |     "\n",
109 |     "g = tf.Graph()\n",
110 |     "with g.as_default() as g:\n",
111 |     "    \n",
112 |     "    x = tf.placeholder(dtype=tf.float32, shape=None, name='x')\n",
113 |     "    w = tf.Variable(initial_value=2, dtype=tf.float32, name='w')\n",
114 |     "    b = tf.Variable(initial_value=1, dtype=tf.float32, name='b')\n",
115 |     "    \n",
116 |     "    u = x * w\n",
117 |     "    v = u + b\n",
118 |     "    a = tf.nn.relu(v)\n",
119 |     "    \n",
120 |     "    d_a_x = tf.gradients(a, x)\n",
121 |     "    d_a_w = tf.gradients(a, w)\n",
122 |     "    d_a_b = tf.gradients(a, b)\n",
123 |     "    d_a_u = tf.gradients(a, u)\n",
124 |     "    d_a_v = tf.gradients(a, v)\n",
125 |     "\n",
126 |     "\n",
127 |     "with tf.Session(graph=g) as sess:\n",
128 |     "    sess.run(tf.global_variables_initializer())\n",
129 |     "    grads = sess.run([d_a_x, d_a_w, d_a_b, d_a_u, d_a_v], feed_dict={'x:0': 3})\n",
130 |     "\n",
131 |     "print(grads)\n"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "metadata": {},
137 |    "source": [
138 |     "## Intermediate Gradients in PyTorch via autograd's `grad`"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "metadata": {},
144 |    "source": [
145 |     "In PyTorch, there are multiple ways to compute partial derivatives or gradients. If the goal is to just compute partial derivatives, the most straight-forward way would be using autograd's `grad` function. By default, the `retain_graph` parameter of the `grad` function is set to `False`, which will free the graph after computing the partial derivative. Thus, if we want to obtain multiple partial derivatives, we need to set `retain_graph=True`. Note that this is a very inefficient solution though, as multiple passes over the graph are being made where intermediate results are being recalculated:"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": 3,
151 |    "metadata": {},
152 |    "outputs": [
153 |     {
154 |      "name": "stdout",
155 |      "output_type": "stream",
156 |      "text": [
157 |       "d_a_x: (tensor([2.]),)\n",
158 |       "d_a_w: (tensor([3.]),)\n",
159 |       "d_a_b: (tensor([1.]),)\n",
160 |       "d_a_u: (tensor([1.]),)\n",
161 |       "d_a_v: (tensor([1.]),)\n"
162 |      ]
163 |     }
164 |    ],
165 |    "source": [
166 |     "import torch\n",
167 |     "import torch.nn.functional as F\n",
168 |     "from torch.autograd import grad\n",
169 |     "\n",
170 |     "\n",
171 |     "x = torch.tensor([3.], requires_grad=True)\n",
172 |     "w = torch.tensor([2.], requires_grad=True)\n",
173 |     "b = torch.tensor([1.], requires_grad=True)\n",
174 |     "\n",
175 |     "u = x * w\n",
176 |     "v = u + b\n",
177 |     "a = F.relu(v)\n",
178 |     "\n",
179 |     "d_a_b = grad(a, b, retain_graph=True)\n",
180 |     "d_a_u = grad(a, u, retain_graph=True)\n",
181 |     "d_a_v = grad(a, v, retain_graph=True)\n",
182 |     "d_a_w = grad(a, w, retain_graph=True)\n",
183 |     "d_a_x = grad(a, x)\n",
184 |     "    \n",
185 |     "\n",
186 |     "for name, grad in zip(\"xwbuv\", (d_a_x, d_a_w, d_a_b, d_a_u, d_a_v)):\n",
187 |     "    print('d_a_%s:' % name, grad)"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "metadata": {},
193 |    "source": [
194 |     "As suggested by Adam Paszke, this can be made rewritten in a more efficient manner by passing a tuple to the `grad` function so that it can reuse intermediate results and only require one pass over the graph:"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 4,
200 |    "metadata": {},
201 |    "outputs": [
202 |     {
203 |      "name": "stdout",
204 |      "output_type": "stream",
205 |      "text": [
206 |       "d_a_x: tensor([2.])\n",
207 |       "d_a_w: tensor([3.])\n",
208 |       "d_a_b: tensor([1.])\n",
209 |       "d_a_u: tensor([1.])\n",
210 |       "d_a_v: tensor([1.])\n"
211 |      ]
212 |     }
213 |    ],
214 |    "source": [
215 |     "import torch\n",
216 |     "import torch.nn.functional as F\n",
217 |     "from torch.autograd import grad\n",
218 |     "\n",
219 |     "\n",
220 |     "x = torch.tensor([3.], requires_grad=True)\n",
221 |     "w = torch.tensor([2.], requires_grad=True)\n",
222 |     "b = torch.tensor([1.], requires_grad=True)\n",
223 |     "\n",
224 |     "u = x * w\n",
225 |     "v = u + b\n",
226 |     "a = F.relu(v)\n",
227 |     "\n",
228 |     "partial_derivatives = grad(a, (x, w, b, u, v))\n",
229 |     "\n",
230 |     "for name, grad in zip(\"xwbuv\", (partial_derivatives)):\n",
231 |     "    print('d_a_%s:' % name, grad)"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "markdown",
236 |    "metadata": {},
237 |    "source": [
238 |     "## Intermediate Gradients in PyTorch via `retain_grad`"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "markdown",
243 |    "metadata": {},
244 |    "source": [
245 |     "In PyTorch, we most often use the `backward()` method on an output variable to compute its partial derivative (or gradient) with respect to its inputs (typically, the weights and bias units of a neural network). By default, PyTorch only stores the gradients of the leaf variables (e.g., the weights and biases) via their `grad` attribute to save memory. So, if we are interested in the intermediate results in a computational graph, we can use the `retain_grad` method to store gradients of non-leaf variables as follows:"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 5,
251 |    "metadata": {},
252 |    "outputs": [
253 |     {
254 |      "name": "stdout",
255 |      "output_type": "stream",
256 |      "text": [
257 |       "d_a_x: tensor([2.])\n",
258 |       "d_a_w: tensor([3.])\n",
259 |       "d_a_b: tensor([1.])\n",
260 |       "d_a_u: tensor([1.])\n",
261 |       "d_a_v: tensor([1.])\n"
262 |      ]
263 |     }
264 |    ],
265 |    "source": [
266 |     "import torch\n",
267 |     "import torch.nn.functional as F\n",
268 |     "from torch.autograd import Variable\n",
269 |     "\n",
270 |     "\n",
271 |     "x = torch.tensor([3.], requires_grad=True)\n",
272 |     "w = torch.tensor([2.], requires_grad=True)\n",
273 |     "b = torch.tensor([1.], requires_grad=True)\n",
274 |     "\n",
275 |     "u = x * w\n",
276 |     "v = u + b\n",
277 |     "a = F.relu(v)\n",
278 |     "\n",
279 |     "u.retain_grad()\n",
280 |     "v.retain_grad()\n",
281 |     "\n",
282 |     "a.backward()\n",
283 |     "\n",
284 |     "for name, var in zip(\"xwbuv\", (x, w, b, u, v)):\n",
285 |     "    print('d_a_%s:' % name, var.grad)"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "markdown",
290 |    "metadata": {},
291 |    "source": [
292 |     "## Intermediate Gradients in PyTorch Using Hooks"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "markdown",
297 |    "metadata": {},
298 |    "source": [
299 |     "Finally, and this is a not-recommended workaround, we can use hooks to obtain intermediate gradients. While the two other approaches explained above should be preferred, this approach highlights the use of hooks, which may come in handy in certain situations.\n",
300 |     "\n",
301 |     "> The hook will be called every time a gradient with respect to the variable is computed.  (http://pytorch.org/docs/master/autograd.html#torch.autograd.Variable.register_hook)"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "markdown",
306 |    "metadata": {},
307 |    "source": [
308 |     "Based on the suggestion by Adam Paszke (https://discuss.pytorch.org/t/why-cant-i-see-grad-of-an-intermediate-variable/94/7?u=rasbt), we can use these hooks in a combintation with a little helper function, `save_grad` and a `hook` closure writing the partial derivatives or gradients to a global variable `grads`. So, if we invoke the `backward` method on the output node `a`, all the intermediate results will be collected in `grads`, as illustrated below:"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "code",
313 |    "execution_count": 6,
314 |    "metadata": {},
315 |    "outputs": [
316 |     {
317 |      "data": {
318 |       "text/plain": [
319 |        "{'d_a_v': tensor([1.]),\n",
320 |        " 'd_a_b': tensor([1.]),\n",
321 |        " 'd_a_u': tensor([1.]),\n",
322 |        " 'd_a_x': tensor([2.]),\n",
323 |        " 'd_a_w': tensor([3.])}"
324 |       ]
325 |      },
326 |      "execution_count": 6,
327 |      "metadata": {},
328 |      "output_type": "execute_result"
329 |     }
330 |    ],
331 |    "source": [
332 |     "import torch\n",
333 |     "import torch.nn.functional as F\n",
334 |     "\n",
335 |     "\n",
336 |     "grads = {}\n",
337 |     "def save_grad(name):\n",
338 |     "    def hook(grad):\n",
339 |     "        grads[name] = grad\n",
340 |     "    return hook\n",
341 |     "\n",
342 |     "\n",
343 |     "x = torch.tensor([3.], requires_grad=True)\n",
344 |     "w = torch.tensor([2.], requires_grad=True)\n",
345 |     "b = torch.tensor([1.], requires_grad=True)\n",
346 |     "\n",
347 |     "u = x * w\n",
348 |     "v = u + b\n",
349 |     "\n",
350 |     "x.register_hook(save_grad('d_a_x'))\n",
351 |     "w.register_hook(save_grad('d_a_w'))\n",
352 |     "b.register_hook(save_grad('d_a_b'))\n",
353 |     "u.register_hook(save_grad('d_a_u'))\n",
354 |     "v.register_hook(save_grad('d_a_v'))\n",
355 |     "\n",
356 |     "a = F.relu(v)\n",
357 |     "\n",
358 |     "a.backward()\n",
359 |     "\n",
360 |     "grads"
361 |    ]
362 |   },
363 |   {
364 |    "cell_type": "code",
365 |    "execution_count": 7,
366 |    "metadata": {},
367 |    "outputs": [
368 |     {
369 |      "name": "stdout",
370 |      "output_type": "stream",
371 |      "text": [
372 |       "tensorflow  1.12.0\n",
373 |       "torch       1.0.0\n",
374 |       "\n"
375 |      ]
376 |     }
377 |    ],
378 |    "source": [
379 |     "%watermark -iv"
380 |    ]
381 |   }
382 |  ],
383 |  "metadata": {
384 |   "kernelspec": {
385 |    "display_name": "Python 3",
386 |    "language": "python",
387 |    "name": "python3"
388 |   },
389 |   "language_info": {
390 |    "codemirror_mode": {
391 |     "name": "ipython",
392 |     "version": 3
393 |    },
394 |    "file_extension": ".py",
395 |    "mimetype": "text/x-python",
396 |    "name": "python",
397 |    "nbconvert_exporter": "python",
398 |    "pygments_lexer": "ipython3",
399 |    "version": "3.7.1"
400 |   },
401 |   "toc": {
402 |    "nav_menu": {},
403 |    "number_sections": true,
404 |    "sideBar": true,
405 |    "skip_h1_title": false,
406 |    "title_cell": "Table of Contents",
407 |    "title_sidebar": "Contents",
408 |    "toc_cell": false,
409 |    "toc_position": {},
410 |    "toc_section_display": true,
411 |    "toc_window_display": false
412 |   }
413 |  },
414 |  "nbformat": 4,
415 |  "nbformat_minor": 2
416 | }
417 | 


--------------------------------------------------------------------------------
/pytorch_ipynb/mechanics/validation-splits.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n",
  8 |     "- Author: Sebastian Raschka\n",
  9 |     "- GitHub Repository: https://github.com/rasbt/deeplearning-models"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "# Generating Validation Set Splits"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "Often, we obtain datasets for which only training and test splits are provided, and validation splits are missing. As we all know, the use of validation sets for repeated model tuning and evaluation is recommended to avoid overfitting on the test set. \n",
 24 |     "\n",
 25 |     "Since we sometimes want to rotate the validation set, or merge training and validation sets at a later stage to obtain more training data, it is not always convenient to define a separate validation set, and it can be more convenient to split the validation set portion off the training set if/when we need it."
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "Suppose we load the MNIST dataset as follows -- note that there is no validation set pre-specified for MNIST, and the same is true for CIFAR-10/100."
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "## A Typical Dataset (here: MNIST)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 1,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "import torch\n",
 49 |     "from torchvision import datasets\n",
 50 |     "from torchvision import transforms\n",
 51 |     "from torch.utils.data import DataLoader\n",
 52 |     "\n",
 53 |     "BATCH_SIZE = 64"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 2,
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "name": "stderr",
 63 |      "output_type": "stream",
 64 |      "text": [
 65 |       "0it [00:00, ?it/s]"
 66 |      ]
 67 |     },
 68 |     {
 69 |      "name": "stdout",
 70 |      "output_type": "stream",
 71 |      "text": [
 72 |       "Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz\n"
 73 |      ]
 74 |     },
 75 |     {
 76 |      "name": "stderr",
 77 |      "output_type": "stream",
 78 |      "text": [
 79 |       "9920512it [00:02, 4390618.70it/s]                             \n"
 80 |      ]
 81 |     },
 82 |     {
 83 |      "name": "stdout",
 84 |      "output_type": "stream",
 85 |      "text": [
 86 |       "Extracting data/MNIST/raw/train-images-idx3-ubyte.gz\n"
 87 |      ]
 88 |     },
 89 |     {
 90 |      "name": "stderr",
 91 |      "output_type": "stream",
 92 |      "text": [
 93 |       "32768it [00:00, 293812.98it/s]                           \n",
 94 |       "0it [00:00, ?it/s]"
 95 |      ]
 96 |     },
 97 |     {
 98 |      "name": "stdout",
 99 |      "output_type": "stream",
100 |      "text": [
101 |       "Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz\n",
102 |       "Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz\n",
103 |       "Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz\n"
104 |      ]
105 |     },
106 |     {
107 |      "name": "stderr",
108 |      "output_type": "stream",
109 |      "text": [
110 |       "1654784it [00:00, 2762205.03it/s]                            \n",
111 |       "8192it [00:00, 124866.40it/s]\n"
112 |      ]
113 |     },
114 |     {
115 |      "name": "stdout",
116 |      "output_type": "stream",
117 |      "text": [
118 |       "Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz\n",
119 |       "Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz\n",
120 |       "Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz\n",
121 |       "Processing...\n",
122 |       "Done!\n",
123 |       "Image batch dimensions: torch.Size([64, 1, 28, 28])\n",
124 |       "Image label dimensions: torch.Size([64])\n"
125 |      ]
126 |     }
127 |    ],
128 |    "source": [
129 |     "##########################\n",
130 |     "### MNIST DATASET\n",
131 |     "##########################\n",
132 |     "\n",
133 |     "# Note transforms.ToTensor() scales input images\n",
134 |     "# to 0-1 range\n",
135 |     "train_dataset = datasets.MNIST(root='data', \n",
136 |     "                               train=True, \n",
137 |     "                               transform=transforms.ToTensor(),\n",
138 |     "                               download=True)\n",
139 |     "\n",
140 |     "test_dataset = datasets.MNIST(root='data', \n",
141 |     "                              train=False, \n",
142 |     "                              transform=transforms.ToTensor())\n",
143 |     "\n",
144 |     "\n",
145 |     "train_loader = DataLoader(dataset=train_dataset, \n",
146 |     "                          batch_size=BATCH_SIZE,\n",
147 |     "                          num_workers=4,\n",
148 |     "                          shuffle=True)\n",
149 |     "\n",
150 |     "test_loader = DataLoader(dataset=test_dataset, \n",
151 |     "                         batch_size=BATCH_SIZE,\n",
152 |     "                         num_workers=4,\n",
153 |     "                         shuffle=False)\n",
154 |     "\n",
155 |     "# Checking the dataset\n",
156 |     "for images, labels in train_loader:  \n",
157 |     "    print('Image batch dimensions:', images.shape)\n",
158 |     "    print('Image label dimensions:', labels.shape)\n",
159 |     "    break"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 3,
165 |    "metadata": {},
166 |    "outputs": [
167 |     {
168 |      "name": "stdout",
169 |      "output_type": "stream",
170 |      "text": [
171 |       "Total number of training examples: 60000\n"
172 |      ]
173 |     }
174 |    ],
175 |    "source": [
176 |     "print(f'Total number of training examples: {len(train_dataset)}')"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {},
182 |    "source": [
183 |     "## Subset Method"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "Most of the time, a convenient method for splitting a training set into a training subset and validation subset is the `Subset` method . However, note that we have to use the same `transform` methodology for both training and test sets (which may not be desired in all cases; for instance, if we want to perform random cropping or rotation for training set augmentation).\n",
191 |     "\n",
192 |     "Concretely, we will reserve the first 1000 training examples for validation and use the remaining 59000 examples for the new training set. Note that the `Subset` method will automatically shuffle the data prior to each epoch."
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 4,
198 |    "metadata": {},
199 |    "outputs": [],
200 |    "source": [
201 |     "from torch.utils.data.dataset import Subset"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 5,
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": [
210 |     "valid_indices = torch.arange(0, 1000)\n",
211 |     "train_indices = torch.arange(1000, 60000)\n",
212 |     "\n",
213 |     "\n",
214 |     "train_and_valid = datasets.MNIST(root='data', \n",
215 |     "                                 train=True, \n",
216 |     "                                 transform=transforms.ToTensor(),\n",
217 |     "                                 download=True)\n",
218 |     "\n",
219 |     "train_dataset = Subset(train_and_valid, train_indices)\n",
220 |     "valid_dataset = Subset(train_and_valid, valid_indices)"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": 6,
226 |    "metadata": {},
227 |    "outputs": [],
228 |    "source": [
229 |     "train_loader = DataLoader(dataset=train_dataset, \n",
230 |     "                          batch_size=BATCH_SIZE,\n",
231 |     "                          num_workers=4,\n",
232 |     "                          shuffle=True)\n",
233 |     "\n",
234 |     "valid_loader = DataLoader(dataset=valid_dataset, \n",
235 |     "                          batch_size=BATCH_SIZE,\n",
236 |     "                          num_workers=4,\n",
237 |     "                          shuffle=False)"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": 7,
243 |    "metadata": {},
244 |    "outputs": [
245 |     {
246 |      "name": "stdout",
247 |      "output_type": "stream",
248 |      "text": [
249 |       "Image batch dimensions: torch.Size([64, 1, 28, 28])\n",
250 |       "Image label dimensions: torch.Size([64])\n"
251 |      ]
252 |     }
253 |    ],
254 |    "source": [
255 |     "# Checking the dataset\n",
256 |     "for images, labels in train_loader:  \n",
257 |     "    print('Image batch dimensions:', images.shape)\n",
258 |     "    print('Image label dimensions:', labels.shape)\n",
259 |     "    break"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": 8,
265 |    "metadata": {},
266 |    "outputs": [
267 |     {
268 |      "name": "stdout",
269 |      "output_type": "stream",
270 |      "text": [
271 |       "tensor([1, 7, 2, 4, 7, 7, 8, 4, 0, 5])\n",
272 |       "tensor([5, 5, 6, 4, 2, 3, 8, 0, 7, 5])\n"
273 |      ]
274 |     }
275 |    ],
276 |    "source": [
277 |     "# Check that shuffling works properly\n",
278 |     "# i.e., label indices should be in random order.\n",
279 |     "# Also, the label order should be different in the second\n",
280 |     "# epoch.\n",
281 |     "\n",
282 |     "for images, labels in train_loader:  \n",
283 |     "    pass\n",
284 |     "print(labels[:10])\n",
285 |     "\n",
286 |     "for images, labels in train_loader:  \n",
287 |     "    pass\n",
288 |     "print(labels[:10])"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "code",
293 |    "execution_count": 9,
294 |    "metadata": {},
295 |    "outputs": [
296 |     {
297 |      "name": "stdout",
298 |      "output_type": "stream",
299 |      "text": [
300 |       "tensor([1, 0, 3, 7, 0, 7, 5, 6, 8, 3])\n",
301 |       "tensor([1, 0, 3, 7, 0, 7, 5, 6, 8, 3])\n"
302 |      ]
303 |     }
304 |    ],
305 |    "source": [
306 |     "# Check that shuffling works properly.\n",
307 |     "# i.e., label indices should be in random order.\n",
308 |     "# Via the fixed random seed, both epochs should return\n",
309 |     "# the same label sequence.\n",
310 |     "\n",
311 |     "torch.manual_seed(123)\n",
312 |     "for images, labels in train_loader:  \n",
313 |     "    pass\n",
314 |     "print(labels[:10])\n",
315 |     "\n",
316 |     "torch.manual_seed(123)\n",
317 |     "for images, labels in train_loader:  \n",
318 |     "    pass\n",
319 |     "print(labels[:10])"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "markdown",
324 |    "metadata": {},
325 |    "source": [
326 |     "## SubsetRandomSampler Method"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "markdown",
331 |    "metadata": {},
332 |    "source": [
333 |     "Compared to the `Subset` method, the `SubsetRandomSampler` is a more convenient solution if we want to assign different transformation methods to training and test subsets. Similar to the `Subset` example, we will use the first 1000 examples for the validation set and the remaining 59000 examples for training."
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": 10,
339 |    "metadata": {},
340 |    "outputs": [],
341 |    "source": [
342 |     "from torch.utils.data import SubsetRandomSampler"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": 11,
348 |    "metadata": {},
349 |    "outputs": [],
350 |    "source": [
351 |     "train_indices = torch.arange(1000, 60000)\n",
352 |     "valid_indices = torch.arange(0, 1000)\n",
353 |     "\n",
354 |     "\n",
355 |     "train_sampler = SubsetRandomSampler(train_indices)\n",
356 |     "valid_sampler = SubsetRandomSampler(valid_indices)\n",
357 |     "\n",
358 |     "\n",
359 |     "training_transform = transforms.Compose([transforms.Resize((32, 32)),\n",
360 |     "                                         transforms.RandomCrop((28, 28)),\n",
361 |     "                                         transforms.ToTensor()])\n",
362 |     "\n",
363 |     "valid_transform = transforms.Compose([transforms.Resize((32, 32)),\n",
364 |     "                                         transforms.CenterCrop((28, 28)),\n",
365 |     "                                         transforms.ToTensor()])\n",
366 |     "\n",
367 |     "\n",
368 |     "\n",
369 |     "train_dataset = datasets.MNIST(root='data', \n",
370 |     "                               train=True, \n",
371 |     "                               transform=training_transform,\n",
372 |     "                               download=True)\n",
373 |     "\n",
374 |     "# note that this is the same dataset as \"train_dataset\" above\n",
375 |     "# however, we can now choose a different transform method\n",
376 |     "valid_dataset = datasets.MNIST(root='data', \n",
377 |     "                               train=True, \n",
378 |     "                               transform=valid_transform,\n",
379 |     "                               download=False)\n",
380 |     "\n",
381 |     "test_dataset = datasets.MNIST(root='data', \n",
382 |     "                              train=False, \n",
383 |     "                              transform=valid_transform,\n",
384 |     "                              download=False)\n",
385 |     "\n",
386 |     "train_loader = DataLoader(train_dataset,\n",
387 |     "                          batch_size=BATCH_SIZE,\n",
388 |     "                          num_workers=4,\n",
389 |     "                          sampler=train_sampler)\n",
390 |     "\n",
391 |     "valid_loader = DataLoader(valid_dataset,\n",
392 |     "                          batch_size=BATCH_SIZE,\n",
393 |     "                          num_workers=4,\n",
394 |     "                          sampler=valid_sampler)\n",
395 |     "\n",
396 |     "test_loader = DataLoader(dataset=test_dataset, \n",
397 |     "                         batch_size=BATCH_SIZE,\n",
398 |     "                         num_workers=4,\n",
399 |     "                         shuffle=False)"
400 |    ]
401 |   },
402 |   {
403 |    "cell_type": "code",
404 |    "execution_count": 12,
405 |    "metadata": {},
406 |    "outputs": [
407 |     {
408 |      "name": "stdout",
409 |      "output_type": "stream",
410 |      "text": [
411 |       "Image batch dimensions: torch.Size([64, 1, 28, 28])\n",
412 |       "Image label dimensions: torch.Size([64])\n"
413 |      ]
414 |     }
415 |    ],
416 |    "source": [
417 |     "# Checking the dataset\n",
418 |     "for images, labels in train_loader:  \n",
419 |     "    print('Image batch dimensions:', images.shape)\n",
420 |     "    print('Image label dimensions:', labels.shape)\n",
421 |     "    break"
422 |    ]
423 |   },
424 |   {
425 |    "cell_type": "code",
426 |    "execution_count": 13,
427 |    "metadata": {},
428 |    "outputs": [
429 |     {
430 |      "name": "stdout",
431 |      "output_type": "stream",
432 |      "text": [
433 |       "tensor([5, 7, 4, 9, 1, 7, 4, 1, 6, 7])\n",
434 |       "tensor([8, 2, 0, 7, 1, 3, 2, 6, 0, 4])\n"
435 |      ]
436 |     }
437 |    ],
438 |    "source": [
439 |     "# Check that shuffling works properly\n",
440 |     "# i.e., label indices should be in random order.\n",
441 |     "# Also, the label order should be different in the second\n",
442 |     "# epoch.\n",
443 |     "\n",
444 |     "for images, labels in train_loader:  \n",
445 |     "    pass\n",
446 |     "print(labels[:10])\n",
447 |     "\n",
448 |     "for images, labels in train_loader:  \n",
449 |     "    pass\n",
450 |     "print(labels[:10])"
451 |    ]
452 |   },
453 |   {
454 |    "cell_type": "code",
455 |    "execution_count": 14,
456 |    "metadata": {},
457 |    "outputs": [
458 |     {
459 |      "name": "stdout",
460 |      "output_type": "stream",
461 |      "text": [
462 |       "tensor([1, 0, 3, 7, 0, 7, 5, 6, 8, 3])\n",
463 |       "tensor([1, 0, 3, 7, 0, 7, 5, 6, 8, 3])\n"
464 |      ]
465 |     }
466 |    ],
467 |    "source": [
468 |     "# Check that shuffling works properly.\n",
469 |     "# i.e., label indices should be in random order.\n",
470 |     "# Via the fixed random seed, both epochs should return\n",
471 |     "# the same label sequence.\n",
472 |     "\n",
473 |     "torch.manual_seed(123)\n",
474 |     "for images, labels in train_loader:  \n",
475 |     "    pass\n",
476 |     "print(labels[:10])\n",
477 |     "\n",
478 |     "torch.manual_seed(123)\n",
479 |     "for images, labels in train_loader:  \n",
480 |     "    pass\n",
481 |     "print(labels[:10])"
482 |    ]
483 |   }
484 |  ],
485 |  "metadata": {
486 |   "kernelspec": {
487 |    "display_name": "Python 3",
488 |    "language": "python",
489 |    "name": "python3"
490 |   },
491 |   "language_info": {
492 |    "codemirror_mode": {
493 |     "name": "ipython",
494 |     "version": 3
495 |    },
496 |    "file_extension": ".py",
497 |    "mimetype": "text/x-python",
498 |    "name": "python",
499 |    "nbconvert_exporter": "python",
500 |    "pygments_lexer": "ipython3",
501 |    "version": "3.6.8"
502 |   }
503 |  },
504 |  "nbformat": 4,
505 |  "nbformat_minor": 2
506 | }
507 | 


--------------------------------------------------------------------------------
/pytorch_ipynb/viz/cnns/cats-and-dogs/datautils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | from torch.utils.data import Dataset
  4 | from torch.utils.data import DataLoader
  5 | from PIL import Image
  6 | 
  7 | 
  8 | class CatsDogsDataset(Dataset):
  9 |     """Custom Dataset for loading CelebA face images"""
 10 | 
 11 |     def __init__(self, img_dir, transform=None):
 12 |     
 13 |         self.img_dir = img_dir
 14 |         
 15 |         self.img_names = [i for i in 
 16 |                           os.listdir(img_dir) 
 17 |                           if i.endswith('.jpg')]
 18 |         
 19 |         self.y = []
 20 |         for i in self.img_names:
 21 |             if i.split('.')[0] == 'cat':
 22 |                 self.y.append(0)
 23 |             else:
 24 |                 self.y.append(1)
 25 |         
 26 |         self.transform = transform
 27 | 
 28 |     def __getitem__(self, index):
 29 |         img = Image.open(os.path.join(self.img_dir,
 30 |                                       self.img_names[index]))
 31 |         
 32 |         if self.transform is not None:
 33 |             img = self.transform(img)
 34 |         
 35 |         label = self.y[index]
 36 |         return img, label
 37 | 
 38 |     def __len__(self):
 39 |         return len(self.y)
 40 |     
 41 | 
 42 | 
 43 | 
 44 | 
 45 | def create_cats_and_dogs_dataloaders(batch_size, data_transforms, train_path, valid_path, test_path):
 46 |     train_dataset = CatsDogsDataset(img_dir=train_path, 
 47 |                                     transform=data_transforms['train'])
 48 | 
 49 |     train_loader = DataLoader(dataset=train_dataset, 
 50 |                               batch_size=batch_size,
 51 |                               drop_last=True,
 52 |                               num_workers=4,
 53 |                               shuffle=True)
 54 | 
 55 |     valid_dataset = CatsDogsDataset(img_dir=valid_path, 
 56 |                                     transform=data_transforms['valid'])
 57 | 
 58 |     valid_loader = DataLoader(dataset=valid_dataset, 
 59 |                               batch_size=batch_size, 
 60 |                               num_workers=4,
 61 |                               shuffle=False)
 62 | 
 63 |     test_dataset = CatsDogsDataset(img_dir=test_path, 
 64 |                                    transform=data_transforms['valid'])
 65 | 
 66 |     test_loader = DataLoader(dataset=test_dataset, 
 67 |                              batch_size=batch_size, 
 68 |                              num_workers=4,
 69 |                              shuffle=False)
 70 | 
 71 |     return train_loader, valid_loader, test_loader
 72 | 
 73 | 
 74 | class UnNormalize(object):
 75 |     def __init__(self, mean, std):
 76 |         self.mean = mean
 77 |         self.std = std
 78 | 
 79 |     def __call__(self, tensor):
 80 |         """
 81 |         Parameters:
 82 |         ------------
 83 |         tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
 84 |         
 85 |         Returns:
 86 |         ------------
 87 |         Tensor: Normalized image.
 88 | 
 89 |         """
 90 |         for t, m, s in zip(tensor, self.mean, self.std):
 91 |             t.mul_(s).add_(m)
 92 |         return tensor
 93 |     
 94 |     
 95 | def convert_rgb_to_grayscale(im_as_arr):
 96 |     """
 97 |     Converts RGB image to grayscale
 98 |     Expects and returns CHW format.
 99 |     """
100 |     grayscale_im = np.sum(np.abs(im_as_arr), axis=0)
101 |     im_max = np.percentile(grayscale_im, 99)
102 |     im_min = np.min(grayscale_im)
103 |     grayscale_im = (np.clip((grayscale_im - im_min) / (im_max - im_min), 0, 1))
104 |     grayscale_im = np.expand_dims(grayscale_im, axis=0)
105 |     return grayscale_im
106 | 
107 | 


--------------------------------------------------------------------------------
/pytorch_ipynb/viz/cnns/cats-and-dogs/gradient.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/viz/cnns/cats-and-dogs/gradient.png


--------------------------------------------------------------------------------
/pytorch_ipynb/viz/cnns/cats-and-dogs/images/cats-and-dogs-download-all.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephmisiti/deeplearning-models/e66ac3b8aabaa9e21de871c9bc814d1a5af2a7e2/pytorch_ipynb/viz/cnns/cats-and-dogs/images/cats-and-dogs-download-all.png


--------------------------------------------------------------------------------
/pytorch_ipynb/viz/cnns/cats-and-dogs/vgg16.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | ##########################
  7 | ### MODEL
  8 | ##########################
  9 | 
 10 | class VGG16(torch.nn.Module):
 11 | 
 12 |     def __init__(self, num_classes):
 13 |         super(VGG16, self).__init__()
 14 | 
 15 |         # calculate same padding:
 16 |         # (w - k + 2*p)/s + 1 = o
 17 |         # => p = (s(o-1) - w + k)/2
 18 | 
 19 |         self.block_1 = nn.Sequential(
 20 |                 nn.Conv2d(in_channels=3,
 21 |                           out_channels=64,
 22 |                           kernel_size=(3, 3),
 23 |                           stride=(1, 1),
 24 |                           # (1(32-1)- 32 + 3)/2 = 1
 25 |                           padding=1),
 26 |                 nn.ReLU(),
 27 |                 nn.Conv2d(in_channels=64,
 28 |                           out_channels=64,
 29 |                           kernel_size=(3, 3),
 30 |                           stride=(1, 1),
 31 |                           padding=1),
 32 |                 nn.ReLU(),
 33 |                 nn.MaxPool2d(kernel_size=(2, 2),
 34 |                              stride=(2, 2))
 35 |         )
 36 | 
 37 |         self.block_2 = nn.Sequential(
 38 |                 nn.Conv2d(in_channels=64,
 39 |                           out_channels=128,
 40 |                           kernel_size=(3, 3),
 41 |                           stride=(1, 1),
 42 |                           padding=1),
 43 |                 nn.ReLU(),
 44 |                 nn.Conv2d(in_channels=128,
 45 |                           out_channels=128,
 46 |                           kernel_size=(3, 3),
 47 |                           stride=(1, 1),
 48 |                           padding=1),
 49 |                 nn.ReLU(),
 50 |                 nn.MaxPool2d(kernel_size=(2, 2),
 51 |                              stride=(2, 2))
 52 |         )
 53 | 
 54 |         self.block_3 = nn.Sequential(
 55 |                 nn.Conv2d(in_channels=128,
 56 |                           out_channels=256,
 57 |                           kernel_size=(3, 3),
 58 |                           stride=(1, 1),
 59 |                           padding=1),
 60 |                 nn.ReLU(),
 61 |                 nn.Conv2d(in_channels=256,
 62 |                           out_channels=256,
 63 |                           kernel_size=(3, 3),
 64 |                           stride=(1, 1),
 65 |                           padding=1),
 66 |                 nn.ReLU(),
 67 |                 nn.Conv2d(in_channels=256,
 68 |                           out_channels=256,
 69 |                           kernel_size=(3, 3),
 70 |                           stride=(1, 1),
 71 |                           padding=1),
 72 |                 nn.ReLU(),
 73 |                 nn.Conv2d(in_channels=256,
 74 |                           out_channels=256,
 75 |                           kernel_size=(3, 3),
 76 |                           stride=(1, 1),
 77 |                           padding=1),
 78 |                 nn.ReLU(),
 79 |                 nn.MaxPool2d(kernel_size=(2, 2),
 80 |                              stride=(2, 2))
 81 |         )
 82 | 
 83 | 
 84 |         self.block_4 = nn.Sequential(
 85 |                 nn.Conv2d(in_channels=256,
 86 |                           out_channels=512,
 87 |                           kernel_size=(3, 3),
 88 |                           stride=(1, 1),
 89 |                           padding=1),
 90 |                 nn.ReLU(),
 91 |                 nn.Conv2d(in_channels=512,
 92 |                           out_channels=512,
 93 |                           kernel_size=(3, 3),
 94 |                           stride=(1, 1),
 95 |                           padding=1),
 96 |                 nn.ReLU(),
 97 |                 nn.Conv2d(in_channels=512,
 98 |                           out_channels=512,
 99 |                           kernel_size=(3, 3),
100 |                           stride=(1, 1),
101 |                           padding=1),
102 |                 nn.ReLU(),
103 |                 nn.Conv2d(in_channels=512,
104 |                           out_channels=512,
105 |                           kernel_size=(3, 3),
106 |                           stride=(1, 1),
107 |                           padding=1),
108 |                 nn.ReLU(),
109 |                 nn.MaxPool2d(kernel_size=(2, 2),
110 |                              stride=(2, 2))
111 |         )
112 | 
113 |         self.block_5 = nn.Sequential(
114 |                 nn.Conv2d(in_channels=512,
115 |                           out_channels=512,
116 |                           kernel_size=(3, 3),
117 |                           stride=(1, 1),
118 |                           padding=1),
119 |                 nn.ReLU(),
120 |                 nn.Conv2d(in_channels=512,
121 |                           out_channels=512,
122 |                           kernel_size=(3, 3),
123 |                           stride=(1, 1),
124 |                           padding=1),
125 |                 nn.ReLU(), 
126 |                 nn.Conv2d(in_channels=512,
127 |                           out_channels=512,
128 |                           kernel_size=(3, 3),
129 |                           stride=(1, 1),
130 |                           padding=1),
131 |                 nn.ReLU(),
132 |                 nn.Conv2d(in_channels=512,
133 |                           out_channels=512,
134 |                           kernel_size=(3, 3),
135 |                           stride=(1, 1),
136 |                           padding=1),
137 |                 nn.ReLU(),
138 |                 nn.MaxPool2d(kernel_size=(2, 2),
139 |                              stride=(2, 2))
140 |         )
141 | 
142 |         self.classifier = nn.Sequential(
143 |                 nn.Linear(512*2*2, 1024), # changed 4096 to 1024
144 |                 nn.ReLU(),
145 |                 nn.Linear(1024, 1024), # changed 4096 to 1024
146 |                 nn.ReLU(),
147 |                 nn.Linear(1024, num_classes) # changed 4096 to 1024
148 |         )
149 | 
150 |         for m in self.modules():
151 |             if isinstance(m, torch.nn.Conv2d):
152 |                 #n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
153 |                 #m.weight.data.normal_(0, np.sqrt(2. / n))
154 |                 m.weight.detach().normal_(0, 0.05)
155 |                 if m.bias is not None:
156 |                     m.bias.detach().zero_()
157 |             elif isinstance(m, torch.nn.Linear):
158 |                 m.weight.detach().normal_(0, 0.05)
159 |                 m.bias.detach().detach().zero_()
160 | 
161 |     def forward(self, x):
162 | 
163 |         x = self.block_1(x)
164 |         x = self.block_2(x)
165 |         x = self.block_3(x)
166 |         x = self.block_4(x)
167 |         x = self.block_5(x)
168 | 
169 |         logits = self.classifier(x.view(-1, 512*2*2))
170 |         probas = F.softmax(logits, dim=1)
171 | 
172 |         return logits, probas
173 | 


--------------------------------------------------------------------------------
/tensorflow1_ipynb/basic-ml/logistic-regression.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n",
  8 |     "- Author: Sebastian Raschka\n",
  9 |     "- GitHub Repository: https://github.com/rasbt/deeplearning-models"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Sebastian Raschka \n",
 22 |       "\n",
 23 |       "CPython 3.6.1\n",
 24 |       "IPython 6.0.0\n",
 25 |       "\n",
 26 |       "tensorflow 1.2.0\n"
 27 |      ]
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "%load_ext watermark\n",
 32 |     "%watermark -a 'Sebastian Raschka' -v -p tensorflow"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "# Model Zoo -- Logistic Regression"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "Implementation of *classic* logistic regression for binary class labels."
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 2,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "data": {
 56 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAa4AAACqCAYAAAD1E6s4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFexJREFUeJzt3X+MVNd1B/DvYbKOVkq0hHjlyMsuUMddCxkqxMom4g8r\nxhWOCzGxGxTcWqG1hCI5ShMiWpAtZCNXpkIijVX3D1QsWtkhosLeONgVsaGKVStQ75oEsAmRHdew\nm0jBtSCRuhLL7ukfb4edmX1v5r1598398b4fCa3nMTtzWL+75829550rqgoiIiJfzLMdABERURZM\nXERE5BUmLiIi8goTFxEReYWJi4iIvMLERUREXmHiIiIirzBxERGRV5i4iIjIK5+w8aY33nijLl68\n2MZbExkxOjr6kar22o6jimOKQpB2XFlJXIsXL8bIyIiNtyYyQkQ+tB1DLY4pCkHaccWpQiIi8goT\nFxEReYWJi4iIvMLEFbLTh4Dv3Q48MT/6evqQ7YiIisVzvhSsFGdQB5w+BPz4W8DkRPT4ysXoMQAs\n32gvLqKi8JwvDX7iCtWxXbMDuGpyIjpOFCKe86XBxBWqK2PZjhP5jud8aTBxhapnYbbjRL7jOV8a\nTFyhWrMT6OquP9bVHR0nChHP+dJg4grV8o3A+meAnn4AEn1d/wwXqckdpisAec6XBqsKQ7Z8Iwct\nuamoCkCe86XAT1xE1HmsAKQcmLiIqPNYAUg5MHERUeexApByYOIios5jBSDlwMRFRJ3HCkDKgVWF\nRI4RkX4A/wbgJgAKYJ+qft9uVAVgBSC1iYmLyD3XAHxXVd8WkU8DGBWR11T1XduBEbmAU4VEjlHV\n36rq2zP//QcA5wD02Y2KyB1MXEQOE5HFAFYAOBnzd1tEZERERi5dutTp0IisYeIicpSIfArAYQDf\nVtXfN/69qu5T1SFVHert7e18gESWMHEROUhEuhAlrRdU9UXb8RC5hImLyDEiIgD2Azinqnttx0Pk\nGiauMjLdlZtMWw3gYQB3i8jPZ/7cZzsoIlfkLocvzT0noSiqKzcZo6r/BUBsx0HkKhOfuKr3nCwF\nsArAoyKy1MDrUhHYlZuIPJc7cfGeE8+wKzcRec5o54xW95wA2AIAAwMDJt+WsuhZGE0Pxh0nMunI\nVmD0AKBTgFSAlZuBdaw1ofyMFWfwnhPDjmwFnlwAPNETfT2y1czrsis3dcKRrcDI/ihpAdHXkf3m\nzmMqNSOJi/ecGFbkoGdXbuqE0QPZjhNlYKKqkPecmNZs0JuYamFXbipa9aIr7XGiDEx84uI9J6Zx\n0JPvpJLtOFEGuT9x8Z6TAkglPklx0JMvVm6OprfjjhPlxM4ZppnoSpE0uLMOenbIIFvW7QWGHpm9\n2JJK9NjEVDfP69LjRpImmepKUR3ceUqJ2SGDbFu313z5O89rAhOXWc26UmQdVHkHvclYiFzB85rA\nqUKzXOpK4VIsRKbwvCYwcZmV1H3CRlcKl2IhMoXnNYGJy6w1O4F5XfXH5nU170pR1EIzO2RQiHhe\nE7jGZZ5I88e1ilxorn7/sV3RNErPwmhwcx2AfMbzmsDEZdaxXcDU1fpjU1eTF46LXmhmhwwKEc/r\n0uNUoUlZF4650ExElBkTl0lZF4650ExElFnYiavIO+zjXjvrwjEXmimBiDwnIr8TkbO2YyFyTbiJ\nq1r4cOUiAJ0tfDCRvJJeG8i2ZQi3GKFkBwDcazsIIheFW5xRZOFDs9f+ztlsr8+FZoqhqm/M7Cju\nr9OHOl/9Z+M9qePCTVxFFj6wqIIcICJbAGwBgIGBAcvRNLDRU5B9DEsj3KnCIgsfWFRBDlDVfao6\npKpDvb29tsOp12xWIqT3JCvCTVxFFj6s2QnMa9gba14lOp5UEMKtGKhMbMxKcCakNMKdKizyDvsL\nJ4Dpho0ep6eAU88DY/89d6riwgngFz/gFAaVR8/CmeKlmOMhvSdZEe4nLiBKCt85CzxxOXvRRDOj\nB+KPf/DT+KmK0QOcwqBMROQggJ8BGBSRMRF5xHZMmdi41YO3l5RGuJ+4iqRTrZ+T5vmcwqAEqrrJ\ndgy5LN8YzTTUbob6Jw+ZuXhMqhxkH8PSYOJqh1SyJa+k53MKg0J1+lA0PV4973UqejywKl8iaVU5\nyNtLSiHsqcKsBRFHtgJPLgCe6Im+Htka/7yVm+OPL7krfqpi5WZ3tjtpw/CpcazefRxLtr+C1buP\nY/jUuLVYyBNFVfixcpAQcuLK2jnjyFZgZH/9FeLI/uTkFeezn4/vhDGwqr3tToro+pHR8Klx7Hjx\nDMYvT0ABjF+ewI4XzzB5UXNFVfixcpAQcuLKemWWVHARd7zZc+MKQpptd2Ii9gLtOXoeE5P105wT\nk1PYc/R8x2MhjxR1ryPvoSSEnLiyXpklrVnFHc/y3HZiceiq8jeXJzIdJwJQXIUfKwcJISeurFdm\nUkl/PMtz24nFoavKm+d3ZzpOBKC4BtJsTA2A687hJq6sV2ZJBRdxx5s9N7DtTratHUR3V31C7u6q\nYNvawY7HQp4p6j7Kol7XE1x3DjlxZb0yW7cXGHpk9lOTVKLH6/bOfe7AqrmfrqqPA9vuZMOKPjz9\nwDL0ze+GAOib342nH1iGDSv6Oh4LEXHdGQBEVTv+pkNDQzoyMtLx9zXme7fHt5ZJvF+rP7oypGCI\nyKiqDtmOo8r7MUWpLdn+CuJ+awuAD3b/WafDMSrtuAr3E1eRshZ4sFSXiAzhujMTV3uyFniwVJeI\nDOG6s28tn7Lubpr0/Ly7pK7ZCfzo0fp7syo3ACseru8CD3hVqjt8ahx7jp7Hby5P4Ob53di2drB0\na1lB/wy4O3AQqudj0nka9Dk8w5/ElXV306Tnm9pipHFtUDUq2hhY5eUvh2qlUnXRt1qpBCC4kz5J\n0D8D7g4clA0r+mLPyaDP4Rr+TBVm7SaR9HwTW4wc2wVMT9Yfm56MjntaqstKpcB/Bg51Y6HiBH0O\n1/AncZnqMmGigMKhzhamsENG4D+DAM9Zmivoc7iGP4nLVJcJEwUUDnW2MIWVSoH/DAI8Z2muoM/h\nGv4kLlPdJ1Zuzt+VwqHOFqawUinwn0GA5yzNFfQ5XMNIcYaI3Avg+wAqAP5FVXebeN06WXc3TdqB\ndd3e5AKKpKqrf/0y8MFPZ197yV1RJwsPizCStKpUcp2JSirffwZNcXfgUijqHM4zvoqocszdOUNE\nKgB+BeBPAYwBeAvAJlV9N+l7OnKXf2MVFRBdYSa1Tkp6fs8i4KNfzn3+kruAr79sPm7KrLGSCoiu\nMotsTVV054ysF4PsnEFFyTO+sn5vJztn3AHgPVX9tapeBfBDAPcbeN18TFUhxiUtoP4TGFkVWiXV\nzMXgswC+BGApgE0istRuVFRWecZXUWPTROLqA1DbuG9s5lgdEdkiIiMiMnLp0iUDb9uCB3tdkRkB\nVlK5eTFIpZRnfBU1NjtWnKGq+1R1SFWHent7i39DD/a6IjMCrKRy82KQSinP+CpqbJoozhgH0F/z\neOHMsfaZaNW0Zmf8mlWzKsSXvlF/n5dUgM/eGj9deONtM13i3Vjofnz4DA6evIgpVVREsOnOfgwt\nWpBpUTTrIqqN1jJx77lt7SC2/fsvMDk9u17bNU+Cq6RqpKr7AOwDojUuy+FQoLatHYxdp0ozvvJ8\nbzMmEtdbAG4VkSWIEtbXADzU9quZatWUtYrqwom5NyfrFPDpm4CPzgONGwl8/P5s9wzL7XMeHz6D\n509cuP54ShXPn7iAH5y4gOmZY61av2RtFWOjtUzSez64si/a06FW42O/mL8YJGpTnkrFoqocjezH\nJSL3AfhHRBVQz6nq3zd7ftMKKFt7XT25ILmrRlqW9t26ZcermEr5/7Fvfjfe3H73nOOrdx/HeMy8\ns6nnm5D0nhWR2H9/kbEUWVUoIp9AVKm7BlHCegvAQ6r6TtL3lKmqsAxNZMsq7bgych+Xqr4K4FUT\nr2Vtr6u8SQuwVuCRNmkB2RdLTR03Iem1k/79vhZnqOo1EfkmgKOYvRhMTFplUpYmstSce50zbO11\nlfT6WVgq8KhI+nmxrIulpo6bkPTaSf9+j4szoKqvquofq+otrWYwyiS0Wx+oPe4lrmatmuZ11R+f\n12WuZc3KzfHHl9w1N57KDXNjsdg+Z9Od/a2fhOYFC1lbxWxbO4iuefUJo92CiOFT41i9+ziWbH8F\nq3cfx/Cp+OWcbWsH0VVpeM9KVIhShjY3FOStD9QG9xLX8o1Rd4uefgASfV3/TNSmqfHKOsMnjZbW\n7QWGHpn95CWV6PHXX54bz/3PAhv+eW6MlqoKhxYtQKUhicyT6E+dJj+uDSv68PQDy9A3vxuCaH2o\n5Z3xBgoiqlM/45cnoJid+klKXo01MtDo3585dvJSgLc+UBuMFGdk1dZCclLRhqWCCJckFS3EMVWw\nYKo4I8vr2CgISVJ0y6esylKcYaO9F3VOR4szOoIdLxJlmSYxNaViasomy+twmoh8a4TcbgUkKyeb\n8ydx9SxM+MTFjhc3z+9O/YnL1JRK0ntmff0sr2PqPclvSdvWu6bdCkhWTrbm3hpXEu4nlCiusKKr\nInOWnCoGu0mYKs5IKgr54m29cwo22tlrKG3hB5Fp7VZAsnKyNX8SV1LRBvcTii2suGPxZ+bUMUxN\nK0Y+/NjcGxsozoiL/cGVfTg8Oj6nYANApiKMzIUfRAa1O7XNKfHW/JkqBKIkxUQVq3H65JYd8feD\nHzx5EU9tWJb7/fYcPY/JqfrUODml2HP0fFsbONZ+z+rdxxOvON/cfnfq12925copFypau1PbnBJv\nzZ9PXJRJUjeJLF02minyqtBG4QeRae1Mbef5vjLx6xMXpZbUvy9Ll41mirwqtFH4QWRaqwrIpMpB\n3yonbWDi6pCiy1sbX3/VH30Gb74/dz1r0539RrYvKWq7AsDcVghFxkiURlIFZKvKQV8qJ23hVGEH\nFF0kEPf6b1+4gtW3LLj+Casigr9cNYChRQsyxZIUO5CtUCKLtrp4FPg6RKaxcjAffuLqgKKLBJJe\n/3/+dwLvP31f3fFmhQ9xsTSLPUuhRFamrjh55Uou4vprPvzE1QFFn6RFdp/gACMyjz0X82Hi6oCi\nT9Isr+/D9iVEoWPlYD5MXB1g8iSN6wRRZPcJDjAi8x1YuP6ajz/d4T1noqqwWWdsoL589ou39eLw\n6Hiq57ZTVVj2Acbu8OXBjvSdk3ZcMXF5xNctQELExFUeHEudk3ZccarQI9wCJHwi8lUReUdEpkXE\nmcRYZhxL7mHi8kiRRRjkjLMAHgDwhu1AKMKx5B4mLsOK3EZj29pBdFUathKpxG8l4mJRBbcYaU1V\nz6kq70J1iItjqeyYuAzqyDYajUuSCUuUrlUtcYsR80Rki4iMiMjIpUuXbIcTrA0r+vDgyr66LjQP\nruSN7Taxc4ZBneiQMTndsJXIdPJWIi51jeAWI7NE5HUAn4v5q8dU9UdpX0dV9wHYB0TFGYbCowbD\np8ZxeHT8etPqKVUcHh3H0KIFpTt3XcHEZZBLHTJc43PspqnqPbZjoPR40eUeThUa5FKHDNf4HDuV\nGy+63MPElULaooKiF3F9XiT2OfZOEpGviMgYgC8AeEVEjtqOqex40eUeJq4WshQVFF0Q4VrBRRY+\nx95JqvqSqi5U1U+q6k2qutZ2TGXHiy73sHNGC7xrnuKwc4Z/8rQuY9uzzkg7rlic0QLnt4n812rH\n4VZcqtAlThW2xPltIv9xx+GwhPGJ6/Qh4Ngu4MoY0LMQWLMTWL7RyEtvWzsY2xna1vy2z1MWPsdO\nfuPMSVj8T1ynDwE//hYwOXMCXrkYPQaMJK/qL1YXfuHmne6wyefYyX83z++OXavmzImf/E9cx3bN\nJq2qyYnouKFPXa7Mb/t8I6TPsZP/XJs5oXz8T1xXxrId95jP0x0+x07+eHz4DA6evIgpVVREsOnO\nfjy1YZlTMyeUn/+Jq2dhND0YdzwwPk93+Bw7+eHx4TN4/sSF64+nVK8/riYvJqow5KoqFJE9IvJL\nETktIi+JyHxTgaW2ZifQ1fDLr6s7Oh4YX26EjOs04kvs5K+DJ2MuYJscJ3/lLYd/DcDtqrocwK8A\n7MgfUkbLNwLrnwF6+gFI9HX9M8bWt1ziQ/eJpE4jAJyPnfw2ldBMIek4+SvXVKGq/qTm4QkAf54v\nnDYt3xhkoorj+nRHsyKMN7ff7XTs5LeKSGySqu6jReEweQPyXwP4j6S/5KZ35cAiDLJl0539mY6T\nv1p+4kqz6Z2IPAbgGoAXkl6Hm96VA4swysWlm8qf2rAMAGKrCiksLRNXq03vRGQzgHUA1qiNjr3k\nFN4vUx4u3lT+1IZlTFQlkLeq8F4Afwvgy6r6f2ZCIp/5UEBCZrD/H9mS9z6ufwLwSQCvSbQAekJV\nv5E7KvKa6wUkZAbXM8mWvFWFnzcVCBH5heuZZAu3NSGitvCmcrLF/5ZPlrhUTUXhEJE9ANYDuArg\nfQB/paqX7UYVj/3/WuPviWIwcbXBxWoqCsZrAHao6jUR+QdE3Wj+znJMibiemYy/J4rDqcI2sJqK\niqKqP1HVazMPTwAIr1t0SfD3RHGYuNrAairqEHaj8Rh/TxSHiasNSVVTrKaiNETkdRE5G/Pn/prn\npOpGo6pDqjrU29vbidApA/6eKA4TVxtYTUV5qOo9qnp7zJ9qC7XNiLrR/AW70fiLvyeKw+KMNrCa\niopS043mLnaj8Rt/TxSHiatNrKaigrAbTUD4e6IYTFxEDmE3GqLWuMZFREReERtrvyJyCcCHHX/j\n5m4E8JHtIBK4GluZ41qkqs6U8uUcU67+f0yDsdtRVOypxpWVxOUiERlR1SHbccRxNTbGFQaff16M\n3Q7bsXOqkIiIvMLERUREXmHimrXPdgBNuBob4wqDzz8vxm6H1di5xkVERF7hJy4iIvIKExcREXmF\niauGiHxVRN4RkWkRsV6mKiL3ish5EXlPRLbbjqdKRJ4Tkd+JyFnbsdQSkX4R+U8ReXfm/+Pf2I7J\nB66d92m4OjbScHX8tOLS+GLiqncWwAMA3rAdiIhUADwL4EsAlgLYJCJL7UZ13QEA99oOIsY1AN9V\n1aUAVgF41KGfmcucOe/TcHxspHEAbo6fVpwZX0xcNVT1nKq6sj3pHQDeU9Vfq+pVAD8EcH+L7+kI\nVX0DwMe242ikqr9V1bdn/vsPAM4BYIfTFhw779Nwdmyk4er4acWl8cXE5a4+ABdrHo+Bv4RTE5HF\nAFYAOGk3EioAx4ZltsdX6brDi8jrAD4X81ePVTfyI7+JyKcAHAbwbVX9ve14XMDznkxxYXyVLnGp\n6j22Y0hpHEB/zeOFM8eoCRHpQjSoXlDVF23H4wqPzvs0ODYscWV8carQXW8BuFVElojIDQC+BuBl\nyzE5TaKdF/cDOKeqe23HQ4Xh2LDApfHFxFVDRL4iImMAvgDgFRE5aisWVb0G4JsAjiJaBD2kqu/Y\niqeWiBwE8DMAgyIyJiKP2I5pxmoADwO4W0R+PvPnPttBuc6l8z4Nl8dGGg6Pn1acGV9s+URERF7h\nJy4iIvIKExcREXmFiYuIiLzCxEVERF5h4iIiIq8wcRERkVeYuIiIyCv/DzTREOtIuGjEAAAAAElF\nTkSuQmCC\n",
 57 |       "text/plain": [
 58 |        "<matplotlib.figure.Figure at 0x10f6e0cc0>"
 59 |       ]
 60 |      },
 61 |      "metadata": {},
 62 |      "output_type": "display_data"
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "%matplotlib inline\n",
 67 |     "import matplotlib.pyplot as plt\n",
 68 |     "import numpy as np\n",
 69 |     "from io import BytesIO\n",
 70 |     "\n",
 71 |     "##########################\n",
 72 |     "### DATASET\n",
 73 |     "##########################\n",
 74 |     "\n",
 75 |     "ds = np.lib.DataSource()\n",
 76 |     "fp = ds.open('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data')\n",
 77 |     "\n",
 78 |     "x = np.genfromtxt(BytesIO(fp.read().encode()), delimiter=',', usecols=range(2), max_rows=100)\n",
 79 |     "y = np.zeros(100)\n",
 80 |     "y[50:] = 1\n",
 81 |     "\n",
 82 |     "np.random.seed(1)\n",
 83 |     "idx = np.arange(y.shape[0])\n",
 84 |     "np.random.shuffle(idx)\n",
 85 |     "x_test, y_test = x[idx[:25]], y[idx[:25]]\n",
 86 |     "x_train, y_train = x[idx[25:]], y[idx[25:]]\n",
 87 |     "mu, std = np.mean(x_train, axis=0), np.std(x_train, axis=0)\n",
 88 |     "x_train, x_test = (x_train - mu) / std, (x_test - mu) / std\n",
 89 |     "\n",
 90 |     "fig, ax = plt.subplots(1, 2, figsize=(7, 2.5))\n",
 91 |     "ax[0].scatter(x_train[y_train == 1, 0], x_train[y_train == 1, 1])\n",
 92 |     "ax[0].scatter(x_train[y_train == 0, 0], x_train[y_train == 0, 1])\n",
 93 |     "ax[1].scatter(x_test[y_test == 1, 0], x_test[y_test == 1, 1])\n",
 94 |     "ax[1].scatter(x_test[y_test == 0, 0], x_test[y_test == 0, 1])\n",
 95 |     "plt.show()"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 3,
101 |    "metadata": {
102 |     "collapsed": true
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "##########################\n",
107 |     "### HELPER FUNCTIONS\n",
108 |     "##########################\n",
109 |     "\n",
110 |     "def iterate_minibatches(arrays, batch_size, shuffle=False, seed=None):\n",
111 |     "    rgen = np.random.RandomState(seed)\n",
112 |     "    indices = np.arange(arrays[0].shape[0])\n",
113 |     "\n",
114 |     "    if shuffle:\n",
115 |     "        rgen.shuffle(indices)\n",
116 |     "\n",
117 |     "    for start_idx in range(0, indices.shape[0] - batch_size + 1, batch_size):\n",
118 |     "        index_slice = indices[start_idx:start_idx + batch_size]\n",
119 |     "\n",
120 |     "        yield (ary[index_slice] for ary in arrays)"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 4,
126 |    "metadata": {},
127 |    "outputs": [
128 |     {
129 |      "name": "stdout",
130 |      "output_type": "stream",
131 |      "text": [
132 |       "Epoch: 000 | AvgCost: nan | Train/Valid ACC: 0.53/0.40\n",
133 |       "Epoch: 001 | AvgCost: 4.221 | Train/Valid ACC: 1.00/1.00\n",
134 |       "Epoch: 002 | AvgCost: 1.225 | Train/Valid ACC: 1.00/1.00\n",
135 |       "Epoch: 003 | AvgCost: 0.610 | Train/Valid ACC: 1.00/1.00\n",
136 |       "Epoch: 004 | AvgCost: 0.376 | Train/Valid ACC: 1.00/1.00\n",
137 |       "Epoch: 005 | AvgCost: 0.259 | Train/Valid ACC: 1.00/1.00\n",
138 |       "Epoch: 006 | AvgCost: 0.191 | Train/Valid ACC: 1.00/1.00\n",
139 |       "Epoch: 007 | AvgCost: 0.148 | Train/Valid ACC: 1.00/1.00\n",
140 |       "Epoch: 008 | AvgCost: 0.119 | Train/Valid ACC: 1.00/1.00\n",
141 |       "Epoch: 009 | AvgCost: 0.098 | Train/Valid ACC: 1.00/1.00\n",
142 |       "Epoch: 010 | AvgCost: 0.082 | Train/Valid ACC: 1.00/1.00\n",
143 |       "Epoch: 011 | AvgCost: 0.070 | Train/Valid ACC: 1.00/1.00\n",
144 |       "Epoch: 012 | AvgCost: 0.061 | Train/Valid ACC: 1.00/1.00\n",
145 |       "Epoch: 013 | AvgCost: 0.053 | Train/Valid ACC: 1.00/1.00\n",
146 |       "Epoch: 014 | AvgCost: 0.047 | Train/Valid ACC: 1.00/1.00\n",
147 |       "\n",
148 |       "Weights:\n",
149 |       " [[ 3.31176686]\n",
150 |       " [-2.40808702]]\n",
151 |       "\n",
152 |       "Bias:\n",
153 |       " [[-0.01001291]]\n"
154 |      ]
155 |     }
156 |    ],
157 |    "source": [
158 |     "import tensorflow as tf\n",
159 |     "\n",
160 |     "\n",
161 |     "##########################\n",
162 |     "### SETTINGS\n",
163 |     "##########################\n",
164 |     "\n",
165 |     "n_features = x.shape[1]\n",
166 |     "n_samples = x.shape[0]\n",
167 |     "learning_rate = 0.05\n",
168 |     "training_epochs = 15\n",
169 |     "batch_size = 10\n",
170 |     "\n",
171 |     "\n",
172 |     "##########################\n",
173 |     "### GRAPH DEFINITION\n",
174 |     "##########################\n",
175 |     "\n",
176 |     "g = tf.Graph()\n",
177 |     "with g.as_default() as g:\n",
178 |     "\n",
179 |     "   # Input data\n",
180 |     "    tf_x = tf.placeholder(dtype=tf.float32,\n",
181 |     "                          shape=[None, n_features], name='inputs')\n",
182 |     "    tf_y = tf.placeholder(dtype=tf.float32,\n",
183 |     "                          shape=[None], name='targets')\n",
184 |     "    \n",
185 |     "    # Model parameters\n",
186 |     "    params = {\n",
187 |     "        'weights': tf.Variable(tf.zeros(shape=[n_features, 1],\n",
188 |     "                                               dtype=tf.float32), name='weights'),\n",
189 |     "        'bias': tf.Variable([[0.]], dtype=tf.float32, name='bias')}\n",
190 |     "\n",
191 |     "    # Logistic Regression\n",
192 |     "    linear = tf.matmul(tf_x, params['weights']) + params['bias']\n",
193 |     "    pred_proba = tf.sigmoid(linear, name='predict_probas')\n",
194 |     "\n",
195 |     "    # Loss and optimizer\n",
196 |     "    r = tf.reshape(pred_proba, [-1])\n",
197 |     "    cost = tf.reduce_mean(tf.reduce_sum((-tf_y * tf.log(r)) - \n",
198 |     "                                        ((1. - tf_y) * tf.log(1. - r))), name='cost')\n",
199 |     "    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
200 |     "    train = optimizer.minimize(cost, name='train')\n",
201 |     "                                                  \n",
202 |     "    # Class prediction\n",
203 |     "    pred_labels = tf.round(tf.reshape(pred_proba, [-1]), name='predict_labels')\n",
204 |     "    correct_prediction = tf.equal(tf_y, pred_labels)\n",
205 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')\n",
206 |     "\n",
207 |     "\n",
208 |     "##########################\n",
209 |     "### TRAINING & EVALUATION\n",
210 |     "##########################\n",
211 |     "    \n",
212 |     "with tf.Session(graph=g) as sess:\n",
213 |     "    sess.run(tf.global_variables_initializer())\n",
214 |     "    \n",
215 |     "    avg_cost = np.nan\n",
216 |     "    count = 1\n",
217 |     "    \n",
218 |     "    for epoch in range(training_epochs):\n",
219 |     "\n",
220 |     "        train_acc = sess.run('accuracy:0', feed_dict={tf_x: x_train,\n",
221 |     "                                                      tf_y: y_train})\n",
222 |     "        valid_acc = sess.run('accuracy:0', feed_dict={tf_x: x_test,\n",
223 |     "                                                      tf_y: y_test}) \n",
224 |     "\n",
225 |     "        print(\"Epoch: %03d | AvgCost: %.3f\" % (epoch, avg_cost / count), end=\"\")\n",
226 |     "        print(\" | Train/Valid ACC: %.2f/%.2f\" % (train_acc, valid_acc))\n",
227 |     "        \n",
228 |     "        avg_cost = 0.\n",
229 |     "        for x_batch, y_batch in iterate_minibatches(arrays=[x_train, y_train],\n",
230 |     "                                                    batch_size=batch_size, \n",
231 |     "                                                    shuffle=True, seed=123):\n",
232 |     "            \n",
233 |     "            feed_dict = {'inputs:0': x_batch,\n",
234 |     "                         'targets:0': y_batch}\n",
235 |     "            _, c = sess.run(['train', 'cost:0'], feed_dict=feed_dict)\n",
236 |     "\n",
237 |     "            avg_cost += c\n",
238 |     "            count += 1\n",
239 |     "\n",
240 |     "    weights, bias = sess.run(['weights:0', 'bias:0'])\n",
241 |     "    print('\\nWeights:\\n', weights)\n",
242 |     "    print('\\nBias:\\n', bias)"
243 |    ]
244 |   }
245 |  ],
246 |  "metadata": {
247 |   "kernelspec": {
248 |    "display_name": "Python 3",
249 |    "language": "python",
250 |    "name": "python3"
251 |   },
252 |   "language_info": {
253 |    "codemirror_mode": {
254 |     "name": "ipython",
255 |     "version": 3
256 |    },
257 |    "file_extension": ".py",
258 |    "mimetype": "text/x-python",
259 |    "name": "python",
260 |    "nbconvert_exporter": "python",
261 |    "pygments_lexer": "ipython3",
262 |    "version": "3.7.1"
263 |   }
264 |  },
265 |  "nbformat": 4,
266 |  "nbformat_minor": 2
267 | }
268 | 


--------------------------------------------------------------------------------
/tensorflow1_ipynb/basic-ml/softmax-regression.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n",
  8 |     "- Author: Sebastian Raschka\n",
  9 |     "- GitHub Repository: https://github.com/rasbt/deeplearning-models"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Sebastian Raschka \n",
 22 |       "\n",
 23 |       "CPython 3.6.1\n",
 24 |       "IPython 6.0.0\n",
 25 |       "\n",
 26 |       "tensorflow 1.2.0\n"
 27 |      ]
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "%load_ext watermark\n",
 32 |     "%watermark -a 'Sebastian Raschka' -v -p tensorflow"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "# Model Zoo -- Softmax Regression"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "Implementation of softmax regression (multinomial logistic regression)."
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 2,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "name": "stdout",
 56 |      "output_type": "stream",
 57 |      "text": [
 58 |       "Extracting ./train-images-idx3-ubyte.gz\n",
 59 |       "Extracting ./train-labels-idx1-ubyte.gz\n",
 60 |       "Extracting ./t10k-images-idx3-ubyte.gz\n",
 61 |       "Extracting ./t10k-labels-idx1-ubyte.gz\n",
 62 |       "Epoch: 001 | AvgCost: 0.476 | Train/Valid ACC: 0.903/0.909\n",
 63 |       "Epoch: 002 | AvgCost: 0.339 | Train/Valid ACC: 0.911/0.918\n",
 64 |       "Epoch: 003 | AvgCost: 0.320 | Train/Valid ACC: 0.915/0.922\n",
 65 |       "Epoch: 004 | AvgCost: 0.309 | Train/Valid ACC: 0.918/0.923\n",
 66 |       "Epoch: 005 | AvgCost: 0.301 | Train/Valid ACC: 0.918/0.922\n",
 67 |       "Epoch: 006 | AvgCost: 0.296 | Train/Valid ACC: 0.919/0.922\n",
 68 |       "Epoch: 007 | AvgCost: 0.291 | Train/Valid ACC: 0.921/0.925\n",
 69 |       "Epoch: 008 | AvgCost: 0.287 | Train/Valid ACC: 0.922/0.925\n",
 70 |       "Epoch: 009 | AvgCost: 0.286 | Train/Valid ACC: 0.922/0.926\n",
 71 |       "Epoch: 010 | AvgCost: 0.283 | Train/Valid ACC: 0.923/0.926\n",
 72 |       "Epoch: 011 | AvgCost: 0.282 | Train/Valid ACC: 0.923/0.924\n",
 73 |       "Epoch: 012 | AvgCost: 0.278 | Train/Valid ACC: 0.925/0.927\n",
 74 |       "Epoch: 013 | AvgCost: 0.278 | Train/Valid ACC: 0.925/0.928\n",
 75 |       "Epoch: 014 | AvgCost: 0.276 | Train/Valid ACC: 0.925/0.925\n",
 76 |       "Epoch: 015 | AvgCost: 0.276 | Train/Valid ACC: 0.926/0.928\n",
 77 |       "Epoch: 016 | AvgCost: 0.274 | Train/Valid ACC: 0.927/0.927\n",
 78 |       "Epoch: 017 | AvgCost: 0.270 | Train/Valid ACC: 0.927/0.925\n",
 79 |       "Epoch: 018 | AvgCost: 0.273 | Train/Valid ACC: 0.927/0.930\n",
 80 |       "Epoch: 019 | AvgCost: 0.270 | Train/Valid ACC: 0.927/0.929\n",
 81 |       "Epoch: 020 | AvgCost: 0.268 | Train/Valid ACC: 0.927/0.927\n",
 82 |       "Epoch: 021 | AvgCost: 0.268 | Train/Valid ACC: 0.927/0.926\n",
 83 |       "Epoch: 022 | AvgCost: 0.270 | Train/Valid ACC: 0.928/0.926\n",
 84 |       "Epoch: 023 | AvgCost: 0.268 | Train/Valid ACC: 0.927/0.926\n",
 85 |       "Epoch: 024 | AvgCost: 0.266 | Train/Valid ACC: 0.929/0.926\n",
 86 |       "Epoch: 025 | AvgCost: 0.261 | Train/Valid ACC: 0.927/0.926\n",
 87 |       "Epoch: 026 | AvgCost: 0.269 | Train/Valid ACC: 0.929/0.927\n",
 88 |       "Epoch: 027 | AvgCost: 0.265 | Train/Valid ACC: 0.928/0.928\n",
 89 |       "Epoch: 028 | AvgCost: 0.261 | Train/Valid ACC: 0.929/0.928\n",
 90 |       "Epoch: 029 | AvgCost: 0.266 | Train/Valid ACC: 0.930/0.926\n",
 91 |       "Epoch: 030 | AvgCost: 0.261 | Train/Valid ACC: 0.929/0.924\n",
 92 |       "Test ACC: 0.925\n"
 93 |      ]
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "import tensorflow as tf\n",
 98 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
 99 |     "\n",
100 |     "\n",
101 |     "##########################\n",
102 |     "### DATASET\n",
103 |     "##########################\n",
104 |     "\n",
105 |     "mnist = input_data.read_data_sets(\"./\", one_hot=True)\n",
106 |     "\n",
107 |     "\n",
108 |     "##########################\n",
109 |     "### SETTINGS\n",
110 |     "##########################\n",
111 |     "\n",
112 |     "# Hyperparameters\n",
113 |     "learning_rate = 0.5\n",
114 |     "training_epochs = 30\n",
115 |     "batch_size = 256\n",
116 |     "\n",
117 |     "# Architecture\n",
118 |     "n_features = 784\n",
119 |     "n_classes = 10\n",
120 |     "\n",
121 |     "\n",
122 |     "##########################\n",
123 |     "### GRAPH DEFINITION\n",
124 |     "##########################\n",
125 |     "\n",
126 |     "g = tf.Graph()\n",
127 |     "with g.as_default():\n",
128 |     "\n",
129 |     "    # Input data\n",
130 |     "    tf_x = tf.placeholder(tf.float32, [None, n_features])\n",
131 |     "    tf_y = tf.placeholder(tf.float32, [None, n_classes])\n",
132 |     "\n",
133 |     "    # Model parameters\n",
134 |     "    params = {\n",
135 |     "        'weights': tf.Variable(tf.zeros(shape=[n_features, n_classes],\n",
136 |     "                                               dtype=tf.float32), name='weights'),\n",
137 |     "        'bias': tf.Variable([[n_classes]], dtype=tf.float32, name='bias')}\n",
138 |     "\n",
139 |     "    # Softmax regression\n",
140 |     "    linear = tf.matmul(tf_x, params['weights']) + params['bias']\n",
141 |     "    pred_proba = tf.nn.softmax(linear, name='predict_probas')\n",
142 |     "    \n",
143 |     "    # Loss and optimizer\n",
144 |     "    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(\n",
145 |     "        logits=linear, labels=tf_y), name='cost')\n",
146 |     "    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
147 |     "    train = optimizer.minimize(cost, name='train')\n",
148 |     "\n",
149 |     "    # Class prediction\n",
150 |     "    pred_labels = tf.argmax(pred_proba, 1, name='predict_labels')\n",
151 |     "    correct_prediction = tf.equal(tf.argmax(tf_y, 1), pred_labels)\n",
152 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')\n",
153 |     "\n",
154 |     "    \n",
155 |     "##########################\n",
156 |     "### TRAINING & EVALUATION\n",
157 |     "##########################\n",
158 |     "\n",
159 |     "with tf.Session(graph=g) as sess:\n",
160 |     "    sess.run(tf.global_variables_initializer())\n",
161 |     "\n",
162 |     "    for epoch in range(training_epochs):\n",
163 |     "        avg_cost = 0.\n",
164 |     "        total_batch = mnist.train.num_examples // batch_size\n",
165 |     "\n",
166 |     "        for i in range(total_batch):\n",
167 |     "            batch_x, batch_y = mnist.train.next_batch(batch_size)\n",
168 |     "            _, c = sess.run(['train', 'cost:0'], feed_dict={tf_x: batch_x,\n",
169 |     "                                                            tf_y: batch_y})\n",
170 |     "            avg_cost += c\n",
171 |     "        \n",
172 |     "        train_acc = sess.run('accuracy:0', feed_dict={tf_x: mnist.train.images,\n",
173 |     "                                                      tf_y: mnist.train.labels})\n",
174 |     "        valid_acc = sess.run('accuracy:0', feed_dict={tf_x: mnist.validation.images,\n",
175 |     "                                                      tf_y: mnist.validation.labels})  \n",
176 |     "        \n",
177 |     "        print(\"Epoch: %03d | AvgCost: %.3f\" % (epoch + 1, avg_cost / (i + 1)), end=\"\")\n",
178 |     "        print(\" | Train/Valid ACC: %.3f/%.3f\" % (train_acc, valid_acc))\n",
179 |     "        \n",
180 |     "    test_acc = sess.run(accuracy, feed_dict={tf_x: mnist.test.images,\n",
181 |     "                                             tf_y: mnist.test.labels})\n",
182 |     "    print('Test ACC: %.3f' % test_acc)"
183 |    ]
184 |   }
185 |  ],
186 |  "metadata": {
187 |   "kernelspec": {
188 |    "display_name": "Python 3",
189 |    "language": "python",
190 |    "name": "python3"
191 |   },
192 |   "language_info": {
193 |    "codemirror_mode": {
194 |     "name": "ipython",
195 |     "version": 3
196 |    },
197 |    "file_extension": ".py",
198 |    "mimetype": "text/x-python",
199 |    "name": "python",
200 |    "nbconvert_exporter": "python",
201 |    "pygments_lexer": "ipython3",
202 |    "version": "3.7.1"
203 |   }
204 |  },
205 |  "nbformat": 4,
206 |  "nbformat_minor": 2
207 | }
208 | 


--------------------------------------------------------------------------------
/tensorflow1_ipynb/cnn/cnn-basic.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n",
  8 |     "- Author: Sebastian Raschka\n",
  9 |     "- GitHub Repository: https://github.com/rasbt/deeplearning-models"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Sebastian Raschka \n",
 22 |       "\n",
 23 |       "CPython 3.6.8\n",
 24 |       "IPython 7.2.0\n",
 25 |       "\n",
 26 |       "tensorflow 1.12.0\n"
 27 |      ]
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "%load_ext watermark\n",
 32 |     "%watermark -a 'Sebastian Raschka' -v -p tensorflow"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "# Model Zoo -- Convolutional Neural Network"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "### Low-level Implementation"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 2,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "name": "stdout",
 56 |      "output_type": "stream",
 57 |      "text": [
 58 |       "WARNING:tensorflow:From <ipython-input-2-70b056af7052>:10: read_data_sets (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
 59 |       "Instructions for updating:\n",
 60 |       "Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n",
 61 |       "WARNING:tensorflow:From /home/raschka/miniconda3/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:260: maybe_download (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\n",
 62 |       "Instructions for updating:\n",
 63 |       "Please write your own downloading logic.\n",
 64 |       "WARNING:tensorflow:From /home/raschka/miniconda3/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:262: extract_images (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
 65 |       "Instructions for updating:\n",
 66 |       "Please use tf.data to implement this functionality.\n",
 67 |       "Extracting ./train-images-idx3-ubyte.gz\n",
 68 |       "WARNING:tensorflow:From /home/raschka/miniconda3/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:267: extract_labels (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
 69 |       "Instructions for updating:\n",
 70 |       "Please use tf.data to implement this functionality.\n",
 71 |       "Extracting ./train-labels-idx1-ubyte.gz\n",
 72 |       "WARNING:tensorflow:From /home/raschka/miniconda3/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:110: dense_to_one_hot (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
 73 |       "Instructions for updating:\n",
 74 |       "Please use tf.one_hot on tensors.\n",
 75 |       "Extracting ./t10k-images-idx3-ubyte.gz\n",
 76 |       "Extracting ./t10k-labels-idx1-ubyte.gz\n",
 77 |       "WARNING:tensorflow:From /home/raschka/miniconda3/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py:290: DataSet.__init__ (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
 78 |       "Instructions for updating:\n",
 79 |       "Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n"
 80 |      ]
 81 |     }
 82 |    ],
 83 |    "source": [
 84 |     "import tensorflow as tf\n",
 85 |     "from functools import reduce\n",
 86 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
 87 |     "\n",
 88 |     "\n",
 89 |     "##########################\n",
 90 |     "### DATASET\n",
 91 |     "##########################\n",
 92 |     "\n",
 93 |     "mnist = input_data.read_data_sets(\"./\", one_hot=True)\n",
 94 |     "\n",
 95 |     "\n",
 96 |     "##########################\n",
 97 |     "### SETTINGS\n",
 98 |     "##########################\n",
 99 |     "\n",
100 |     "# Hyperparameters\n",
101 |     "learning_rate = 0.1\n",
102 |     "dropout_keep_proba = 0.5\n",
103 |     "epochs = 3\n",
104 |     "batch_size = 32\n",
105 |     "\n",
106 |     "# Architecture\n",
107 |     "input_size = 784\n",
108 |     "image_width, image_height = 28, 28\n",
109 |     "n_classes = 10\n",
110 |     "\n",
111 |     "# Other\n",
112 |     "print_interval = 500\n",
113 |     "random_seed = 123\n",
114 |     "\n",
115 |     "\n",
116 |     "##########################\n",
117 |     "### WRAPPER FUNCTIONS\n",
118 |     "##########################\n",
119 |     "\n",
120 |     "def conv2d(input_tensor, output_channels,\n",
121 |     "           kernel_size=(5, 5), strides=(1, 1, 1, 1),\n",
122 |     "           padding='SAME', activation=None, seed=None,\n",
123 |     "           name='conv2d'):\n",
124 |     "\n",
125 |     "    with tf.name_scope(name):\n",
126 |     "        input_channels = input_tensor.get_shape().as_list()[-1]\n",
127 |     "        weights_shape = (kernel_size[0], kernel_size[1],\n",
128 |     "                         input_channels, output_channels)\n",
129 |     "\n",
130 |     "        weights = tf.Variable(tf.truncated_normal(shape=weights_shape,\n",
131 |     "                                                  mean=0.0,\n",
132 |     "                                                  stddev=0.01,\n",
133 |     "                                                  dtype=tf.float32,\n",
134 |     "                                                  seed=seed),\n",
135 |     "                              name='weights')\n",
136 |     "        biases = tf.Variable(tf.zeros(shape=(output_channels,)), name='biases')\n",
137 |     "        conv = tf.nn.conv2d(input=input_tensor,\n",
138 |     "                            filter=weights,\n",
139 |     "                            strides=strides,\n",
140 |     "                            padding=padding)\n",
141 |     "\n",
142 |     "        act = conv + biases\n",
143 |     "        if activation is not None:\n",
144 |     "            act = activation(conv + biases)\n",
145 |     "        return act\n",
146 |     "\n",
147 |     "\n",
148 |     "def fully_connected(input_tensor, output_nodes,\n",
149 |     "                    activation=None, seed=None,\n",
150 |     "                    name='fully_connected'):\n",
151 |     "\n",
152 |     "    with tf.name_scope(name):\n",
153 |     "        input_nodes = input_tensor.get_shape().as_list()[1]\n",
154 |     "        weights = tf.Variable(tf.truncated_normal(shape=(input_nodes,\n",
155 |     "                                                         output_nodes),\n",
156 |     "                                                  mean=0.0,\n",
157 |     "                                                  stddev=0.01,\n",
158 |     "                                                  dtype=tf.float32,\n",
159 |     "                                                  seed=seed),\n",
160 |     "                              name='weights')\n",
161 |     "        biases = tf.Variable(tf.zeros(shape=[output_nodes]), name='biases')\n",
162 |     "\n",
163 |     "        act = tf.matmul(input_tensor, weights) + biases\n",
164 |     "        if activation is not None:\n",
165 |     "            act = activation(act)\n",
166 |     "        return act\n",
167 |     "\n",
168 |     "    \n",
169 |     "##########################\n",
170 |     "### GRAPH DEFINITION\n",
171 |     "##########################\n",
172 |     "\n",
173 |     "g = tf.Graph()\n",
174 |     "with g.as_default():\n",
175 |     "    \n",
176 |     "    tf.set_random_seed(random_seed)\n",
177 |     "\n",
178 |     "    # Input data\n",
179 |     "    tf_x = tf.placeholder(tf.float32, [None, input_size, 1], name='inputs')\n",
180 |     "    tf_y = tf.placeholder(tf.float32, [None, n_classes], name='targets')\n",
181 |     "    \n",
182 |     "    keep_proba = tf.placeholder(tf.float32, shape=None, name='keep_proba')\n",
183 |     "\n",
184 |     "    # Convolutional Neural Network:\n",
185 |     "    # 2 convolutional layers with maxpool and ReLU activation\n",
186 |     "    input_layer = tf.reshape(tf_x, shape=[-1, image_width, image_height, 1])\n",
187 |     "    \n",
188 |     "    conv1 = conv2d(input_tensor=input_layer,\n",
189 |     "                   output_channels=8,\n",
190 |     "                   kernel_size=(3, 3),\n",
191 |     "                   strides=(1, 1, 1, 1),\n",
192 |     "                   activation=tf.nn.relu,\n",
193 |     "                   name='conv1')\n",
194 |     "                              \n",
195 |     "    pool1 = tf.nn.max_pool(conv1,\n",
196 |     "                           ksize=(1, 2, 2, 1), \n",
197 |     "                           strides=(1, 2, 2, 1),\n",
198 |     "                           padding='SAME',\n",
199 |     "                           name='maxpool1')\n",
200 |     "    \n",
201 |     "    conv2 = conv2d(input_tensor=pool1,\n",
202 |     "                   output_channels=16,\n",
203 |     "                   kernel_size=(3, 3),\n",
204 |     "                   strides=(1, 1, 1, 1),\n",
205 |     "                   activation=tf.nn.relu,\n",
206 |     "                   name='conv2')\n",
207 |     "    \n",
208 |     "    pool2 = tf.nn.max_pool(conv2,\n",
209 |     "                           ksize=(1, 2, 2, 1), \n",
210 |     "                           strides=(1, 2, 2, 1),\n",
211 |     "                           padding='SAME',\n",
212 |     "                           name='maxpool2')\n",
213 |     "    \n",
214 |     "    dims = pool2.get_shape().as_list()[1:]\n",
215 |     "    dims = reduce(lambda x, y: x * y, dims, 1)\n",
216 |     "    flat = tf.reshape(pool2, shape=(-1, dims))\n",
217 |     "    \n",
218 |     "    out_layer = fully_connected(flat, n_classes, activation=None, \n",
219 |     "                                name='logits')\n",
220 |     "\n",
221 |     "    # Loss and optimizer\n",
222 |     "    loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=out_layer, labels=tf_y)\n",
223 |     "    cost = tf.reduce_mean(loss, name='cost')\n",
224 |     "    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
225 |     "    train = optimizer.minimize(cost, name='train')\n",
226 |     "\n",
227 |     "    # Prediction\n",
228 |     "    correct_prediction = tf.equal(tf.argmax(tf_y, 1), \n",
229 |     "                                  tf.argmax(out_layer, 1), \n",
230 |     "                         name='correct_prediction')\n",
231 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, \n",
232 |     "                                      tf.float32), \n",
233 |     "                              name='accuracy')"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 3,
239 |    "metadata": {},
240 |    "outputs": [
241 |     {
242 |      "name": "stdout",
243 |      "output_type": "stream",
244 |      "text": [
245 |       "Minibatch: 001 | Cost: 2.303\n",
246 |       "Minibatch: 501 | Cost: 0.225\n",
247 |       "Minibatch: 1001 | Cost: 0.106\n",
248 |       "Minibatch: 1501 | Cost: 0.039\n",
249 |       "Epoch: 001 | AvgCost: 0.530 | Train/Valid ACC: 0.966/0.964\n",
250 |       "Minibatch: 001 | Cost: 0.051\n",
251 |       "Minibatch: 501 | Cost: 0.035\n",
252 |       "Minibatch: 1001 | Cost: 0.043\n",
253 |       "Minibatch: 1501 | Cost: 0.058\n",
254 |       "Epoch: 002 | AvgCost: 0.102 | Train/Valid ACC: 0.967/0.968\n",
255 |       "Minibatch: 001 | Cost: 0.019\n",
256 |       "Minibatch: 501 | Cost: 0.132\n",
257 |       "Minibatch: 1001 | Cost: 0.064\n",
258 |       "Minibatch: 1501 | Cost: 0.011\n",
259 |       "Epoch: 003 | AvgCost: 0.076 | Train/Valid ACC: 0.978/0.978\n",
260 |       "Test ACC: 0.980\n"
261 |      ]
262 |     }
263 |    ],
264 |    "source": [
265 |     "import numpy as np\n",
266 |     "\n",
267 |     "##########################\n",
268 |     "### TRAINING & EVALUATION\n",
269 |     "##########################\n",
270 |     "\n",
271 |     "with tf.Session(graph=g) as sess:\n",
272 |     "    sess.run(tf.global_variables_initializer())\n",
273 |     "\n",
274 |     "    np.random.seed(random_seed) # random seed for mnist iterator\n",
275 |     "    for epoch in range(1, epochs + 1):\n",
276 |     "        avg_cost = 0.\n",
277 |     "        total_batch = mnist.train.num_examples // batch_size\n",
278 |     "\n",
279 |     "        for i in range(total_batch):\n",
280 |     "            batch_x, batch_y = mnist.train.next_batch(batch_size)\n",
281 |     "            batch_x = batch_x[:, :, None] # add \"missing\" color channel\n",
282 |     "            \n",
283 |     "            _, c = sess.run(['train', 'cost:0'], \n",
284 |     "                            feed_dict={'inputs:0': batch_x,\n",
285 |     "                                       'targets:0': batch_y,\n",
286 |     "                                       'keep_proba:0': dropout_keep_proba})\n",
287 |     "            avg_cost += c\n",
288 |     "            if not i % print_interval:\n",
289 |     "                print(\"Minibatch: %03d | Cost: %.3f\" % (i + 1, c))\n",
290 |     "        \n",
291 |     "        train_acc = sess.run('accuracy:0', \n",
292 |     "                             feed_dict={'inputs:0': mnist.train.images[:, :, None],\n",
293 |     "                                        'targets:0': mnist.train.labels,\n",
294 |     "                                        'keep_proba:0': 1.0})\n",
295 |     "        valid_acc = sess.run('accuracy:0', \n",
296 |     "                             feed_dict={'inputs:0': mnist.validation.images[:, :, None],\n",
297 |     "                                        'targets:0': mnist.validation.labels,\n",
298 |     "                                        'keep_proba:0': 1.0})\n",
299 |     "        \n",
300 |     "        print(\"Epoch: %03d | AvgCost: %.3f\" % (epoch, avg_cost / (i + 1)), end=\"\")\n",
301 |     "        print(\" | Train/Valid ACC: %.3f/%.3f\" % (train_acc, valid_acc))\n",
302 |     "        \n",
303 |     "    test_acc = sess.run('accuracy:0', \n",
304 |     "                        feed_dict={'inputs:0': mnist.test.images[:, :, None],\n",
305 |     "                                   'targets:0': mnist.test.labels,\n",
306 |     "                                   'keep_proba:0': 1.0})\n",
307 |     "        \n",
308 |     "    print('Test ACC: %.3f' % test_acc)"
309 |    ]
310 |   }
311 |  ],
312 |  "metadata": {
313 |   "kernelspec": {
314 |    "display_name": "Python 3",
315 |    "language": "python",
316 |    "name": "python3"
317 |   },
318 |   "language_info": {
319 |    "codemirror_mode": {
320 |     "name": "ipython",
321 |     "version": 3
322 |    },
323 |    "file_extension": ".py",
324 |    "mimetype": "text/x-python",
325 |    "name": "python",
326 |    "nbconvert_exporter": "python",
327 |    "pygments_lexer": "ipython3",
328 |    "version": "3.6.8"
329 |   }
330 |  },
331 |  "nbformat": 4,
332 |  "nbformat_minor": 2
333 | }
334 | 


--------------------------------------------------------------------------------
/tensorflow1_ipynb/data/perceptron_toydata.txt:
--------------------------------------------------------------------------------
  1 | 0.77	-1.14	0
  2 | -0.33	1.44	0
  3 | 0.91	-3.07	0
  4 | -0.37	-1.91	0
  5 | -1.84	-1.13	0
  6 | -1.50	0.34	0
  7 | -0.63	-1.53	0
  8 | -1.08	-1.23	0
  9 | 0.39	-1.99	0
 10 | -1.26	-2.90	0
 11 | -5.27	-0.78	0
 12 | -0.49	-2.74	0
 13 | 1.48	-3.74	0
 14 | -1.64	-1.96	0
 15 | 0.45	0.36	0
 16 | -1.48	-1.17	0
 17 | -2.94	-4.47	0
 18 | -2.19	-1.48	0
 19 | 0.02	-0.02	0
 20 | -2.24	-2.12	0
 21 | -3.17	-3.69	0
 22 | -4.09	1.03	0
 23 | -2.41	-2.31	0
 24 | -3.45	-0.61	0
 25 | -3.96	-2.00	0
 26 | -2.95	-1.16	0
 27 | -2.42	-3.35	0
 28 | -1.74	-1.10	0
 29 | -1.61	-1.28	0
 30 | -2.59	-2.21	0
 31 | -2.64	-2.20	0
 32 | -2.84	-4.12	0
 33 | -1.45	-2.26	0
 34 | -3.98	-1.05	0
 35 | -2.97	-1.63	0
 36 | -0.68	-1.52	0
 37 | -0.10	-3.43	0
 38 | -1.14	-2.66	0
 39 | -2.92	-2.51	0
 40 | -2.14	-1.62	0
 41 | -3.33	-0.44	0
 42 | -1.05	-3.85	0
 43 | 0.38	0.95	0
 44 | -0.05	-1.95	0
 45 | -3.20	-0.22	0
 46 | -2.26	0.01	0
 47 | -1.41	-0.33	0
 48 | -1.20	-0.71	0
 49 | -1.69	0.80	0
 50 | -1.52	-1.14	0
 51 | 3.88	0.65	1
 52 | 0.73	2.97	1
 53 | 0.83	3.94	1
 54 | 1.59	1.25	1
 55 | 3.92	3.48	1
 56 | 3.87	2.91	1
 57 | 1.14	3.91	1
 58 | 1.73	2.80	1
 59 | 2.95	1.84	1
 60 | 2.61	2.92	1
 61 | 2.38	0.90	1
 62 | 2.30	3.33	1
 63 | 1.31	1.85	1
 64 | 1.56	3.85	1
 65 | 2.67	2.41	1
 66 | 1.23	2.54	1
 67 | 1.33	2.03	1
 68 | 1.36	2.68	1
 69 | 2.58	1.79	1
 70 | 2.40	0.91	1
 71 | 0.51	2.44	1
 72 | 2.17	2.64	1
 73 | 4.38	2.94	1
 74 | 1.09	3.12	1
 75 | 0.68	1.54	1
 76 | 1.93	3.71	1
 77 | 1.26	1.17	1
 78 | 1.90	1.34	1
 79 | 3.13	0.92	1
 80 | 0.85	1.56	1
 81 | 1.50	3.93	1
 82 | 2.95	2.09	1
 83 | 0.77	2.84	1
 84 | 1.00	0.46	1
 85 | 3.19	2.32	1
 86 | 2.92	2.32	1
 87 | 2.86	1.35	1
 88 | 0.97	2.68	1
 89 | 1.20	1.31	1
 90 | 1.54	2.02	1
 91 | 1.65	0.63	1
 92 | 1.36	-0.22	1
 93 | 2.63	0.40	1
 94 | 0.90	2.05	1
 95 | 1.26	3.54	1
 96 | 0.71	2.27	1
 97 | 1.96	0.83	1
 98 | 2.52	1.83	1
 99 | 2.77	2.82	1
100 | 4.16	3.34	1
101 | 


--------------------------------------------------------------------------------
/tensorflow1_ipynb/helper.py:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2016-2017
  2 | #
  3 | # Supporting code for the book
  4 | # "Introduction to Artificial Neural Networks and Deep Learning:
  5 | #  A Practical Guide with Applications in Python"
  6 | #
  7 | # Source: https://github.com/rasbt/deep-learning-book
  8 | # Author: Sebastian Raschka <sebastianraschka.com>
  9 | # License: MIT
 10 | 
 11 | 
 12 | from urllib.request import urlretrieve
 13 | import shutil
 14 | import glob
 15 | import tarfile
 16 | import os
 17 | import sys
 18 | import pickle
 19 | import numpy as np
 20 | import scipy.misc
 21 | from tensorflow.examples.tutorials.mnist import input_data
 22 | 
 23 | 
 24 | def download_and_extract_cifar(target_dir,
 25 |                                cifar_url='http://www.cs.toronto.edu/'
 26 |                                '~kriz/cifar-10-python.tar.gz'):
 27 | 
 28 |     if not os.path.exists(target_dir):
 29 |         os.mkdir(target_dir)
 30 | 
 31 |     fbase = os.path.basename(cifar_url)
 32 |     fpath = os.path.join(target_dir, fbase)
 33 | 
 34 |     if not os.path.exists(fpath):
 35 |         def get_progress(count, block_size, total_size):
 36 |             sys.stdout.write('\rDownloading ... %s %d%%' % (fbase,
 37 |                              float(count * block_size) /
 38 |                              float(total_size) * 100.0))
 39 |             sys.stdout.flush()
 40 |         local_filename, headers = urlretrieve(cifar_url,
 41 |                                               fpath,
 42 |                                               reporthook=get_progress)
 43 |         sys.stdout.write('\nDownloaded')
 44 | 
 45 |     else:
 46 |         sys.stdout.write('Found existing')
 47 | 
 48 |     statinfo = os.stat(fpath)
 49 |     file_size = statinfo.st_size / 1024**2
 50 |     sys.stdout.write(' %s (%.1f Mb)\n' % (fbase, file_size))
 51 |     sys.stdout.write('Extracting %s ...\n' % fbase)
 52 |     sys.stdout.flush()
 53 | 
 54 |     with tarfile.open(fpath, 'r:gz') as t:
 55 |         t.extractall(target_dir)
 56 | 
 57 |     return fpath.replace('cifar-10-python.tar.gz', 'cifar-10-batches-py')
 58 | 
 59 | 
 60 | def unpickle_cifar(fpath):
 61 |     with open(fpath, 'rb') as f:
 62 |         dct = pickle.load(f, encoding='bytes')
 63 |     return dct
 64 | 
 65 | 
 66 | class Cifar10Loader():
 67 |     def __init__(self, cifar_path, normalize=False,
 68 |                  channel_mean_center=False, zero_center=False):
 69 |         self.cifar_path = cifar_path
 70 |         self.batchnames = [os.path.join(self.cifar_path, f)
 71 |                            for f in os.listdir(self.cifar_path)
 72 |                            if f.startswith('data_batch')]
 73 |         self.testname = os.path.join(self.cifar_path, 'test_batch')
 74 |         self.num_train = self.count_train()
 75 |         self.num_test = self.count_test()
 76 |         self.normalize = normalize
 77 |         self.channel_mean_center = channel_mean_center
 78 |         self.zero_center = zero_center
 79 |         self.train_mean = None
 80 | 
 81 |     def _compute_train_mean(self):
 82 | 
 83 |         cum_mean = np.zeros((1, 1, 1, 3))
 84 | 
 85 |         for batch in self.batchnames:
 86 |             dct = unpickle_cifar(batch)
 87 |             dct[b'labels'] = np.array(dct[b'labels'], dtype=int)
 88 |             dct[b'data'] = dct[b'data'].reshape(
 89 |                 dct[b'data'].shape[0], 3, 32, 32).transpose(0, 2, 3, 1)
 90 |             mean = dct[b'data'].mean(axis=(0, 1, 2), keepdims=True)
 91 |             cum_mean += mean
 92 | 
 93 |         self.train_mean = cum_mean / len(self.batchnames)
 94 | 
 95 |         return None
 96 | 
 97 |     def load_test(self, onehot=True):
 98 |         dct = unpickle_cifar(self.testname)
 99 |         dct[b'labels'] = np.array(dct[b'labels'], dtype=int)
100 | 
101 |         dct[b'data'] = dct[b'data'].reshape(
102 |             dct[b'data'].shape[0], 3, 32, 32).transpose(0, 2, 3, 1)
103 | 
104 |         if onehot:
105 |             dct[b'labels'] = (np.arange(10) ==
106 |                               dct[b'labels'][:, None]).astype(int)
107 | 
108 |         if self.normalize:
109 |             dct[b'data'] = dct[b'data'].astype(np.float32)
110 |             dct[b'data'] = dct[b'data'] / 255.0
111 | 
112 |         if self.channel_mean_center:
113 |             if self.train_mean is None:
114 |                 self._compute_train_mean()
115 |             dct[b'data'] -= self.train_mean
116 | 
117 |         if self.zero_center:
118 |             if self.normalize:
119 |                 dct[b'data'] -= .5
120 |             else:
121 |                 dct[b'data'] -= 127.5
122 | 
123 |         return dct[b'data'], dct[b'labels']
124 | 
125 |     def load_train_epoch(self, batch_size=50, onehot=True,
126 |                          shuffle=False, seed=None):
127 | 
128 |         rgen = np.random.RandomState(seed)
129 | 
130 |         for batch in self.batchnames:
131 |             dct = unpickle_cifar(batch)
132 |             dct[b'labels'] = np.array(dct[b'labels'], dtype=int)
133 |             dct[b'data'] = dct[b'data'].reshape(
134 |                 dct[b'data'].shape[0], 3, 32, 32).transpose(0, 2, 3, 1)
135 | 
136 |             if onehot:
137 |                 dct[b'labels'] = (np.arange(10) ==
138 |                                   dct[b'labels'][:, None]).astype(int)
139 | 
140 |             if self.normalize:
141 |                 dct[b'data'] = dct[b'data'].astype(np.float32)
142 |                 dct[b'data'] = dct[b'data'] / 255.0
143 | 
144 |             if self.channel_mean_center:
145 |                 if self.train_mean is None:
146 |                     self._compute_train_mean()
147 |                 dct[b'data'] -= self.train_mean
148 | 
149 |             if self.zero_center:
150 |                 if self.normalize:
151 |                     dct[b'data'] -= .5
152 |                 else:
153 |                     dct[b'data'] -= 127.5
154 | 
155 |             arrays = [dct[b'data'], dct[b'labels']]
156 |             del dct
157 |             indices = np.arange(arrays[0].shape[0])
158 | 
159 |             if shuffle:
160 |                 rgen.shuffle(indices)
161 | 
162 |             for start_idx in range(0, indices.shape[0] - batch_size + 1,
163 |                                    batch_size):
164 |                 index_slice = indices[start_idx:start_idx + batch_size]
165 |                 yield (ary[index_slice] for ary in arrays)
166 | 
167 |     def count_train(self):
168 |         cnt = 0
169 |         for f in self.batchnames:
170 |             dct = unpickle_cifar(f)
171 |             cnt += len(dct[b'labels'])
172 |         return cnt
173 | 
174 |     def count_test(self):
175 |         dct = unpickle_cifar(self.testname)
176 |         return len(dct[b'labels'])
177 | 
178 | 
179 | def mnist_export_to_jpg(path='./'):
180 | 
181 |     mnist = input_data.read_data_sets("./", one_hot=False)
182 | 
183 |     batch_x, batch_y = mnist.train.next_batch(50000)
184 |     cnt = -1
185 | 
186 |     def remove_incomplete_existing(path_prefix, expect_files):
187 |         dir_path = os.path.join(path, 'mnist_%s' % path_prefix)
188 | 
189 |         is_empty = False
190 |         if not os.path.exists(dir_path):
191 |             for i in range(10):
192 |                 outpath = os.path.join(path, dir_path, str(i))
193 |                 if not os.path.exists(outpath):
194 |                     os.makedirs(outpath)
195 |             is_empty = True
196 |         else:
197 |             num_existing_files = len(glob.glob('%s/*/*.jpg' % dir_path))
198 |             if num_existing_files > 0 and num_existing_files < expect_files:
199 |                 shutil.rmtree(dir_path)
200 |                 is_empty = True
201 |                 for i in range(10):
202 |                     outpath = os.path.join(path, dir_path, str(i))
203 |                     if not os.path.exists(outpath):
204 |                         os.makedirs(outpath)
205 |         return is_empty
206 | 
207 |     is_empty = remove_incomplete_existing(path_prefix='train',
208 |                                           expect_files=45000)
209 |     if is_empty:
210 |         for data, label in zip(batch_x[:45000], batch_y[:45000]):
211 |             cnt += 1
212 |             outpath = os.path.join(path, 'mnist_train/%d/%05d.jpg' %
213 |                                    (label, cnt))
214 |             scipy.misc.imsave(outpath, (data*255).reshape(28, 28))
215 | 
216 |     is_empty = remove_incomplete_existing(path_prefix='valid',
217 |                                           expect_files=5000)
218 |     if is_empty:
219 |         for data, label in zip(batch_x[45000:], batch_y[45000:]):
220 |             cnt += 1
221 |             outpath = os.path.join(path, 'mnist_valid/%d/%05d.jpg' %
222 |                                    (label, cnt))
223 |             scipy.misc.imsave(outpath, (data*255).reshape(28, 28))
224 | 
225 |     is_empty = remove_incomplete_existing(path_prefix='test',
226 |                                           expect_files=10000)
227 |     if is_empty:
228 |         batch_x, batch_y = mnist.test.next_batch(10000)
229 |         cnt = -1
230 |         for data, label in zip(batch_x, batch_y):
231 |             cnt += 1
232 |             outpath = os.path.join(path, 'mnist_test/%d/%05d.jpg' % (label, cnt))
233 |             scipy.misc.imsave(outpath, (data*255).reshape(28, 28))
234 | 


--------------------------------------------------------------------------------
/tensorflow1_ipynb/mechanics/saving-and-reloading-models.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n",
  8 |     "- Author: Sebastian Raschka\n",
  9 |     "- GitHub Repository: https://github.com/rasbt/deeplearning-models"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Sebastian Raschka \n",
 22 |       "\n",
 23 |       "CPython 3.6.0\n",
 24 |       "IPython 6.0.0\n",
 25 |       "\n",
 26 |       "tensorflow 1.1.0\n"
 27 |      ]
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "%load_ext watermark\n",
 32 |     "%watermark -a 'Sebastian Raschka' -v -p tensorflow"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "# Model Zoo -- Saving and Loading Trained Models \n",
 40 |     "\n",
 41 |     "## from TensorFlow Checkpoint Files and NumPy NPZ Archives"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "This notebook demonstrates different strategies on how to export and import training TensorFlow models based on a a simple 2-hidden layer multilayer perceptron. These include\n",
 49 |     "\n",
 50 |     "- Using regular TensorFlow meta and checkpoint files\n",
 51 |     "- Loading variables from NumPy archives (.npz) files\n",
 52 |     "\n",
 53 |     "Note that the graph def is going set up in a way that it constructs \"rigid,\" not trainable TensorFlow classifier if .npz files are provided. This is on purpose, since it may come handy in certain use cases, but the code can be easily modified to make the model trainable if NumPy .npz files are provided -- for example, by wrapping the `tf.constant` calls in `fc_layer` in a `tf.Variable` constructor like so:\n",
 54 |     "\n",
 55 |     "```python\n",
 56 |     "...\n",
 57 |     "if weight_params is not None:\n",
 58 |     "    weights = tf.Variable(tf.constant(weight_params, name='weights',\n",
 59 |     "                                      dtype=tf.float32))\n",
 60 |     "...\n",
 61 |     "```\n",
 62 |     "\n",
 63 |     "instead of \n",
 64 |     "\n",
 65 |     "```python\n",
 66 |     "...\n",
 67 |     "if weight_params is not None:\n",
 68 |     "    weights = tf.constant(weight_params, name='weights',\n",
 69 |     "                          dtype=tf.float32)\n",
 70 |     "...\n",
 71 |     "```"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "## Define Multilayer Perceptron Graph"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "The following code cells defines wrapper functions for our convenience; it saves us some re-typing later when we set up the TensorFlow multilayer perceptron graphs for the trainable and non-trainable models."
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 2,
 91 |    "metadata": {
 92 |     "collapsed": true
 93 |    },
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "import tensorflow as tf\n",
 97 |     "\n",
 98 |     "\n",
 99 |     "##########################\n",
100 |     "### WRAPPER FUNCTIONS\n",
101 |     "##########################\n",
102 |     "\n",
103 |     "\n",
104 |     "def fc_layer(input_tensor, n_output_units, name,\n",
105 |     "             activation_fn=None, seed=None,\n",
106 |     "             weight_params=None, bias_params=None):\n",
107 |     "\n",
108 |     "    with tf.variable_scope(name):\n",
109 |     "\n",
110 |     "        if weight_params is not None:\n",
111 |     "            weights = tf.constant(weight_params, name='weights',\n",
112 |     "                                  dtype=tf.float32)\n",
113 |     "        else:\n",
114 |     "            weights = tf.Variable(tf.truncated_normal(\n",
115 |     "                shape=[input_tensor.get_shape().as_list()[-1], n_output_units],\n",
116 |     "                    mean=0.0,\n",
117 |     "                    stddev=0.1,\n",
118 |     "                    dtype=tf.float32,\n",
119 |     "                    seed=seed),\n",
120 |     "                name='weights',)\n",
121 |     "\n",
122 |     "        if bias_params is not None:\n",
123 |     "            biases = tf.constant(bias_params, name='biases', \n",
124 |     "                                 dtype=tf.float32)\n",
125 |     "\n",
126 |     "        else:\n",
127 |     "            biases = tf.Variable(tf.zeros(shape=[n_output_units]),\n",
128 |     "                                 name='biases', \n",
129 |     "                                 dtype=tf.float32)\n",
130 |     "\n",
131 |     "        act = tf.matmul(input_tensor, weights) + biases\n",
132 |     "\n",
133 |     "        if activation_fn is not None:\n",
134 |     "            act = activation_fn(act)\n",
135 |     "\n",
136 |     "    return act\n",
137 |     "\n",
138 |     "\n",
139 |     "def mlp_graph(n_input=784, n_classes=10, n_hidden_1=128, n_hidden_2=256,\n",
140 |     "              learning_rate=0.1,\n",
141 |     "              fixed_params=None):\n",
142 |     "    \n",
143 |     "    # fixed_params to allow loading weights & biases\n",
144 |     "    # from NumPy npz archives and defining a fixed, non-trainable\n",
145 |     "    # TensorFlow classifier\n",
146 |     "    if not fixed_params:\n",
147 |     "        var_names = ['fc1/weights:0', 'fc1/biases:0',\n",
148 |     "                     'fc2/weights:0', 'fc2/biases:0',\n",
149 |     "                     'logits/weights:0', 'logits/biases:0',]\n",
150 |     "        \n",
151 |     "        fixed_params = {v: None for v in var_names}\n",
152 |     "        found_params = False\n",
153 |     "    else:\n",
154 |     "        found_params = True\n",
155 |     "    \n",
156 |     "    # Input data\n",
157 |     "    tf_x = tf.placeholder(tf.float32, [None, n_input], name='features')\n",
158 |     "    tf_y = tf.placeholder(tf.int32, [None], name='targets')\n",
159 |     "    tf_y_onehot = tf.one_hot(tf_y, depth=n_classes, name='onehot_targets')\n",
160 |     "\n",
161 |     "    # Multilayer perceptron\n",
162 |     "    fc1 = fc_layer(input_tensor=tf_x, \n",
163 |     "                   n_output_units=n_hidden_1, \n",
164 |     "                   name='fc1',\n",
165 |     "                   weight_params=fixed_params['fc1/weights:0'], \n",
166 |     "                   bias_params=fixed_params['fc1/biases:0'],\n",
167 |     "                   activation_fn=tf.nn.relu)\n",
168 |     "\n",
169 |     "    fc2 = fc_layer(input_tensor=fc1, \n",
170 |     "                   n_output_units=n_hidden_2, \n",
171 |     "                   name='fc2',\n",
172 |     "                   weight_params=fixed_params['fc2/weights:0'], \n",
173 |     "                   bias_params=fixed_params['fc2/biases:0'],\n",
174 |     "                   activation_fn=tf.nn.relu)\n",
175 |     "    \n",
176 |     "    logits = fc_layer(input_tensor=fc2, \n",
177 |     "                      n_output_units=n_classes, \n",
178 |     "                      name='logits',\n",
179 |     "                      weight_params=fixed_params['logits/weights:0'], \n",
180 |     "                      bias_params=fixed_params['logits/biases:0'],\n",
181 |     "                      activation_fn=tf.nn.relu)\n",
182 |     "    \n",
183 |     "    # Loss and optimizer\n",
184 |     "    ### Only necessary if no existing params are found\n",
185 |     "    ### and a trainable graph has to be initialized\n",
186 |     "    if not found_params:\n",
187 |     "        loss = tf.nn.softmax_cross_entropy_with_logits(\n",
188 |     "            logits=logits, labels=tf_y_onehot)\n",
189 |     "        cost = tf.reduce_mean(loss, name='cost')\n",
190 |     "        optimizer = tf.train.GradientDescentOptimizer(\n",
191 |     "            learning_rate=learning_rate)\n",
192 |     "        train = optimizer.minimize(cost, name='train')\n",
193 |     "\n",
194 |     "    # Prediction\n",
195 |     "    probabilities = tf.nn.softmax(logits, name='probabilities')\n",
196 |     "    labels = tf.cast(tf.argmax(logits, 1), tf.int32, name='labels')\n",
197 |     "    \n",
198 |     "    correct_prediction = tf.equal(labels, \n",
199 |     "                                  tf_y, name='correct_predictions')\n",
200 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32),\n",
201 |     "                              name='accuracy')"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "markdown",
206 |    "metadata": {},
207 |    "source": [
208 |     "## Train and Save Multilayer Perceptron"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": 3,
214 |    "metadata": {
215 |     "scrolled": true
216 |    },
217 |    "outputs": [
218 |     {
219 |      "name": "stdout",
220 |      "output_type": "stream",
221 |      "text": [
222 |       "Extracting ./train-images-idx3-ubyte.gz\n",
223 |       "Extracting ./train-labels-idx1-ubyte.gz\n",
224 |       "Extracting ./t10k-images-idx3-ubyte.gz\n",
225 |       "Extracting ./t10k-labels-idx1-ubyte.gz\n",
226 |       "Epoch: 001 | AvgCost: 0.366 | Train/Valid ACC: 0.944/0.948\n",
227 |       "Epoch: 002 | AvgCost: 0.163 | Train/Valid ACC: 0.965/0.963\n",
228 |       "Epoch: 003 | AvgCost: 0.118 | Train/Valid ACC: 0.972/0.969\n",
229 |       "Epoch: 004 | AvgCost: 0.093 | Train/Valid ACC: 0.979/0.974\n",
230 |       "Epoch: 005 | AvgCost: 0.076 | Train/Valid ACC: 0.984/0.977\n",
231 |       "Epoch: 006 | AvgCost: 0.062 | Train/Valid ACC: 0.986/0.974\n",
232 |       "Epoch: 007 | AvgCost: 0.052 | Train/Valid ACC: 0.990/0.977\n",
233 |       "Epoch: 008 | AvgCost: 0.044 | Train/Valid ACC: 0.988/0.975\n",
234 |       "Epoch: 009 | AvgCost: 0.037 | Train/Valid ACC: 0.991/0.978\n",
235 |       "Epoch: 010 | AvgCost: 0.032 | Train/Valid ACC: 0.994/0.979\n",
236 |       "Test ACC: 0.976\n"
237 |      ]
238 |     }
239 |    ],
240 |    "source": [
241 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
242 |     "\n",
243 |     "##########################\n",
244 |     "### SETTINGS\n",
245 |     "##########################\n",
246 |     "\n",
247 |     "# Hyperparameters\n",
248 |     "learning_rate = 0.1\n",
249 |     "training_epochs = 10\n",
250 |     "batch_size = 64\n",
251 |     "\n",
252 |     "##########################\n",
253 |     "### GRAPH DEFINITION\n",
254 |     "##########################\n",
255 |     "\n",
256 |     "g = tf.Graph()\n",
257 |     "with g.as_default():\n",
258 |     "    mlp_graph()\n",
259 |     "\n",
260 |     "##########################\n",
261 |     "### DATASET\n",
262 |     "##########################\n",
263 |     "\n",
264 |     "mnist = input_data.read_data_sets(\"./\", one_hot=False)\n",
265 |     "\n",
266 |     "##########################\n",
267 |     "### TRAINING & EVALUATION\n",
268 |     "##########################\n",
269 |     "\n",
270 |     "with tf.Session(graph=g) as sess:\n",
271 |     "    sess.run(tf.global_variables_initializer())\n",
272 |     "    saver0 = tf.train.Saver()\n",
273 |     "    \n",
274 |     "    for epoch in range(training_epochs):\n",
275 |     "        avg_cost = 0.\n",
276 |     "        total_batch = mnist.train.num_examples // batch_size\n",
277 |     "\n",
278 |     "        for i in range(total_batch):\n",
279 |     "            batch_x, batch_y = mnist.train.next_batch(batch_size)\n",
280 |     "            _, c = sess.run(['train', 'cost:0'], feed_dict={'features:0': batch_x,\n",
281 |     "                                                            'targets:0': batch_y})\n",
282 |     "            avg_cost += c\n",
283 |     "        \n",
284 |     "        train_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.train.images,\n",
285 |     "                                                      'targets:0': mnist.train.labels})\n",
286 |     "        valid_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.validation.images,\n",
287 |     "                                                      'targets:0': mnist.validation.labels})  \n",
288 |     "        \n",
289 |     "        print(\"Epoch: %03d | AvgCost: %.3f\" % (epoch + 1, avg_cost / (i + 1)), end=\"\")\n",
290 |     "        print(\" | Train/Valid ACC: %.3f/%.3f\" % (train_acc, valid_acc))\n",
291 |     "        \n",
292 |     "    test_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.test.images,\n",
293 |     "                                                 'targets:0': mnist.test.labels})\n",
294 |     "    print('Test ACC: %.3f' % test_acc)\n",
295 |     "    \n",
296 |     "    ##########################\n",
297 |     "    ### SAVE TRAINED MODEL\n",
298 |     "    ##########################\n",
299 |     "    saver0.save(sess, save_path='./mlp')"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "markdown",
304 |    "metadata": {},
305 |    "source": [
306 |     "## Reload Model from Meta and Checkpoint Files"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "markdown",
311 |    "metadata": {},
312 |    "source": [
313 |     "**You can restart and the notebook and the following code cells should execute without any additional code dependencies.**"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": 4,
319 |    "metadata": {},
320 |    "outputs": [
321 |     {
322 |      "name": "stdout",
323 |      "output_type": "stream",
324 |      "text": [
325 |       "Extracting ./train-images-idx3-ubyte.gz\n",
326 |       "Extracting ./train-labels-idx1-ubyte.gz\n",
327 |       "Extracting ./t10k-images-idx3-ubyte.gz\n",
328 |       "Extracting ./t10k-labels-idx1-ubyte.gz\n",
329 |       "INFO:tensorflow:Restoring parameters from ./mlp\n",
330 |       "Test ACC: 0.976\n"
331 |      ]
332 |     }
333 |    ],
334 |    "source": [
335 |     "import tensorflow as tf\n",
336 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
337 |     "\n",
338 |     "mnist = input_data.read_data_sets(\"./\", one_hot=False)\n",
339 |     "\n",
340 |     "with tf.Session() as sess:\n",
341 |     "    \n",
342 |     "    saver1 = tf.train.import_meta_graph('./mlp.meta')\n",
343 |     "    saver1.restore(sess, save_path='./mlp')\n",
344 |     "    \n",
345 |     "    test_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.test.images,\n",
346 |     "                                                 'targets:0': mnist.test.labels})\n",
347 |     "    print('Test ACC: %.3f' % test_acc)"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "markdown",
352 |    "metadata": {},
353 |    "source": [
354 |     "## Working with NumPy Archive Files and Creating Non-Trainable Graphs"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "markdown",
359 |    "metadata": {},
360 |    "source": [
361 |     "### Export Model Parameters to NumPy NPZ files"
362 |    ]
363 |   },
364 |   {
365 |    "cell_type": "code",
366 |    "execution_count": 5,
367 |    "metadata": {},
368 |    "outputs": [
369 |     {
370 |      "name": "stdout",
371 |      "output_type": "stream",
372 |      "text": [
373 |       "INFO:tensorflow:Restoring parameters from ./mlp\n",
374 |       "Found variables:\n",
375 |       "fc1/weights:0\n",
376 |       "fc1/biases:0\n",
377 |       "fc2/weights:0\n",
378 |       "fc2/biases:0\n",
379 |       "logits/weights:0\n",
380 |       "logits/biases:0\n"
381 |      ]
382 |     }
383 |    ],
384 |    "source": [
385 |     "import tensorflow as tf\n",
386 |     "import numpy as np\n",
387 |     "\n",
388 |     "tf.reset_default_graph()\n",
389 |     "with tf.Session() as sess:\n",
390 |     "\n",
391 |     "    saver1 = tf.train.import_meta_graph('./mlp.meta')\n",
392 |     "    saver1.restore(sess, save_path='./mlp')\n",
393 |     "    \n",
394 |     "    var_names = [v.name for v in \n",
395 |     "                 tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)]\n",
396 |     "    \n",
397 |     "    params = {}\n",
398 |     "    print('Found variables:')\n",
399 |     "    for v in var_names:\n",
400 |     "        print(v)\n",
401 |     "        \n",
402 |     "        ary = sess.run(v)\n",
403 |     "        params[v] = ary\n",
404 |     "        \n",
405 |     "    np.savez('mlp', **params)"
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "markdown",
410 |    "metadata": {},
411 |    "source": [
412 |     "### Load NumPy .npz files into the `mlp_graph`"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "markdown",
417 |    "metadata": {},
418 |    "source": [
419 |     "Note that the graph def was set up in a way that it constructs \"rigid,\" not trainable TensorFlow classifier if .npz files are provided. This is on purpose, since it may come handy in certain use cases, but the code can be easily modified to make the model trainable if NumPy .npz files are provided (e.g., by wrapping the `tf.constant` calls in `fc_layer` in a `tf.Variable` constructor."
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "markdown",
424 |    "metadata": {},
425 |    "source": [
426 |     "**Note: If you defined the `fc_layer` and `mlp_graph` wrapper functions in *Define Multilayer Perceptron Graph*, the following code cell is otherwise independent and has no other code dependencies.**"
427 |    ]
428 |   },
429 |   {
430 |    "cell_type": "code",
431 |    "execution_count": 6,
432 |    "metadata": {},
433 |    "outputs": [
434 |     {
435 |      "name": "stdout",
436 |      "output_type": "stream",
437 |      "text": [
438 |       "Extracting ./train-images-idx3-ubyte.gz\n",
439 |       "Extracting ./train-labels-idx1-ubyte.gz\n",
440 |       "Extracting ./t10k-images-idx3-ubyte.gz\n",
441 |       "Extracting ./t10k-labels-idx1-ubyte.gz\n",
442 |       "Test ACC: 0.976\n"
443 |      ]
444 |     }
445 |    ],
446 |    "source": [
447 |     "import numpy as np\n",
448 |     "import tensorflow as tf\n",
449 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
450 |     "\n",
451 |     "###########################\n",
452 |     "### LOAD DATA AND PARAMS\n",
453 |     "###########################\n",
454 |     "\n",
455 |     "mnist = input_data.read_data_sets(\"./\", one_hot=False)\n",
456 |     "param_dict = np.load('mlp.npz')\n",
457 |     "\n",
458 |     "##########################\n",
459 |     "### GRAPH DEFINITION\n",
460 |     "##########################\n",
461 |     "\n",
462 |     "\n",
463 |     "g = tf.Graph()\n",
464 |     "with g.as_default():\n",
465 |     "    \n",
466 |     "    # here: constructs a non-trainable graph\n",
467 |     "    # due to the provided fixed_params argument\n",
468 |     "    mlp_graph(fixed_params=param_dict)\n",
469 |     "\n",
470 |     "with tf.Session(graph=g) as sess:\n",
471 |     "    \n",
472 |     "    test_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.test.images,\n",
473 |     "                                                 'targets:0': mnist.test.labels})\n",
474 |     "    print('Test ACC: %.3f' % test_acc)"
475 |    ]
476 |   }
477 |  ],
478 |  "metadata": {
479 |   "kernelspec": {
480 |    "display_name": "Python 3",
481 |    "language": "python",
482 |    "name": "python3"
483 |   },
484 |   "language_info": {
485 |    "codemirror_mode": {
486 |     "name": "ipython",
487 |     "version": 3
488 |    },
489 |    "file_extension": ".py",
490 |    "mimetype": "text/x-python",
491 |    "name": "python",
492 |    "nbconvert_exporter": "python",
493 |    "pygments_lexer": "ipython3",
494 |    "version": "3.7.1"
495 |   }
496 |  },
497 |  "nbformat": 4,
498 |  "nbformat_minor": 2
499 | }
500 | 


--------------------------------------------------------------------------------
/tensorflow1_ipynb/metric/siamese-1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n",
  8 |     "- Author: Sebastian Raschka\n",
  9 |     "- GitHub Repository: https://github.com/rasbt/deeplearning-models"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Sebastian Raschka \n",
 22 |       "\n",
 23 |       "CPython 3.6.1\n",
 24 |       "IPython 6.0.0\n",
 25 |       "\n",
 26 |       "tensorflow 1.2.0\n"
 27 |      ]
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "%load_ext watermark\n",
 32 |     "%watermark -a 'Sebastian Raschka' -v -p tensorflow"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "# Model Zoo -- Siamese Network with Multilayer Perceptrons"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 2,
 45 |    "metadata": {
 46 |     "scrolled": true
 47 |    },
 48 |    "outputs": [
 49 |     {
 50 |      "name": "stdout",
 51 |      "output_type": "stream",
 52 |      "text": [
 53 |       "Extracting ./train-images-idx3-ubyte.gz\n",
 54 |       "Extracting ./train-labels-idx1-ubyte.gz\n",
 55 |       "Extracting ./t10k-images-idx3-ubyte.gz\n",
 56 |       "Extracting ./t10k-labels-idx1-ubyte.gz\n",
 57 |       "Initializing variables:\n",
 58 |       "<tf.Variable 'siamese_net/fc_1/weights:0' shape=(784, 256) dtype=float32_ref>\n",
 59 |       "<tf.Variable 'siamese_net/fc_1/biases:0' shape=(256,) dtype=float32_ref>\n",
 60 |       "<tf.Variable 'siamese_net/fc_2/weights:0' shape=(256, 256) dtype=float32_ref>\n",
 61 |       "<tf.Variable 'siamese_net/fc_2/biases:0' shape=(256,) dtype=float32_ref>\n",
 62 |       "<tf.Variable 'siamese_net/fc_3/weights:0' shape=(256, 1) dtype=float32_ref>\n",
 63 |       "<tf.Variable 'siamese_net/fc_3/biases:0' shape=(1,) dtype=float32_ref>\n",
 64 |       "Epoch: 001 | AvgCost: 0.472\n",
 65 |       "Epoch: 002 | AvgCost: 0.258\n",
 66 |       "Epoch: 003 | AvgCost: 0.250\n",
 67 |       "Epoch: 004 | AvgCost: 0.250\n",
 68 |       "Epoch: 005 | AvgCost: 0.250\n"
 69 |      ]
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "import numpy as np\n",
 74 |     "import tensorflow as tf\n",
 75 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
 76 |     "\n",
 77 |     "\n",
 78 |     "##########################\n",
 79 |     "### SETTINGS\n",
 80 |     "##########################\n",
 81 |     "\n",
 82 |     "# General settings\n",
 83 |     "\n",
 84 |     "random_seed = 0\n",
 85 |     "\n",
 86 |     "# Hyperparameters\n",
 87 |     "learning_rate = 0.001\n",
 88 |     "training_epochs = 5\n",
 89 |     "batch_size = 100\n",
 90 |     "margin = 1.0\n",
 91 |     "\n",
 92 |     "# Architecture\n",
 93 |     "n_hidden_1 = 256\n",
 94 |     "n_hidden_2 = 256\n",
 95 |     "n_input = 784\n",
 96 |     "n_classes = 1 # for 'true' and 'false' matches\n",
 97 |     "\n",
 98 |     "\n",
 99 |     "def fully_connected(inputs, output_nodes, activation=None, seed=None):\n",
100 |     "\n",
101 |     "    input_nodes = inputs.get_shape().as_list()[1]\n",
102 |     "    weights = tf.get_variable(name='weights', \n",
103 |     "                              shape=(input_nodes, output_nodes),\n",
104 |     "                              initializer=tf.truncated_normal_initializer(\n",
105 |     "                                  mean=0.0,\n",
106 |     "                                  stddev=0.001,\n",
107 |     "                                  dtype=tf.float32,\n",
108 |     "                                  seed=seed))\n",
109 |     "\n",
110 |     "    biases = tf.get_variable(name='biases', \n",
111 |     "                             shape=(output_nodes,),\n",
112 |     "                             initializer=tf.constant_initializer(\n",
113 |     "                                 value=0.0, \n",
114 |     "                                 dtype=tf.float32))\n",
115 |     "                              \n",
116 |     "    act = tf.matmul(inputs, weights) + biases\n",
117 |     "    if activation is not None:\n",
118 |     "        act = activation(act)\n",
119 |     "    return act\n",
120 |     "\n",
121 |     "\n",
122 |     "def euclidean_distance(x_1, x_2):\n",
123 |     "    return tf.sqrt(tf.maximum(tf.sum(\n",
124 |     "        tf.square(x - y), axis=1, keepdims=True), 1e-06))\n",
125 |     "\n",
126 |     "def contrastive_loss(x_1, x_2, margin=1.0):\n",
127 |     "    return (x_1 * tf.square(x_2) +\n",
128 |     "            (1.0 - x_1) * tf.square(tf.maximum(margin - x_2, 0.)))\n",
129 |     "\n",
130 |     "\n",
131 |     "##########################\n",
132 |     "### GRAPH DEFINITION\n",
133 |     "##########################\n",
134 |     "\n",
135 |     "g = tf.Graph()\n",
136 |     "with g.as_default():\n",
137 |     "    \n",
138 |     "    tf.set_random_seed(random_seed)\n",
139 |     "\n",
140 |     "    # Input data\n",
141 |     "    tf_x_1 = tf.placeholder(tf.float32, [None, n_input], name='inputs_1')\n",
142 |     "    tf_x_2 = tf.placeholder(tf.float32, [None, n_input], name='inputs_2')\n",
143 |     "    tf_y = tf.placeholder(tf.float32, [None], \n",
144 |     "                          name='targets') # here: 'true' or 'false' valuess\n",
145 |     "\n",
146 |     "    # Siamese Network\n",
147 |     "    def build_mlp(inputs):\n",
148 |     "        with tf.variable_scope('fc_1'):\n",
149 |     "            layer_1 = fully_connected(inputs, n_hidden_1, \n",
150 |     "                                      activation=tf.nn.relu)\n",
151 |     "        with tf.variable_scope('fc_2'):\n",
152 |     "            layer_2 = fully_connected(layer_1, n_hidden_2, \n",
153 |     "                                      activation=tf.nn.relu)\n",
154 |     "        with tf.variable_scope('fc_3'):\n",
155 |     "            out_layer = fully_connected(layer_2, n_classes, \n",
156 |     "                                        activation=tf.nn.relu)\n",
157 |     "\n",
158 |     "        return out_layer\n",
159 |     "    \n",
160 |     "    \n",
161 |     "    with tf.variable_scope('siamese_net', reuse=False):\n",
162 |     "        pred_left = build_mlp(tf_x_1)\n",
163 |     "    with tf.variable_scope('siamese_net', reuse=True):\n",
164 |     "        pred_right = build_mlp(tf_x_2)\n",
165 |     "    \n",
166 |     "    # Loss and optimizer\n",
167 |     "    loss = contrastive_loss(pred_left, pred_right)\n",
168 |     "    cost = tf.reduce_mean(loss, name='cost')\n",
169 |     "    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
170 |     "    train = optimizer.minimize(cost, name='train')\n",
171 |     "    \n",
172 |     "##########################\n",
173 |     "### TRAINING & EVALUATION\n",
174 |     "##########################\n",
175 |     "\n",
176 |     "np.random.seed(random_seed) # set seed for mnist shuffling\n",
177 |     "mnist = input_data.read_data_sets(\"./\", one_hot=False)\n",
178 |     "\n",
179 |     "with tf.Session(graph=g) as sess:\n",
180 |     "    \n",
181 |     "    print('Initializing variables:')\n",
182 |     "    sess.run(tf.global_variables_initializer())\n",
183 |     "    for i in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,\n",
184 |     "                               scope='siamese_net'):\n",
185 |     "        print(i)\n",
186 |     "\n",
187 |     "    for epoch in range(training_epochs):\n",
188 |     "        avg_cost = 0.\n",
189 |     "        \n",
190 |     "        total_batch = mnist.train.num_examples // batch_size // 2\n",
191 |     "\n",
192 |     "        for i in range(total_batch):\n",
193 |     "            \n",
194 |     "            batch_x_1, batch_y_1 = mnist.train.next_batch(batch_size)\n",
195 |     "            batch_x_2, batch_y_2 = mnist.train.next_batch(batch_size)\n",
196 |     "            batch_y = (batch_y_1 == batch_y_2).astype('float32')\n",
197 |     "            \n",
198 |     "            _, c = sess.run(['train', 'cost:0'], feed_dict={'inputs_1:0': batch_x_1,\n",
199 |     "                                                            'inputs_2:0': batch_x_2,\n",
200 |     "                                                            'targets:0': batch_y})\n",
201 |     "            avg_cost += c\n",
202 |     "\n",
203 |     "        print(\"Epoch: %03d | AvgCost: %.3f\" % (epoch + 1, avg_cost / (i + 1)))"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "markdown",
208 |    "metadata": {},
209 |    "source": [
210 |     "- Todo: add embedding visualization"
211 |    ]
212 |   }
213 |  ],
214 |  "metadata": {
215 |   "kernelspec": {
216 |    "display_name": "Python 3",
217 |    "language": "python",
218 |    "name": "python3"
219 |   },
220 |   "language_info": {
221 |    "codemirror_mode": {
222 |     "name": "ipython",
223 |     "version": 3
224 |    },
225 |    "file_extension": ".py",
226 |    "mimetype": "text/x-python",
227 |    "name": "python",
228 |    "nbconvert_exporter": "python",
229 |    "pygments_lexer": "ipython3",
230 |    "version": "3.7.1"
231 |   }
232 |  },
233 |  "nbformat": 4,
234 |  "nbformat_minor": 2
235 | }
236 | 


--------------------------------------------------------------------------------
/tensorflow1_ipynb/mlp/mlp-basic.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n",
  8 |     "- Author: Sebastian Raschka\n",
  9 |     "- GitHub Repository: https://github.com/rasbt/deeplearning-models"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Sebastian Raschka \n",
 22 |       "\n",
 23 |       "CPython 3.6.0\n",
 24 |       "IPython 6.0.0\n",
 25 |       "\n",
 26 |       "tensorflow 1.1.0\n"
 27 |      ]
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "%load_ext watermark\n",
 32 |     "%watermark -a 'Sebastian Raschka' -v -p tensorflow"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "# Model Zoo -- Multilayer Perceptron"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "### Low-level Implementation"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 2,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "name": "stdout",
 56 |      "output_type": "stream",
 57 |      "text": [
 58 |       "Extracting ./train-images-idx3-ubyte.gz\n",
 59 |       "Extracting ./train-labels-idx1-ubyte.gz\n",
 60 |       "Extracting ./t10k-images-idx3-ubyte.gz\n",
 61 |       "Extracting ./t10k-labels-idx1-ubyte.gz\n",
 62 |       "Epoch: 001 | AvgCost: 0.349 | Train/Valid ACC: 0.945/0.944\n",
 63 |       "Epoch: 002 | AvgCost: 0.164 | Train/Valid ACC: 0.962/0.961\n",
 64 |       "Epoch: 003 | AvgCost: 0.118 | Train/Valid ACC: 0.973/0.969\n",
 65 |       "Epoch: 004 | AvgCost: 0.092 | Train/Valid ACC: 0.979/0.971\n",
 66 |       "Epoch: 005 | AvgCost: 0.075 | Train/Valid ACC: 0.983/0.974\n",
 67 |       "Epoch: 006 | AvgCost: 0.061 | Train/Valid ACC: 0.985/0.976\n",
 68 |       "Epoch: 007 | AvgCost: 0.052 | Train/Valid ACC: 0.988/0.976\n",
 69 |       "Epoch: 008 | AvgCost: 0.043 | Train/Valid ACC: 0.991/0.978\n",
 70 |       "Epoch: 009 | AvgCost: 0.037 | Train/Valid ACC: 0.993/0.980\n",
 71 |       "Epoch: 010 | AvgCost: 0.030 | Train/Valid ACC: 0.994/0.979\n",
 72 |       "Test ACC: 0.975\n"
 73 |      ]
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "import tensorflow as tf\n",
 78 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
 79 |     "\n",
 80 |     "\n",
 81 |     "##########################\n",
 82 |     "### DATASET\n",
 83 |     "##########################\n",
 84 |     "\n",
 85 |     "mnist = input_data.read_data_sets(\"./\", one_hot=True)\n",
 86 |     "\n",
 87 |     "\n",
 88 |     "##########################\n",
 89 |     "### SETTINGS\n",
 90 |     "##########################\n",
 91 |     "\n",
 92 |     "# Hyperparameters\n",
 93 |     "learning_rate = 0.1\n",
 94 |     "training_epochs = 10\n",
 95 |     "batch_size = 64\n",
 96 |     "\n",
 97 |     "# Architecture\n",
 98 |     "n_hidden_1 = 128\n",
 99 |     "n_hidden_2 = 256\n",
100 |     "n_input = 784\n",
101 |     "n_classes = 10\n",
102 |     "\n",
103 |     "\n",
104 |     "##########################\n",
105 |     "### GRAPH DEFINITION\n",
106 |     "##########################\n",
107 |     "\n",
108 |     "g = tf.Graph()\n",
109 |     "with g.as_default():\n",
110 |     "\n",
111 |     "    # Input data\n",
112 |     "    tf_x = tf.placeholder(tf.float32, [None, n_input], name='features')\n",
113 |     "    tf_y = tf.placeholder(tf.float32, [None, n_classes], name='targets')\n",
114 |     "\n",
115 |     "    # Model parameters\n",
116 |     "    weights = {\n",
117 |     "        'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden_1], stddev=0.1)),\n",
118 |     "        'h2': tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2], stddev=0.1)),\n",
119 |     "        'out': tf.Variable(tf.truncated_normal([n_hidden_2, n_classes], stddev=0.1))\n",
120 |     "    }\n",
121 |     "    biases = {\n",
122 |     "        'b1': tf.Variable(tf.zeros([n_hidden_1])),\n",
123 |     "        'b2': tf.Variable(tf.zeros([n_hidden_2])),\n",
124 |     "        'out': tf.Variable(tf.zeros([n_classes]))\n",
125 |     "    }\n",
126 |     "\n",
127 |     "    # Multilayer perceptron\n",
128 |     "    layer_1 = tf.add(tf.matmul(tf_x, weights['h1']), biases['b1'])\n",
129 |     "    layer_1 = tf.nn.relu(layer_1)\n",
130 |     "    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])\n",
131 |     "    layer_2 = tf.nn.relu(layer_2)\n",
132 |     "    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']\n",
133 |     "\n",
134 |     "    # Loss and optimizer\n",
135 |     "    loss = tf.nn.softmax_cross_entropy_with_logits(logits=out_layer, labels=tf_y)\n",
136 |     "    cost = tf.reduce_mean(loss, name='cost')\n",
137 |     "    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
138 |     "    train = optimizer.minimize(cost, name='train')\n",
139 |     "\n",
140 |     "    # Prediction\n",
141 |     "    correct_prediction = tf.equal(tf.argmax(tf_y, 1), tf.argmax(out_layer, 1))\n",
142 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')\n",
143 |     "\n",
144 |     "    \n",
145 |     "##########################\n",
146 |     "### TRAINING & EVALUATION\n",
147 |     "##########################\n",
148 |     "\n",
149 |     "with tf.Session(graph=g) as sess:\n",
150 |     "    sess.run(tf.global_variables_initializer())\n",
151 |     "\n",
152 |     "    for epoch in range(training_epochs):\n",
153 |     "        avg_cost = 0.\n",
154 |     "        total_batch = mnist.train.num_examples // batch_size\n",
155 |     "\n",
156 |     "        for i in range(total_batch):\n",
157 |     "            batch_x, batch_y = mnist.train.next_batch(batch_size)\n",
158 |     "            _, c = sess.run(['train', 'cost:0'], feed_dict={'features:0': batch_x,\n",
159 |     "                                                            'targets:0': batch_y})\n",
160 |     "            avg_cost += c\n",
161 |     "        \n",
162 |     "        train_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.train.images,\n",
163 |     "                                                      'targets:0': mnist.train.labels})\n",
164 |     "        valid_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.validation.images,\n",
165 |     "                                                      'targets:0': mnist.validation.labels})  \n",
166 |     "        \n",
167 |     "        print(\"Epoch: %03d | AvgCost: %.3f\" % (epoch + 1, avg_cost / (i + 1)), end=\"\")\n",
168 |     "        print(\" | Train/Valid ACC: %.3f/%.3f\" % (train_acc, valid_acc))\n",
169 |     "        \n",
170 |     "    test_acc = sess.run(accuracy, feed_dict={'features:0': mnist.test.images,\n",
171 |     "                                             'targets:0': mnist.test.labels})\n",
172 |     "    print('Test ACC: %.3f' % test_acc)"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "metadata": {},
178 |    "source": [
179 |     "### tensorflow.layers Abstraction"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 3,
185 |    "metadata": {},
186 |    "outputs": [
187 |     {
188 |      "name": "stdout",
189 |      "output_type": "stream",
190 |      "text": [
191 |       "Extracting ./train-images-idx3-ubyte.gz\n",
192 |       "Extracting ./train-labels-idx1-ubyte.gz\n",
193 |       "Extracting ./t10k-images-idx3-ubyte.gz\n",
194 |       "Extracting ./t10k-labels-idx1-ubyte.gz\n",
195 |       "Epoch: 001 | AvgCost: 0.344 | Train/Valid ACC: 0.946/0.946\n",
196 |       "Epoch: 002 | AvgCost: 0.159 | Train/Valid ACC: 0.965/0.965\n",
197 |       "Epoch: 003 | AvgCost: 0.115 | Train/Valid ACC: 0.973/0.969\n",
198 |       "Epoch: 004 | AvgCost: 0.090 | Train/Valid ACC: 0.979/0.973\n",
199 |       "Epoch: 005 | AvgCost: 0.073 | Train/Valid ACC: 0.978/0.971\n",
200 |       "Epoch: 006 | AvgCost: 0.062 | Train/Valid ACC: 0.985/0.975\n",
201 |       "Epoch: 007 | AvgCost: 0.051 | Train/Valid ACC: 0.990/0.977\n",
202 |       "Epoch: 008 | AvgCost: 0.043 | Train/Valid ACC: 0.992/0.979\n",
203 |       "Epoch: 009 | AvgCost: 0.036 | Train/Valid ACC: 0.993/0.978\n",
204 |       "Epoch: 010 | AvgCost: 0.030 | Train/Valid ACC: 0.991/0.975\n",
205 |       "Test ACC: 0.975\n"
206 |      ]
207 |     }
208 |    ],
209 |    "source": [
210 |     "import tensorflow as tf\n",
211 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
212 |     "\n",
213 |     "\n",
214 |     "##########################\n",
215 |     "### DATASET\n",
216 |     "##########################\n",
217 |     "\n",
218 |     "mnist = input_data.read_data_sets(\"./\", one_hot=True)\n",
219 |     "\n",
220 |     "\n",
221 |     "##########################\n",
222 |     "### SETTINGS\n",
223 |     "##########################\n",
224 |     "\n",
225 |     "# Hyperparameters\n",
226 |     "learning_rate = 0.1\n",
227 |     "training_epochs = 10\n",
228 |     "batch_size = 64\n",
229 |     "\n",
230 |     "# Architecture\n",
231 |     "n_hidden_1 = 128\n",
232 |     "n_hidden_2 = 256\n",
233 |     "n_input = 784\n",
234 |     "n_classes = 10\n",
235 |     "\n",
236 |     "\n",
237 |     "##########################\n",
238 |     "### GRAPH DEFINITION\n",
239 |     "##########################\n",
240 |     "\n",
241 |     "g = tf.Graph()\n",
242 |     "with g.as_default():\n",
243 |     "\n",
244 |     "    # Input data\n",
245 |     "    tf_x = tf.placeholder(tf.float32, [None, n_input], name='features')\n",
246 |     "    tf_y = tf.placeholder(tf.float32, [None, n_classes], name='targets')\n",
247 |     "\n",
248 |     "    # Multilayer perceptron\n",
249 |     "    layer_1 = tf.layers.dense(tf_x, n_hidden_1, activation=tf.nn.relu, \n",
250 |     "                              kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))\n",
251 |     "    layer_2 = tf.layers.dense(layer_1, n_hidden_2, activation=tf.nn.relu,\n",
252 |     "                              kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))\n",
253 |     "    out_layer = tf.layers.dense(layer_2, n_classes, activation=None)\n",
254 |     "\n",
255 |     "    # Loss and optimizer\n",
256 |     "    loss = tf.nn.softmax_cross_entropy_with_logits(logits=out_layer, labels=tf_y)\n",
257 |     "    cost = tf.reduce_mean(loss, name='cost')\n",
258 |     "    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
259 |     "    train = optimizer.minimize(cost, name='train')\n",
260 |     "\n",
261 |     "    # Prediction\n",
262 |     "    correct_prediction = tf.equal(tf.argmax(tf_y, 1), tf.argmax(out_layer, 1))\n",
263 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')\n",
264 |     "\n",
265 |     "\n",
266 |     "##########################\n",
267 |     "### TRAINING & EVALUATION\n",
268 |     "##########################\n",
269 |     "    \n",
270 |     "with tf.Session(graph=g) as sess:\n",
271 |     "    sess.run(tf.global_variables_initializer())\n",
272 |     "\n",
273 |     "    for epoch in range(training_epochs):\n",
274 |     "        avg_cost = 0.\n",
275 |     "        total_batch = mnist.train.num_examples // batch_size\n",
276 |     "\n",
277 |     "        for i in range(total_batch):\n",
278 |     "            batch_x, batch_y = mnist.train.next_batch(batch_size)\n",
279 |     "            _, c = sess.run(['train', 'cost:0'], feed_dict={'features:0': batch_x,\n",
280 |     "                                                            'targets:0': batch_y})\n",
281 |     "            avg_cost += c\n",
282 |     "        \n",
283 |     "        train_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.train.images,\n",
284 |     "                                                      'targets:0': mnist.train.labels})\n",
285 |     "        valid_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.validation.images,\n",
286 |     "                                                      'targets:0': mnist.validation.labels})  \n",
287 |     "        \n",
288 |     "        print(\"Epoch: %03d | AvgCost: %.3f\" % (epoch + 1, avg_cost / (i + 1)), end=\"\")\n",
289 |     "        print(\" | Train/Valid ACC: %.3f/%.3f\" % (train_acc, valid_acc))\n",
290 |     "        \n",
291 |     "    test_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.test.images,\n",
292 |     "                                                 'targets:0': mnist.test.labels})\n",
293 |     "    print('Test ACC: %.3f' % test_acc)"
294 |    ]
295 |   }
296 |  ],
297 |  "metadata": {
298 |   "kernelspec": {
299 |    "display_name": "Python 3",
300 |    "language": "python",
301 |    "name": "python3"
302 |   },
303 |   "language_info": {
304 |    "codemirror_mode": {
305 |     "name": "ipython",
306 |     "version": 3
307 |    },
308 |    "file_extension": ".py",
309 |    "mimetype": "text/x-python",
310 |    "name": "python",
311 |    "nbconvert_exporter": "python",
312 |    "pygments_lexer": "ipython3",
313 |    "version": "3.7.1"
314 |   }
315 |  },
316 |  "nbformat": 4,
317 |  "nbformat_minor": 2
318 | }
319 | 


--------------------------------------------------------------------------------
/tensorflow1_ipynb/mlp/mlp-batchnorm.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n",
  8 |     "- Author: Sebastian Raschka\n",
  9 |     "- GitHub Repository: https://github.com/rasbt/deeplearning-models"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Sebastian Raschka \n",
 22 |       "\n",
 23 |       "CPython 3.6.1\n",
 24 |       "IPython 6.0.0\n",
 25 |       "\n",
 26 |       "tensorflow 1.2.0\n"
 27 |      ]
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "%load_ext watermark\n",
 32 |     "%watermark -a 'Sebastian Raschka' -v -p tensorflow"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "# Model Zoo -- Multilayer Perceptron with Batch Normalization"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 2,
 45 |    "metadata": {},
 46 |    "outputs": [
 47 |     {
 48 |      "name": "stdout",
 49 |      "output_type": "stream",
 50 |      "text": [
 51 |       "Extracting ./train-images-idx3-ubyte.gz\n",
 52 |       "Extracting ./train-labels-idx1-ubyte.gz\n",
 53 |       "Extracting ./t10k-images-idx3-ubyte.gz\n",
 54 |       "Extracting ./t10k-labels-idx1-ubyte.gz\n"
 55 |      ]
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "import tensorflow as tf\n",
 60 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
 61 |     "\n",
 62 |     "\n",
 63 |     "##########################\n",
 64 |     "### DATASET\n",
 65 |     "##########################\n",
 66 |     "\n",
 67 |     "mnist = input_data.read_data_sets(\"./\", one_hot=True)\n",
 68 |     "\n",
 69 |     "\n",
 70 |     "##########################\n",
 71 |     "### SETTINGS\n",
 72 |     "##########################\n",
 73 |     "\n",
 74 |     "# Hyperparameters\n",
 75 |     "learning_rate = 0.1\n",
 76 |     "training_epochs = 10\n",
 77 |     "batch_size = 64\n",
 78 |     "\n",
 79 |     "# Architecture\n",
 80 |     "n_hidden_1 = 128\n",
 81 |     "n_hidden_2 = 256\n",
 82 |     "n_input = 784\n",
 83 |     "n_classes = 10\n",
 84 |     "\n",
 85 |     "# Other\n",
 86 |     "random_seed = 123\n",
 87 |     "\n",
 88 |     "\n",
 89 |     "##########################\n",
 90 |     "### GRAPH DEFINITION\n",
 91 |     "##########################\n",
 92 |     "\n",
 93 |     "g = tf.Graph()\n",
 94 |     "with g.as_default():\n",
 95 |     "    \n",
 96 |     "    tf.set_random_seed(random_seed)\n",
 97 |     "    \n",
 98 |     "    # Batchnorm settings\n",
 99 |     "    training_phase = tf.placeholder(tf.bool, None, name='training_phase')\n",
100 |     "\n",
101 |     "    # Input data\n",
102 |     "    tf_x = tf.placeholder(tf.float32, [None, n_input], name='features')\n",
103 |     "    tf_y = tf.placeholder(tf.float32, [None, n_classes], name='targets')\n",
104 |     "\n",
105 |     "    # Multilayer perceptron\n",
106 |     "    layer_1 = tf.layers.dense(tf_x, n_hidden_1, \n",
107 |     "                              activation=None, # Batchnorm comes before nonlinear activation\n",
108 |     "                              use_bias=False, # Note that no bias unit is used in batchnorm\n",
109 |     "                              kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))\n",
110 |     "    \n",
111 |     "    layer_1 = tf.layers.batch_normalization(layer_1, training=training_phase)\n",
112 |     "    layer_1 = tf.nn.relu(layer_1)\n",
113 |     "    \n",
114 |     "    layer_2 = tf.layers.dense(layer_1, n_hidden_2, \n",
115 |     "                              activation=None,\n",
116 |     "                              use_bias=False,\n",
117 |     "                              kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))\n",
118 |     "    layer_2 = tf.layers.batch_normalization(layer_2, training=training_phase)\n",
119 |     "    layer_2 = tf.nn.relu(layer_2)\n",
120 |     "    \n",
121 |     "    out_layer = tf.layers.dense(layer_2, n_classes, activation=None, name='logits')\n",
122 |     "\n",
123 |     "    # Loss and optimizer\n",
124 |     "    loss = tf.nn.softmax_cross_entropy_with_logits(logits=out_layer, labels=tf_y)\n",
125 |     "    cost = tf.reduce_mean(loss, name='cost')\n",
126 |     "    \n",
127 |     "    # control dependency to ensure that batchnorm parameters are also updated\n",
128 |     "    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):\n",
129 |     "        optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
130 |     "        train = optimizer.minimize(cost, name='train')\n",
131 |     "\n",
132 |     "    # Prediction\n",
133 |     "    correct_prediction = tf.equal(tf.argmax(tf_y, 1), tf.argmax(out_layer, 1))\n",
134 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 3,
140 |    "metadata": {},
141 |    "outputs": [
142 |     {
143 |      "name": "stdout",
144 |      "output_type": "stream",
145 |      "text": [
146 |       "Epoch: 001 | AvgCost: 0.280 | Train/Valid ACC: 0.962/0.960\n",
147 |       "Epoch: 002 | AvgCost: 0.131 | Train/Valid ACC: 0.978/0.972\n",
148 |       "Epoch: 003 | AvgCost: 0.095 | Train/Valid ACC: 0.984/0.973\n",
149 |       "Epoch: 004 | AvgCost: 0.074 | Train/Valid ACC: 0.988/0.976\n",
150 |       "Epoch: 005 | AvgCost: 0.059 | Train/Valid ACC: 0.992/0.980\n",
151 |       "Epoch: 006 | AvgCost: 0.049 | Train/Valid ACC: 0.995/0.980\n",
152 |       "Epoch: 007 | AvgCost: 0.039 | Train/Valid ACC: 0.996/0.979\n",
153 |       "Epoch: 008 | AvgCost: 0.033 | Train/Valid ACC: 0.997/0.981\n",
154 |       "Epoch: 009 | AvgCost: 0.030 | Train/Valid ACC: 0.997/0.977\n",
155 |       "Epoch: 010 | AvgCost: 0.024 | Train/Valid ACC: 0.998/0.979\n",
156 |       "Test ACC: 0.977\n"
157 |      ]
158 |     }
159 |    ],
160 |    "source": [
161 |     "import numpy as np\n",
162 |     "\n",
163 |     "##########################\n",
164 |     "### TRAINING & EVALUATION\n",
165 |     "##########################\n",
166 |     "    \n",
167 |     "with tf.Session(graph=g) as sess:\n",
168 |     "    sess.run(tf.global_variables_initializer())\n",
169 |     "\n",
170 |     "    np.random.seed(random_seed) # random seed for mnist iterator\n",
171 |     "    for epoch in range(training_epochs):\n",
172 |     "        avg_cost = 0.\n",
173 |     "        total_batch = mnist.train.num_examples // batch_size\n",
174 |     "\n",
175 |     "        for i in range(total_batch):\n",
176 |     "            batch_x, batch_y = mnist.train.next_batch(batch_size)\n",
177 |     "            _, c = sess.run(['train', 'cost:0'], feed_dict={'features:0': batch_x,\n",
178 |     "                                                            'targets:0': batch_y,\n",
179 |     "                                                            'training_phase:0': True})\n",
180 |     "            avg_cost += c\n",
181 |     "        \n",
182 |     "        train_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.train.images,\n",
183 |     "                                                      'targets:0': mnist.train.labels,\n",
184 |     "                                                      'training_phase:0': False})\n",
185 |     "        valid_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.validation.images,\n",
186 |     "                                                      'targets:0': mnist.validation.labels,\n",
187 |     "                                                      'training_phase:0': False})  \n",
188 |     "        \n",
189 |     "        print(\"Epoch: %03d | AvgCost: %.3f\" % (epoch + 1, avg_cost / (i + 1)), end=\"\")\n",
190 |     "        print(\" | Train/Valid ACC: %.3f/%.3f\" % (train_acc, valid_acc))\n",
191 |     "        \n",
192 |     "    test_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.test.images,\n",
193 |     "                                                 'targets:0': mnist.test.labels,\n",
194 |     "                                                 'training_phase:0': False})\n",
195 |     "    print('Test ACC: %.3f' % test_acc)"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": null,
201 |    "metadata": {
202 |     "collapsed": true
203 |    },
204 |    "outputs": [],
205 |    "source": []
206 |   }
207 |  ],
208 |  "metadata": {
209 |   "kernelspec": {
210 |    "display_name": "Python 3",
211 |    "language": "python",
212 |    "name": "python3"
213 |   },
214 |   "language_info": {
215 |    "codemirror_mode": {
216 |     "name": "ipython",
217 |     "version": 3
218 |    },
219 |    "file_extension": ".py",
220 |    "mimetype": "text/x-python",
221 |    "name": "python",
222 |    "nbconvert_exporter": "python",
223 |    "pygments_lexer": "ipython3",
224 |    "version": "3.7.1"
225 |   }
226 |  },
227 |  "nbformat": 4,
228 |  "nbformat_minor": 2
229 | }
230 | 


--------------------------------------------------------------------------------
/tensorflow1_ipynb/mlp/mlp-dropout.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.\n",
  8 |     "- Author: Sebastian Raschka\n",
  9 |     "- GitHub Repository: https://github.com/rasbt/deeplearning-models"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Sebastian Raschka \n",
 22 |       "\n",
 23 |       "CPython 3.6.1\n",
 24 |       "IPython 6.0.0\n",
 25 |       "\n",
 26 |       "tensorflow 1.2.0\n"
 27 |      ]
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "%load_ext watermark\n",
 32 |     "%watermark -a 'Sebastian Raschka' -v -p tensorflow"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "# Model Zoo -- Multilayer Perceptron with Dropout"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "Typically, dropout is applied after the non-linear activation function (a). However, when using rectified linear units (ReLUs), it might make sense to apply dropout before the non-linear activation (b) for reasons of computational efficiency depending on the particular code implementation.\n",
 47 |     "\n",
 48 |     "> (a):  Fully connected, linear activation -> ReLU -> Dropout -> ...  \n",
 49 |     "> (b):  Fully connected, linear activation -> Dropout -> ReLU -> ...\n",
 50 |     "\n",
 51 |     "Why do (a) and (b) produce the same results in case of ReLU?. Let's answer this question with a simple example starting with the following *logits* (outputs of the linear activation of the fully connected layer):\n",
 52 |     "\n",
 53 |     "> `[-1, -2, -3, 4, 5, 6]`\n",
 54 |     "\n",
 55 |     "Let's walk through scenario (a), applying the ReLU activation first. The output of the non-linear ReLU functions are as follows:\n",
 56 |     "\n",
 57 |     "> `[0, 0, 0, 4, 5, 6]`\n",
 58 |     "\n",
 59 |     "Remember, the ReLU activation function is defined as $f(x) = max(0, x)$; thus, all non-zero values will be changed to zeros. Now, applying dropout with a probability 0f 50%, let's assume that the units being deactivated are units 2, 4, and 6:\n",
 60 |     "\n",
 61 |     "\n",
 62 |     "> `[0*2, 0, 0*2, 0, 0*2, 0] = [0, 0, 0, 0, 10, 0]`\n",
 63 |     "\n",
 64 |     "\n",
 65 |     "Note that in dropout, units are deactivated randomly by default. In the preceding example, we assumed that the 2nd, 4th, and 6th unit were deactivated during the training iteration. Also, because we applied dropout with 50% dropout probability, we scaled the remaining units by a factor of 2.\n",
 66 |     "\n",
 67 |     "Now, let's take a look at scenario (b). Again, we assume a 50% dropout rate and that units 2, 4, and 6 are deactivated:\n",
 68 |     "\n",
 69 |     "> `[-1, -2, -3, 4, 5, 6] ->  [-1*2, 0, -3*2, 0, 5*2, 0]`\n",
 70 |     "\n",
 71 |     "\n",
 72 |     "Now, if we pass this array to the ReLU function, the resulting array will look exactly like the one in scenario (a):\n",
 73 |     "\n",
 74 |     "\n",
 75 |     "> `[-2, 0, -6, 0, 10, 0] -> [0, 0, 0, 0, 10, 0]`"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "metadata": {},
 81 |    "source": [
 82 |     "### Low-level Implementation"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 2,
 88 |    "metadata": {},
 89 |    "outputs": [
 90 |     {
 91 |      "name": "stdout",
 92 |      "output_type": "stream",
 93 |      "text": [
 94 |       "Extracting ./train-images-idx3-ubyte.gz\n",
 95 |       "Extracting ./train-labels-idx1-ubyte.gz\n",
 96 |       "Extracting ./t10k-images-idx3-ubyte.gz\n",
 97 |       "Extracting ./t10k-labels-idx1-ubyte.gz\n"
 98 |      ]
 99 |     }
100 |    ],
101 |    "source": [
102 |     "import tensorflow as tf\n",
103 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
104 |     "\n",
105 |     "\n",
106 |     "##########################\n",
107 |     "### DATASET\n",
108 |     "##########################\n",
109 |     "\n",
110 |     "mnist = input_data.read_data_sets(\"./\", one_hot=True)\n",
111 |     "\n",
112 |     "\n",
113 |     "##########################\n",
114 |     "### SETTINGS\n",
115 |     "##########################\n",
116 |     "\n",
117 |     "# Hyperparameters\n",
118 |     "learning_rate = 0.1\n",
119 |     "training_epochs = 20\n",
120 |     "batch_size = 64\n",
121 |     "dropout_keep_proba = 0.5\n",
122 |     "\n",
123 |     "# Architecture\n",
124 |     "n_hidden_1 = 128\n",
125 |     "n_hidden_2 = 256\n",
126 |     "n_input = 784\n",
127 |     "n_classes = 10\n",
128 |     "\n",
129 |     "# Other\n",
130 |     "random_seed = 123\n",
131 |     "\n",
132 |     "\n",
133 |     "##########################\n",
134 |     "### GRAPH DEFINITION\n",
135 |     "##########################\n",
136 |     "\n",
137 |     "g = tf.Graph()\n",
138 |     "with g.as_default():\n",
139 |     "    \n",
140 |     "    tf.set_random_seed(random_seed)\n",
141 |     "\n",
142 |     "    # Dropout settings\n",
143 |     "    keep_proba = tf.placeholder(tf.float32, None, name='keep_proba')\n",
144 |     "    \n",
145 |     "    # Input data\n",
146 |     "    tf_x = tf.placeholder(tf.float32, [None, n_input], name='features')\n",
147 |     "    tf_y = tf.placeholder(tf.float32, [None, n_classes], name='targets')\n",
148 |     "\n",
149 |     "    # Model parameters\n",
150 |     "    weights = {\n",
151 |     "        'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden_1], stddev=0.1)),\n",
152 |     "        'h2': tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2], stddev=0.1)),\n",
153 |     "        'out': tf.Variable(tf.truncated_normal([n_hidden_2, n_classes], stddev=0.1))\n",
154 |     "    }\n",
155 |     "    biases = {\n",
156 |     "        'b1': tf.Variable(tf.zeros([n_hidden_1])),\n",
157 |     "        'b2': tf.Variable(tf.zeros([n_hidden_2])),\n",
158 |     "        'out': tf.Variable(tf.zeros([n_classes]))\n",
159 |     "    }\n",
160 |     "\n",
161 |     "    # Multilayer perceptron\n",
162 |     "    layer_1 = tf.add(tf.matmul(tf_x, weights['h1']), biases['b1'])\n",
163 |     "    layer_1 = tf.nn.relu(layer_1)\n",
164 |     "    layer_1 = tf.nn.dropout(layer_1, keep_prob=keep_proba)\n",
165 |     "    \n",
166 |     "    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])\n",
167 |     "    layer_2 = tf.nn.relu(layer_2)\n",
168 |     "    layer_2 = tf.nn.dropout(layer_2, keep_prob=keep_proba)\n",
169 |     "    \n",
170 |     "    out_layer = tf.add(tf.matmul(layer_2, weights['out']), biases['out'], name='logits')\n",
171 |     "\n",
172 |     "    # Loss and optimizer\n",
173 |     "    loss = tf.nn.softmax_cross_entropy_with_logits(logits=out_layer, labels=tf_y)\n",
174 |     "    cost = tf.reduce_mean(loss, name='cost')\n",
175 |     "    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
176 |     "    train = optimizer.minimize(cost, name='train')\n",
177 |     "\n",
178 |     "    # Prediction\n",
179 |     "    correct_prediction = tf.equal(tf.argmax(tf_y, 1), tf.argmax(out_layer, 1))\n",
180 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 3,
186 |    "metadata": {},
187 |    "outputs": [
188 |     {
189 |      "name": "stdout",
190 |      "output_type": "stream",
191 |      "text": [
192 |       "Epoch: 001 | AvgCost: 0.669 | Train/Valid ACC: 0.927/0.935\n",
193 |       "Epoch: 002 | AvgCost: 0.372 | Train/Valid ACC: 0.944/0.953\n",
194 |       "Epoch: 003 | AvgCost: 0.308 | Train/Valid ACC: 0.952/0.956\n",
195 |       "Epoch: 004 | AvgCost: 0.271 | Train/Valid ACC: 0.962/0.961\n",
196 |       "Epoch: 005 | AvgCost: 0.251 | Train/Valid ACC: 0.964/0.966\n",
197 |       "Epoch: 006 | AvgCost: 0.231 | Train/Valid ACC: 0.968/0.966\n",
198 |       "Epoch: 007 | AvgCost: 0.219 | Train/Valid ACC: 0.970/0.970\n",
199 |       "Epoch: 008 | AvgCost: 0.204 | Train/Valid ACC: 0.972/0.971\n",
200 |       "Epoch: 009 | AvgCost: 0.194 | Train/Valid ACC: 0.974/0.970\n",
201 |       "Epoch: 010 | AvgCost: 0.187 | Train/Valid ACC: 0.976/0.970\n",
202 |       "Epoch: 011 | AvgCost: 0.178 | Train/Valid ACC: 0.977/0.972\n",
203 |       "Epoch: 012 | AvgCost: 0.175 | Train/Valid ACC: 0.978/0.972\n",
204 |       "Epoch: 013 | AvgCost: 0.170 | Train/Valid ACC: 0.979/0.973\n",
205 |       "Epoch: 014 | AvgCost: 0.162 | Train/Valid ACC: 0.980/0.975\n",
206 |       "Epoch: 015 | AvgCost: 0.157 | Train/Valid ACC: 0.980/0.974\n",
207 |       "Epoch: 016 | AvgCost: 0.153 | Train/Valid ACC: 0.982/0.976\n",
208 |       "Epoch: 017 | AvgCost: 0.151 | Train/Valid ACC: 0.982/0.976\n",
209 |       "Epoch: 018 | AvgCost: 0.147 | Train/Valid ACC: 0.983/0.973\n",
210 |       "Epoch: 019 | AvgCost: 0.144 | Train/Valid ACC: 0.984/0.974\n",
211 |       "Epoch: 020 | AvgCost: 0.143 | Train/Valid ACC: 0.985/0.975\n",
212 |       "Test ACC: 0.974\n"
213 |      ]
214 |     }
215 |    ],
216 |    "source": [
217 |     "from numpy.random import seed\n",
218 |     "\n",
219 |     "##########################\n",
220 |     "### TRAINING & EVALUATION\n",
221 |     "##########################\n",
222 |     "\n",
223 |     "with tf.Session(graph=g) as sess:\n",
224 |     "    sess.run(tf.global_variables_initializer())\n",
225 |     "\n",
226 |     "    seed(random_seed) # random seed for mnist iterator\n",
227 |     "    for epoch in range(training_epochs):\n",
228 |     "        avg_cost = 0.\n",
229 |     "        total_batch = mnist.train.num_examples // batch_size\n",
230 |     "\n",
231 |     "        for i in range(total_batch):\n",
232 |     "            batch_x, batch_y = mnist.train.next_batch(batch_size)\n",
233 |     "            _, c = sess.run(['train', 'cost:0'], feed_dict={'features:0': batch_x,\n",
234 |     "                                                            'targets:0': batch_y,\n",
235 |     "                                                            'keep_proba:0': dropout_keep_proba})\n",
236 |     "            avg_cost += c\n",
237 |     "        \n",
238 |     "        train_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.train.images,\n",
239 |     "                                                      'targets:0': mnist.train.labels,\n",
240 |     "                                                      'keep_proba:0': 1.0})\n",
241 |     "        valid_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.validation.images,\n",
242 |     "                                                      'targets:0': mnist.validation.labels,\n",
243 |     "                                                      'keep_proba:0': 1.0})\n",
244 |     "        \n",
245 |     "        print(\"Epoch: %03d | AvgCost: %.3f\" % (epoch + 1, avg_cost / (i + 1)), end=\"\")\n",
246 |     "        print(\" | Train/Valid ACC: %.3f/%.3f\" % (train_acc, valid_acc))\n",
247 |     "        \n",
248 |     "    test_acc = sess.run(accuracy, feed_dict={'features:0': mnist.test.images,\n",
249 |     "                                             'targets:0': mnist.test.labels,\n",
250 |     "                                             'keep_proba:0': 1.0})                                             \n",
251 |     "    print('Test ACC: %.3f' % test_acc)"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "markdown",
256 |    "metadata": {},
257 |    "source": [
258 |     "### tensorflow.layers Abstraction"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {},
264 |    "source": [
265 |     "Bote that we define the *dropout rate*, not the *keep probability* when we are using dropout from `tf.layers`."
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": 4,
271 |    "metadata": {},
272 |    "outputs": [
273 |     {
274 |      "name": "stdout",
275 |      "output_type": "stream",
276 |      "text": [
277 |       "Extracting ./train-images-idx3-ubyte.gz\n",
278 |       "Extracting ./train-labels-idx1-ubyte.gz\n",
279 |       "Extracting ./t10k-images-idx3-ubyte.gz\n",
280 |       "Extracting ./t10k-labels-idx1-ubyte.gz\n"
281 |      ]
282 |     }
283 |    ],
284 |    "source": [
285 |     "import tensorflow as tf\n",
286 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
287 |     "\n",
288 |     "\n",
289 |     "##########################\n",
290 |     "### DATASET\n",
291 |     "##########################\n",
292 |     "\n",
293 |     "mnist = input_data.read_data_sets(\"./\", one_hot=True)\n",
294 |     "\n",
295 |     "\n",
296 |     "##########################\n",
297 |     "### SETTINGS\n",
298 |     "##########################\n",
299 |     "\n",
300 |     "# Hyperparameters\n",
301 |     "learning_rate = 0.1\n",
302 |     "training_epochs = 20\n",
303 |     "batch_size = 64\n",
304 |     "dropout_rate = 0.5 \n",
305 |     "# note that we define the dropout rate, not\n",
306 |     "# the \"keep probability\" when using\n",
307 |     "# dropout from tf.layers\n",
308 |     "\n",
309 |     "# Architecture\n",
310 |     "n_hidden_1 = 128\n",
311 |     "n_hidden_2 = 256\n",
312 |     "n_input = 784\n",
313 |     "training_epochs = 15\n",
314 |     "\n",
315 |     "# Other\n",
316 |     "random_seed = 123\n",
317 |     "\n",
318 |     "\n",
319 |     "##########################\n",
320 |     "### GRAPH DEFINITION\n",
321 |     "##########################\n",
322 |     "\n",
323 |     "g = tf.Graph()\n",
324 |     "with g.as_default():\n",
325 |     "    \n",
326 |     "    tf.set_random_seed(random_seed)\n",
327 |     "\n",
328 |     "    # Dropout settings\n",
329 |     "    is_training = tf.placeholder(tf.bool, name='is_training')\n",
330 |     "    \n",
331 |     "    # Input data\n",
332 |     "    tf_x = tf.placeholder(tf.float32, [None, n_input], name='features')\n",
333 |     "    tf_y = tf.placeholder(tf.float32, [None, n_classes], name='targets')\n",
334 |     "\n",
335 |     "    # Multilayer perceptron\n",
336 |     "    layer_1 = tf.layers.dense(tf_x, n_hidden_1, activation=tf.nn.relu, \n",
337 |     "                              kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))\n",
338 |     "    layer_1 = tf.layers.dropout(layer_1, rate=dropout_rate, training=is_training)\n",
339 |     "    \n",
340 |     "    layer_2 = tf.layers.dense(layer_1, n_hidden_2, activation=tf.nn.relu,\n",
341 |     "                              kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))\n",
342 |     "    layer_2 = tf.layers.dropout(layer_1, rate=dropout_rate, training=is_training)\n",
343 |     "    \n",
344 |     "    out_layer = tf.layers.dense(layer_2, n_classes, activation=None, name='logits')\n",
345 |     "\n",
346 |     "    # Loss and optimizer\n",
347 |     "    loss = tf.nn.softmax_cross_entropy_with_logits(logits=out_layer, labels=tf_y)\n",
348 |     "    cost = tf.reduce_mean(loss, name='cost')\n",
349 |     "    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
350 |     "    train = optimizer.minimize(cost, name='train')\n",
351 |     "\n",
352 |     "    # Prediction\n",
353 |     "    correct_prediction = tf.equal(tf.argmax(tf_y, 1), tf.argmax(out_layer, 1))\n",
354 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "code",
359 |    "execution_count": 5,
360 |    "metadata": {},
361 |    "outputs": [
362 |     {
363 |      "name": "stdout",
364 |      "output_type": "stream",
365 |      "text": [
366 |       "Epoch: 001 | AvgCost: 0.814 | Train/Valid ACC: 0.917/0.925\n",
367 |       "Epoch: 002 | AvgCost: 0.520 | Train/Valid ACC: 0.931/0.938\n",
368 |       "Epoch: 003 | AvgCost: 0.457 | Train/Valid ACC: 0.940/0.945\n",
369 |       "Epoch: 004 | AvgCost: 0.408 | Train/Valid ACC: 0.948/0.952\n",
370 |       "Epoch: 005 | AvgCost: 0.393 | Train/Valid ACC: 0.952/0.956\n",
371 |       "Epoch: 006 | AvgCost: 0.376 | Train/Valid ACC: 0.954/0.957\n",
372 |       "Epoch: 007 | AvgCost: 0.355 | Train/Valid ACC: 0.956/0.958\n",
373 |       "Epoch: 008 | AvgCost: 0.348 | Train/Valid ACC: 0.958/0.960\n",
374 |       "Epoch: 009 | AvgCost: 0.338 | Train/Valid ACC: 0.961/0.964\n",
375 |       "Epoch: 010 | AvgCost: 0.334 | Train/Valid ACC: 0.962/0.964\n",
376 |       "Epoch: 011 | AvgCost: 0.324 | Train/Valid ACC: 0.963/0.965\n",
377 |       "Epoch: 012 | AvgCost: 0.315 | Train/Valid ACC: 0.964/0.963\n",
378 |       "Epoch: 013 | AvgCost: 0.310 | Train/Valid ACC: 0.965/0.965\n",
379 |       "Epoch: 014 | AvgCost: 0.305 | Train/Valid ACC: 0.966/0.965\n",
380 |       "Epoch: 015 | AvgCost: 0.305 | Train/Valid ACC: 0.967/0.965\n",
381 |       "Test ACC: 0.961\n"
382 |      ]
383 |     }
384 |    ],
385 |    "source": [
386 |     "from numpy.random import seed\n",
387 |     "\n",
388 |     "##########################\n",
389 |     "### TRAINING & EVALUATION\n",
390 |     "##########################\n",
391 |     "    \n",
392 |     "with tf.Session(graph=g) as sess:\n",
393 |     "    sess.run(tf.global_variables_initializer())\n",
394 |     "\n",
395 |     "    seed(random_seed) # random seed for mnist iterator\n",
396 |     "    for epoch in range(training_epochs):\n",
397 |     "        avg_cost = 0.\n",
398 |     "        total_batch = mnist.train.num_examples // batch_size\n",
399 |     "\n",
400 |     "        for i in range(total_batch):\n",
401 |     "            batch_x, batch_y = mnist.train.next_batch(batch_size)\n",
402 |     "            _, c = sess.run(['train', 'cost:0'], feed_dict={'features:0': batch_x,\n",
403 |     "                                                            'targets:0': batch_y,\n",
404 |     "                                                            'is_training:0': True})\n",
405 |     "            avg_cost += c\n",
406 |     "        \n",
407 |     "        train_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.train.images,\n",
408 |     "                                                      'targets:0': mnist.train.labels,\n",
409 |     "                                                      'is_training:0': False})\n",
410 |     "        \n",
411 |     "        valid_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.validation.images,\n",
412 |     "                                                      'targets:0': mnist.validation.labels,\n",
413 |     "                                                      'is_training:0': False})\n",
414 |     "        \n",
415 |     "        print(\"Epoch: %03d | AvgCost: %.3f\" % (epoch + 1, avg_cost / (i + 1)), end=\"\")\n",
416 |     "        print(\" | Train/Valid ACC: %.3f/%.3f\" % (train_acc, valid_acc))\n",
417 |     "        \n",
418 |     "    test_acc = sess.run('accuracy:0', feed_dict={'features:0': mnist.test.images,\n",
419 |     "                                                 'targets:0': mnist.test.labels,\n",
420 |     "                                                 'is_training:0': False})\n",
421 |     "    print('Test ACC: %.3f' % test_acc)"
422 |    ]
423 |   }
424 |  ],
425 |  "metadata": {
426 |   "kernelspec": {
427 |    "display_name": "Python 3",
428 |    "language": "python",
429 |    "name": "python3"
430 |   },
431 |   "language_info": {
432 |    "codemirror_mode": {
433 |     "name": "ipython",
434 |     "version": 3
435 |    },
436 |    "file_extension": ".py",
437 |    "mimetype": "text/x-python",
438 |    "name": "python",
439 |    "nbconvert_exporter": "python",
440 |    "pygments_lexer": "ipython3",
441 |    "version": "3.7.1"
442 |   }
443 |  },
444 |  "nbformat": 4,
445 |  "nbformat_minor": 2
446 | }
447 | 


--------------------------------------------------------------------------------