├── .gitignore
├── LICENSE
├── README.md
├── checkpoint.pth
├── differential-privacy-intro
    └── intro_differential_privacy.ipynb
├── differential-privacy
    └── differential_privacy.ipynb
├── fashion-mnist-model
    ├── fashion_mnist_network.ipynb
    └── fashion_mnist_network_files
    │   ├── fashion_mnist_network_11_1.png
    │   ├── fashion_mnist_network_16_1.png
    │   └── fashion_mnist_network_4_1.png
├── fashion_mnist_network.md
├── federated-learning
    └── federated_learning.ipynb
├── intro-notebooks
    ├── loading_image_data.ipynb
    ├── memory_sharing_vs_copying.ipynb
    ├── mnist_neural_network.ipynb
    ├── mnist_neural_network_files
    │   ├── mnist_neural_network_24_1.png
    │   └── mnist_neural_network_2_0.png
    ├── multilayer_network.ipynb
    ├── pytorch_tensors.ipynb
    ├── reshaping_tensors.ipynb
    ├── reshaping_tensors.md
    ├── single_layer_network.ipynb
    ├── single_layer_network.md
    └── transfer_learning.ipynb
├── mnist_neural_network.md
├── multilayer_network.md
└── raw_mnist_data
    ├── FASHION_MNIST_data
        └── FashionMNIST
        │   ├── processed
        │       ├── test.pt
        │       └── training.pt
        │   └── raw
        │       ├── t10k-images-idx3-ubyte
        │       ├── t10k-labels-idx1-ubyte
        │       ├── train-images-idx3-ubyte
        │       └── train-labels-idx1-ubyte
    ├── MNIST_data
        └── MNIST
        │   ├── processed
        │       ├── test.pt
        │       └── training.pt
        │   └── raw
        │       ├── t10k-images-idx3-ubyte
        │       ├── t10k-labels-idx1-ubyte
        │       ├── train-images-idx3-ubyte
        │       └── train-labels-idx1-ubyte
    └── feature_extractor.jpeg


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # DS Store
 10 | .DS_Store
 11 | 
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # pyenv
 80 | .python-version
 81 | 
 82 | # celery beat schedule file
 83 | celerybeat-schedule
 84 | 
 85 | # SageMath parsed files
 86 | *.sage.py
 87 | 
 88 | # Environments
 89 | .env
 90 | .venv
 91 | env/
 92 | venv/
 93 | ENV/
 94 | env.bak/
 95 | venv.bak/
 96 | 
 97 | # Spyder project settings
 98 | .spyderproject
 99 | .spyproject
100 | 
101 | # Rope project settings
102 | .ropeproject
103 | 
104 | # mkdocs documentation
105 | /site
106 | 
107 | # mypy
108 | .mypy_cache/
109 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Jee Githinji Gikera
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Differential Privacy & Federated Learning.
 2 | 
 3 | Curated notebooks on how to train neural networks using differential privacy and federated learning.
 4 | 
 5 | 
 6 | ## Intro Notebooks
 7 | 
 8 | Before you start learning about Differential Privacy and Federated Learning,
 9 | it's important to understand tensors; the fundamental data structures for neural networks.
10 | 
11 | ### Learn about tensors:
12 | 
13 | - [Pytorch Tensors](intro-notebooks/pytorch_tensors.ipynb)
14 | - [Reshaping Tensors](intro-noteboos/reshaping_tensors.ipynb)
15 | - [Memory Sharing vs Copying](intro-notebooks/memory_sharing_vs_copying.ipynb)
16 | 
17 | 
18 | ### Creating simple neural networks
19 | 
20 | - [Single Layer Network](intro-notebooks/single_layer_network.ipynb)
21 | - [Multi Layer Network](intro-notebooks/multilayer_network.ipynb)
22 | - [Loading Image data](intro-notebooks/loading_image_data.ipynb)
23 | 
24 | 
25 | ### Creating Dense Networks with MNIST data
26 | 
27 | - [MNIST Model](intro-notebooks/mnist_neural_network.ipynb)
28 | - [Fashion MNIST Model](fashion-mnist-model/fashion_mnist_netowkr.ipynb)
29 | 
30 | 
31 | ### Transfer Learning
32 | 
33 | Most of the time you won't want to train a whole convolutional network yourself.
34 | Modern ConvNets training on huge datasets like ImageNet take weeks on multiple GPUs.
35 | [Transfer Learning](intro-notebooks/transfer_learning.ipynb) helps you solve this problem.
36 | 
37 | ## What is Differential Privacy?
38 | 
39 | Differential Privacy is a set of techniques for
40 | preventing a model from accidentally memorizing secrets present
41 | in a training dataset during the learning process.
42 | 
43 | For it to work, we need to uphold the following:
44 | 
45 | - Make a promise to a data subject that: You won’t be affected,
46 |   adversely or otherwise, by allowing your data to be used in any analysis,
47 |   no matter what studies, datasets or information sources, are available.
48 | - Ensure that the model learning from sensitive data are only learning what they are
49 |   supposed to learn without accidentally learning what they are not supposed to learn from their data
50 | 
51 | Here's some notebooks to explain the concept further:
52 | 
53 | - [Differential Privacy Intro](differential-privacy-intro/intro_differential_privacy.ipynb)
54 | - [Differential Privacy](differential-privacy/differential_privacy.ipynb)
55 | 
56 | 
57 | ## Federated Learning
58 | Instead of bringing data all to one place for training,
59 | federated learning is done by bringing the model to the data.
60 | this allows a data owner to maintain the only copy of their information.
61 | 
62 | This notebook on [federated learning](federated-learning/federated_learning.ipynb) explains
63 | more in detail.
64 | 
65 | 


--------------------------------------------------------------------------------
/checkpoint.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/checkpoint.pth


--------------------------------------------------------------------------------
/differential-privacy/differential_privacy.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "## Differential Privacy - Simple Database Queries"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "markdown",
  12 |    "metadata": {},
  13 |    "source": [
  14 |     "The database is going to be a VERY simple database with only one boolean column. Each row corresponds to a person. Each value corresponds to whether or not that person has a certain private attribute (such as whether they have a certain disease, or whether they are above/below a certain age). We are then going to learn how to know whether a database query over such a small database is differentially private or not - and more importantly - what techniques we can employ to ensure various levels of privacy\n",
  15 |     "\n",
  16 |     "#### Create a Simple Database\n",
  17 |     "To do this, initialize a random list of 1s and 0s (which are the entries in our database). Note - the number of entries directly corresponds to the number of people in our database."
  18 |    ]
  19 |   },
  20 |   {
  21 |    "cell_type": "code",
  22 |    "execution_count": 30,
  23 |    "metadata": {},
  24 |    "outputs": [
  25 |     {
  26 |      "data": {
  27 |       "text/plain": [
  28 |        "tensor([1, 1, 1,  ..., 1, 1, 1], dtype=torch.uint8)"
  29 |       ]
  30 |      },
  31 |      "execution_count": 30,
  32 |      "metadata": {},
  33 |      "output_type": "execute_result"
  34 |     }
  35 |    ],
  36 |    "source": [
  37 |     "import torch\n",
  38 |     "# the number of entries in our DB / this of it as number of people in the DB\n",
  39 |     "num_entries = 5000\n",
  40 |     "db = torch.rand(num_entries) > 0.5\n",
  41 |     "db"
  42 |    ]
  43 |   },
  44 |   {
  45 |    "cell_type": "markdown",
  46 |    "metadata": {},
  47 |    "source": [
  48 |     "## Generate Parallel Databases\n",
  49 |     "> \"When querying a database, if I removed someone from the database, would the output of the query change?\". \n",
  50 |     "\n",
  51 |     "In order to check for this, we create \"parallel databases\" which are simply databases with one entry removed. \n",
  52 |     "\n",
  53 |     "We'll create a list of every parallel database to the one currently contained in the \"db\" variable. Then, create a helper function which does the following:\n",
  54 |     "- creates the initial database (db)\n",
  55 |     "- creates all parallel databases"
  56 |    ]
  57 |   },
  58 |   {
  59 |    "cell_type": "code",
  60 |    "execution_count": 32,
  61 |    "metadata": {},
  62 |    "outputs": [],
  63 |    "source": [
  64 |     "def create_parallel_db(db, remove_index):\n",
  65 |     "    return torch.cat((db[0:remove_index], db[remove_index+1:]))"
  66 |    ]
  67 |   },
  68 |   {
  69 |    "cell_type": "code",
  70 |    "execution_count": 33,
  71 |    "metadata": {},
  72 |    "outputs": [],
  73 |    "source": [
  74 |     "def create_parallel_dbs(db):\n",
  75 |     "    parallel_dbs = list()\n",
  76 |     "    for i in range(len(db)):\n",
  77 |     "        pdb = create_parallel_db(db, i)\n",
  78 |     "        parallel_dbs.append(pdb)\n",
  79 |     "    return parallel_dbs"
  80 |    ]
  81 |   },
  82 |   {
  83 |    "cell_type": "code",
  84 |    "execution_count": 34,
  85 |    "metadata": {},
  86 |    "outputs": [],
  87 |    "source": [
  88 |     "def create_db_and_parallels(num_entries):\n",
  89 |     "    # generate dbs and parallel dbs on the fly\n",
  90 |     "    db = torch.rand(num_entries) > 0.5\n",
  91 |     "    pdbs = create_parallel_dbs(db)\n",
  92 |     "    \n",
  93 |     "    return db, pdbs"
  94 |    ]
  95 |   },
  96 |   {
  97 |    "cell_type": "code",
  98 |    "execution_count": 44,
  99 |    "metadata": {},
 100 |    "outputs": [
 101 |     {
 102 |      "name": "stdout",
 103 |      "output_type": "stream",
 104 |      "text": [
 105 |       "Real database: tensor([1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=torch.uint8)\n",
 106 |       "Size of real DB torch.Size([10])\n",
 107 |       "A sample parallel DB tensor([1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=torch.uint8)\n",
 108 |       "Size of parallel DB torch.Size([9])\n"
 109 |      ]
 110 |     }
 111 |    ],
 112 |    "source": [
 113 |     "db, pdbs = create_db_and_parallels(10)\n",
 114 |     "pdbs\n",
 115 |     "print(\"Real database:\", db)\n",
 116 |     "print(\"Size of real DB\", db.size())\n",
 117 |     "print(\"A sample parallel DB\", pdbs[0])\n",
 118 |     "print(\"Size of parallel DB\", pdbs[0].size())"
 119 |    ]
 120 |   },
 121 |   {
 122 |    "cell_type": "markdown",
 123 |    "metadata": {},
 124 |    "source": [
 125 |     "# Towards Evaluating The Differential Privacy of a Function\n",
 126 |     "\n",
 127 |     "Intuitively, we want to be able to query our database and evaluate whether or not the result of the query is leaking \"private\" information. \n",
 128 |     "> This is about evaluating whether the output of a query changes when we remove someone from the database. Specifically, we want to evaluate the *maximum* amount the query changes when someone is removed (maximum over all possible people who could be removed). \n",
 129 |     "\n",
 130 |     "To find how much privacy is leaked, we'll iterate over each person in the database and **measure** the difference in the output of the query relative to when we query the entire database. \n",
 131 |     "\n",
 132 |     "Just for the sake of argument, let's make our first \"database query\" a simple sum. Aka, we're going to count the number of 1s in the database."
 133 |    ]
 134 |   },
 135 |   {
 136 |    "cell_type": "code",
 137 |    "execution_count": 53,
 138 |    "metadata": {},
 139 |    "outputs": [],
 140 |    "source": [
 141 |     "db, pdbs = create_db_and_parallels(200)\n",
 142 |     "def query(db):\n",
 143 |     "    return db.sum()"
 144 |    ]
 145 |   },
 146 |   {
 147 |    "cell_type": "code",
 148 |    "execution_count": 60,
 149 |    "metadata": {},
 150 |    "outputs": [
 151 |     {
 152 |      "data": {
 153 |       "text/plain": [
 154 |        "tensor(106)"
 155 |       ]
 156 |      },
 157 |      "execution_count": 60,
 158 |      "metadata": {},
 159 |      "output_type": "execute_result"
 160 |     }
 161 |    ],
 162 |    "source": [
 163 |     "query(db)"
 164 |    ]
 165 |   },
 166 |   {
 167 |    "cell_type": "code",
 168 |    "execution_count": 61,
 169 |    "metadata": {},
 170 |    "outputs": [
 171 |     {
 172 |      "data": {
 173 |       "text/plain": [
 174 |        "tensor(106)"
 175 |       ]
 176 |      },
 177 |      "execution_count": 61,
 178 |      "metadata": {},
 179 |      "output_type": "execute_result"
 180 |     }
 181 |    ],
 182 |    "source": [
 183 |     "# the output of the parallel dbs is different from the db query\n",
 184 |     "query(pdbs[1])\n"
 185 |    ]
 186 |   },
 187 |   {
 188 |    "cell_type": "code",
 189 |    "execution_count": 62,
 190 |    "metadata": {},
 191 |    "outputs": [
 192 |     {
 193 |      "name": "stdout",
 194 |      "output_type": "stream",
 195 |      "text": [
 196 |       "tensor(106)\n"
 197 |      ]
 198 |     }
 199 |    ],
 200 |    "source": [
 201 |     "full_db_result = query(db)\n",
 202 |     "print(full_db_result)"
 203 |    ]
 204 |   },
 205 |   {
 206 |    "cell_type": "code",
 207 |    "execution_count": 67,
 208 |    "metadata": {},
 209 |    "outputs": [],
 210 |    "source": [
 211 |     "sensitivity = 0\n",
 212 |     "sensitivity_scale = []\n",
 213 |     "for pdb in pdbs:\n",
 214 |     "    pdb_result = query(pdb)\n",
 215 |     "    db_distance = torch.abs(pdb_result - full_db_result)\n",
 216 |     "    if(db_distance > sensitivity):\n",
 217 |     "        sensitivity_scale.append(db_distance)\n",
 218 |     "        sensitivity = db_distance"
 219 |    ]
 220 |   },
 221 |   {
 222 |    "cell_type": "code",
 223 |    "execution_count": 68,
 224 |    "metadata": {},
 225 |    "outputs": [
 226 |     {
 227 |      "data": {
 228 |       "text/plain": [
 229 |        "tensor(1)"
 230 |       ]
 231 |      },
 232 |      "execution_count": 68,
 233 |      "metadata": {},
 234 |      "output_type": "execute_result"
 235 |     }
 236 |    ],
 237 |    "source": [
 238 |     "sensitivity"
 239 |    ]
 240 |   },
 241 |   {
 242 |    "cell_type": "markdown",
 243 |    "metadata": {},
 244 |    "source": [
 245 |     "#### Sensitivity\n",
 246 |     "> The maximum amount the query changes when removing an individual from the DB.\n"
 247 |    ]
 248 |   },
 249 |   {
 250 |    "cell_type": "markdown",
 251 |    "metadata": {},
 252 |    "source": [
 253 |     "# Evaluating the Privacy of a Function\n",
 254 |     "\n",
 255 |     "The difference between each parallel db's query result and the query result for the real database and its max value (which was 1) is called \"sensitivity\". It corresponds to the function we chose for the query. The \"sum\" query will always have a sensitivity of exactly 1. We can also calculate sensitivity for other functions as well.\n",
 256 |     "\n",
 257 |     "Let's calculate sensitivity for the \"mean\" function."
 258 |    ]
 259 |   },
 260 |   {
 261 |    "cell_type": "code",
 262 |    "execution_count": 75,
 263 |    "metadata": {},
 264 |    "outputs": [],
 265 |    "source": [
 266 |     "def sensitivity(query, num_entries=1000):\n",
 267 |     "    db, pdbs = create_db_and_parallels(num_entries)\n",
 268 |     "    \n",
 269 |     "    full_db_result = query(db)\n",
 270 |     "    \n",
 271 |     "    max_distance = 0\n",
 272 |     "    for pdb in pdbs:\n",
 273 |     "        # for each parallel db, execute the query (sum, or mean, ..., etc)\n",
 274 |     "        pdb_result = query(pdb)\n",
 275 |     "        db_distance = torch.abs(pdb_result - full_db_result)\n",
 276 |     "        \n",
 277 |     "        if (db_distance > max_distance):\n",
 278 |     "            max_distance = db_distance\n",
 279 |     "\n",
 280 |     "    return max_distance "
 281 |    ]
 282 |   },
 283 |   {
 284 |    "cell_type": "code",
 285 |    "execution_count": 76,
 286 |    "metadata": {},
 287 |    "outputs": [],
 288 |    "source": [
 289 |     "# our query is now the mean\n",
 290 |     "def query(db):\n",
 291 |     "    return db.float().mean()"
 292 |    ]
 293 |   },
 294 |   {
 295 |    "cell_type": "code",
 296 |    "execution_count": 77,
 297 |    "metadata": {},
 298 |    "outputs": [
 299 |     {
 300 |      "data": {
 301 |       "text/plain": [
 302 |        "tensor(0.0005)"
 303 |       ]
 304 |      },
 305 |      "execution_count": 77,
 306 |      "metadata": {},
 307 |      "output_type": "execute_result"
 308 |     }
 309 |    ],
 310 |    "source": [
 311 |     "\n",
 312 |     "sensitivity(query)"
 313 |    ]
 314 |   },
 315 |   {
 316 |    "cell_type": "markdown",
 317 |    "metadata": {},
 318 |    "source": [
 319 |     "Wow! That sensitivity is WAY lower. Note the intuition here. \n",
 320 |     ">\"Sensitivity\" is measuring how sensitive the output of the query is to a person being removed from the database. \n",
 321 |     "\n",
 322 |     "For a simple sum, this is always 1, but for the mean, removing a person is going to change the result of the query by rougly 1 divided by the size of the database. Thus, \"mean\" is a VASTLY less \"sensitive\" function (query) than SUM."
 323 |    ]
 324 |   },
 325 |   {
 326 |    "cell_type": "markdown",
 327 |    "metadata": {},
 328 |    "source": [
 329 |     "# Calculating L1 Sensitivity For Threshold\n",
 330 |     "\n",
 331 |     "TO calculate the sensitivty for the \"threshold\" function: \n",
 332 |     "\n",
 333 |     "- First compute the sum over the database (i.e. sum(db)) and return whether that sum is greater than a certain threshold.\n",
 334 |     "- Then, create databases of size 10 and threshold of 5 and calculate the sensitivity of the function. \n",
 335 |     "- Finally, re-initialize the database 10 times and calculate the sensitivity each time."
 336 |    ]
 337 |   },
 338 |   {
 339 |    "cell_type": "code",
 340 |    "execution_count": 78,
 341 |    "metadata": {},
 342 |    "outputs": [],
 343 |    "source": [
 344 |     "def query(db, threshold=5):\n",
 345 |     "    \"\"\"\n",
 346 |     "    Query that adds a threshold of 5, and returns whether sum is > threshold or not.\n",
 347 |     "    \"\"\"\n",
 348 |     "    return (db.sum() > threshold).float()"
 349 |    ]
 350 |   },
 351 |   {
 352 |    "cell_type": "code",
 353 |    "execution_count": 86,
 354 |    "metadata": {},
 355 |    "outputs": [
 356 |     {
 357 |      "name": "stdout",
 358 |      "output_type": "stream",
 359 |      "text": [
 360 |       "0\n",
 361 |       "tensor(1.)\n",
 362 |       "0\n",
 363 |       "0\n",
 364 |       "0\n",
 365 |       "0\n",
 366 |       "0\n",
 367 |       "0\n",
 368 |       "0\n",
 369 |       "tensor(1.)\n"
 370 |      ]
 371 |     }
 372 |    ],
 373 |    "source": [
 374 |     "for i in range(10):\n",
 375 |     "    sens = sensitivity(query, num_entries=10)\n",
 376 |     "    print(sens)"
 377 |    ]
 378 |   },
 379 |   {
 380 |    "cell_type": "markdown",
 381 |    "metadata": {},
 382 |    "source": [
 383 |     "# A Basic Differencing Attack\n",
 384 |     "\n",
 385 |     "Sadly none of the functions we've looked at so far are differentially private (despite them having varying levels of sensitivity). The most basic type of attack can be done as follows.\n",
 386 |     "\n",
 387 |     "Let's say we wanted to figure out a specific person's value in the database. All we would have to do is query for the sum of the entire database and then the sum of the entire database without that person!\n",
 388 |     "\n",
 389 |     "## Performing a Differencing Attack on Row 10 (How privacy can fail)\n",
 390 |     "\n",
 391 |     "We'll construct a database and then demonstrate how one can use two different sum queries to explose the value of the person represented by row 10 in the database (note, you'll need to use a database with at least 10 rows)"
 392 |    ]
 393 |   },
 394 |   {
 395 |    "cell_type": "code",
 396 |    "execution_count": 91,
 397 |    "metadata": {},
 398 |    "outputs": [],
 399 |    "source": [
 400 |     "db, _ = create_db_and_parallels(100)"
 401 |    ]
 402 |   },
 403 |   {
 404 |    "cell_type": "code",
 405 |    "execution_count": 92,
 406 |    "metadata": {},
 407 |    "outputs": [
 408 |     {
 409 |      "data": {
 410 |       "text/plain": [
 411 |        "tensor([0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1,\n",
 412 |        "        0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0,\n",
 413 |        "        0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0,\n",
 414 |        "        0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,\n",
 415 |        "        1, 0, 1, 0], dtype=torch.uint8)"
 416 |       ]
 417 |      },
 418 |      "execution_count": 92,
 419 |      "metadata": {},
 420 |      "output_type": "execute_result"
 421 |     }
 422 |    ],
 423 |    "source": [
 424 |     "db"
 425 |    ]
 426 |   },
 427 |   {
 428 |    "cell_type": "code",
 429 |    "execution_count": 93,
 430 |    "metadata": {},
 431 |    "outputs": [],
 432 |    "source": [
 433 |     "# create a parallel db with that person (index 10) removed\n",
 434 |     "pdb = create_parallel_db(db, remove_index=10)"
 435 |    ]
 436 |   },
 437 |   {
 438 |    "cell_type": "code",
 439 |    "execution_count": 94,
 440 |    "metadata": {},
 441 |    "outputs": [
 442 |     {
 443 |      "data": {
 444 |       "text/plain": [
 445 |        "tensor([0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0,\n",
 446 |        "        1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,\n",
 447 |        "        1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0,\n",
 448 |        "        0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1,\n",
 449 |        "        0, 1, 0], dtype=torch.uint8)"
 450 |       ]
 451 |      },
 452 |      "execution_count": 94,
 453 |      "metadata": {},
 454 |      "output_type": "execute_result"
 455 |     }
 456 |    ],
 457 |    "source": [
 458 |     "pdb"
 459 |    ]
 460 |   },
 461 |   {
 462 |    "cell_type": "code",
 463 |    "execution_count": 95,
 464 |    "metadata": {},
 465 |    "outputs": [
 466 |     {
 467 |      "data": {
 468 |       "text/plain": [
 469 |        "tensor(1, dtype=torch.uint8)"
 470 |       ]
 471 |      },
 472 |      "execution_count": 95,
 473 |      "metadata": {},
 474 |      "output_type": "execute_result"
 475 |     }
 476 |    ],
 477 |    "source": [
 478 |     "# differencing attack using sum query\n",
 479 |     "sum(db) - sum(pdb)"
 480 |    ]
 481 |   },
 482 |   {
 483 |    "cell_type": "code",
 484 |    "execution_count": 96,
 485 |    "metadata": {},
 486 |    "outputs": [
 487 |     {
 488 |      "data": {
 489 |       "text/plain": [
 490 |        "tensor(0.0051)"
 491 |       ]
 492 |      },
 493 |      "execution_count": 96,
 494 |      "metadata": {},
 495 |      "output_type": "execute_result"
 496 |     }
 497 |    ],
 498 |    "source": [
 499 |     "# a differencing attack using mean query\n",
 500 |     "sum(db).float() /len(db) - sum(pdb).float() / len(pdb)"
 501 |    ]
 502 |   },
 503 |   {
 504 |    "cell_type": "code",
 505 |    "execution_count": 102,
 506 |    "metadata": {},
 507 |    "outputs": [
 508 |     {
 509 |      "data": {
 510 |       "text/plain": [
 511 |        "tensor(1, dtype=torch.uint8)"
 512 |       ]
 513 |      },
 514 |      "execution_count": 102,
 515 |      "metadata": {},
 516 |      "output_type": "execute_result"
 517 |     }
 518 |    ],
 519 |    "source": [
 520 |     "# differencing using a threshold\n",
 521 |     "(sum(db).float() > 50) - (sum(pdb).float() > 50)"
 522 |    ]
 523 |   },
 524 |   {
 525 |    "cell_type": "markdown",
 526 |    "metadata": {},
 527 |    "source": [
 528 |     "# Local Differential Privacy\n",
 529 |     "\n",
 530 |     "Differential privacy always requires a form of randommess or noise added to the query to protect from things like Differencing Attacks.\n",
 531 |     "To explain this, let's look at Randomized Response.\n",
 532 |     "\n",
 533 |     "### Randomized Response (Local Differential Privacy)\n",
 534 |     "\n",
 535 |     "Let's say I have a group of people I wish to survey about a very taboo behavior which I think they will lie about (say, I want to know if they have ever committed a certain kind of crime). I'm not a policeman, I'm just trying to collect statistics to understand the higher level trend in society. So, how do we do this? One technique is to add randomness to each person's response by giving each person the following instructions (assuming I'm asking a simple yes/no question):\n",
 536 |     "\n",
 537 |     "- Flip a coin 2 times.\n",
 538 |     "- If the first coin flip is heads, answer honestly\n",
 539 |     "- If the first coin flip is tails, answer according to the second coin flip (heads for yes, tails for no)!\n",
 540 |     "\n",
 541 |     "Thus, each person is now protected with \"plausible deniability\". If they answer \"Yes\" to the question \"have you committed X crime?\", then it might becasue they actually did, or it might be because they are answering according to a random coin flip. Each person has a high degree of protection. Furthermore, we can recover the underlying statistics with some accuracy, as the \"true statistics\" are simply averaged with a 50% probability. Thus, if we collect a bunch of samples and it turns out that 60% of people answer yes, then we know that the TRUE distribution is actually centered around 70%, because 70% averaged with a 50% (a coin flip) is 60% which is the result we obtained. \n",
 542 |     "\n",
 543 |     "However, it should be noted that, especially when we only have a few samples, this comes at the cost of accuracy. This tradeoff exists across all of Differential Privacy. \n",
 544 |     "\n",
 545 |     "> NOTE: **The greater the privacy protection (plausible deniability) the less accurate the results. **\n",
 546 |     "\n",
 547 |     "Let's implement this local DP for our database before!\n",
 548 |     "\n",
 549 |     "The main goal is to: \n",
 550 |     "* Get the most accurate query with the **greatest** amount of privacy\n",
 551 |     "* Greatest fit with trust models in the actual world, (don't waste trust)\n",
 552 |     "\n",
 553 |     "Let's implement local differential privacy:"
 554 |    ]
 555 |   },
 556 |   {
 557 |    "cell_type": "code",
 558 |    "execution_count": 118,
 559 |    "metadata": {},
 560 |    "outputs": [
 561 |     {
 562 |      "data": {
 563 |       "text/plain": [
 564 |        "tensor([0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0,\n",
 565 |        "        1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,\n",
 566 |        "        0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1,\n",
 567 |        "        1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1,\n",
 568 |        "        0, 1, 0, 0], dtype=torch.uint8)"
 569 |       ]
 570 |      },
 571 |      "execution_count": 118,
 572 |      "metadata": {},
 573 |      "output_type": "execute_result"
 574 |     }
 575 |    ],
 576 |    "source": [
 577 |     "db, pdbs = create_db_and_parallels(100)\n",
 578 |     "db"
 579 |    ]
 580 |   },
 581 |   {
 582 |    "cell_type": "code",
 583 |    "execution_count": 119,
 584 |    "metadata": {},
 585 |    "outputs": [],
 586 |    "source": [
 587 |     "def query(db):\n",
 588 |     "    true_result = torch.mean(db.float())\n",
 589 |     "    \n",
 590 |     "    # local differential privacy is adding noise to data: replacing some \n",
 591 |     "    # of the values with random values\n",
 592 |     "    first_coin_flip = (torch.rand(len(db)) > 0.5).float()\n",
 593 |     "    second_coin_flip = (torch.rand(len(db)) > 0.5).float()\n",
 594 |     "    \n",
 595 |     "    # differentially private DB ... \n",
 596 |     "    augmented_db = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip\n",
 597 |     "    \n",
 598 |     "    # the result is skewed if we do:\n",
 599 |     "    # torch.mean(augmented_db.float())\n",
 600 |     "    # we remove the skewed average that was the result of the differential privacy\n",
 601 |     "    dp_result = torch.mean(augmented_db.float()) * 2 - 0.5\n",
 602 |     "    \n",
 603 |     "    return dp_result, true_result"
 604 |    ]
 605 |   },
 606 |   {
 607 |    "cell_type": "code",
 608 |    "execution_count": 120,
 609 |    "metadata": {},
 610 |    "outputs": [
 611 |     {
 612 |      "name": "stdout",
 613 |      "output_type": "stream",
 614 |      "text": [
 615 |       "Without noise 0.7000000476837158\n",
 616 |       "With noise: 0.30000001192092896\n"
 617 |      ]
 618 |     }
 619 |    ],
 620 |    "source": [
 621 |     "db, pdbs = create_db_and_parallels(10)\n",
 622 |     "private_result, true_result = query(db)\n",
 623 |     "print(f\"Without noise {private_result}\")\n",
 624 |     "print(f\"With noise: {true_result}\")"
 625 |    ]
 626 |   },
 627 |   {
 628 |    "cell_type": "code",
 629 |    "execution_count": 124,
 630 |    "metadata": {},
 631 |    "outputs": [
 632 |     {
 633 |      "name": "stdout",
 634 |      "output_type": "stream",
 635 |      "text": [
 636 |       "Without noise 0.42000001668930054\n",
 637 |       "With noise: 0.4699999988079071\n"
 638 |      ]
 639 |     }
 640 |    ],
 641 |    "source": [
 642 |     "# Increasing the size of the dateset\n",
 643 |     "db, pdbs = create_db_and_parallels(100)\n",
 644 |     "private_result, true_result = query(db)\n",
 645 |     "print(f\"Without noise {private_result}\")\n",
 646 |     "print(f\"With noise: {true_result}\")"
 647 |    ]
 648 |   },
 649 |   {
 650 |    "cell_type": "code",
 651 |    "execution_count": 126,
 652 |    "metadata": {},
 653 |    "outputs": [
 654 |     {
 655 |      "name": "stdout",
 656 |      "output_type": "stream",
 657 |      "text": [
 658 |       "Without noise 0.5099999904632568\n",
 659 |       "With noise: 0.5210000276565552\n"
 660 |      ]
 661 |     }
 662 |    ],
 663 |    "source": [
 664 |     "# Increasing the size of the dateset even further\n",
 665 |     "db, pdbs = create_db_and_parallels(1000)\n",
 666 |     "private_result, true_result = query(db)\n",
 667 |     "print(f\"Without noise {private_result}\")\n",
 668 |     "print(f\"With noise: {true_result}\")"
 669 |    ]
 670 |   },
 671 |   {
 672 |    "cell_type": "markdown",
 673 |    "metadata": {},
 674 |    "source": [
 675 |     "As we have seen,\n",
 676 |     "> The more data we have the more the noise will tend to not affect the output of the query"
 677 |    ]
 678 |   },
 679 |   {
 680 |    "cell_type": "markdown",
 681 |    "metadata": {},
 682 |    "source": [
 683 |     "# Varying Amounts of Noise\n",
 684 |     "\n",
 685 |     "We are going to augment the randomized response query to allow for varying amounts of randomness to be added. To do this, we bias the coin flip to be higher or lower and then run the same experiment. \n",
 686 |     "\n",
 687 |     "We'll  need to both adjust the likelihood of the first coin flip AND the de-skewing at the end (where we create the \"augmented_result\" variable).\n"
 688 |    ]
 689 |   },
 690 |   {
 691 |    "cell_type": "code",
 692 |    "execution_count": 130,
 693 |    "metadata": {},
 694 |    "outputs": [
 695 |     {
 696 |      "name": "stdout",
 697 |      "output_type": "stream",
 698 |      "text": [
 699 |       "True result: tensor(0.5210)\n",
 700 |       "Skewed result: tensor(0.5140)\n",
 701 |       "De-skewed result: tensor(0.5280)\n"
 702 |      ]
 703 |     }
 704 |    ],
 705 |    "source": [
 706 |     "# Noise < 0.5 sets the likelihood that the coin flip will be heads, and vice-versa.\n",
 707 |     "noise = 0.2\n",
 708 |     "\n",
 709 |     "true_result = torch.mean(db.float())\n",
 710 |     "# let's add the noise to data: replacing some of the values with random values\n",
 711 |     "first_coin_flip = (torch.rand(len(db)) > noise).float()\n",
 712 |     "second_coin_flip = (torch.rand(len(db)) > 0.5).float()\n",
 713 |     "\n",
 714 |     "# differentially private DB ... \n",
 715 |     "augmented_db = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip\n",
 716 |     "\n",
 717 |     "# since the result will be skewed if we do: torch.mean(augmented_db.float())\n",
 718 |     "# we'll remove the skewed average above by doing below:\n",
 719 |     "dp_result = torch.mean(augmented_db.float()) * 2 - 0.5\n",
 720 |     "\n",
 721 |     "sk_result = augmented_db.float().mean()\n",
 722 |     "print('True result:', true_result)\n",
 723 |     "print('Skewed result:', sk_result)\n",
 724 |     "print('De-skewed result:', dp_result)"
 725 |    ]
 726 |   },
 727 |   {
 728 |    "cell_type": "code",
 729 |    "execution_count": 131,
 730 |    "metadata": {},
 731 |    "outputs": [],
 732 |    "source": [
 733 |     "def query(db, noise=0.2):\n",
 734 |     "    \"\"\"Default noise(0.2) above sets the likelihood that the coin flip will be heads\"\"\"\n",
 735 |     "    true_result = torch.mean(db.float())\n",
 736 |     "\n",
 737 |     "    # local diff privacy is adding noise to data: replacing some \n",
 738 |     "    # of the values with random values\n",
 739 |     "    first_coin_flip = (torch.rand(len(db)) > noise).float()\n",
 740 |     "    second_coin_flip = (torch.rand(len(db)) > 0.5).float()\n",
 741 |     "\n",
 742 |     "    # differentially private DB ... \n",
 743 |     "    augmented_db = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip\n",
 744 |     "\n",
 745 |     "    # the result is skewed if we do:\n",
 746 |     "    # torch.mean(augmented_db.float())\n",
 747 |     "    # we remove the skewed average that was the result of the differential privacy\n",
 748 |     "    sk_result = augmented_db.float().mean()\n",
 749 |     "    private_result = ((sk_result / noise ) - 0.5) * noise / (1 - noise)\n",
 750 |     "\n",
 751 |     "    return private_result, true_result"
 752 |    ]
 753 |   },
 754 |   {
 755 |    "cell_type": "code",
 756 |    "execution_count": 132,
 757 |    "metadata": {},
 758 |    "outputs": [
 759 |     {
 760 |      "name": "stdout",
 761 |      "output_type": "stream",
 762 |      "text": [
 763 |       "Without noise 0.25\n",
 764 |       "With noise: 0.30000001192092896\n"
 765 |      ]
 766 |     }
 767 |    ],
 768 |    "source": [
 769 |     "# test varying noise\n",
 770 |     "db, pdbs = create_db_and_parallels(10)\n",
 771 |     "private_result, true_result = query(db, noise=0.2)\n",
 772 |     "print(f\"Without noise {private_result}\")\n",
 773 |     "print(f\"With noise: {true_result}\")\n"
 774 |    ]
 775 |   },
 776 |   {
 777 |    "cell_type": "code",
 778 |    "execution_count": 133,
 779 |    "metadata": {},
 780 |    "outputs": [
 781 |     {
 782 |      "name": "stdout",
 783 |      "output_type": "stream",
 784 |      "text": [
 785 |       "Without noise 0.7333332300186157\n",
 786 |       "With noise: 0.6399999856948853\n"
 787 |      ]
 788 |     }
 789 |    ],
 790 |    "source": [
 791 |     "# Increasing the size of the dateset even further\n",
 792 |     "db, pdbs = create_db_and_parallels(100)\n",
 793 |     "private_result, true_result = query(db, noise=0.4)\n",
 794 |     "print(f\"Without noise {private_result}\")\n",
 795 |     "print(f\"With noise: {true_result}\")"
 796 |    ]
 797 |   },
 798 |   {
 799 |    "cell_type": "code",
 800 |    "execution_count": 134,
 801 |    "metadata": {},
 802 |    "outputs": [
 803 |     {
 804 |      "name": "stdout",
 805 |      "output_type": "stream",
 806 |      "text": [
 807 |       "Without noise 0.5264999866485596\n",
 808 |       "With noise: 0.5004000067710876\n"
 809 |      ]
 810 |     }
 811 |    ],
 812 |    "source": [
 813 |     "# Increasing the size of the dateset even further\n",
 814 |     "db, pdbs = create_db_and_parallels(10000)\n",
 815 |     "private_result, true_result = query(db, noise=0.8)\n",
 816 |     "print(f\"Without noise {private_result}\")\n",
 817 |     "print(f\"With noise: {true_result}\")\n"
 818 |    ]
 819 |   },
 820 |   {
 821 |    "cell_type": "markdown",
 822 |    "metadata": {},
 823 |    "source": [
 824 |     "From the analysis above, with more data, its easier to protect privacy with noise. It becomes a lot easier to learn about general characteristics in the DB because the algorithm has more data points to look at and compare with each other."
 825 |    ]
 826 |   },
 827 |   {
 828 |    "cell_type": "markdown",
 829 |    "metadata": {},
 830 |    "source": [
 831 |     "So differential privacy mechanisms has helped us filter out any information unique to individual data entities and try to let through information that is consistent across multiple different people in the dataset. \n",
 832 |     "> The larger the dataset, the easier it is to protect privacy. "
 833 |    ]
 834 |   },
 835 |   {
 836 |    "cell_type": "markdown",
 837 |    "metadata": {},
 838 |    "source": [
 839 |     "# The Formal Definition of Differential Privacy\n",
 840 |     "\n",
 841 |     "The previous method of adding noise was called \"Local Differentail Privacy\" because we added noise to each datapoint individually. This is necessary for some situations wherein the data is SO sensitive that individuals do not trust noise to be added later. However, it comes at a very high cost in terms of accuracy. \n",
 842 |     "\n",
 843 |     "However, alternatively we can add noise AFTER data has been aggregated by a function. This kind of noise can allow for similar levels of protection with a lower affect on accuracy. However, participants must be able to trust that no-one looked at their datapoints _before_ the aggregation took place. In some situations this works out well, in others (such as an individual hand-surveying a group of people), this is less realistic.\n",
 844 |     "\n",
 845 |     "Nevertheless, global differential privacy is incredibly important because it allows us to perform differential privacy on smaller groups of individuals with lower amounts of noise. Let's revisit our sum functions."
 846 |    ]
 847 |   },
 848 |   {
 849 |    "cell_type": "code",
 850 |    "execution_count": 40,
 851 |    "metadata": {},
 852 |    "outputs": [
 853 |     {
 854 |      "data": {
 855 |       "text/plain": [
 856 |        "tensor(40.)"
 857 |       ]
 858 |      },
 859 |      "execution_count": 40,
 860 |      "metadata": {},
 861 |      "output_type": "execute_result"
 862 |     }
 863 |    ],
 864 |    "source": [
 865 |     "db, pdbs = create_db_and_parallels(100)\n",
 866 |     "\n",
 867 |     "def query(db):\n",
 868 |     "    return torch.sum(db.float())\n",
 869 |     "\n",
 870 |     "def M(db):\n",
 871 |     "    query(db) + noise\n",
 872 |     "\n",
 873 |     "query(db)"
 874 |    ]
 875 |   },
 876 |   {
 877 |    "cell_type": "markdown",
 878 |    "metadata": {},
 879 |    "source": [
 880 |     "So the idea here is that we want to add noise to the output of our function. We actually have two different kinds of noise we can add - Laplacian Noise or Gaussian Noise. However, before we do so at this point we need to dive into the formal definition of Differential Privacy.\n",
 881 |     "\n",
 882 |     "![alt text](dp_formula.png \"Title\")"
 883 |    ]
 884 |   },
 885 |   {
 886 |    "cell_type": "markdown",
 887 |    "metadata": {},
 888 |    "source": [
 889 |     "_Image From: \"The Algorithmic Foundations of Differential Privacy\" - Cynthia Dwork and Aaron Roth - https://www.cis.upenn.edu/~aaroth/Papers/privacybook.pdf_"
 890 |    ]
 891 |   },
 892 |   {
 893 |    "cell_type": "markdown",
 894 |    "metadata": {},
 895 |    "source": [
 896 |     "This definition does not _create_ differential privacy, instead it is a measure of how much privacy is afforded by a query M. Specifically, it's a comparison between running the query M on a database (x) and a parallel database (y). As you remember, parallel databases are defined to be the same as a full database (x) with one entry/person removed.\n",
 897 |     "\n",
 898 |     "Thus, this definition says that FOR ALL parallel databases, the maximum distance between a query on database (x) and the same query on database (y) will be e^epsilon, but that occasionally this constraint won't hold with probability delta. Thus, this theorem is called \"epsilon delta\" differential privacy.\n",
 899 |     "\n",
 900 |     "# Epsilon\n",
 901 |     "\n",
 902 |     "Let's unpack the intuition of this for a moment. \n",
 903 |     "\n",
 904 |     "Epsilon Zero: If a query satisfied this inequality where epsilon was set to 0, then that would mean that the query for all parallel databases outputed the exact same value as the full database. As you may remember, when we calculated the \"threshold\" function, often the Sensitivity was 0. In that case, the epsilon also happened to be zero.\n",
 905 |     "\n",
 906 |     "Epsilon One: If a query satisfied this inequality with epsilon 1, then the maximum distance between all queries would be 1 - or more precisely - the maximum distance between the two random distributions M(x) and M(y) is 1 (because all these queries have some amount of randomness in them, just like we observed in the last section).\n",
 907 |     "\n",
 908 |     "# Delta\n",
 909 |     "\n",
 910 |     "Delta is basically the probability that epsilon breaks. Namely, sometimes the epsilon is different for some queries than it is for others. For example, you may remember when we were calculating the sensitivity of threshold, most of the time sensitivity was 0 but sometimes it was 1. Thus, we could calculate this as \"epsilon zero but non-zero delta\" which would say that epsilon is perfect except for some probability of the time when it's arbitrarily higher. Note that this expression doesn't represent the full tradeoff between epsilon and delta."
 911 |    ]
 912 |   },
 913 |   {
 914 |    "cell_type": "markdown",
 915 |    "metadata": {},
 916 |    "source": [
 917 |     "# How To Add Noise for Global Differential Privacy\n",
 918 |     "\n",
 919 |     "Global Differential Privacy adds noise to the output of a query.\n",
 920 |     "We'll add noise to the output of our query so that it satisfies a certain epsilon-delta differential privacy threshold.\n",
 921 |     "\n",
 922 |     "There are two kinds of noise we can add \n",
 923 |     "- Gaussian Noise\n",
 924 |     "- Laplacian Noise. \n",
 925 |     "\n",
 926 |     "Generally speaking Laplacian is better, but both are still valid. Now to the hard question...\n",
 927 |     "\n",
 928 |     "### How much noise should we add?\n",
 929 |     "\n",
 930 |     "The amount of noise necessary to add to the output of a query is a function of four things:\n",
 931 |     "\n",
 932 |     "- the type of noise (Gaussian/Laplacian)\n",
 933 |     "- the sensitivity of the query/function\n",
 934 |     "- the desired epsilon (ε)\n",
 935 |     "- the desired delta (δ)\n",
 936 |     "\n",
 937 |     "Thus, for each type of noise we're adding, we have different way of calculating how much to add as a function of sensitivity, epsilon, and delta.\n",
 938 |     "\n",
 939 |     "Laplacian noise is increased/decreased according to a \"scale\" parameter b. We choose \"b\" based on the following formula.\n",
 940 |     "\n",
 941 |     "`b = sensitivity(query) / epsilon`\n",
 942 |     "\n",
 943 |     "In other words, if we set b to be this value, then we know that we will have a privacy leakage of <= epsilon. Furthermore, the nice thing about Laplace is that it guarantees this with delta == 0. There are some tunings where we can have very low epsilon where delta is non-zero, but we'll ignore them for now.\n",
 944 |     "\n",
 945 |     "### Querying Repeatedly\n",
 946 |     "\n",
 947 |     "- if we query the database multiple times - we can simply add the epsilons (Even if we change the amount of noise and their epsilons are not the same)."
 948 |    ]
 949 |   },
 950 |   {
 951 |    "cell_type": "code",
 952 |    "execution_count": null,
 953 |    "metadata": {},
 954 |    "outputs": [],
 955 |    "source": []
 956 |   },
 957 |   {
 958 |    "cell_type": "markdown",
 959 |    "metadata": {},
 960 |    "source": [
 961 |     "# Create a Differentially Private Query\n",
 962 |     "\n",
 963 |     "Let's create a query function which sums over the database and adds just the right amount of noise such that it satisfies an epsilon constraint. query will be for \"sum\" and for \"mean\". We'll use the correct sensitivity measures for both."
 964 |    ]
 965 |   },
 966 |   {
 967 |    "cell_type": "code",
 968 |    "execution_count": 108,
 969 |    "metadata": {},
 970 |    "outputs": [],
 971 |    "source": [
 972 |     "epsilon = 0.001"
 973 |    ]
 974 |   },
 975 |   {
 976 |    "cell_type": "code",
 977 |    "execution_count": 77,
 978 |    "metadata": {},
 979 |    "outputs": [],
 980 |    "source": [
 981 |     "import numpy as np"
 982 |    ]
 983 |   },
 984 |   {
 985 |    "cell_type": "code",
 986 |    "execution_count": 78,
 987 |    "metadata": {},
 988 |    "outputs": [],
 989 |    "source": [
 990 |     "db, pdbs = create_db_and_parallels(100)"
 991 |    ]
 992 |   },
 993 |   {
 994 |    "cell_type": "code",
 995 |    "execution_count": 79,
 996 |    "metadata": {},
 997 |    "outputs": [
 998 |     {
 999 |      "data": {
1000 |       "text/plain": [
1001 |        "tensor([0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0,\n",
1002 |        "        1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1,\n",
1003 |        "        1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0,\n",
1004 |        "        0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1,\n",
1005 |        "        0, 1, 0, 1], dtype=torch.uint8)"
1006 |       ]
1007 |      },
1008 |      "execution_count": 79,
1009 |      "metadata": {},
1010 |      "output_type": "execute_result"
1011 |     }
1012 |    ],
1013 |    "source": [
1014 |     "db"
1015 |    ]
1016 |   },
1017 |   {
1018 |    "cell_type": "code",
1019 |    "execution_count": 80,
1020 |    "metadata": {},
1021 |    "outputs": [],
1022 |    "source": [
1023 |     "def sum_query(db):\n",
1024 |     "    return db.sum()"
1025 |    ]
1026 |   },
1027 |   {
1028 |    "cell_type": "code",
1029 |    "execution_count": 109,
1030 |    "metadata": {},
1031 |    "outputs": [],
1032 |    "source": [
1033 |     "def laplacian_mechanism(db, query, sensitivity):\n",
1034 |     "    beta = sensitivity / epsilon\n",
1035 |     "    noise = torch.tensor(np.random.laplace(0, beta, 1))\n",
1036 |     "    \n",
1037 |     "    return query(db) + noise"
1038 |    ]
1039 |   },
1040 |   {
1041 |    "cell_type": "code",
1042 |    "execution_count": 111,
1043 |    "metadata": {},
1044 |    "outputs": [
1045 |     {
1046 |      "data": {
1047 |       "text/plain": [
1048 |        "tensor([46.2211], dtype=torch.float64)"
1049 |       ]
1050 |      },
1051 |      "execution_count": 111,
1052 |      "metadata": {},
1053 |      "output_type": "execute_result"
1054 |     }
1055 |    ],
1056 |    "source": [
1057 |     "laplacian_mechanism(db, sum_query, 0.01)"
1058 |    ]
1059 |   },
1060 |   {
1061 |    "cell_type": "code",
1062 |    "execution_count": 112,
1063 |    "metadata": {},
1064 |    "outputs": [],
1065 |    "source": [
1066 |     "def mean_query(db):\n",
1067 |     "    return torch.mean(db.float())"
1068 |    ]
1069 |   },
1070 |   {
1071 |    "cell_type": "code",
1072 |    "execution_count": 117,
1073 |    "metadata": {},
1074 |    "outputs": [
1075 |     {
1076 |      "data": {
1077 |       "text/plain": [
1078 |        "tensor([413.9655], dtype=torch.float64)"
1079 |       ]
1080 |      },
1081 |      "execution_count": 117,
1082 |      "metadata": {},
1083 |      "output_type": "execute_result"
1084 |     }
1085 |    ],
1086 |    "source": [
1087 |     "laplacian_mechanism(db, mean_query, 1)"
1088 |    ]
1089 |   },
1090 |   {
1091 |    "cell_type": "markdown",
1092 |    "metadata": {},
1093 |    "source": [
1094 |     "# Differential Privacy for Deep Learning\n",
1095 |     "\n",
1096 |     "So what does all of this have to do with Deep Learning? Well, these mechanisms form the core primitives for how Differential Privacy provides guarantees in the context of Deep Learning. \n",
1097 |     "\n",
1098 |     "### Perfect Privacy\n",
1099 |     "> \"a query to a database returns the same value even if we remove any person from the database\".\n",
1100 |     "\n",
1101 |     "In the context of Deep Learning, we have a similar standard. \n",
1102 |     "\n",
1103 |     "> Training a model on a dataset should return the same model even if we remove any person from the dataset.\n",
1104 |     "\n",
1105 |     "Thus, we've replaced \"querying a database\" with \"training a model on a dataset\". In essence, the training process is a kind of query. However, one should note that this adds two points of complexity which database queries did not have:\n",
1106 |     "\n",
1107 |     "    1. do we always know where \"people\" are referenced in the dataset?\n",
1108 |     "    2. neural models rarely never train to the same output model, even on identical data\n",
1109 |     "\n",
1110 |     "The answer to (1) is to treat each training example as a single, separate person. Strictly speaking, this is often overly zealous as some training examples have no relevance to people and others may have multiple/partial (consider an image with multiple people contained within it). Thus, localizing exactly where \"people\" are referenced, and thus how much your model would change if people were removed, is challenging.\n",
1111 |     "\n",
1112 |     "The answer to (2) is also an open problem. To solve this, lets look at PATE.\n",
1113 |     "\n",
1114 |     "## Scenario: A Health Neural Network\n",
1115 |     "\n",
1116 |     "You work for a hospital and you have a large collection of images about your patients. However, you don't know what's in them. You would like to use these images to develop a neural network which can automatically classify them, however since your images aren't labeled, they aren't sufficient to train a classifier.\n",
1117 |     "\n",
1118 |     "However, being a cunning strategist, you realize that you can reach out to 10 partner hospitals which have annotated data. It is your hope to train your new classifier on their datasets so that you can automatically label your own. While these hospitals are interested in helping, they have privacy concerns regarding information about their patients. Thus, you will use the following technique to train a classifier which protects the privacy of patients in the other hospitals.\n",
1119 |     "\n",
1120 |     "- 1) You'll ask each of the 10 hospitals to train a model on their own datasets (All of which have the same kinds of labels)\n",
1121 |     "- 2) You'll then use each of the 10 partner models to predict on your local dataset, generating 10 labels for each of your datapoints\n",
1122 |     "- 3) Then, for each local data point (now with 10 labels), you will perform a DP query to generate the final true label. This query is a \"max\" function, where \"max\" is the most frequent label across the 10 labels. We will need to add laplacian noise to make this Differentially Private to a certain epsilon/delta constraint.\n",
1123 |     "- 4) Finally, we will retrain a new model on our local dataset which now has labels. This will be our final \"DP\" model.\n",
1124 |     "\n",
1125 |     "So, let's walk through these steps. I will assume you're already familiar with how to train/predict a deep neural network, so we'll skip steps 1 and 2 and work with example data. We'll focus instead on step 3, namely how to perform the DP query for each example using toy data.\n",
1126 |     "\n",
1127 |     "So, let's say we have 10,000 training examples, and we've got 10 labels for each example (from our 10 \"teacher models\" which were trained directly on private data). Each label is chosen from a set of 10 possible labels (categories) for each image."
1128 |    ]
1129 |   },
1130 |   {
1131 |    "cell_type": "code",
1132 |    "execution_count": 3,
1133 |    "metadata": {},
1134 |    "outputs": [],
1135 |    "source": [
1136 |     "import numpy as np"
1137 |    ]
1138 |   },
1139 |   {
1140 |    "cell_type": "code",
1141 |    "execution_count": 4,
1142 |    "metadata": {},
1143 |    "outputs": [],
1144 |    "source": [
1145 |     "num_teachers = 10 # we're working with 10 partner hospitals\n",
1146 |     "num_examples = 10000 # the size of OUR dataset\n",
1147 |     "num_labels = 10 # number of lablels for our classifier"
1148 |    ]
1149 |   },
1150 |   {
1151 |    "cell_type": "code",
1152 |    "execution_count": 5,
1153 |    "metadata": {},
1154 |    "outputs": [],
1155 |    "source": [
1156 |     "# fake predictions\n",
1157 |     "fake_preds = (\n",
1158 |     "    np.random.rand(\n",
1159 |     "        num_teachers, num_examples\n",
1160 |     "    ) * num_labels).astype(int).transpose(1,0)"
1161 |    ]
1162 |   },
1163 |   {
1164 |    "cell_type": "code",
1165 |    "execution_count": 6,
1166 |    "metadata": {},
1167 |    "outputs": [
1168 |     {
1169 |      "data": {
1170 |       "text/plain": [
1171 |        "array([8, 1, 2, ..., 2, 2, 5])"
1172 |       ]
1173 |      },
1174 |      "execution_count": 6,
1175 |      "metadata": {},
1176 |      "output_type": "execute_result"
1177 |     }
1178 |    ],
1179 |    "source": [
1180 |     "fake_preds[:,0]\n"
1181 |    ]
1182 |   },
1183 |   {
1184 |    "cell_type": "code",
1185 |    "execution_count": 9,
1186 |    "metadata": {},
1187 |    "outputs": [],
1188 |    "source": [
1189 |     "# Step 3: Perform a DP query to generate the final true label/outputs,\n",
1190 |     "# Use the argmax function to find the most frequent label across all 10 labels,\n",
1191 |     "# Then finally add some noise to make it differentially private.\n",
1192 |     "\n",
1193 |     "new_labels = list()\n",
1194 |     "for an_image in fake_preds:\n",
1195 |     "    # count the most frequent label the hospitals came up with\n",
1196 |     "    label_counts = np.bincount(an_image, minlength=num_labels)\n",
1197 |     "\n",
1198 |     "    epsilon = 0.1\n",
1199 |     "    beta = 1 / epsilon\n",
1200 |     "\n",
1201 |     "    for i in range(len(label_counts)):\n",
1202 |     "        # for each label, add some noise to the counts\n",
1203 |     "        label_counts[i] += np.random.laplace(0, beta, 1)\n",
1204 |     "\n",
1205 |     "    new_label = np.argmax(label_counts)\n",
1206 |     "    \n",
1207 |     "    new_labels.append(new_label)"
1208 |    ]
1209 |   },
1210 |   {
1211 |    "cell_type": "code",
1212 |    "execution_count": 10,
1213 |    "metadata": {},
1214 |    "outputs": [
1215 |     {
1216 |      "data": {
1217 |       "text/plain": [
1218 |        "[2, 2, 2, 2, 0, 5, 4, 0, 0, 4]"
1219 |       ]
1220 |      },
1221 |      "execution_count": 10,
1222 |      "metadata": {},
1223 |      "output_type": "execute_result"
1224 |     }
1225 |    ],
1226 |    "source": [
1227 |     "# new_labels\n",
1228 |     "new_labels[:10]"
1229 |    ]
1230 |   },
1231 |   {
1232 |    "cell_type": "markdown",
1233 |    "metadata": {},
1234 |    "source": [
1235 |     "# PATE Analysis"
1236 |    ]
1237 |   },
1238 |   {
1239 |    "cell_type": "code",
1240 |    "execution_count": 15,
1241 |    "metadata": {},
1242 |    "outputs": [
1243 |     {
1244 |      "name": "stdout",
1245 |      "output_type": "stream",
1246 |      "text": [
1247 |       "[0 0 1 1 0 0 1 0 1 6]\n"
1248 |      ]
1249 |     },
1250 |     {
1251 |      "data": {
1252 |       "text/plain": [
1253 |        "9"
1254 |       ]
1255 |      },
1256 |      "execution_count": 15,
1257 |      "metadata": {},
1258 |      "output_type": "execute_result"
1259 |     }
1260 |    ],
1261 |    "source": [
1262 |     "# lets say the hospitals came up with these outputs... 9, 9, 3, 6 ..., 2\n",
1263 |     "labels = np.array([9, 9, 3, 6, 9, 9, 9, 9, 8, 2])\n",
1264 |     "counts = np.bincount(labels, minlength=10)\n",
1265 |     "print(counts)\n",
1266 |     "query_result = np.argmax(counts)\n",
1267 |     "query_result\n"
1268 |    ]
1269 |   },
1270 |   {
1271 |    "cell_type": "markdown",
1272 |    "metadata": {},
1273 |    "source": [
1274 |     "If every hospital says the result is 9, then we have very low sensitivity.\n",
1275 |     "We could remove a person, from the dataset, and the query results still is 9,\n",
1276 |     "then we have not leaked any information. \n",
1277 |     "Core assumption: The same patient was not present at any of this two hospitals.\n",
1278 |     "\n",
1279 |     "Removing any one of this hospitals, acts as a proxy to removing one person, which means that if we do remove one hospital, the query result should not be different.\n",
1280 |     "\n"
1281 |    ]
1282 |   },
1283 |   {
1284 |    "cell_type": "code",
1285 |    "execution_count": 16,
1286 |    "metadata": {},
1287 |    "outputs": [],
1288 |    "source": [
1289 |     "from syft.frameworks.torch.differential_privacy import pate"
1290 |    ]
1291 |   },
1292 |   {
1293 |    "cell_type": "code",
1294 |    "execution_count": 28,
1295 |    "metadata": {},
1296 |    "outputs": [
1297 |     {
1298 |      "name": "stdout",
1299 |      "output_type": "stream",
1300 |      "text": [
1301 |       "Warning: May not have used enough values of l. Increase 'moments' variable and run again.\n",
1302 |       "Data Independent Epsilon 11.756462732485115\n",
1303 |       "Data Dependent Epsilon 1.52655213289881\n"
1304 |      ]
1305 |     }
1306 |    ],
1307 |    "source": [
1308 |     "num_teachers, num_examples, num_labels = (100, 100, 10)\n",
1309 |     "# generate fake predictions/labels\n",
1310 |     "preds = (np.random.rand(num_teachers, num_examples) * num_labels).astype(int)\n",
1311 |     "indices = (np.random.rand(num_examples) * num_labels).astype(int) # true answers\n",
1312 |     "\n",
1313 |     "preds[:,0:10] *= 0\n",
1314 |     "\n",
1315 |     "# perform PATE to find the data depended epsilon and data independent epsilon\n",
1316 |     "data_dep_eps, data_ind_eps = pate.perform_analysis(\n",
1317 |     "    teacher_preds=preds, \n",
1318 |     "    indices=indices, \n",
1319 |     "    noise_eps=0.1, \n",
1320 |     "    delta=1e-5\n",
1321 |     ")\n",
1322 |     "print('Data Independent Epsilon', data_ind_eps)\n",
1323 |     "print('Data Dependent Epsilon', data_dep_eps)\n",
1324 |     "\n",
1325 |     "assert data_dep_eps < data_ind_eps\n"
1326 |    ]
1327 |   },
1328 |   {
1329 |    "cell_type": "code",
1330 |    "execution_count": 29,
1331 |    "metadata": {},
1332 |    "outputs": [
1333 |     {
1334 |      "name": "stdout",
1335 |      "output_type": "stream",
1336 |      "text": [
1337 |       "Warning: May not have used enough values of l. Increase 'moments' variable and run again.\n",
1338 |       "Data Independent Epsilon: 11.756462732485115\n",
1339 |       "Data Dependent Epsilon: 1.52655213289881\n"
1340 |      ]
1341 |     }
1342 |    ],
1343 |    "source": [
1344 |     "data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds, indices=indices, noise_eps=0.1, delta=1e-5)\n",
1345 |     "print(\"Data Independent Epsilon:\", data_ind_eps)\n",
1346 |     "print(\"Data Dependent Epsilon:\", data_dep_eps)"
1347 |    ]
1348 |   },
1349 |   {
1350 |    "cell_type": "code",
1351 |    "execution_count": 21,
1352 |    "metadata": {},
1353 |    "outputs": [],
1354 |    "source": [
1355 |     "preds[:,0:50] *= 0"
1356 |    ]
1357 |   },
1358 |   {
1359 |    "cell_type": "code",
1360 |    "execution_count": 22,
1361 |    "metadata": {},
1362 |    "outputs": [
1363 |     {
1364 |      "name": "stdout",
1365 |      "output_type": "stream",
1366 |      "text": [
1367 |       "Data Independent Epsilon: 411.5129254649703\n",
1368 |       "Data Dependent Epsilon: 9.219308825046408\n"
1369 |      ]
1370 |     }
1371 |    ],
1372 |    "source": [
1373 |     "data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds, indices=indices, noise_eps=0.1, delta=1e-5, moments=20)\n",
1374 |     "print(\"Data Independent Epsilon:\", data_ind_eps)\n",
1375 |     "print(\"Data Dependent Epsilon:\", data_dep_eps)"
1376 |    ]
1377 |   },
1378 |   {
1379 |    "cell_type": "code",
1380 |    "execution_count": null,
1381 |    "metadata": {},
1382 |    "outputs": [],
1383 |    "source": []
1384 |   },
1385 |   {
1386 |    "cell_type": "markdown",
1387 |    "metadata": {},
1388 |    "source": [
1389 |     "# Where to Go From Here\n",
1390 |     "\n",
1391 |     "\n",
1392 |     "Read:\n",
1393 |     "    - Algorithmic Foundations of Differential Privacy: https://www.cis.upenn.edu/~aaroth/Papers/privacybook.pdf\n",
1394 |     "    - Deep Learning with Differential Privacy: https://arxiv.org/pdf/1607.00133.pdf\n",
1395 |     "    - The Ethical Algorithm: https://www.amazon.com/Ethical-Algorithm-Science-Socially-Design/dp/0190948205\n",
1396 |     "   \n",
1397 |     "Topics:\n",
1398 |     "    - The Exponential Mechanism\n",
1399 |     "    - The Moment's Accountant\n",
1400 |     "    - Differentially Private Stochastic Gradient Descent\n",
1401 |     "\n",
1402 |     "Advice:\n",
1403 |     "    - For deployments - stick with public frameworks!\n",
1404 |     "    - Join the Differential Privacy Community\n",
1405 |     "    - Don't get ahead of yourself - DP is still in the early days"
1406 |    ]
1407 |   },
1408 |   {
1409 |    "cell_type": "markdown",
1410 |    "metadata": {},
1411 |    "source": [
1412 |     "# Application of DP in Private Federated Learning"
1413 |    ]
1414 |   },
1415 |   {
1416 |    "cell_type": "markdown",
1417 |    "metadata": {},
1418 |    "source": [
1419 |     "DP works by adding statistical noise either at the input level or output level of the model so that you can mask out individual user contribution, but at the same time gain insight into th overall population without sacrificing privacy.\n",
1420 |     "\n",
1421 |     "> Case: Figure out average money one has in their pockets.\n",
1422 |     "We could go and ask someone how much they have in their wallet. They pick a random number between -100 and 100. Add that to the real value, say $20 and a picked number of 100. resulting in 120. That way, we have no way to know what the actual amount of money in their wallet is.\n",
1423 |     "When sufficiently large numbers of people submit these results, if we take the average, the noise will cancel out and we'll start seeing the true average.\n",
1424 |     "\n",
1425 |     "\n",
1426 |     "Apart from statistical use cases, we can apply DP in Private Federated learning.\n",
1427 |     "\n",
1428 |     "Suppose you want to train a model using distributed learning across a number of user devices. One way to do that is to get all the private data from the devices, but that's not very privacy friendly. \n",
1429 |     "\n",
1430 |     "Instead, we send the model from the server  back to the devices. The devices will then train the model\n",
1431 |     "using their user data, and only send the privatized model updates back to the server.\n",
1432 |     "Server will then aggregate the updates and make an informed decision of the overall model on the server.\n",
1433 |     "As you do more and more rounds, slowly the model converges to the true population without \n",
1434 |     "private user data having to leave the devices.\n",
1435 |     "If you increase the level of privacy, the model converges a bit slower and vice versa.\n"
1436 |    ]
1437 |   },
1438 |   {
1439 |    "cell_type": "markdown",
1440 |    "metadata": {},
1441 |    "source": [
1442 |     "# Project:\n",
1443 |     "\n",
1444 |     "For the final project for this section, you're going to train a DP model using this PATE method on the MNIST dataset, provided below."
1445 |    ]
1446 |   },
1447 |   {
1448 |    "cell_type": "code",
1449 |    "execution_count": 23,
1450 |    "metadata": {},
1451 |    "outputs": [
1452 |     {
1453 |      "name": "stdout",
1454 |      "output_type": "stream",
1455 |      "text": [
1456 |       "Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz\n"
1457 |      ]
1458 |     },
1459 |     {
1460 |      "name": "stderr",
1461 |      "output_type": "stream",
1462 |      "text": [
1463 |       "100.1%"
1464 |      ]
1465 |     },
1466 |     {
1467 |      "name": "stdout",
1468 |      "output_type": "stream",
1469 |      "text": [
1470 |       "Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz\n",
1471 |       "Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz\n"
1472 |      ]
1473 |     },
1474 |     {
1475 |      "name": "stderr",
1476 |      "output_type": "stream",
1477 |      "text": [
1478 |       "113.5%"
1479 |      ]
1480 |     },
1481 |     {
1482 |      "name": "stdout",
1483 |      "output_type": "stream",
1484 |      "text": [
1485 |       "Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz\n",
1486 |       "Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz\n"
1487 |      ]
1488 |     },
1489 |     {
1490 |      "name": "stderr",
1491 |      "output_type": "stream",
1492 |      "text": [
1493 |       "100.4%"
1494 |      ]
1495 |     },
1496 |     {
1497 |      "name": "stdout",
1498 |      "output_type": "stream",
1499 |      "text": [
1500 |       "Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz\n",
1501 |       "Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz\n"
1502 |      ]
1503 |     },
1504 |     {
1505 |      "name": "stderr",
1506 |      "output_type": "stream",
1507 |      "text": [
1508 |       "180.4%"
1509 |      ]
1510 |     },
1511 |     {
1512 |      "name": "stdout",
1513 |      "output_type": "stream",
1514 |      "text": [
1515 |       "Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz\n",
1516 |       "Processing...\n",
1517 |       "Done!\n"
1518 |      ]
1519 |     }
1520 |    ],
1521 |    "source": [
1522 |     "import torchvision.datasets as datasets\n",
1523 |     "mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=None)"
1524 |    ]
1525 |   },
1526 |   {
1527 |    "cell_type": "code",
1528 |    "execution_count": 14,
1529 |    "metadata": {},
1530 |    "outputs": [
1531 |     {
1532 |      "name": "stderr",
1533 |      "output_type": "stream",
1534 |      "text": [
1535 |       "/Users/atrask/anaconda/lib/python3.6/site-packages/torchvision/datasets/mnist.py:53: UserWarning: train_data has been renamed data\n",
1536 |       "  warnings.warn(\"train_data has been renamed data\")\n",
1537 |       "/Users/atrask/anaconda/lib/python3.6/site-packages/torchvision/datasets/mnist.py:43: UserWarning: train_labels has been renamed targets\n",
1538 |       "  warnings.warn(\"train_labels has been renamed targets\")\n"
1539 |      ]
1540 |     }
1541 |    ],
1542 |    "source": [
1543 |     "train_data = mnist_trainset.train_data\n",
1544 |     "train_targets = mnist_trainset.train_labels"
1545 |    ]
1546 |   },
1547 |   {
1548 |    "cell_type": "code",
1549 |    "execution_count": 12,
1550 |    "metadata": {},
1551 |    "outputs": [
1552 |     {
1553 |      "name": "stderr",
1554 |      "output_type": "stream",
1555 |      "text": [
1556 |       "/Users/atrask/anaconda/lib/python3.6/site-packages/torchvision/datasets/mnist.py:58: UserWarning: test_data has been renamed data\n",
1557 |       "  warnings.warn(\"test_data has been renamed data\")\n",
1558 |       "/Users/atrask/anaconda/lib/python3.6/site-packages/torchvision/datasets/mnist.py:48: UserWarning: test_labels has been renamed targets\n",
1559 |       "  warnings.warn(\"test_labels has been renamed targets\")\n"
1560 |      ]
1561 |     }
1562 |    ],
1563 |    "source": [
1564 |     "test_data = mnist_trainset.test_data\n",
1565 |     "test_targets = mnist_trainset.test_labels"
1566 |    ]
1567 |   },
1568 |   {
1569 |    "cell_type": "markdown",
1570 |    "metadata": {},
1571 |    "source": [
1572 |     "\n"
1573 |    ]
1574 |   }
1575 |  ],
1576 |  "metadata": {
1577 |   "kernelspec": {
1578 |    "display_name": "Python 3",
1579 |    "language": "python",
1580 |    "name": "python3"
1581 |   },
1582 |   "language_info": {
1583 |    "codemirror_mode": {
1584 |     "name": "ipython",
1585 |     "version": 3
1586 |    },
1587 |    "file_extension": ".py",
1588 |    "mimetype": "text/x-python",
1589 |    "name": "python",
1590 |    "nbconvert_exporter": "python",
1591 |    "pygments_lexer": "ipython3",
1592 |    "version": "3.7.3"
1593 |   }
1594 |  },
1595 |  "nbformat": 4,
1596 |  "nbformat_minor": 2
1597 | }
1598 | 


--------------------------------------------------------------------------------
/fashion-mnist-model/fashion_mnist_network_files/fashion_mnist_network_11_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/fashion-mnist-model/fashion_mnist_network_files/fashion_mnist_network_11_1.png


--------------------------------------------------------------------------------
/fashion-mnist-model/fashion_mnist_network_files/fashion_mnist_network_16_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/fashion-mnist-model/fashion_mnist_network_files/fashion_mnist_network_16_1.png


--------------------------------------------------------------------------------
/fashion-mnist-model/fashion_mnist_network_files/fashion_mnist_network_4_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/fashion-mnist-model/fashion_mnist_network_files/fashion_mnist_network_4_1.png


--------------------------------------------------------------------------------
/fashion_mnist_network.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ```python
  4 | import torch
  5 | from torchvision import datasets, transforms
  6 | import torch.nn.functional as F
  7 | 
  8 | # define a transform to normalize the data
  9 | transform = transforms.Compose([
 10 |     transforms.ToTensor(),
 11 |     transforms.Normalize([0.5], [0.5])
 12 | ])
 13 | 
 14 | # Download and load training data
 15 | trainset = datasets.FashionMNIST(
 16 |     'data/FASHION_MNIST_data/', download=True, train=True, transform=transform)
 17 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
 18 | 
 19 | # Download and load test data
 20 | testset = datasets.FashionMNIST(
 21 |     'data/FASHION_MNIST_data', download=True, train=False, transform=transform)
 22 | testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)
 23 | 
 24 | 
 25 | ```
 26 | 
 27 | 
 28 | ```python
 29 | # Create the network, define criterion and optimizer
 30 | from torch import nn, optim
 31 | 
 32 | input_units = 784
 33 | hidden_units = [256, 128, 64]
 34 | output_units = 10
 35 | 
 36 | model = nn.Sequential(
 37 |     nn.Linear(input_units, hidden_units[0]),
 38 |     nn.ReLU(),
 39 |     nn.Linear(hidden_units[0], hidden_units[1]),
 40 |     nn.ReLU(),
 41 |     nn.Linear(hidden_units[1], hidden_units[2]),
 42 |     nn.ReLU(),
 43 |     nn.Linear(hidden_units[2], output_units),
 44 |     nn.LogSoftmax(dim=1)
 45 | )
 46 | 
 47 | optimizer = optim.Adam(model.parameters(), lr=0.001)
 48 | criterion = nn.NLLLoss()
 49 | epochs= 5
 50 | ```
 51 | 
 52 | 
 53 | ```python
 54 | # Train the network
 55 | for i in range(epochs):
 56 |     running_loss = 0
 57 |     for images, target_labels in trainloader:
 58 |         # flatten images into 784 long vector for the input layer
 59 |         images = images.view(images.shape[0], -1)
 60 |         # clear gradients because they accumulate
 61 |         optimizer.zero_grad()
 62 |         out = model(images)
 63 |         loss = criterion(out, target_labels)
 64 |         # let optmizer update the parameters
 65 |         loss.backward()
 66 |         optimizer.step()
 67 |         
 68 |         running_loss += loss.item()
 69 |     else:
 70 |         print(f'Training loss: {running_loss/len(trainloader)}')
 71 | ```
 72 | 
 73 |     Training loss: 0.5213481309666816
 74 |     Training loss: 0.3745512448664286
 75 |     Training loss: 0.33560746372032013
 76 |     Training loss: 0.3119227885723368
 77 |     Training loss: 0.29549156429607476
 78 | 
 79 | 
 80 | 
 81 | ```python
 82 | import matplotlib.pyplot as plt
 83 | import numpy as np
 84 | 
 85 | def view_classification(img, probabilities):
 86 |     """Utility to imshow the image and its predicted classes."""
 87 |     ps = probabilities.data.numpy().squeeze()
 88 |     
 89 |     fig, (ax1, ax2) = plt.subplots(figsize=(6,9), ncols=2)
 90 |     ax1.imshow(img.resize_(1, 28, 28).numpy().squeeze())
 91 |     ax1.axis('off')
 92 |     ax2.barh(np.arange(10), ps)
 93 |     ax2.set_aspect(0.1)
 94 |     ax2.set_yticks(np.arange(10))
 95 |     ax2.set_yticklabels([
 96 |         'T-shirt/top',
 97 |         'Trouser',
 98 |         'Pullover',
 99 |         'Dress',
100 |         'Coat',
101 |         'Sandal',
102 |         'Shirt',
103 |         'Sneaker',
104 |         'Bag',
105 |         'Ankle Boot'
106 |     ], size='small');
107 |     ax2.set_title('Class Probability')
108 |     ax2.set_xlim(0, 1.1)
109 |     
110 |     plt.tight_layout()          
111 | ```
112 | 
113 | 
114 | ```python
115 | # Test out the network
116 | dataiter = iter(testloader)
117 | images, labels = dataiter.next()
118 | img = images[1]
119 | # reshape images to the model input layer's liking.
120 | test_images = images.view(images.shape[0], -1)
121 | # Calculate class probabilities (softmax)
122 | ps = torch.exp(model(test_images))
123 | print(ps.shape)
124 | 
125 | # plot out the image and probability distribution
126 | view_classification(img, ps[0])
127 | ```
128 | 
129 |     torch.Size([64, 10])
130 | 
131 | 
132 | 
133 | ![png](fashion_mnist_network_files/fashion_mnist_network_4_1.png)
134 | 
135 | 
136 | #### Validation
137 | To test for overfitting, we measure the performance of the model on data that isn't part of the training set.
138 | We'll use accuracy, the percentage of classes the NN predicted correctly.
139 | Other options are `precision and recall and top-5 error rate`.
140 | 
141 | First, we do one forward pass with one batch from the test set
142 | 
143 | 
144 | ```python
145 | images, labels = next(iter(testloader))
146 | # get class probabilities (10 class probabilities for 64 examples)
147 | images = images.view(images.shape[0], -1)
148 | ps = torch.exp(model(images))
149 | ps.shape
150 | ```
151 | 
152 | 
153 | 
154 | 
155 |     torch.Size([64, 10])
156 | 
157 | 
158 | 
159 | With the probabilities, we can use `ps.topk` to get the most likely class and return the `k` highest values. Since we just want the most likely class, we can use `ps.topk(1)`. This returns a tuple of top-k values and top-k indices. If the highest values is the 5th element, we'll get back 4 as the index.
160 | 
161 | 
162 | ```python
163 | top_p, top_class = ps.topk(1, dim=1)
164 | print(top_class.shape)
165 | print (labels.shape)
166 | 
167 | ```
168 | 
169 |     torch.Size([64, 1])
170 |     torch.Size([64])
171 | 
172 | 
173 | 
174 | ```python
175 | # check where our predicted classes match with true classes from labels
176 | equals = top_class == labels.view(*top_class.shape) # make sure they have the same shape
177 | # convert the equals byte tensor into float tensor before doing the mean
178 | accuracy = torch.mean(equals.type(torch.FloatTensor)) 
179 | print(f'Accuracy: {accuracy.item()*100}%')
180 | ```
181 | 
182 |     Accuracy: 89.0625%
183 | 
184 | 
185 | 
186 | ```python
187 | # Train the network
188 | epochs = 30
189 | steps = 0
190 | 
191 | train_losses, test_losses = [], []
192 | for i in range(epochs):
193 |     running_loss = 0
194 |     for images, target_labels in trainloader:
195 |         # flatten images into 784 long vector for the input layer
196 |         images = images.view(images.shape[0], -1)
197 |         # clear gradients because they accumulate
198 |         optimizer.zero_grad()
199 |         out = model(images)
200 |         loss = criterion(out, target_labels)
201 |         # let optmizer update the parameters
202 |         loss.backward()
203 |         optimizer.step()
204 |         
205 |         running_loss += loss.item()
206 |     else:
207 |         accuracy = 0
208 |         test_loss = 0
209 |         
210 |         # turn off gradients for validation, saves memory and computation
211 |         with torch.no_grad():
212 |             for images, labels in testloader:
213 |                 images = images.view(images.shape[0], -1)
214 |                 log_ps = model(images)
215 |                 test_loss += criterion(log_ps, labels)
216 |                 
217 |                 ps = torch.exp(log_ps)
218 |                 _, top_class = ps.topk(1, dim=1)
219 |                 equals = top_class == labels.view(*top_class.shape)
220 |                 accuracy += torch.mean(equals.type(torch.FloatTensor))
221 |         
222 |         train_losses.append(running_loss/len(trainloader))
223 |         test_losses.append(test_loss/len(testloader))
224 |         
225 |         print(f'Accuracy: {accuracy/len(testloader)}')
226 |         print(f'Training loss: {running_loss/len(trainloader)}')
227 |         print(f'Test loss: {test_loss/len(testloader)}')  
228 | ```
229 | 
230 | 
231 | ```python
232 | plt.plot(train_losses, label='Training loss')
233 | plt.plot(test_losses, label='Validation loss')
234 | plt.legend(frameon=False)
235 | ```
236 | 
237 | 
238 | 
239 | 
240 |     <matplotlib.legend.Legend at 0x11f375390>
241 | 
242 | 
243 | 
244 | 
245 | ![png](fashion_mnist_network_files/fashion_mnist_network_11_1.png)
246 | 
247 | 
248 | As the network gets better and better on the training data, it's actually starting to get worse on the test/validation data.
249 | This is because as it's learning, it's failing to generalize to data outside of that.
250 | This is the overfitting phenomenon at work.
251 | 
252 | We should always strive to get the lowest validation loss possible. 
253 | * Early stopping – use version of the model with the lowest validation loss (8-10 training epochs)
254 | * Dropout – give a drop probability, randomly drop input units, forcing the network to share imformation between weights, increasing it's ability to generalize to new data.
255 | 
256 | 
257 | ## Network with Dropout
258 | During training we want to prevent overfitting using dropout. But during inference, we want to use all of our units. So we turn off the dropout using `mode.eval()`: which sets the model to evaluation mode. After calculating the validation loss and metric, we set the model back to train mode using `model.train()`
259 | 
260 | 
261 | ```python
262 | from torch import nn
263 | 
264 | class Classifier(nn.Module):
265 |     def __init__(self):
266 |         super().__init__()
267 |         self.fc1 = nn.Linear(784, 256)
268 |         self.fc2 = nn.Linear(256, 128)
269 |         self.fc3 = nn.Linear(128, 64)
270 |         self.fc4 = nn.Linear(64, 10)
271 |         
272 |         # Dropout module with 0.2 drop probability
273 |         self.dropout = nn.Dropout(p=0.2)
274 |         
275 |     def forward(self, x):
276 |         # make sure input tensor is flattened
277 |         x = x.view(x.shape[0], -1)
278 |         
279 |         x = self.dropout(F.relu(self.fc1(x)))
280 |         x = self.dropout(F.relu(self.fc2(x)))        
281 |         x = self.dropout(F.relu(self.fc3(x)))
282 |         
283 |         # output so no dropout here
284 |         x = F.log_softmax(self.fc4(x), dim=1)
285 |         return x
286 | ```
287 | 
288 | 
289 | ```python
290 | # Initialize the network
291 | model = Classifier()
292 | criterion = nn.NLLLoss()
293 | optimizer = optim.Adam(model.parameters(), lr=0.003)
294 | 
295 | epochs = 20
296 | steps = 0
297 | train_losses, test_losses = [], []
298 | 
299 | for i in range(epochs):
300 |     running_loss = 0
301 |     for images, target_labels in trainloader:
302 |         # clear gradients because they accumulate
303 |         optimizer.zero_grad()
304 |         out = model(images)
305 |         loss = criterion(out, target_labels)
306 |         # let optmizer update the parameters
307 |         loss.backward()
308 |         optimizer.step()
309 |         
310 |         running_loss += loss.item()
311 |     else:
312 |         accuracy = 0
313 |         test_loss = 0
314 |         
315 |         # turn off gradients for validation, saves memory and computation
316 |         with torch.no_grad():
317 |             # set model to eval mode
318 |             model.eval()
319 |             for images, labels in testloader:
320 |                 log_ps = model(images)
321 |                 test_loss += criterion(log_ps, labels)
322 |                 
323 |                 ps = torch.exp(log_ps)
324 |                 _, top_class = ps.topk(1, dim=1)
325 |                 equals = top_class == labels.view(*top_class.shape)
326 |                 accuracy += torch.mean(equals.type(torch.FloatTensor))
327 |         
328 |         # set model back to train mode
329 |         model.train() 
330 |         
331 |         train_losses.append(running_loss/len(trainloader))
332 |         test_losses.append(test_loss/len(testloader))
333 |         
334 |         print(f'Training loss: {running_loss/len(trainloader)}')
335 |         print(f'Test loss: {test_loss/len(testloader)}')
336 |         print(f'Accuracy: {accuracy/len(testloader)}')
337 | ```
338 | 
339 |     Training loss: 0.5969907244258343
340 |     Test loss: 0.45537126064300537
341 |     Accuracy: 0.837579607963562
342 |     Training loss: 0.48040236235618083
343 |     Test loss: 0.4564197361469269
344 |     Accuracy: 0.8391719460487366
345 |     Training loss: 0.4555375060674224
346 |     Test loss: 0.4054587781429291
347 |     Accuracy: 0.8566879034042358
348 |     Training loss: 0.43134639539253483
349 |     Test loss: 0.4160998463630676
350 |     Accuracy: 0.849920392036438
351 |     Training loss: 0.42371229006092687
352 |     Test loss: 0.39617791771888733
353 |     Accuracy: 0.8572850227355957
354 |     Training loss: 0.4141741424227066
355 |     Test loss: 0.41605144739151
356 |     Accuracy: 0.8525079488754272
357 |     Training loss: 0.4026127882254149
358 |     Test loss: 0.41036131978034973
359 |     Accuracy: 0.8496218323707581
360 |     Training loss: 0.40074826791278845
361 |     Test loss: 0.41455933451652527
362 |     Accuracy: 0.8544983863830566
363 |     Training loss: 0.3938264337731704
364 |     Test loss: 0.40421757102012634
365 |     Accuracy: 0.856289803981781
366 |     Training loss: 0.38593607951900855
367 |     Test loss: 0.3894609212875366
368 |     Accuracy: 0.8673368096351624
369 |     Training loss: 0.38743931217107186
370 |     Test loss: 0.42503443360328674
371 |     Accuracy: 0.8538017272949219
372 |     Training loss: 0.3780076681201392
373 |     Test loss: 0.3754321336746216
374 |     Accuracy: 0.8673368096351624
375 |     Training loss: 0.3800705683622152
376 |     Test loss: 0.40686577558517456
377 |     Accuracy: 0.8619625568389893
378 |     Training loss: 0.3766767466182648
379 |     Test loss: 0.39196550846099854
380 |     Accuracy: 0.8647491931915283
381 |     Training loss: 0.37004977861034083
382 |     Test loss: 0.37305891513824463
383 |     Accuracy: 0.8654458522796631
384 |     Training loss: 0.36968942699846685
385 |     Test loss: 0.39208701252937317
386 |     Accuracy: 0.8612658977508545
387 |     Training loss: 0.36807350244825837
388 |     Test loss: 0.3892441391944885
389 |     Accuracy: 0.8644506335258484
390 |     Training loss: 0.36202541258194043
391 |     Test loss: 0.3834351599216461
392 |     Accuracy: 0.8691281676292419
393 |     Training loss: 0.36551991187687366
394 |     Test loss: 0.37415942549705505
395 |     Accuracy: 0.8662420511245728
396 |     Training loss: 0.3577012780791661
397 |     Test loss: 0.3998105823993683
398 |     Accuracy: 0.8568869233131409
399 | 
400 | 
401 | 
402 | ```python
403 | plt.plot(train_losses, label='Training loss')
404 | plt.plot(test_losses, label='Validation loss')
405 | plt.legend(frameon=False)
406 | ```
407 | 
408 | 
409 | 
410 | 
411 |     <matplotlib.legend.Legend at 0x1219dfa90>
412 | 
413 | 
414 | 
415 | 
416 | ![png](fashion_mnist_network_files/fashion_mnist_network_16_1.png)
417 | 
418 | 
419 | #### Saving and loading networks
420 | It's impractical to create new models every time we want to train or predict.
421 | To save we'll use the pytorch's `torch.save()`. We can save to a file as follows:
422 |     
423 | 
424 | 
425 | ```python
426 | torch.save(model.state_dict(), 'checkpoint.pth')
427 | ```
428 | 
429 | 
430 | ```python
431 | # load the saved model
432 | state_dict = torch.load('checkpoint.pth')
433 | print(state_dict.keys())
434 | ```
435 | 
436 |     odict_keys(['fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias', 'fc4.weight', 'fc4.bias'])
437 | 
438 | 
439 | 
440 | ```python
441 | 
442 | ```
443 | 


--------------------------------------------------------------------------------
/intro-notebooks/loading_image_data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Loading image data"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "The easiest way to load image data is with `datasets.ImageFolder` from torchvision.\n",
 15 |     "Example:\n",
 16 |     "```python\n",
 17 |     "# transforms = transforms.Compose(...)\n",
 18 |     "dataset = datasets.ImageFolder('path/to/data', transforms=transforms)\n",
 19 |     "```\n",
 20 |     "Each class should have its own directory, like so:\n",
 21 |     "* root/dog/123.png\n",
 22 |     "* root/cat/456.png\n"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "#### Transforms\n",
 30 |     "We need to define transforms when loading image data. Images might be different sizes so we need to resize them to a standard size for training.\n",
 31 |     "We also convert the images to pytorch tensors with `transforms.ToTensor()`\n",
 32 |     "We combine these two transforms into a pipeline with `transforms.Compose()`\n",
 33 |     "```python\n",
 34 |     "transforms = transforms.Compose([\n",
 35 |     "    transforms.Resize(255),\n",
 36 |     "    transforms.CenterCrop(224),\n",
 37 |     "    transforms.ToTensor()\n",
 38 |     "])\n",
 39 |     "```\n"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "#### Data Loaders\n",
 47 |     "After ImageLoader loads, you pass it to a DataLoader. The DataLoader takes a dataset and returns batches of images and the corresponding labels.\n",
 48 |     "```python\n",
 49 |     "dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)\n",
 50 |     "```\n",
 51 |     "The dataloader is a generator, which means you have to loop through it/convert to iterator and call `next()`\n"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "#### Data Augmentation\n",
 59 |     "It's a good strategy to introduce randomness in the input data. We can randomly rotate, mirror, scale and crop images during training. This helps the network generalize well as it's seeing the same images but in different locations, size and orientations."
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 1,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "import torch\n",
 69 |     "from torchvision import datasets, transforms\n",
 70 |     "\n",
 71 |     "train_transforms = transforms.Compose([\n",
 72 |     "    transforms.RandomRotation(30),\n",
 73 |     "    transforms.RandomResizedCrop(100),\n",
 74 |     "    transforms.RandomHorizontalFlip(),\n",
 75 |     "    transforms.ToTensor(),\n",
 76 |     "    transforms.Normalize([0.5], [0.5])\n",
 77 |     "])\n",
 78 |     "# we also normalize images by passing mean and standard devitions (or a list of the two)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "The color channels are normalized like so\n",
 86 |     "```\n",
 87 |     "input[channel] = (input[channel] - mean[channel] / std[channel])\n",
 88 |     "```\n",
 89 |     "NOTE:\n",
 90 |     "* Subtracting `mean` centers data around zero\n",
 91 |     "* Dividing by `std` squishes the values to be between -1 and 1.\n",
 92 |     "* Normalizing helps keep the work weights near zero which helps backpropagation to be more stable. Without normalization, networks tend to fail to learn.\n"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "#### Loading Cats and Dogs images and build a dataloader\n",
100 |     "We'll use the Cats and Dogs classification data from Kaggle"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 10,
106 |    "metadata": {},
107 |    "outputs": [
108 |     {
109 |      "ename": "RuntimeError",
110 |      "evalue": "Found 0 files in subfolders of: dogs_vs_cats/train\nSupported extensions are: .jpg,.jpeg,.png,.ppm,.bmp,.pgm,.tif,.tiff,.webp",
111 |      "output_type": "error",
112 |      "traceback": [
113 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
114 |       "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
115 |       "\u001b[0;32m<ipython-input-10-ba33e57aab4d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     16\u001b[0m )\n\u001b[1;32m     17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0mtrain_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdatasets\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mImageFolder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_dir\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'train'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtransform\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtrain_transforms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     19\u001b[0m \u001b[0mtest_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdatasets\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mImageFolder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_dir\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'test1'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtransform\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtest_transforms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
116 |       "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/torchvision/datasets/folder.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, root, transform, target_transform, loader, is_valid_file)\u001b[0m\n\u001b[1;32m    207\u001b[0m                                           \u001b[0mtransform\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    208\u001b[0m                                           \u001b[0mtarget_transform\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtarget_transform\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 209\u001b[0;31m                                           is_valid_file=is_valid_file)\n\u001b[0m\u001b[1;32m    210\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mimgs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msamples\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
117 |       "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/torchvision/datasets/folder.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, root, loader, extensions, transform, target_transform, is_valid_file)\u001b[0m\n\u001b[1;32m     95\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msamples\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     96\u001b[0m             raise (RuntimeError(\"Found 0 files in subfolders of: \" + self.root + \"\\n\"\n\u001b[0;32m---> 97\u001b[0;31m                                 \"Supported extensions are: \" + \",\".join(extensions)))\n\u001b[0m\u001b[1;32m     98\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     99\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mloader\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
118 |       "\u001b[0;31mRuntimeError\u001b[0m: Found 0 files in subfolders of: dogs_vs_cats/train\nSupported extensions are: .jpg,.jpeg,.png,.ppm,.bmp,.pgm,.tif,.tiff,.webp"
119 |      ]
120 |     }
121 |    ],
122 |    "source": [
123 |     "import torch\n",
124 |     "from torchvision import datasets, transforms\n",
125 |     "\n",
126 |     "data_dir = 'dogs_vs_cats/'\n",
127 |     "\n",
128 |     "train_transforms = transforms.Compose([\n",
129 |     "    transforms.RandomRotation(30),\n",
130 |     "    transforms.RandomResizedCrop(100),\n",
131 |     "    transforms.RandomHorizontalFlip(),\n",
132 |     "    transforms.ToTensor()])\n",
133 |     "\n",
134 |     "test_transforms = transforms.Compose([\n",
135 |     "    transforms.Resize(255),\n",
136 |     "    transforms.CenterCrop(224),\n",
137 |     "    transforms.ToTensor()]\n",
138 |     ")\n",
139 |     "\n",
140 |     "train_data = datasets.ImageFolder(data_dir + 'train', transform=train_transforms)\n",
141 |     "test_data = datasets.ImageFolder(data_dir + 'test1', transform=test_transforms)\n",
142 |     "\n",
143 |     "trainloader = torch.utils.data.DataLoader(train_data, batch_size=32)\n",
144 |     "testloader = torch.utils.data.DataLoader(test_data, batch_size=32)\n"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": []
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": []
160 |   }
161 |  ],
162 |  "metadata": {
163 |   "kernelspec": {
164 |    "display_name": "Python 3",
165 |    "language": "python",
166 |    "name": "python3"
167 |   },
168 |   "language_info": {
169 |    "codemirror_mode": {
170 |     "name": "ipython",
171 |     "version": 3
172 |    },
173 |    "file_extension": ".py",
174 |    "mimetype": "text/x-python",
175 |    "name": "python",
176 |    "nbconvert_exporter": "python",
177 |    "pygments_lexer": "ipython3",
178 |    "version": "3.7.4"
179 |   }
180 |  },
181 |  "nbformat": 4,
182 |  "nbformat_minor": 2
183 | }
184 | 


--------------------------------------------------------------------------------
/intro-notebooks/memory_sharing_vs_copying.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": []
 9 |   }
10 |  ],
11 |  "metadata": {
12 |   "kernelspec": {
13 |    "display_name": "Python 3",
14 |    "language": "python",
15 |    "name": "python3"
16 |   },
17 |   "language_info": {
18 |    "codemirror_mode": {
19 |     "name": "ipython",
20 |     "version": 3
21 |    },
22 |    "file_extension": ".py",
23 |    "mimetype": "text/x-python",
24 |    "name": "python",
25 |    "nbconvert_exporter": "python",
26 |    "pygments_lexer": "ipython3",
27 |    "version": "3.6.5"
28 |   }
29 |  },
30 |  "nbformat": 4,
31 |  "nbformat_minor": 2
32 | }
33 | 


--------------------------------------------------------------------------------
/intro-notebooks/mnist_neural_network.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 39,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import torch\n",
 10 |     "from torchvision import datasets, transforms\n",
 11 |     "\n",
 12 |     "# define a transform to normalize the data\n",
 13 |     "# if the img has three channels, you should have three number for mean, \n",
 14 |     "# for example, img is RGB, mean is [0.5, 0.5, 0.5], the normalize result is R * 0.5, G * 0.5, B * 0.5. \n",
 15 |     "# If img is grey type that only one channel, mean should be [0.5], the normalize result is R * 0.5\n",
 16 |     "transform = transforms.Compose([transforms.ToTensor(),\n",
 17 |     "                                transforms.Normalize([0.5], [0.5])\n",
 18 |     "                               ])\n",
 19 |     "# download and load the traning data\n",
 20 |     "trainset = datasets.MNIST('data/MNIST_data/', download=True, train=True, transform=transform)\n",
 21 |     "trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 49,
 27 |    "metadata": {},
 28 |    "outputs": [
 29 |     {
 30 |      "name": "stdout",
 31 |      "output_type": "stream",
 32 |      "text": [
 33 |       "<class 'torch.Tensor'>\n",
 34 |       "torch.Size([1, 28, 28])\n",
 35 |       "torch.Size([64, 1, 28, 28])\n"
 36 |      ]
 37 |     }
 38 |    ],
 39 |    "source": [
 40 |     "# make an iterator for looping\n",
 41 |     "dataiter = iter(trainloader)\n",
 42 |     "images, labels = dataiter.next()\n",
 43 |     "print(type(images))\n",
 44 |     "print(images[0].shape)\n",
 45 |     "# NOTE: The batch size is the number of images we get in one iteration"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 43,
 51 |    "metadata": {},
 52 |    "outputs": [
 53 |     {
 54 |      "data": {
 55 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAADSBJREFUeJzt3W+sVPWdx/HPR4QQoBqULF4t0W5j1jQ+sJsL2aiQmi6FNUToE1MfrJgQbh/UpE36YI2buDw0m/7JarQJpITbtWvdhFYxNrtlSY01blA0LqBsKzaQQuBSxAjoAxb47oN7aK/I/GaYOTNnLt/3K7m5M+d75pxvJvdzz5n5nZmfI0IA8rmq6QYANIPwA0kRfiApwg8kRfiBpAg/kBThB5Ii/EBShB9I6upB7sw2lxMCfRYR7mS9no78tlfa/q3t/bYf6WVbAAbL3V7bb3uGpN9JWi7pkKQ3JD0QEe8WHsORH+izQRz5l0jaHxG/j4gzkn4maXUP2wMwQL2E/yZJf5hy/1C17FNsj9neZXtXD/sCULO+v+EXERslbZQ47QeGSS9H/sOSFk25//lqGYBpoJfwvyHpVttfsD1L0jckbaunLQD91vVpf0Sctf2wpP+UNEPS5oh4p7bOAPRV10N9Xe2M1/xA3w3kIh8A0xfhB5Ii/EBShB9IivADSRF+ICnCDyRF+IGkCD+QFOEHkiL8QFKEH0iK8ANJEX4gKcIPJEX4gaQIP5AU4QeSIvxAUoQfSIrwA0kRfiApwg8kRfiBpAg/kBThB5Ii/EBShB9IivADSXU9Rbck2T4g6ZSkc5LORsRoHU0B6L+ewl+5JyKO17AdAAPEaT+QVK/hD0m/sv2m7bE6GgIwGL2e9t8dEYdt/4Wk7bb/NyJembpC9U+BfwzAkHFE1LMhe4Ok0xHxvcI69ewMQEsR4U7W6/q03/Zc25+7cFvS1yTt7XZ7AAarl9P+hZJ+YfvCdv4tIv6jlq4A9F1tp/0d7YzT/q6sWLGiWH/xxRdb1q6+ure3dap/7i318veze/fuYn3p0qXF+qlTp7re95Ws76f9AKY3wg8kRfiBpAg/kBThB5Ii/EBSDPUNwIIFC4r1VatWFetPPvlksT5nzpzL7qlT/Rzqa+ell14q1u+7776+7Xs6Y6gPQBHhB5Ii/EBShB9IivADSRF+ICnCDyRVx7f3oo3SR24lacmSJX3b95kzZ4r1duP07cb5Z86c2dPjS2688cauH4v2OPIDSRF+ICnCDyRF+IGkCD+QFOEHkiL8QFKM8w/Abbfd1tftHz16tGVt2bJlxce+//77Pe378OHDxfoNN9zQ0/bRPxz5gaQIP5AU4QeSIvxAUoQfSIrwA0kRfiCptuP8tjdLWiXpWETcXi27TtJzkm6RdEDS/RHxYf/aHG6PPfZYsT5v3ry+7r80lt/rOH477b6LYOvWrS1rixcvrrsdXIZOjvxbJK28aNkjknZExK2SdlT3AUwjbcMfEa9IOnHR4tWSxqvb45LW1NwXgD7r9jX/wog4Ut0+KmlhTf0AGJCer+2PiCjNwWd7TNJYr/sBUK9uj/wTtkckqfp9rNWKEbExIkYjYrTLfQHog27Dv03S2ur2Wkkv1NMOgEFpG37bz0r6b0l/ZfuQ7XWSHpe03PZ7kv62ug9gGmn7mj8iHmhR+mrNvUxb1157bbF+1VW9XUu1ffv2Yv3gwYM9bb8XJ05cPBD0ab18b//s2bOL9Tlz5hTrn3zySdf7zoAr/ICkCD+QFOEHkiL8QFKEH0iK8ANJ8dXd08CePXuK9bNnzw6ok8/atGlTsT462v2Fnddff32xfvPNNxfr+/bt63rfGXDkB5Ii/EBShB9IivADSRF+ICnCDyRF+IGkGOdHT+65556+bbvd9N+M4/eGIz+QFOEHkiL8QFKEH0iK8ANJEX4gKcIPJMU4fw0mJiaK9XPnzhXrM2bMKNbXrVtXrI+Pj7es7d27t/jYdp+Jb/e14e0+c4/hxZEfSIrwA0kRfiApwg8kRfiBpAg/kBThB5JyRJRXsDdLWiXpWETcXi3bIGm9pD9Wqz0aEb9suzO7vLMr1IcfflisX3PNNT1t/4MPPmhZe+2114qPXbx4cbE+MjJSrLf7++nFRx99VKyvXLmyWH/99dfrbGfaiIiO5kXv5Mi/RdKlnuUfRsQd1U/b4AMYLm3DHxGvSDoxgF4ADFAvr/kftr3b9mbb82vrCMBAdBv+H0n6oqQ7JB2R9P1WK9oes73L9q4u9wWgD7oKf0RMRMS5iDgvaZOkJYV1N0bEaER0P2MjgNp1FX7bU98C/rqk8kfHAAydth/ptf2spK9IWmD7kKR/kvQV23dICkkHJH2zjz0C6IO24/y17izpOP/LL79crC9btmwwjXTBLg8Zv/rqq8X66dOnW9ZWrFjRVU8XPPjgg8X6M88809P2p6s6x/kBXIEIP5AU4QeSIvxAUoQfSIrwA0nx1d0DsHr16mL9ueeeK9aXL19eZzuXZf/+/cX6+vXri/XS15ofP368q55QD478QFKEH0iK8ANJEX4gKcIPJEX4gaQIP5AUH+kdAvPmzSvW165dW6w/9NBDLWtbtmzpoqM/e+qpp3p6/Pz5rb/esddx/p07dxbrd955Z0/bn674SC+AIsIPJEX4gaQIP5AU4QeSIvxAUoQfSIpxfvRVP8f5T5482fW+r2SM8wMoIvxAUoQfSIrwA0kRfiApwg8kRfiBpNqG3/Yi27+2/a7td2x/u1p+ne3ttt+rfuccVAWmqU6O/GclfTciviTpbyR9y/aXJD0iaUdE3CppR3UfwDTRNvwRcSQi3qpun5K0T9JNklZLGq9WG5e0pl9NAqjfZb3mt32LpC9L2ilpYUQcqUpHJS2stTMAfdXxXH2250naKuk7EXHS/vPlwxERra7btz0maazXRgHUq6Mjv+2Zmgz+TyPi59XiCdsjVX1E0rFLPTYiNkbEaESM1tEwgHp08m6/Jf1Y0r6I+MGU0jZJF75Wdq2kF+pvD0C/dHLaf5ekv5e0x/bb1bJHJT0u6d9tr5N0UNL9/WkRQD+0DX9EvCqp1eeDv1pvOwAGhSv8gKQIP5AU4QeSIvxAUoQfSIrwA0l1fHkvMGxmz55drK9Z0/qzZs8//3zd7Uw7HPmBpAg/kBThB5Ii/EBShB9IivADSRF+ICnG+TFtzZo1q1i/6667WtYY5+fID6RF+IGkCD+QFOEHkiL8QFKEH0iK8ANJMc6Pvjp//nzL2scff1x87Ny5c4v1c+fOFesTExPFenYc+YGkCD+QFOEHkiL8QFKEH0iK8ANJEX4gKUdEeQV7kaSfSFooKSRtjIh/sb1B0npJf6xWfTQiftlmW+WdIZWlS5cW65s2bSrWn3766WL9iSeeuOyergQR4U7W6+Qin7OSvhsRb9n+nKQ3bW+vaj+MiO912ySA5rQNf0QckXSkun3K9j5JN/W7MQD9dVmv+W3fIunLknZWix62vdv2ZtvzWzxmzPYu27t66hRArToOv+15krZK+k5EnJT0I0lflHSHJs8Mvn+px0XExogYjYjRGvoFUJOOwm97piaD/9OI+LkkRcRERJyLiPOSNkla0r82AdStbfhtW9KPJe2LiB9MWT4yZbWvS9pbf3sA+qWTob67Jf1G0h5JFz6f+aikBzR5yh+SDkj6ZvXmYGlbDPUBfdbpUF/b8NeJ8AP912n4ucIPSIrwA0kRfiApwg8kRfiBpAg/kBThB5Ii/EBShB9IivADSRF+ICnCDyRF+IGkCD+Q1KCn6D4u6eCU+wuqZcNoWHsb1r4keutWnb3d3OmKA/08/2d2bu8a1u/2G9behrUvid661VRvnPYDSRF+IKmmw7+x4f2XDGtvw9qXRG/daqS3Rl/zA2hO00d+AA1pJPy2V9r+re39th9poodWbB+wvcf2201PMVZNg3bM9t4py66zvd32e9XvS06T1lBvG2wfrp67t23f21Bvi2z/2va7tt+x/e1qeaPPXaGvRp63gZ/2254h6XeSlks6JOkNSQ9ExLsDbaQF2wckjUZE42PCtpdJOi3pJxFxe7XsnyWdiIjHq3+c8yPiH4aktw2STjc9c3M1oczI1JmlJa2R9JAafO4Kfd2vBp63Jo78SyTtj4jfR8QZST+TtLqBPoZeRLwi6cRFi1dLGq9uj2vyj2fgWvQ2FCLiSES8Vd0+JenCzNKNPneFvhrRRPhvkvSHKfcPabim/A5Jv7L9pu2xppu5hIVTZkY6Kmlhk81cQtuZmwfpopmlh+a562bG67rxht9n3R0Rfy3p7yR9qzq9HUox+ZptmIZrOpq5eVAuMbP0nzT53HU743Xdmgj/YUmLptz/fLVsKETE4er3MUm/0PDNPjxxYZLU6vexhvv5k2GauflSM0trCJ67YZrxuonwvyHpVttfsD1L0jckbWugj8+wPbd6I0a250r6moZv9uFtktZWt9dKeqHBXj5lWGZubjWztBp+7oZuxuuIGPiPpHs1+Y7/+5L+sYkeWvT1l5L+p/p5p+neJD2rydPA/9PkeyPrJF0vaYek9yT9l6Trhqi3f9XkbM67NRm0kYZ6u1uTp/S7Jb1d/dzb9HNX6KuR540r/ICkeMMPSIrwA0kRfiApwg8kRfiBpAg/kBThB5Ii/EBS/w/7FS9u0XS6hwAAAABJRU5ErkJggg==\n",
 56 |       "text/plain": [
 57 |        "<Figure size 432x288 with 1 Axes>"
 58 |       ]
 59 |      },
 60 |      "metadata": {
 61 |       "needs_background": "light"
 62 |      },
 63 |      "output_type": "display_data"
 64 |     }
 65 |    ],
 66 |    "source": [
 67 |     "import matplotlib.pyplot as plt\n",
 68 |     "plt.imshow(images[1].numpy().squeeze(), cmap='Greys_r');"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "Time to create a dense fully-connected network. \n",
 76 |     "\n",
 77 |     "Each unit in one layer is connected to the other in the next layer.\n",
 78 |     "The input to each layer must be one-dimensional vector. But our images are 28*28 2D tensors, so we need to convert them to 1D vectors. Therefore:\n",
 79 |     "* Convert/Flatten the batch of images of shape(64, 1, 28, 28) into (64, 28 * 28=784).\n",
 80 |     "* For the output layer, we also need 10 output units for the 10 classes(digits)\n",
 81 |     "* Also convert the network output into a probability distribution."
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 55,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "flattened_images = images.view(64, 28 * 28)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 56,
 96 |    "metadata": {},
 97 |    "outputs": [
 98 |     {
 99 |      "name": "stdout",
100 |      "output_type": "stream",
101 |      "text": [
102 |       "torch.Size([64, 784])\n"
103 |      ]
104 |     }
105 |    ],
106 |    "source": [
107 |     "print(flattened_images.shape)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 83,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "def activation(x):\n",
117 |     "    \"\"\"Create a sigmoid activation function.\n",
118 |     "    Good for outputs that fall between 0 and 1. (probability)\n",
119 |     "    args x: a torch tensor.\n",
120 |     "    \"\"\"\n",
121 |     "    return 1/(1 + torch.exp(-x))\n",
122 |     "\n",
123 |     "def softmax(x):\n",
124 |     "    \"\"\"Create a softmax activation function.\n",
125 |     "    Good for outputs that fall between 0 and 1. (probability)\n",
126 |     "    args x: a torch tensor.\n",
127 |     "    \"\"\"\n",
128 |     "    return torch.exp(x)/torch.sum(torch.exp(x), dim=1).view(-1, 1)"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 84,
134 |    "metadata": {},
135 |    "outputs": [
136 |     {
137 |      "name": "stdout",
138 |      "output_type": "stream",
139 |      "text": [
140 |       "torch.Size([64, 10])\n",
141 |       "tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
142 |       "        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
143 |       "        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
144 |       "        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
145 |       "        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
146 |       "        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
147 |       "        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,\n",
148 |       "        1.0000])\n"
149 |      ]
150 |     }
151 |    ],
152 |    "source": [
153 |     "# flatten the images to shape(64, 784)\n",
154 |     "inputs = images.view(images.shape[0], -1)\n",
155 |     "\n",
156 |     "# create parameters\n",
157 |     "w1 = torch.randn(784, 256)\n",
158 |     "b1 = torch.randn(256)\n",
159 |     "\n",
160 |     "w2 = torch.randn(256, 10)\n",
161 |     "b2 = torch.randn(10)\n",
162 |     "\n",
163 |     "h = activation(torch.mm(inputs, w1) + b1)\n",
164 |     "\n",
165 |     "out = torch.mm(h, w2) + b2\n",
166 |     "probabilities = softmax(out)\n",
167 |     "print(probabilities.shape)\n",
168 |     "print(probabilities.sum(dim=1))"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "metadata": {},
174 |    "source": [
175 |     "## Using the Torch nn to create networks"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 89,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "from torch import nn\n",
185 |     "import torch.nn.functional as F\n",
186 |     "\n",
187 |     "class Network(nn.Module):\n",
188 |     "    \"\"\"Use relu(Rectified linear unit) as the activation function.\n",
189 |     "    Networks tend to train a lot faster when using relu.\n",
190 |     "    For a network to approximate a non-linear function, the activation\n",
191 |     "    function must be non-linear.\n",
192 |     "    \"\"\"\n",
193 |     "    def __init__(self):\n",
194 |     "        super().__init__()\n",
195 |     "        # inputs to hidden layer linear transformation\n",
196 |     "        self.hidden_layer1 = nn.Linear(784, 128) # 256 outputs\n",
197 |     "        self.hidden_layer2 = nn.Linear(128, 64)\n",
198 |     "        # output layer, 10 units one for each digit\n",
199 |     "        self.output = nn.Linear(64, 10)\n",
200 |     "        \n",
201 |     "    def forward(self, x):\n",
202 |     "        # hidden layer with sigmoid activation\n",
203 |     "        x = F.relu(self.hidden_layer1(x))\n",
204 |     "        x = F.relu(self.hidden_layer2(x))\n",
205 |     "        # Output layer with softmax activation\n",
206 |     "        x = F.softmax(self.output(x), dim=1)\n",
207 |     "        return x"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": 90,
213 |    "metadata": {},
214 |    "outputs": [
215 |     {
216 |      "data": {
217 |       "text/plain": [
218 |        "Network(\n",
219 |        "  (hidden_layer1): Linear(in_features=784, out_features=128, bias=True)\n",
220 |        "  (hidden_layer2): Linear(in_features=128, out_features=64, bias=True)\n",
221 |        "  (output): Linear(in_features=64, out_features=10, bias=True)\n",
222 |        ")"
223 |       ]
224 |      },
225 |      "execution_count": 90,
226 |      "metadata": {},
227 |      "output_type": "execute_result"
228 |     }
229 |    ],
230 |    "source": [
231 |     "model = Network()\n",
232 |     "model"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "markdown",
237 |    "metadata": {},
238 |    "source": [
239 |     "## Training our network\n"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": 91,
245 |    "metadata": {},
246 |    "outputs": [
247 |     {
248 |      "name": "stdout",
249 |      "output_type": "stream",
250 |      "text": [
251 |       "tensor(2.3058, grad_fn=<NllLossBackward>)\n"
252 |      ]
253 |     }
254 |    ],
255 |    "source": [
256 |     "model = nn.Sequential(nn.Linear(784, 128),\n",
257 |     "                      nn.ReLU(),\n",
258 |     "                      nn.Linear(128, 64),\n",
259 |     "                      nn.ReLU(),\n",
260 |     "                      nn.Linear(64, 10)\n",
261 |     "                     )\n",
262 |     "\n",
263 |     "# define the loss\n",
264 |     "criterion = nn.CrossEntropyLoss()\n",
265 |     "\n",
266 |     "# Prepare data\n",
267 |     "images, labels = next(iter(trainloader))\n",
268 |     "\n",
269 |     "# flatten images\n",
270 |     "images = images.view(images.shape[0], -1)\n",
271 |     "\n",
272 |     "# forward pass, get the logits\n",
273 |     "logits = model(images)\n",
274 |     "# calculate the loss with the logits and the labels\n",
275 |     "loss = criterion(logits, labels)\n",
276 |     "print(loss)"
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "markdown",
281 |    "metadata": {},
282 |    "source": [
283 |     "It's more convenient to build model with a log-softmax output using `nn.LogSoftmax`\n",
284 |     "We can get actual probabilities by taking the exponential torch.exp(output).\n",
285 |     "We'll also use the negative log likelihood loss, `nn.NLLLoss`"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": 95,
291 |    "metadata": {},
292 |    "outputs": [
293 |     {
294 |      "name": "stdout",
295 |      "output_type": "stream",
296 |      "text": [
297 |       "tensor(2.3025, grad_fn=<NllLossBackward>)\n"
298 |      ]
299 |     }
300 |    ],
301 |    "source": [
302 |     "model = nn.Sequential(nn.Linear(784, 128),\n",
303 |     "                      nn.ReLU(),\n",
304 |     "                      nn.Linear(128, 64),\n",
305 |     "                      nn.ReLU(),\n",
306 |     "                      nn.Linear(64, 10),\n",
307 |     "                      nn.LogSoftmax(dim=1),\n",
308 |     "                     )\n",
309 |     "criterion = nn.NLLLoss()\n",
310 |     "\n",
311 |     "logits = model(images)\n",
312 |     "loss = criterion(logits, labels)\n",
313 |     "print(loss)\n"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "markdown",
318 |    "metadata": {},
319 |    "source": [
320 |     "## USing Autograd to perform backpropagation"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "markdown",
325 |    "metadata": {},
326 |    "source": [
327 |     "After calculating loss, we perform backpropagation. Enter `autograd`\n",
328 |     "\n",
329 |     "We use it to calculate the gradients of all our parameters with respect to the loss we got. Autograd goes backwards through the tensor operations, calculating gradients along the way. \n",
330 |     "* Set `requires_grad=True` on a tensor when creating the tensor."
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": 105,
336 |    "metadata": {},
337 |    "outputs": [
338 |     {
339 |      "name": "stdout",
340 |      "output_type": "stream",
341 |      "text": [
342 |       "tensor([[-0.4997, -0.1425],\n",
343 |       "        [-0.8944,  0.0633]])\n",
344 |       "tensor([[-0.4997, -0.1425],\n",
345 |       "        [-0.8944,  0.0633]], grad_fn=<DivBackward0>)\n"
346 |      ]
347 |     }
348 |    ],
349 |    "source": [
350 |     "x = torch.randn(2,2, requires_grad=True)\n",
351 |     "y = x** 2\n",
352 |     "z = y.mean()\n",
353 |     "z.backward()\n",
354 |     "print(x.grad)"
355 |    ]
356 |   },
357 |   {
358 |    "cell_type": "code",
359 |    "execution_count": 107,
360 |    "metadata": {},
361 |    "outputs": [],
362 |    "source": [
363 |     "# Back to the model we created"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "code",
368 |    "execution_count": 109,
369 |    "metadata": {},
370 |    "outputs": [
371 |     {
372 |      "name": "stdout",
373 |      "output_type": "stream",
374 |      "text": [
375 |       "Before backward pass: \n",
376 |       " None\n",
377 |       "After backward pass: \n",
378 |       " tensor([[-0.0029, -0.0029, -0.0029,  ..., -0.0029, -0.0029, -0.0029],\n",
379 |       "        [-0.0028, -0.0028, -0.0028,  ..., -0.0028, -0.0028, -0.0028],\n",
380 |       "        [-0.0006, -0.0006, -0.0006,  ..., -0.0006, -0.0006, -0.0006],\n",
381 |       "        ...,\n",
382 |       "        [-0.0011, -0.0011, -0.0011,  ..., -0.0011, -0.0011, -0.0011],\n",
383 |       "        [-0.0036, -0.0036, -0.0036,  ..., -0.0036, -0.0036, -0.0036],\n",
384 |       "        [-0.0026, -0.0026, -0.0026,  ..., -0.0026, -0.0026, -0.0026]])\n"
385 |      ]
386 |     }
387 |    ],
388 |    "source": [
389 |     "print('Before backward pass: \\n', model[0].weight.grad)\n",
390 |     "loss.backward()\n",
391 |     "print('After backward pass: \\n', model[0].weight.grad)"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "markdown",
396 |    "metadata": {},
397 |    "source": [
398 |     "We also need an optimizer that'll update weights with the gradients from the backward pass.\n",
399 |     "From Pytorch's `optim` package, we can use stochastic gradient descenc with `optim.SGD`\n",
400 |     "\n"
401 |    ]
402 |   },
403 |   {
404 |    "cell_type": "code",
405 |    "execution_count": 110,
406 |    "metadata": {},
407 |    "outputs": [],
408 |    "source": [
409 |     "from torch import optim\n",
410 |     "# pass in the parameter to optimize and a learning rate\n",
411 |     "optimizer = optim.SGD(model.parameters(), lr=0.01)"
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "code",
416 |    "execution_count": 118,
417 |    "metadata": {},
418 |    "outputs": [
419 |     {
420 |      "name": "stdout",
421 |      "output_type": "stream",
422 |      "text": [
423 |       "Training loss: 1.0614541531689385\n",
424 |       "Training loss: 0.3895804712147728\n",
425 |       "Training loss: 0.3246205799631091\n",
426 |       "Training loss: 0.29300587304206543\n",
427 |       "Training loss: 0.26864237868900237\n"
428 |      ]
429 |     }
430 |    ],
431 |    "source": [
432 |     "model = nn.Sequential(nn.Linear(784, 128),\n",
433 |     "                      nn.ReLU(),\n",
434 |     "                      nn.Linear(128, 64),\n",
435 |     "                      nn.ReLU(),\n",
436 |     "                      nn.Linear(64, 10),\n",
437 |     "                      nn.LogSoftmax(dim=1),\n",
438 |     "                     )\n",
439 |     "criterion = nn.NLLLoss()\n",
440 |     "optimizer = optim.SGD(model.parameters(), lr=0.01)\n",
441 |     "\n",
442 |     "epochs = 5\n",
443 |     "for e in range(epochs):\n",
444 |     "    running_loss = 0\n",
445 |     "    for images, labels in trainloader:\n",
446 |     "        # Flatten Images into 784 long vector for the input layer\n",
447 |     "        images = images.view(images.shape[0], -1)\n",
448 |     "        \n",
449 |     "        # clear gradients because they accumulate\n",
450 |     "        optimizer.zero_grad()\n",
451 |     "        # forward pass\n",
452 |     "        output = model.forward(images)\n",
453 |     "        loss = criterion(output, labels)\n",
454 |     "        loss.backward()\n",
455 |     "        optimizer.step()\n",
456 |     "        \n",
457 |     "        running_loss += loss.item()\n",
458 |     "    else:\n",
459 |     "        print(f'Training loss: {running_loss/len(trainloader)}')"
460 |    ]
461 |   },
462 |   {
463 |    "cell_type": "code",
464 |    "execution_count": 120,
465 |    "metadata": {},
466 |    "outputs": [],
467 |    "source": [
468 |     "# create a helper to view the probability distribution\n",
469 |     "import matplotlib.pyplot as plt\n",
470 |     "import numpy as np\n",
471 |     "\n",
472 |     "def view_classify(img, ps):\n",
473 |     "    \"\"\"Function for viewing an image and it's predicted classes.\"\"\"\n",
474 |     "    ps = ps.data.numpy().squeeze()\n",
475 |     "\n",
476 |     "    fig, (ax1, ax2) = plt.subplots(figsize=(6,9), ncols=2)\n",
477 |     "    ax1.imshow(img.resize_(1, 28, 28).numpy().squeeze())\n",
478 |     "    ax1.axis('off')\n",
479 |     "    ax2.barh(np.arange(10), ps)\n",
480 |     "    ax2.set_aspect(0.1)\n",
481 |     "    ax2.set_yticks(np.arange(10))\n",
482 |     "    ax2.set_yticklabels(np.arange(10))\n"
483 |    ]
484 |   },
485 |   {
486 |    "cell_type": "code",
487 |    "execution_count": 124,
488 |    "metadata": {},
489 |    "outputs": [
490 |     {
491 |      "name": "stdout",
492 |      "output_type": "stream",
493 |      "text": [
494 |       "tensor([[1.2855e-02, 5.0043e-05, 7.8326e-04, 5.7256e-03, 4.5138e-03, 9.6925e-01,\n",
495 |       "         1.3272e-03, 1.2127e-03, 5.4744e-04, 3.7324e-03]])\n"
496 |      ]
497 |     },
498 |     {
499 |      "data": {
500 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAADECAYAAAB6IFEiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAEEhJREFUeJzt3XuwVeV9xvHnkZugBFRIiwiCkXjNeGOsl8Z4v4+kaTOBGZOYJqGTGOOtaXUyHdu0M9pJSKNNM5aqbdIkoqImxmiUGK1jDehBUbmI4tEoRAN4wQtFBX794yxnNnu9B89ZZ++zznn9fmb2sPdvv3ufn0t5fM+79n6XI0IAgDztUHcDAID2IeQBIGOEPABkjJAHgIwR8gCQMUIeADJGyANAxgh5AMgYIQ8AGSPkASBjQ+tuAMjBuHHjYsqUKXW3gUwtXrx4fUSMr/JaQh5ogSlTpqijo6PuNpAp27+r+lqWawAgY4Q8AGSsX5drTtrh0+xrjLZasPUm190DMJAwkweAjBHyAJAxQh4AMkbIA0DGCHkgwfb5tpfaXmb7grr7Aaoi5IEmtg+U9GVJh0s6SNKZtveutyugGkIeKNtP0qKI2BgRmyX9j6RP1dwTUAkhD5QtlfRx27vZHiXpdEmTau4JqIS9a4AmEbHC9j9LulvSW5KWSNrSPM72bEmzJWny5Mn92iPQU8zkgYSIuDYiDouIYyS9KumpxJi5ETE9IqaPH19pg0Cg7ZjJAwm2PxwRa21PVtd6/BF19wRUQcgDaTfb3k3Su5LOjYjX6m4IqIKQBxIi4uN19wC0AmvyAJAxQh4AMkbIA0DGCHkAyBghDwAZI+QBIGOEPJBg+8Jim+Gltq+3vWPdPQFVEPJAE9sTJX1d0vSIOFDSEEkz6+0KqIaQB9KGShppe6ikUZJ+X3M/QCWEPNAkItZI+o6k5yW9KGlDRNxdb1dANYQ80MT2LpJmSJoqaXdJO9k+OzFutu0O2x3r1q3r7zaBHiHkgbITJT0bEesi4l1Jt0g6qnkQWw1jMCDkgbLnJR1he5RtSzpB0oqaewIqIeSBJhGxSNJ8SY9IekJdf0/m1toUUBFbDQMJEXGZpMvq7gPoK2byAJAxQh4AMkbIA0DGWJPPxNA9JibrW195tVzbuLHd7QAYIJjJA0DGCHkAyBghDzSxvY/tJQ23121fUHdfQBWsyQNNImKlpIMlyfYQSWsk3VprU0BFhPwANmTsmGR9xZxppdqik69Mjj1y/sWl2t4XLuxbY7307snTS7Vhd3f0aw99cIKkZyLid3U3AlTBcg2wfTMlXV93E0BVhDzQDdvDJZ0l6aZunmerYQx4hDzQvdMkPRIRf0g9yVbDGAwIeaB7s8RSDQY5Qh5IsL2TpJPUdcEQYNDi0zUDRBx9cKk289rbk2M/M/rXpdrfvvSJ5NipP3+nRz9LkjaNG16qvXxA+j+RGZ9+oFQbN+zN5Nizx5Q/+fPZSUcnxw4UEfGWpN3q7gPoK2byAJAxQh4AMkbIA0DGCHkAyBgnXtvIw8onMiVp5fcPKtWeOvPqUu1/Nw1Lvv64i88r1UbPS29VMHzaa6XayT97JDn2vLGd5b7e3ZQce+ebB5Zq//rgCcmx91z20UT1peRYAK3FTB4AMkbIAwm2x9qeb/tJ2ytsH1l3T0AVLNcAaVdK+lVE/EWxh82ouhsCqiDkgSa2x0g6RtI5khQR70gqf6sMGARYrgHKpkpaJ+k/bT9q+5pimwNg0GEm3053pXcmfHKfH5RqP3p9Uql25X98Kvn6CfMe7HELW54uf2Lm7k8emhx7ywEnl2ojX0p/ukYLHy+VPqqHk0M3b6e/AWqopEMlnRcRi2xfKekSSX/XOMj2bEmzJWny5Mn93iTQE8zkgbLVklZHxKLi8Xx1hf422GoYgwEhDzSJiJckvWB7n6J0gqTlNbYEVMZyDZB2nqSfFJ+s6ZT0hZr7ASoh5IGEiFgiqXwFcmCQIeR7acPZRyTrp33j/lJt1tgbk2Onf+8bpdqka1aUahNe7fkJ1t5InYyVpJHd1AEMXqzJA0DGCHkAyBghDwAZY00eaIEn1mzQlEt+WXcbGMSeu+KMtrwvM3kAyBgz+e148eKjSrWLvjw/OXbW6DWl2hGXlz9FI0kTFr9Vqr1xbOrCGj23852PJetbN3WzLQG2y/Zzkt6QtEXS5ojg45QYlAh5oHvHRcT6upsA+oLlGgDIGCEPpIWku20vLnabBAYllmuAtD+NiDW2Pyxpge0nI2KbrzU3bjU85EPsQomBiZBX+gSrJP36gm+XarvssGNy7NZE7Utf/UVy7PGjVpZqew0bVqrt0M0vWlsTP+2K9Qclx954w7Gl2m7L0ju8j/z5Q8n6B1FErCn+XGv7VkmHS7q/acxcSXMlacSEadHvTQI9wHIN0MT2TrZHv3df0smSltbbFVANM3mg7I8k3Wpb6vo78tOI+FW9LQHVEPJAk4jolJRe/wIGGZZrACBjzOQlbdw9ddpU+v3m8uH56caPJMdedc8ppdo+30xfMe62N3brRXc9093JYx21oVQ6Y1b6BOsvdVypxsnYnvnYxDHqaNPeI0BfMJMHgIwR8gCQMUIeADJGyANAxgh5oBu2h9h+1PbtdfcCVMWnayR95KaNyfolP/7LUi0eXZYcO02LSrX0Z3baY8KcB5P1jZ1/Uqqd+N30P8OGvx9Zqi1Zvndy7JanO3vR3aB1vqQVkj5UdyNAVczkgQTbe0g6Q9I1dfcC9AUhD6R9T9LfqH9/IQNajpAHmtg+U9LaiFj8PuNm2+6w3bFu3bp+6g7oHUIeKDta0lnFdV7nSTre9o+bB0XE3IiYHhHTx49nP3kMTJx4laSFjyfLOWwQPvqp10q1TZH+137GmCWl2pLh+7W8p4EuIi6VdKkk2T5W0l9HxNm1NgVUxEweADLGTB7Yjoi4T9J9NbcBVMZMHgAyRsgDQMYIeQDIWBZr8s/MOaJUG92Z/v/XH1/7SKm2ddOmlvc0UHR+ZtdSbdcd0v+8M377lVJt2usvt7wnAP2HmTwAZIyQB4CMEfJAE9s72n7I9mO2l9n+h7p7AqrKYk0eaLG3JR0fEW/aHibpAdt3RsTCuhsDeiuLkB+xvvwLyT9deF1y7HPnlvcYeXvrsOTYH9x5Sqk2/DUnx459prxZ4eh57cmEHQ7ev1TbsG96y/NbPzenXHv9kOTYaRf9oVTb/OJLvexu8IuIkPRm8XBYccthlwt8ALFcAyQUV4VaImmtpAURUb4qDDAIEPJAQkRsiYiDJe0h6XDbBzaPYathDAaEPLAdEfGapHslnZp4jq2GMeAR8kAT2+Ntjy3uj5R0kqQn6+0KqCaLE69Ai02Q9EPbQ9Q1EboxIm6vuSegkixCfo/LHyzVrrp83+TYFy8+qlTbMiL9vrH//5Vqs0+/Nzl2r+FrS7XT5ryRfuMeGuYhyfq7iavSvR3vJsd+rnNGqbZ+ztTk2JEvPtSL7vIVEY9LSn8ECRhkWK4BgIwR8gCQMUIeADJGyANAxrI48dobE+aUT9L2xp0am6wPnVT6roy+c9juybHrD+jZYd/pyPXJ+uY7xpVqY55Ln3gdccfDpdpI8cUd4IOCmTwAZIyQB5rYnmT7XtvLi62Gz6+7J6CqD9xyDdADmyVdHBGP2B4tabHtBRGxvO7GgN5iJg80iYgXI+KR4v4bklZImlhvV0A1hDywHbanqOvbr2w1jEGJ5ZoW2fzC6lJtZKImSZN+1tef9lRf3wA9YHtnSTdLuiAiXk88P1vSbEmaPHlyP3cH9AwzeSChuOzfzZJ+EhG3pMaw1TAGA0IeaGLbkq6VtCIivlt3P0BfEPJA2dGSPivpeNtLitvpdTcFVMGaPNAkIh6QlL5iOzDIMJMHgIwR8gCQMUIeADJGyANAxgh5AMgYIQ8AGSPkgSa2r7O91vbSunsB+oqQB8r+S9KpdTcBtAIhDzSJiPslvVJ3H0ArEPIAkDFCHqjI9mzbHbY71q3j4ugYmAh5oCK2GsZgQMgDQMYIeaCJ7esl/VbSPrZX2/5i3T0BVbHVMNAkImbV3QPQKszkASBjhDwAZIyQB4CMEfIAkDFCHmiBJ9ZsqLsFIImQB4CMEfJAgu1Tba+0vcr2JXX3A1RFyANNbA+R9G+STpO0v6RZtvevtyugGkIeKDtc0qqI6IyIdyTNkzSj5p6ASgh5oGyipBcaHq8uatto3IVyy0ZOvGJgIuSBihp3oRwyakzd7QBJhDxQtkbSpIbHexQ1YNAh5IGyhyVNsz3V9nBJMyXdVnNPQCXsQgk0iYjNtr8m6S5JQyRdFxHLam4LqISQBxIi4g5Jd9TdB9BXLNcALfCxiZx4xcBEyANAxgh5AMhYv67JL9h6k/vz5wHABx0zeQDIGCEPABkj5AEgY4Q8AGTMEVF3D8CgZ/sNSSvr7qPJOEnr626iyUDsSRqYfTX2tGdEjK/yJnzjFWiNlRExve4mGtnuoKeeGYh9taonlmsAIGOEPABkjJAHWmNu3Q0k0FPPDcS+WtITJ14BIGPM5AEgY4Q8sB22T7W90vYq25cknh9h+4bi+UW2pzQ8d2lRX2n7lH7s6SLby20/bvse23s2PLfF9pLi1tKrXfWgr3Nsr2v4+V9qeO7ztp8ubp/vx57+paGfp2y/1vBcW46V7etsr7W9tJvnbfuqoufHbR/a8Fzvj1NEcOPGLXFT11WhnpG0l6Thkh6TtH/TmK9Kurq4P1PSDcX9/YvxIyRNLd5nSD/1dJykUcX9r7zXU/H4zRqP1TmSvp947a6SOos/dynu79IfPTWNP09dVwFr97E6RtKhkpZ28/zpku6UZElHSFrUl+PETB7o3uGSVkVEZ0S8I2mepBlNY2ZI+mFxf76kE2y7qM+LiLcj4llJq4r3a3tPEXFvRGwsHi5U14XI260nx6o7p0haEBGvRMSrkhZIOrWGnmZJur4FP3e7IuJ+Sa9sZ8gMST+KLgsljbU9QRWPEyEPdG+ipBcaHq8uaskxEbFZ0gZJu/Xwte3qqdEX1TUrfM+OtjtsL7T9yRb009u+/rxYgphve1IvX9uunlQsaU2V9JuGcruO1fvpru9Kx4lvvAKZsn22pOmSPtFQ3jMi1tjeS9JvbD8REc/0U0u/kHR9RLxt+6/U9RvQ8f30s9/PTEnzI2JLQ63OY9UyzOSB7q2RNKnh8R5FLTnG9lBJYyS93MPXtqsn2T5R0jclnRURb79Xj4g1xZ+dku6TdEgLeupRXxHxckMv10g6rKevbVdPDWaqaammjcfq/XTXd7Xj1I4TC9y45XBT12+6ner6Nf69E3cHNI05V9ueeL2xuH+Atj3x2qnWnHjtSU+HqOuE47Sm+i6SRhT3x0l6Wts5EdmGviY03P8zSQuL+7tKerbob5fi/q790VMxbl9Jz6n43lC7j1XxnlPU/YnXM7TtideH+nKcWK4BuhERm21/TdJd6vqkxnURscz2tyR1RMRtkq6V9N+2V6nrZNrM4rXLbN8oabmkzZLOjW2XAtrZ07cl7Szppq5zwHo+Is6StJ+kf7e9VV2/xV8REcv72lMv+vq67bPUdTxeUdenbRQRr9j+R0kPF2/3rYjY3onJVvYkdf07mxdFkhbadqxsXy/pWEnjbK+WdJmkYUXPV0u6Q12fsFklaaOkLxTPVTpOfOMVADLGmjwAZIyQB4CMEfIAkDFCHgAyRsgDQMYIeQDIGCEPABkj5AEgY/8PtxUK3u9xvlYAAAAASUVORK5CYII=\n",
501 |       "text/plain": [
502 |        "<Figure size 432x648 with 2 Axes>"
503 |       ]
504 |      },
505 |      "metadata": {
506 |       "needs_background": "light"
507 |      },
508 |      "output_type": "display_data"
509 |     }
510 |    ],
511 |    "source": [
512 |     "\n",
513 |     "images, labels = next(iter(trainloader))\n",
514 |     "\n",
515 |     "img = images[0].view(1, 784)\n",
516 |     "\n",
517 |     "with torch.no_grad():\n",
518 |     "    logits = model.forward(img)\n",
519 |     "    \n",
520 |     "    ps = F.softmax(logits, dim=1)\n",
521 |     "    print(ps)\n",
522 |     "    view_classify(img.view(1, 28, 28), ps)"
523 |    ]
524 |   }
525 |  ],
526 |  "metadata": {
527 |   "kernelspec": {
528 |    "display_name": "Python 3",
529 |    "language": "python",
530 |    "name": "python3"
531 |   },
532 |   "language_info": {
533 |    "codemirror_mode": {
534 |     "name": "ipython",
535 |     "version": 3
536 |    },
537 |    "file_extension": ".py",
538 |    "mimetype": "text/x-python",
539 |    "name": "python",
540 |    "nbconvert_exporter": "python",
541 |    "pygments_lexer": "ipython3",
542 |    "version": "3.7.4"
543 |   }
544 |  },
545 |  "nbformat": 4,
546 |  "nbformat_minor": 2
547 | }
548 | 


--------------------------------------------------------------------------------
/intro-notebooks/mnist_neural_network_files/mnist_neural_network_24_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/intro-notebooks/mnist_neural_network_files/mnist_neural_network_24_1.png


--------------------------------------------------------------------------------
/intro-notebooks/mnist_neural_network_files/mnist_neural_network_2_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/intro-notebooks/mnist_neural_network_files/mnist_neural_network_2_0.png


--------------------------------------------------------------------------------
/intro-notebooks/multilayer_network.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Multilayered Network\n",
  8 |     "To make the network more powerful, we can stack up individual neurons into layers. The output of one layer becomes the input for the next layer."
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 30,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "import torch"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 31,
 23 |    "metadata": {},
 24 |    "outputs": [
 25 |     {
 26 |      "name": "stdout",
 27 |      "output_type": "stream",
 28 |      "text": [
 29 |       "tensor([[0.3171]])\n"
 30 |      ]
 31 |     }
 32 |    ],
 33 |    "source": [
 34 |     "def activation(x):\n",
 35 |     "    \"\"\"Create a sigmoid activation function.\n",
 36 |     "    Good for outputs that fall between 0 and 1. (probability)\n",
 37 |     "    args x: a torch tensor.\n",
 38 |     "    \"\"\"\n",
 39 |     "    return 1/(1 + torch.exp(-x))\n",
 40 |     "\n",
 41 |     "# generate some data\n",
 42 |     "torch.manual_seed(7)\n",
 43 |     "\n",
 44 |     "# features are 3 random normal variables\n",
 45 |     "features = torch.randn((1, 3))\n",
 46 |     "\n",
 47 |     "# define size of each layer in our network\n",
 48 |     "n_input = features.shape[1]  # number of input units, must match number of input features\n",
 49 |     "n_hidden = 2\n",
 50 |     "n_output = 1\n",
 51 |     "\n",
 52 |     "# weights for input to hidden layer\n",
 53 |     "w1 = torch.randn(n_input, n_hidden)\n",
 54 |     "# weights for hidden layer to output layer\n",
 55 |     "w2 = torch.randn(n_hidden, n_output)\n",
 56 |     "\n",
 57 |     "# biases for the hidden layer and the output layer\n",
 58 |     "b1 = torch.randn(1, n_hidden)\n",
 59 |     "b2 = torch.randn(1, n_output)\n",
 60 |     "\n",
 61 |     "h = activation(torch.mm(features, w1) + b1)\n",
 62 |     "# use h as input to the next layer\n",
 63 |     "output = activation(torch.mm(h, w2) + b2)\n",
 64 |     "print(output)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "## numpys and tensors\n",
 72 |     "We can can create tensors from numpy arrays.\n",
 73 |     "The memory is shared between the Numpy array and the Tensor so a change in one will trigger a change in the other."
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 32,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "import numpy as np\n",
 83 |     "a = np.random.rand(4, 3)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 33,
 89 |    "metadata": {},
 90 |    "outputs": [
 91 |     {
 92 |      "data": {
 93 |       "text/plain": [
 94 |        "tensor([[0.8519, 0.4287, 0.9095],\n",
 95 |        "        [0.7690, 0.7815, 0.7152],\n",
 96 |        "        [0.7211, 0.4287, 0.8067],\n",
 97 |        "        [0.4219, 0.7282, 0.3916]], dtype=torch.float64)"
 98 |       ]
 99 |      },
100 |      "execution_count": 33,
101 |      "metadata": {},
102 |      "output_type": "execute_result"
103 |     }
104 |    ],
105 |    "source": [
106 |     "b = torch.from_numpy(a)\n",
107 |     "b"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 34,
113 |    "metadata": {},
114 |    "outputs": [
115 |     {
116 |      "data": {
117 |       "text/plain": [
118 |        "tensor([[1.7038, 0.8573, 1.8190],\n",
119 |        "        [1.5379, 1.5630, 1.4305],\n",
120 |        "        [1.4422, 0.8574, 1.6133],\n",
121 |        "        [0.8438, 1.4564, 0.7832]], dtype=torch.float64)"
122 |       ]
123 |      },
124 |      "execution_count": 34,
125 |      "metadata": {},
126 |      "output_type": "execute_result"
127 |     }
128 |    ],
129 |    "source": [
130 |     "b.mul_(2)"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 35,
136 |    "metadata": {},
137 |    "outputs": [
138 |     {
139 |      "data": {
140 |       "text/plain": [
141 |        "array([[1.70379337, 0.85731166, 1.81903246],\n",
142 |        "       [1.53791446, 1.5629657 , 1.4304528 ],\n",
143 |        "       [1.442249  , 0.8573502 , 1.6133007 ],\n",
144 |        "       [0.84380949, 1.45637187, 0.78323228]])"
145 |       ]
146 |      },
147 |      "execution_count": 35,
148 |      "metadata": {},
149 |      "output_type": "execute_result"
150 |     }
151 |    ],
152 |    "source": [
153 |     "a"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": []
162 |   }
163 |  ],
164 |  "metadata": {
165 |   "kernelspec": {
166 |    "display_name": "Python 3",
167 |    "language": "python",
168 |    "name": "python3"
169 |   },
170 |   "language_info": {
171 |    "codemirror_mode": {
172 |     "name": "ipython",
173 |     "version": 3
174 |    },
175 |    "file_extension": ".py",
176 |    "mimetype": "text/x-python",
177 |    "name": "python",
178 |    "nbconvert_exporter": "python",
179 |    "pygments_lexer": "ipython3",
180 |    "version": "3.6.5"
181 |   }
182 |  },
183 |  "nbformat": 4,
184 |  "nbformat_minor": 2
185 | }
186 | 


--------------------------------------------------------------------------------
/intro-notebooks/pytorch_tensors.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 6,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import torch\n",
 10 |     "import numpy as np"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 9,
 16 |    "metadata": {},
 17 |    "outputs": [
 18 |     {
 19 |      "name": "stdout",
 20 |      "output_type": "stream",
 21 |      "text": [
 22 |       "torch.float32\n",
 23 |       "cpu\n",
 24 |       "torch.strided\n"
 25 |      ]
 26 |     }
 27 |    ],
 28 |    "source": [
 29 |     "t = torch.Tensor()\n",
 30 |     "print(t.dtype)\n",
 31 |     "print(t.device)\n",
 32 |     "print(t.layout)"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 13,
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "data": {
 42 |       "text/plain": [
 43 |        "device(type='cuda', index=1)"
 44 |       ]
 45 |      },
 46 |      "execution_count": 13,
 47 |      "metadata": {},
 48 |      "output_type": "execute_result"
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "# specify a GPU to do computations\n",
 53 |     "device = torch.device('cuda:1')\n",
 54 |     "device"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "Creating Pytorch Tensors from data"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 16,
 67 |    "metadata": {},
 68 |    "outputs": [
 69 |     {
 70 |      "data": {
 71 |       "text/plain": [
 72 |        "numpy.ndarray"
 73 |       ]
 74 |      },
 75 |      "execution_count": 16,
 76 |      "metadata": {},
 77 |      "output_type": "execute_result"
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "# create a numpy array\n",
 82 |     "data = np.array([1, 2, 3])\n",
 83 |     "type(data)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 30,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "t1 = torch.Tensor(data) # constructors\n",
 93 |     "t2 = torch.tensor(data) # factory function, instead of using a constructor\n",
 94 |     "t3 = torch.as_tensor(data)\n",
 95 |     "t4 = torch.from_numpy(data)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 32,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "name": "stdout",
105 |      "output_type": "stream",
106 |      "text": [
107 |       "torch.float32\n",
108 |       "torch.int64\n"
109 |      ]
110 |     },
111 |     {
112 |      "data": {
113 |       "text/plain": [
114 |        "torch.float32"
115 |       ]
116 |      },
117 |      "execution_count": 32,
118 |      "metadata": {},
119 |      "output_type": "execute_result"
120 |     }
121 |    ],
122 |    "source": [
123 |     "print(t1.dtype)\n",
124 |     "print(t2.dtype)\n",
125 |     "# the default data type of a tensor\n",
126 |     "torch.get_default_dtype()"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 35,
132 |    "metadata": {},
133 |    "outputs": [
134 |     {
135 |      "data": {
136 |       "text/plain": [
137 |        "tensor([1., 2., 3.], dtype=torch.float64)"
138 |       ]
139 |      },
140 |      "execution_count": 35,
141 |      "metadata": {},
142 |      "output_type": "execute_result"
143 |     }
144 |    ],
145 |    "source": [
146 |     "# type inference: the dtype is inferred from the incoming data\n",
147 |     "torch.tensor(np.array([1, 2, 4])).dtype\n",
148 |     "# you can also explicitly set the dtype\n",
149 |     "torch.tensor(np.array([1,2,3]), dtype=torch.float64)"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "raw",
154 |    "metadata": {},
155 |    "source": [
156 |     "data = np.array([1,2,3])"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 36,
162 |    "metadata": {},
163 |    "outputs": [
164 |     {
165 |      "data": {
166 |       "text/plain": [
167 |        "array([1, 2, 3])"
168 |       ]
169 |      },
170 |      "execution_count": 36,
171 |      "metadata": {},
172 |      "output_type": "execute_result"
173 |     }
174 |    ],
175 |    "source": [
176 |     "data"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {},
182 |    "source": [
183 |     "## Memory: Sharing vs Copying"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "* When using torch.as_tensor(), the torch tensor and numpy array share the memory location.\n",
191 |     "* When using torch.tensor, torch.Tensor, we copy data into a new memory location.\n",
192 |     "* Sharing data is more efficient because the data is only written on one mem location.\n",
193 |     "* When sharing data, be careful not to inadvertently make unwanted changes in the underlying data without realizing the change impacts multiple objects Note: the memory sharing of as_tensor() does not work with built-in Python data structures like lists."
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": 66,
199 |    "metadata": {},
200 |    "outputs": [
201 |     {
202 |      "data": {
203 |       "text/plain": [
204 |        "array([1, 2, 3])"
205 |       ]
206 |      },
207 |      "execution_count": 66,
208 |      "metadata": {},
209 |      "output_type": "execute_result"
210 |     }
211 |    ],
212 |    "source": [
213 |     "data = np.array([1,2,3])\n",
214 |     "data"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 67,
220 |    "metadata": {},
221 |    "outputs": [
222 |     {
223 |      "name": "stdout",
224 |      "output_type": "stream",
225 |      "text": [
226 |       "tensor([1., 2., 3.])\n",
227 |       "tensor([1, 2, 3])\n"
228 |      ]
229 |     }
230 |    ],
231 |    "source": [
232 |     "t1 = torch.Tensor(data)\n",
233 |     "t2 = torch.tensor(data)\n",
234 |     "t3 = torch.as_tensor(data)\n",
235 |     "print(t1)\n",
236 |     "print(t2)"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": 68,
242 |    "metadata": {},
243 |    "outputs": [],
244 |    "source": [
245 |     "data[0] = 0\n",
246 |     "data[1] = 0\n",
247 |     "data[2] = 0"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": 69,
253 |    "metadata": {},
254 |    "outputs": [
255 |     {
256 |      "name": "stdout",
257 |      "output_type": "stream",
258 |      "text": [
259 |       "tensor([1., 2., 3.])\n"
260 |      ]
261 |     }
262 |    ],
263 |    "source": [
264 |     "print(t1)"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": 70,
270 |    "metadata": {},
271 |    "outputs": [
272 |     {
273 |      "name": "stdout",
274 |      "output_type": "stream",
275 |      "text": [
276 |       "tensor([0, 0, 0])\n"
277 |      ]
278 |     }
279 |    ],
280 |    "source": [
281 |     "print(t3)"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": null,
287 |    "metadata": {},
288 |    "outputs": [],
289 |    "source": []
290 |   }
291 |  ],
292 |  "metadata": {
293 |   "kernelspec": {
294 |    "display_name": "Python 3",
295 |    "language": "python",
296 |    "name": "python3"
297 |   },
298 |   "language_info": {
299 |    "codemirror_mode": {
300 |     "name": "ipython",
301 |     "version": 3
302 |    },
303 |    "file_extension": ".py",
304 |    "mimetype": "text/x-python",
305 |    "name": "python",
306 |    "nbconvert_exporter": "python",
307 |    "pygments_lexer": "ipython3",
308 |    "version": "3.6.5"
309 |   }
310 |  },
311 |  "nbformat": 4,
312 |  "nbformat_minor": 2
313 | }
314 | 


--------------------------------------------------------------------------------
/intro-notebooks/reshaping_tensors.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Reshaping Pytorch Tensors"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import torch"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 6,
 22 |    "metadata": {},
 23 |    "outputs": [
 24 |     {
 25 |      "data": {
 26 |       "text/plain": [
 27 |        "torch.Size([3, 4])"
 28 |       ]
 29 |      },
 30 |      "execution_count": 6,
 31 |      "metadata": {},
 32 |      "output_type": "execute_result"
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "# tensor with rank two tensor with two axis,\n",
 37 |     "# first axis: length = 3, second axis: length = 4\n",
 38 |     "# elements of axis 1: arrays, elements of axis 2 = numbers\n",
 39 |     "t = torch.tensor([\n",
 40 |     "    [1, 2, 3, 4],\n",
 41 |     "    [4, 4, 6, 8],\n",
 42 |     "    [0, 4, 8, 12]\n",
 43 |     "], dtype=torch.float32)\n",
 44 |     "t.size()"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 13,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "name": "stdout",
 54 |      "output_type": "stream",
 55 |      "text": [
 56 |       "torch.Size([3, 4])\n"
 57 |      ]
 58 |     },
 59 |     {
 60 |      "data": {
 61 |       "text/plain": [
 62 |        "tensor(12)"
 63 |       ]
 64 |      },
 65 |      "execution_count": 13,
 66 |      "metadata": {},
 67 |      "output_type": "execute_result"
 68 |     }
 69 |    ],
 70 |    "source": [
 71 |     "print(t.shape)\n",
 72 |     "len(t.shape) # the number of axis\n",
 73 |     "# product of the tensor size shows the number of elements in the shape\n",
 74 |     "torch.tensor(t.shape).prod() # 12 (scalar components)"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 14,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "# we can also use numel (number of elements)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 15,
 89 |    "metadata": {},
 90 |    "outputs": [
 91 |     {
 92 |      "data": {
 93 |       "text/plain": [
 94 |        "12"
 95 |       ]
 96 |      },
 97 |      "execution_count": 15,
 98 |      "metadata": {},
 99 |      "output_type": "execute_result"
100 |     }
101 |    ],
102 |    "source": [
103 |     "t.numel()"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 16,
109 |    "metadata": {},
110 |    "outputs": [
111 |     {
112 |      "data": {
113 |       "text/plain": [
114 |        "tensor([[ 1.,  2.,  3.,  4.,  4.,  4.,  6.,  8.,  0.,  4.,  8., 12.]])"
115 |       ]
116 |      },
117 |      "execution_count": 16,
118 |      "metadata": {},
119 |      "output_type": "execute_result"
120 |     }
121 |    ],
122 |    "source": [
123 |     "t.reshape(1, 12)\n"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 19,
129 |    "metadata": {},
130 |    "outputs": [
131 |     {
132 |      "data": {
133 |       "text/plain": [
134 |        "tensor([[ 1.,  2.,  3.],\n",
135 |        "        [ 4.,  4.,  4.],\n",
136 |        "        [ 6.,  8.,  0.],\n",
137 |        "        [ 4.,  8., 12.]])"
138 |       ]
139 |      },
140 |      "execution_count": 19,
141 |      "metadata": {},
142 |      "output_type": "execute_result"
143 |     }
144 |    ],
145 |    "source": [
146 |     "t.reshape(4, 3)"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 20,
152 |    "metadata": {},
153 |    "outputs": [
154 |     {
155 |      "data": {
156 |       "text/plain": [
157 |        "tensor([[ 1.,  2.],\n",
158 |        "        [ 3.,  4.],\n",
159 |        "        [ 4.,  4.],\n",
160 |        "        [ 6.,  8.],\n",
161 |        "        [ 0.,  4.],\n",
162 |        "        [ 8., 12.]])"
163 |       ]
164 |      },
165 |      "execution_count": 20,
166 |      "metadata": {},
167 |      "output_type": "execute_result"
168 |     }
169 |    ],
170 |    "source": [
171 |     "t.reshape(6, 2)"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "markdown",
176 |    "metadata": {},
177 |    "source": [
178 |     "We can also change the tensor by squezing and unsqueezing allowing us to expand\n",
179 |     "or shrink a tensor."
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 24,
185 |    "metadata": {},
186 |    "outputs": [
187 |     {
188 |      "name": "stdout",
189 |      "output_type": "stream",
190 |      "text": [
191 |       "tensor([ 1.,  2.,  3.,  4.,  4.,  4.,  6.,  8.,  0.,  4.,  8., 12.])\n",
192 |       "tensor([[ 1.,  2.,  3.,  4.,  4.,  4.,  6.,  8.,  0.,  4.,  8., 12.]])\n"
193 |      ]
194 |     }
195 |    ],
196 |    "source": [
197 |     "# Flatten a tensor: Change it into a lower rank tensor \n",
198 |     "# (removing axis except from one-- creating a 1d array)\n",
199 |     "# Occurs when transitioning from a convolutional layer to a fully connected layer\n",
200 |     "print(t.reshape(1,12).squeeze())\n",
201 |     "print(t.reshape(1, 12).squeeze().unsqueeze(dim=0))"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 26,
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": [
210 |     "def flatten(t):\n",
211 |     "    t = t.reshape(1, -1) # - 1 tells reshape to figure get the length(tensor)\n",
212 |     "    t = t.squeeze()\n",
213 |     "    return t\n"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 27,
219 |    "metadata": {},
220 |    "outputs": [
221 |     {
222 |      "data": {
223 |       "text/plain": [
224 |        "tensor([ 1.,  2.,  3.,  4.,  4.,  4.,  6.,  8.,  0.,  4.,  8., 12.])"
225 |       ]
226 |      },
227 |      "execution_count": 27,
228 |      "metadata": {},
229 |      "output_type": "execute_result"
230 |     }
231 |    ],
232 |    "source": [
233 |     "flatten(t)"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "metadata": {},
239 |    "source": [
240 |     "### Concatenate tensors"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "markdown",
245 |    "metadata": {},
246 |    "source": [
247 |     "Use the `cat()` function, resulting in a tensor having a shape that depends on the shape of the two tensors.\n"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": 30,
253 |    "metadata": {},
254 |    "outputs": [
255 |     {
256 |      "data": {
257 |       "text/plain": [
258 |        "tensor([[1, 2],\n",
259 |        "        [3, 4],\n",
260 |        "        [5, 6],\n",
261 |        "        [7, 8]])"
262 |       ]
263 |      },
264 |      "execution_count": 30,
265 |      "metadata": {},
266 |      "output_type": "execute_result"
267 |     }
268 |    ],
269 |    "source": [
270 |     "t1 = torch.tensor([\n",
271 |     "    [1,2],\n",
272 |     "    [3,4],\n",
273 |     "])\n",
274 |     "t2 = torch.tensor([\n",
275 |     "    [5, 6],\n",
276 |     "    [7, 8]\n",
277 |     "])\n",
278 |     "# combine the tensors on the row axis (axis-0)\n",
279 |     "torch.cat((t1, t2), dim=0)"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": 36,
285 |    "metadata": {},
286 |    "outputs": [
287 |     {
288 |      "data": {
289 |       "text/plain": [
290 |        "tensor([[1, 2, 5, 6],\n",
291 |        "        [3, 4, 7, 8]])"
292 |       ]
293 |      },
294 |      "execution_count": 36,
295 |      "metadata": {},
296 |      "output_type": "execute_result"
297 |     }
298 |    ],
299 |    "source": [
300 |     "# We can combine their column-axis (axis-1) like this\n",
301 |     "torch.cat((t1, t2), dim=1)"
302 |    ]
303 |   }
304 |  ],
305 |  "metadata": {
306 |   "kernelspec": {
307 |    "display_name": "Python 3",
308 |    "language": "python",
309 |    "name": "python3"
310 |   },
311 |   "language_info": {
312 |    "codemirror_mode": {
313 |     "name": "ipython",
314 |     "version": 3
315 |    },
316 |    "file_extension": ".py",
317 |    "mimetype": "text/x-python",
318 |    "name": "python",
319 |    "nbconvert_exporter": "python",
320 |    "pygments_lexer": "ipython3",
321 |    "version": "3.6.5"
322 |   }
323 |  },
324 |  "nbformat": 4,
325 |  "nbformat_minor": 2
326 | }
327 | 


--------------------------------------------------------------------------------
/intro-notebooks/reshaping_tensors.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ## Reshaping Pytorch Tensors
  3 | 
  4 | 
  5 | ```python
  6 | import torch
  7 | ```
  8 | 
  9 | 
 10 | ```python
 11 | # tensor with rank two tensor with two axis,
 12 | # first axis: length = 3, second axis: length = 4
 13 | # elements of axis 1: arrays, elements of axis 2 = numbers
 14 | t = torch.tensor([
 15 |     [1, 2, 3, 4],
 16 |     [4, 4, 6, 8],
 17 |     [0, 4, 8, 12]
 18 | ], dtype=torch.float32)
 19 | t.size()
 20 | ```
 21 | 
 22 | 
 23 | 
 24 | 
 25 |     torch.Size([3, 4])
 26 | 
 27 | 
 28 | 
 29 | 
 30 | ```python
 31 | print(t.shape)
 32 | len(t.shape) # the number of axis
 33 | # product of the tensor size shows the number of elements in the shape
 34 | torch.tensor(t.shape).prod() # 12 (scalar components)
 35 | ```
 36 | 
 37 |     torch.Size([3, 4])
 38 | 
 39 | 
 40 | 
 41 | 
 42 | 
 43 |     tensor(12)
 44 | 
 45 | 
 46 | 
 47 | 
 48 | ```python
 49 | # we can also use numel (number of elements)
 50 | ```
 51 | 
 52 | 
 53 | ```python
 54 | t.numel()
 55 | ```
 56 | 
 57 | 
 58 | 
 59 | 
 60 |     12
 61 | 
 62 | 
 63 | 
 64 | 
 65 | ```python
 66 | t.reshape(1, 12)
 67 | 
 68 | ```
 69 | 
 70 | 
 71 | 
 72 | 
 73 |     tensor([[ 1.,  2.,  3.,  4.,  4.,  4.,  6.,  8.,  0.,  4.,  8., 12.]])
 74 | 
 75 | 
 76 | 
 77 | 
 78 | ```python
 79 | t.reshape(4, 3)
 80 | ```
 81 | 
 82 | 
 83 | 
 84 | 
 85 |     tensor([[ 1.,  2.,  3.],
 86 |             [ 4.,  4.,  4.],
 87 |             [ 6.,  8.,  0.],
 88 |             [ 4.,  8., 12.]])
 89 | 
 90 | 
 91 | 
 92 | 
 93 | ```python
 94 | t.reshape(6, 2)
 95 | ```
 96 | 
 97 | 
 98 | 
 99 | 
100 |     tensor([[ 1.,  2.],
101 |             [ 3.,  4.],
102 |             [ 4.,  4.],
103 |             [ 6.,  8.],
104 |             [ 0.,  4.],
105 |             [ 8., 12.]])
106 | 
107 | 
108 | 
109 | We can also change the tensor by squezing and unsqueezing allowing us to expand
110 | or shrink a tensor.
111 | 
112 | 
113 | ```python
114 | # Flatten a tensor: Change it into a lower rank tensor 
115 | # (removing axis except from one-- creating a 1d array)
116 | # Occurs when transitioning from a convolutional layer to a fully connected layer
117 | print(t.reshape(1,12).squeeze())
118 | print(t.reshape(1, 12).squeeze().unsqueeze(dim=0))
119 | ```
120 | 
121 |     tensor([ 1.,  2.,  3.,  4.,  4.,  4.,  6.,  8.,  0.,  4.,  8., 12.])
122 |     tensor([[ 1.,  2.,  3.,  4.,  4.,  4.,  6.,  8.,  0.,  4.,  8., 12.]])
123 | 
124 | 
125 | 
126 | ```python
127 | def flatten(t):
128 |     t = t.reshape(1, -1) # - 1 tells reshape to figure get the length(tensor)
129 |     t = t.squeeze()
130 |     return t
131 | 
132 | ```
133 | 
134 | 
135 | ```python
136 | flatten(t)
137 | ```
138 | 
139 | 
140 | 
141 | 
142 |     tensor([ 1.,  2.,  3.,  4.,  4.,  4.,  6.,  8.,  0.,  4.,  8., 12.])
143 | 
144 | 
145 | 
146 | ### Concatenate tensors
147 | 
148 | Use the `cat()` function, resulting in a tensor having a shape that depends on the shape of the two tensors.
149 | 
150 | 
151 | 
152 | ```python
153 | t1 = torch.tensor([
154 |     [1,2],
155 |     [3,4],
156 | ])
157 | t2 = torch.tensor([
158 |     [5, 6],
159 |     [7, 8]
160 | ])
161 | # combine the tensors on the row axis (axis-0)
162 | torch.cat((t1, t2), dim=0)
163 | ```
164 | 
165 | 
166 | 
167 | 
168 |     tensor([[1, 2],
169 |             [3, 4],
170 |             [5, 6],
171 |             [7, 8]])
172 | 
173 | 
174 | 
175 | 
176 | ```python
177 | # We can combine their column-axis (axis-1) like this
178 | torch.cat((t1, t2), dim=1)
179 | ```
180 | 
181 | 
182 | 
183 | 
184 |     tensor([[1, 2, 5, 6],
185 |             [3, 4, 7, 8]])
186 | 
187 | 
188 | 


--------------------------------------------------------------------------------
/intro-notebooks/single_layer_network.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {
  7 |     "colab": {},
  8 |     "colab_type": "code",
  9 |     "id": "wPY4it_wHzvW"
 10 |    },
 11 |    "outputs": [],
 12 |    "source": [
 13 |     "import torch"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 3,
 19 |    "metadata": {
 20 |     "colab": {},
 21 |     "colab_type": "code",
 22 |     "id": "HY0eFW_8H-3r"
 23 |    },
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "def activation(x):\n",
 27 |     "    \"\"\"Create a sigmoid activation function.\n",
 28 |     "    Good for outputs that fall between 0 and 1. (probability)\n",
 29 |     "    args x: a torch tensor.\n",
 30 |     "    \"\"\"\n",
 31 |     "    return 1/(1 + torch.exp(-x))\n",
 32 |     "    "
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 21,
 38 |    "metadata": {
 39 |     "colab": {},
 40 |     "colab_type": "code",
 41 |     "id": "y9qIUKojIYcj"
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "# generate some data\n",
 46 |     "# set some random seed so that the result is predictatble\n",
 47 |     "data = torch.manual_seed(7) \n",
 48 |     "\n",
 49 |     "# set some features to 5 random variables\n",
 50 |     "# 2-dimensional matrix/tensor of 1 row and 5 columns\n",
 51 |     "features = torch.randn((1,5))\n",
 52 |     "\n",
 53 |     "# set weights\n",
 54 |     "weights = torch.randn_like(features)\n",
 55 |     "\n",
 56 |     "# set true bias term\n",
 57 |     "bias = torch.randn((1,1))"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 22,
 63 |    "metadata": {
 64 |     "colab": {
 65 |      "base_uri": "https://localhost:8080/",
 66 |      "height": 34
 67 |     },
 68 |     "colab_type": "code",
 69 |     "id": "7vdqvgK3c29m",
 70 |     "outputId": "aff8fffd-2c90-467c-9e74-c4285523bfc9"
 71 |    },
 72 |    "outputs": [
 73 |     {
 74 |      "name": "stdout",
 75 |      "output_type": "stream",
 76 |      "text": [
 77 |       "tensor([[0.1595]])\n"
 78 |      ]
 79 |     }
 80 |    ],
 81 |    "source": [
 82 |     "# calculate y output\n",
 83 |     "# y = (weights.features + bias)\n",
 84 |     "\n",
 85 |     "x = torch.sum(weights * features) + bias\n",
 86 |     "y = activation(x)\n",
 87 |     "\n",
 88 |     "print(y)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 23,
 94 |    "metadata": {
 95 |     "colab": {},
 96 |     "colab_type": "code",
 97 |     "id": "aJ09sXxNd4gP"
 98 |    },
 99 |    "outputs": [
100 |     {
101 |      "ename": "RuntimeError",
102 |      "evalue": "size mismatch, m1: [1 x 5], m2: [1 x 5] at ../aten/src/TH/generic/THTensorMath.cpp:961",
103 |      "output_type": "error",
104 |      "traceback": [
105 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
106 |       "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
107 |       "\u001b[0;32m<ipython-input-23-0459821846a9>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# better to do matrix multiplication because it's optimized\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mweights\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeatures\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
108 |       "\u001b[0;31mRuntimeError\u001b[0m: size mismatch, m1: [1 x 5], m2: [1 x 5] at ../aten/src/TH/generic/THTensorMath.cpp:961"
109 |      ]
110 |     }
111 |    ],
112 |    "source": [
113 |     "# better to do matrix multiplication because it's optimized\n",
114 |     "torch.mm(weights, features)"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {
121 |     "colab": {},
122 |     "colab_type": "code",
123 |     "id": "F_AApoJiWvAJ"
124 |    },
125 |    "outputs": [],
126 |    "source": []
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {
131 |     "colab_type": "text",
132 |     "id": "68IgplvGXA5J"
133 |    },
134 |    "source": [
135 |     "Since we are doing matrix multiplication, we need the matrices shapes to match.\n",
136 |     "We'll change the shape of weights for the mm to work."
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": 26,
142 |    "metadata": {
143 |     "colab": {},
144 |     "colab_type": "code",
145 |     "id": "eMgeaC0-W_ZA"
146 |    },
147 |    "outputs": [
148 |     {
149 |      "name": "stdout",
150 |      "output_type": "stream",
151 |      "text": [
152 |       "torch.Size([1, 5])\n",
153 |       "tensor([[0.6104, 0.4047, 0.3706, 0.7883, 0.2323],\n",
154 |       "        [0.5914, 0.5095, 0.4952, 0.6713, 0.4296],\n",
155 |       "        [0.5341, 0.7836, 0.8153, 0.2581, 0.9169],\n",
156 |       "        [0.5738, 0.6050, 0.6103, 0.5408, 0.6344],\n",
157 |       "        [0.6375, 0.2680, 0.2184, 0.8995, 0.0740]])\n"
158 |      ]
159 |     }
160 |    ],
161 |    "source": [
162 |     "# weight.reshape(a, b) reshapes the data into a tensor of size (a, b)\n",
163 |     "# weight.resize_(a, b) returns the same tensor with a different shape.\n",
164 |     "# if \n",
165 |     "# weight.view(a, b) returns a new tensor\n",
166 |     "print(weights.shape)\n",
167 |     "reshaped_weights = weights.view(5, 1)\n",
168 |     "y = activation(torch.mm(reshaped_weights, features) + bias)\n",
169 |     "print(y)\n"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": 0,
175 |    "metadata": {
176 |     "colab": {},
177 |     "colab_type": "code",
178 |     "id": "Y-t9h0_RXAAB"
179 |    },
180 |    "outputs": [],
181 |    "source": []
182 |   }
183 |  ],
184 |  "metadata": {
185 |   "colab": {
186 |    "name": "single_layer_network.ipynb",
187 |    "provenance": [],
188 |    "version": "0.3.2"
189 |   },
190 |   "kernelspec": {
191 |    "display_name": "Python 3",
192 |    "language": "python",
193 |    "name": "python3"
194 |   },
195 |   "language_info": {
196 |    "codemirror_mode": {
197 |     "name": "ipython",
198 |     "version": 3
199 |    },
200 |    "file_extension": ".py",
201 |    "mimetype": "text/x-python",
202 |    "name": "python",
203 |    "nbconvert_exporter": "python",
204 |    "pygments_lexer": "ipython3",
205 |    "version": "3.6.5"
206 |   }
207 |  },
208 |  "nbformat": 4,
209 |  "nbformat_minor": 1
210 | }
211 | 


--------------------------------------------------------------------------------
/intro-notebooks/single_layer_network.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ```python
  4 | import torch
  5 | ```
  6 | 
  7 | 
  8 | ```python
  9 | def activation(x):
 10 |     """Create a sigmoid activation function.
 11 |     Good for outputs that fall between 0 and 1. (probability)
 12 |     args x: a torch tensor.
 13 |     """
 14 |     return 1/(1 + torch.exp(-x))
 15 |     
 16 | ```
 17 | 
 18 | 
 19 | ```python
 20 | # generate some data
 21 | # set some random seed so that the result is predictatble
 22 | data = torch.manual_seed(7) 
 23 | 
 24 | # set some features to 5 random variables
 25 | # 2-dimensional matrix/tensor of 1 row and 5 columns
 26 | features = torch.randn((1,5))
 27 | 
 28 | # set weights
 29 | weights = torch.randn_like(features)
 30 | 
 31 | # set true bias term
 32 | bias = torch.randn((1,1))
 33 | ```
 34 | 
 35 | 
 36 | ```python
 37 | # calculate y output
 38 | # y = (weights.features + bias)
 39 | 
 40 | x = torch.sum(weights * features) + bias
 41 | y = activation(x)
 42 | 
 43 | print(y)
 44 | ```
 45 | 
 46 |     tensor([[0.1595]])
 47 | 
 48 | 
 49 | 
 50 | ```python
 51 | # better to do matrix multiplication because it's optimized
 52 | torch.mm(weights, features)
 53 | ```
 54 | 
 55 | 
 56 |     ---------------------------------------------------------------------------
 57 | 
 58 |     RuntimeError                              Traceback (most recent call last)
 59 | 
 60 |     <ipython-input-23-0459821846a9> in <module>
 61 |           1 # better to do matrix multiplication because it's optimized
 62 |     ----> 2 torch.mm(weights, features)
 63 |     
 64 | 
 65 |     RuntimeError: size mismatch, m1: [1 x 5], m2: [1 x 5] at ../aten/src/TH/generic/THTensorMath.cpp:961
 66 | 
 67 | 
 68 | 
 69 | ```python
 70 | 
 71 | ```
 72 | 
 73 | Since we are doing matrix multiplication, we need the matrices shapes to match.
 74 | We'll change the shape of weights for the mm to work.
 75 | 
 76 | 
 77 | ```python
 78 | # weight.reshape(a, b) reshapes the data into a tensor of size (a, b)
 79 | # weight.resize_(a, b) returns the same tensor with a different shape.
 80 | # if 
 81 | # weight.view(a, b) returns a new tensor
 82 | print(weights.shape)
 83 | reshaped_weights = weights.view(5, 1)
 84 | y = activation(torch.mm(reshaped_weights, features) + bias)
 85 | print(y)
 86 | 
 87 | ```
 88 | 
 89 |     torch.Size([1, 5])
 90 |     tensor([[0.6104, 0.4047, 0.3706, 0.7883, 0.2323],
 91 |             [0.5914, 0.5095, 0.4952, 0.6713, 0.4296],
 92 |             [0.5341, 0.7836, 0.8153, 0.2581, 0.9169],
 93 |             [0.5738, 0.6050, 0.6103, 0.5408, 0.6344],
 94 |             [0.6375, 0.2680, 0.2184, 0.8995, 0.0740]])
 95 | 
 96 | 
 97 | 
 98 | ```python
 99 | 
100 | ```
101 | 


--------------------------------------------------------------------------------
/intro-notebooks/transfer_learning.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Transfer Learning\n",
  8 |     "Most of the time you won't want to train a whole convolutional network yourself. Modern ConvNets training on huge datasets like ImageNet take weeks on multiple GPUs. \n",
  9 |     "> Instead, most people use a pretrained network either as a fixed feature extractor, or as an initial network to fine tune. \n",
 10 |     "\n",
 11 |     "We do transfer learning when we use a pre-trained network on images not in the training set. \n",
 12 |     "We'll use transfer learning to train a network that can classify cats and dogs photos.\n",
 13 |     "\n",
 14 |     "In this notebook, you'll be using a pre-trained model from the [ImageNet dataset](http://www.image-net.org/) as a feature extractor. Below is a diagram showing the architecture of the model we'll be using. It has a series of convolutional and maxpooling layers, and some fully-connected layers at the end that aid in classifying the images (For us it's cat's and dogs).\n",
 15 |     "\n",
 16 |     "<img src=\"data/feature_extractor.jpeg\" width=700px>\n",
 17 |     "\n",
 18 |     "The idea is to keep all the convolutional layers, but **replace the final fully-connected layer** with our own classifier. This way we can use VGGNet as a _fixed feature extractor_ for our images then easily train a simple classifier on top of that. \n",
 19 |     "* Use all but the last fully-connected layer as a fixed feature extractor.\n",
 20 |     "* Define a new, final classification layer and apply it to a task of our choice!"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 4,
 26 |    "metadata": {},
 27 |    "outputs": [
 28 |     {
 29 |      "ename": "ModuleNotFoundError",
 30 |      "evalue": "No module named 'torchvision'",
 31 |      "output_type": "error",
 32 |      "traceback": [
 33 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 34 |       "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
 35 |       "\u001b[0;32m<ipython-input-4-31c880f4726d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mVariable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     10\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunctional\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mtorchvision\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdatasets\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtransforms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodels\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 36 |       "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'torchvision'"
 37 |      ]
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "%matplotlib inline\n",
 42 |     "%config InlineBackend.figure_format = 'retina'\n",
 43 |     "\n",
 44 |     "import matplotlib.pyplot as plt\n",
 45 |     "\n",
 46 |     "import torch\n",
 47 |     "from torch import nn\n",
 48 |     "from torch import optim\n",
 49 |     "from torch.autograd import Variable\n",
 50 |     "import torch.nn.functional as F\n",
 51 |     "from torchvision import datasets, transforms, models\n"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "Most of the require a 224x224 image as input.\n",
 59 |     "We need to resize the images."
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "data_dir = 'dogs_vs_cats'\n",
 69 |     "\n",
 70 |     "# The model takes 224x224 images as input, so we resize all of them\n",
 71 |     "data_transform = transforms.Compose([transforms.RandomResizedCrop(224), \n",
 72 |     "                                      transforms.ToTensor()])\n",
 73 |     "\n",
 74 |     "train_data = datasets.ImageFolder(data_dir + '/train', transform=data_transform)\n",
 75 |     "test_data = datasets.ImageFolder(data_dir + '/test', transform=data_transform)\n",
 76 |     "\n",
 77 |     "train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)\n",
 78 |     "test_loader = torch.utils.data.DataLoader(test_data, batch_size=32)\n",
 79 |     "\n"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "# The model has two parts: the features, and the classifier. \n",
 89 |     "\n",
 90 |     "classifier = nn.Sequential(\n",
 91 |     "    nn.Linear(2048, 512),\n",
 92 |     "    nn.ReLU(),\n",
 93 |     "    nn.Dropout(p=0.2),\n",
 94 |     "    nn.Linear(512, 2),\n",
 95 |     "    nn.LogSoftmax(dim=1))\n",
 96 |     "\n",
 97 |     "# replace the classifier with our own\n",
 98 |     "model.fc = classifier\n",
 99 |     "\n",
100 |     "# use negative log likelihood loss\n",
101 |     "criterion = nn.NLLLoss()\n",
102 |     "optimizer = optim.Adam(model.fc.parameters(), lr=0.003)\n",
103 |     "\n",
104 |     "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
105 |     "model.to(device)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "epochs = 1\n",
115 |     "steps = 0\n",
116 |     "running_loss = 0\n",
117 |     "print_every = 5\n",
118 |     "\n",
119 |     "for epoch in range(epochs):\n",
120 |     "    for images, labels in train_loader:\n",
121 |     "        steps += 1\n",
122 |     "        \n",
123 |     "        images, labels = images.to(device), labels.to(device)\n",
124 |     "        \n",
125 |     "        # clear gradients\n",
126 |     "        optimizer.zero_grad()\n",
127 |     "        \n",
128 |     "        logps = model(images)\n",
129 |     "        loss = criterion(logps, labels)\n",
130 |     "        loss.backward()\n",
131 |     "        optimizer.step()\n",
132 |     "        \n",
133 |     "        running_loss += loss.item()\n",
134 |     "        \n",
135 |     "        # after training loop, test our network's accuracy and loss\n",
136 |     "        if steps % print_every == 0:\n",
137 |     "            model.eval() # turn model into evaluation mode to make predictions\n",
138 |     "            test_loss = 0\n",
139 |     "            accuracy = 0\n",
140 |     "            \n",
141 |     "            for images, labels in test_loader:\n",
142 |     "                \n",
143 |     "                images, labels = images.to(device), labels.to(device)\n",
144 |     "                \n",
145 |     "                logps = model(images)\n",
146 |     "                loss = criterion(logps, labels)\n",
147 |     "                test_loss += loss.item()\n",
148 |     "                \n",
149 |     "                # accuracy\n",
150 |     "                ps = torch.exp(logps)\n",
151 |     "                top_ps, top_class = ps.topk(1, dim=1)\n",
152 |     "                equality = top_class == labels.view(*top_class.shape)\n",
153 |     "                accuracy += torch.mean(equality.type(torch.FloatTensor)).item()\n",
154 |     "                \n",
155 |     "                \n",
156 |     "                print(f'Epoch {epoch+1}/{epochs}.. ')\n",
157 |     "                print(f'Train loss: {running_loss/print_every}')\n",
158 |     "                print(f'Test loss: {test_loss/len(test_loader)}')\n",
159 |     "                print(f'Test accuracy: {accuracy}/{len(test_loader)}')\n",
160 |     "                \n",
161 |     "                running_loss=0\n",
162 |     "                model.train()"
163 |    ]
164 |   }
165 |  ],
166 |  "metadata": {
167 |   "kernelspec": {
168 |    "display_name": "Python 3",
169 |    "language": "python",
170 |    "name": "python3"
171 |   },
172 |   "language_info": {
173 |    "codemirror_mode": {
174 |     "name": "ipython",
175 |     "version": 3
176 |    },
177 |    "file_extension": ".py",
178 |    "mimetype": "text/x-python",
179 |    "name": "python",
180 |    "nbconvert_exporter": "python",
181 |    "pygments_lexer": "ipython3",
182 |    "version": "3.7.4"
183 |   }
184 |  },
185 |  "nbformat": 4,
186 |  "nbformat_minor": 2
187 | }
188 | 


--------------------------------------------------------------------------------
/mnist_neural_network.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ```python
  4 | import torch
  5 | from torchvision import datasets, transforms
  6 | 
  7 | # define a transform to normalize the data
  8 | # if the img has three channels, you should have three number for mean, 
  9 | # for example, img is RGB, mean is [0.5, 0.5, 0.5], the normalize result is R * 0.5, G * 0.5, B * 0.5. 
 10 | # If img is grey type that only one channel, mean should be [0.5], the normalize result is R * 0.5
 11 | transform = transforms.Compose([transforms.ToTensor(),
 12 |                                 transforms.Normalize([0.5], [0.5])
 13 |                                ])
 14 | # download and load the traning data
 15 | trainset = datasets.MNIST('data/MNIST_data/', download=True, train=True, transform=transform)
 16 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
 17 | ```
 18 | 
 19 | 
 20 | ```python
 21 | # make an iterator for looping
 22 | dataiter = iter(trainloader)
 23 | images, labels = dataiter.next()
 24 | print(type(images))
 25 | print(images[0].shape)
 26 | # NOTE: The batch size is the number of images we get in one iteration
 27 | ```
 28 | 
 29 |     <class 'torch.Tensor'>
 30 |     torch.Size([1, 28, 28])
 31 |     torch.Size([64, 1, 28, 28])
 32 | 
 33 | 
 34 | 
 35 | ```python
 36 | import matplotlib.pyplot as plt
 37 | plt.imshow(images[1].numpy().squeeze(), cmap='Greys_r');
 38 | ```
 39 | 
 40 | 
 41 | ![png](mnist_neural_network_files/mnist_neural_network_2_0.png)
 42 | 
 43 | 
 44 | Time to create a dense fully-connected network. 
 45 | 
 46 | Each unit in one layer is connected to the other in the next layer.
 47 | The input to each layer must be one-dimensional vector. But our images are 28*28 2D tensors, so we need to convert them to 1D vectors. Therefore:
 48 | * Convert/Flatten the batch of images of shape(64, 1, 28, 28) into (64, 28 * 28=784).
 49 | * For the output layer, we also need 10 output units for the 10 classes(digits)
 50 | * Also convert the network output into a probability distribution.
 51 | 
 52 | 
 53 | ```python
 54 | flattened_images = images.view(64, 28 * 28)
 55 | ```
 56 | 
 57 | 
 58 | ```python
 59 | print(flattened_images.shape)
 60 | ```
 61 | 
 62 |     torch.Size([64, 784])
 63 | 
 64 | 
 65 | 
 66 | ```python
 67 | def activation(x):
 68 |     """Create a sigmoid activation function.
 69 |     Good for outputs that fall between 0 and 1. (probability)
 70 |     args x: a torch tensor.
 71 |     """
 72 |     return 1/(1 + torch.exp(-x))
 73 | 
 74 | def softmax(x):
 75 |     """Create a softmax activation function.
 76 |     Good for outputs that fall between 0 and 1. (probability)
 77 |     args x: a torch tensor.
 78 |     """
 79 |     return torch.exp(x)/torch.sum(torch.exp(x), dim=1).view(-1, 1)
 80 | ```
 81 | 
 82 | 
 83 | ```python
 84 | # flatten the images to shape(64, 784)
 85 | inputs = images.view(images.shape[0], -1)
 86 | 
 87 | # create parameters
 88 | w1 = torch.randn(784, 256)
 89 | b1 = torch.randn(256)
 90 | 
 91 | w2 = torch.randn(256, 10)
 92 | b2 = torch.randn(10)
 93 | 
 94 | h = activation(torch.mm(inputs, w1) + b1)
 95 | 
 96 | out = torch.mm(h, w2) + b2
 97 | probabilities = softmax(out)
 98 | print(probabilities.shape)
 99 | print(probabilities.sum(dim=1))
100 | ```
101 | 
102 |     torch.Size([64, 10])
103 |     tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
104 |             1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
105 |             1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
106 |             1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
107 |             1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
108 |             1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
109 |             1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
110 |             1.0000])
111 | 
112 | 
113 | ## Using the Torch nn to create networks
114 | 
115 | 
116 | ```python
117 | from torch import nn
118 | import torch.nn.functional as F
119 | 
120 | class Network(nn.Module):
121 |     """Use relu(Rectified linear unit) as the activation function.
122 |     Networks tend to train a lot faster when using relu.
123 |     For a network to approximate a non-linear function, the activation
124 |     function must be non-linear.
125 |     """
126 |     def __init__(self):
127 |         super().__init__()
128 |         # inputs to hidden layer linear transformation
129 |         self.hidden_layer1 = nn.Linear(784, 128) # 256 outputs
130 |         self.hidden_layer2 = nn.Linear(128, 64)
131 |         # output layer, 10 units one for each digit
132 |         self.output = nn.Linear(64, 10)
133 |         
134 |     def forward(self, x):
135 |         # hidden layer with sigmoid activation
136 |         x = F.relu(self.hidden_layer1(x))
137 |         x = F.relu(self.hidden_layer2(x))
138 |         # Output layer with softmax activation
139 |         x = F.softmax(self.output(x), dim=1)
140 |         return x
141 | ```
142 | 
143 | 
144 | ```python
145 | model = Network()
146 | model
147 | ```
148 | 
149 | 
150 | 
151 | 
152 |     Network(
153 |       (hidden_layer1): Linear(in_features=784, out_features=128, bias=True)
154 |       (hidden_layer2): Linear(in_features=128, out_features=64, bias=True)
155 |       (output): Linear(in_features=64, out_features=10, bias=True)
156 |     )
157 | 
158 | 
159 | 
160 | ## Training our network
161 | 
162 | 
163 | 
164 | ```python
165 | model = nn.Sequential(nn.Linear(784, 128),
166 |                       nn.ReLU(),
167 |                       nn.Linear(128, 64),
168 |                       nn.ReLU(),
169 |                       nn.Linear(64, 10)
170 |                      )
171 | 
172 | # define the loss
173 | criterion = nn.CrossEntropyLoss()
174 | 
175 | # Prepare data
176 | images, labels = next(iter(trainloader))
177 | 
178 | # flatten images
179 | images = images.view(images.shape[0], -1)
180 | 
181 | # forward pass, get the logits
182 | logits = model(images)
183 | # calculate the loss with the logits and the labels
184 | loss = criterion(logits, labels)
185 | print(loss)
186 | ```
187 | 
188 |     tensor(2.3058, grad_fn=<NllLossBackward>)
189 | 
190 | 
191 | It's more convenient to build model with a log-softmax output using `nn.LogSoftmax`
192 | We can get actual probabilities by taking the exponential torch.exp(output).
193 | We'll also use the negative log likelihood loss, `nn.NLLLoss`
194 | 
195 | 
196 | ```python
197 | model = nn.Sequential(nn.Linear(784, 128),
198 |                       nn.ReLU(),
199 |                       nn.Linear(128, 64),
200 |                       nn.ReLU(),
201 |                       nn.Linear(64, 10),
202 |                       nn.LogSoftmax(dim=1),
203 |                      )
204 | criterion = nn.NLLLoss()
205 | 
206 | logits = model(images)
207 | loss = criterion(logits, labels)
208 | print(loss)
209 | 
210 | ```
211 | 
212 |     tensor(2.3025, grad_fn=<NllLossBackward>)
213 | 
214 | 
215 | ## USing Autograd to perform backpropagation
216 | 
217 | After calculating loss, we perform backpropagation. Enter `autograd`
218 | 
219 | We use it to calculate the gradients of all our parameters with respect to the loss we got. Autograd goes backwards through the tensor operations, calculating gradients along the way. 
220 | * Set `requires_grad=True` on a tensor when creating the tensor.
221 | 
222 | 
223 | ```python
224 | x = torch.randn(2,2, requires_grad=True)
225 | y = x** 2
226 | z = y.mean()
227 | z.backward()
228 | print(x.grad)
229 | ```
230 | 
231 |     tensor([[-0.4997, -0.1425],
232 |             [-0.8944,  0.0633]])
233 |     tensor([[-0.4997, -0.1425],
234 |             [-0.8944,  0.0633]], grad_fn=<DivBackward0>)
235 | 
236 | 
237 | 
238 | ```python
239 | # Back to the model we created
240 | ```
241 | 
242 | 
243 | ```python
244 | print('Before backward pass: \n', model[0].weight.grad)
245 | loss.backward()
246 | print('After backward pass: \n', model[0].weight.grad)
247 | ```
248 | 
249 |     Before backward pass: 
250 |      None
251 |     After backward pass: 
252 |      tensor([[-0.0029, -0.0029, -0.0029,  ..., -0.0029, -0.0029, -0.0029],
253 |             [-0.0028, -0.0028, -0.0028,  ..., -0.0028, -0.0028, -0.0028],
254 |             [-0.0006, -0.0006, -0.0006,  ..., -0.0006, -0.0006, -0.0006],
255 |             ...,
256 |             [-0.0011, -0.0011, -0.0011,  ..., -0.0011, -0.0011, -0.0011],
257 |             [-0.0036, -0.0036, -0.0036,  ..., -0.0036, -0.0036, -0.0036],
258 |             [-0.0026, -0.0026, -0.0026,  ..., -0.0026, -0.0026, -0.0026]])
259 | 
260 | 
261 | We also need an optimizer that'll update weights with the gradients from the backward pass.
262 | From Pytorch's `optim` package, we can use stochastic gradient descenc with `optim.SGD`
263 | 
264 | 
265 | 
266 | 
267 | ```python
268 | from torch import optim
269 | # pass in the parameter to optimize and a learning rate
270 | optimizer = optim.SGD(model.parameters(), lr=0.01)
271 | ```
272 | 
273 | 
274 | ```python
275 | model = nn.Sequential(nn.Linear(784, 128),
276 |                       nn.ReLU(),
277 |                       nn.Linear(128, 64),
278 |                       nn.ReLU(),
279 |                       nn.Linear(64, 10),
280 |                       nn.LogSoftmax(dim=1),
281 |                      )
282 | criterion = nn.NLLLoss()
283 | optimizer = optim.SGD(model.parameters(), lr=0.01)
284 | 
285 | epochs = 5
286 | for e in range(epochs):
287 |     running_loss = 0
288 |     for images, labels in trainloader:
289 |         # Flatten Images into 784 long vector for the input layer
290 |         images = images.view(images.shape[0], -1)
291 |         
292 |         # clear gradients because they accumulate
293 |         optimizer.zero_grad()
294 |         # forward pass
295 |         output = model.forward(images)
296 |         loss = criterion(output, labels)
297 |         loss.backward()
298 |         optimizer.step()
299 |         
300 |         running_loss += loss.item()
301 |     else:
302 |         print(f'Training loss: {running_loss/len(trainloader)}')
303 | ```
304 | 
305 |     Training loss: 1.0614541531689385
306 |     Training loss: 0.3895804712147728
307 |     Training loss: 0.3246205799631091
308 |     Training loss: 0.29300587304206543
309 |     Training loss: 0.26864237868900237
310 | 
311 | 
312 | 
313 | ```python
314 | # create a helper to view the probability distribution
315 | import matplotlib.pyplot as plt
316 | import numpy as np
317 | 
318 | def view_classify(img, ps):
319 |     """Function for viewing an image and it's predicted classes."""
320 |     ps = ps.data.numpy().squeeze()
321 | 
322 |     fig, (ax1, ax2) = plt.subplots(figsize=(6,9), ncols=2)
323 |     ax1.imshow(img.resize_(1, 28, 28).numpy().squeeze())
324 |     ax1.axis('off')
325 |     ax2.barh(np.arange(10), ps)
326 |     ax2.set_aspect(0.1)
327 |     ax2.set_yticks(np.arange(10))
328 |     ax2.set_yticklabels(np.arange(10))
329 | 
330 | ```
331 | 
332 | 
333 | ```python
334 | 
335 | images, labels = next(iter(trainloader))
336 | 
337 | img = images[0].view(1, 784)
338 | 
339 | with torch.no_grad():
340 |     logits = model.forward(img)
341 |     
342 |     ps = F.softmax(logits, dim=1)
343 |     print(ps)
344 |     view_classify(img.view(1, 28, 28), ps)
345 | ```
346 | 
347 |     tensor([[1.2855e-02, 5.0043e-05, 7.8326e-04, 5.7256e-03, 4.5138e-03, 9.6925e-01,
348 |              1.3272e-03, 1.2127e-03, 5.4744e-04, 3.7324e-03]])
349 | 
350 | 
351 | 
352 | ![png](mnist_neural_network_files/mnist_neural_network_24_1.png)
353 | 
354 | 


--------------------------------------------------------------------------------
/multilayer_network.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ## Multilayered Network
  3 | To make the network more powerful, we can stack up individual neurons into layers. The output of one layer becomes the input for the next layer.
  4 | 
  5 | 
  6 | ```python
  7 | import torch
  8 | ```
  9 | 
 10 | 
 11 | ```python
 12 | def activation(x):
 13 |     """Create a sigmoid activation function.
 14 |     Good for outputs that fall between 0 and 1. (probability)
 15 |     args x: a torch tensor.
 16 |     """
 17 |     return 1/(1 + torch.exp(-x))
 18 | 
 19 | # generate some data
 20 | torch.manual_seed(7)
 21 | 
 22 | # features are 3 random normal variables
 23 | features = torch.randn((1, 3))
 24 | 
 25 | # define size of each layer in our network
 26 | n_input = features.shape[1]  # number of input units, must match number of input features
 27 | n_hidden = 2
 28 | n_output = 1
 29 | 
 30 | # weights for input to hidden layer
 31 | w1 = torch.randn(n_input, n_hidden)
 32 | # weights for hidden layer to output layer
 33 | w2 = torch.randn(n_hidden, n_output)
 34 | 
 35 | # biases for the hidden layer and the output layer
 36 | b1 = torch.randn(1, n_hidden)
 37 | b2 = torch.randn(1, n_output)
 38 | 
 39 | h = activation(torch.mm(features, w1) + b1)
 40 | # use h as input to the next layer
 41 | output = activation(torch.mm(h, w2) + b2)
 42 | print(output)
 43 | ```
 44 | 
 45 |     tensor([[0.3171]])
 46 | 
 47 | 
 48 | ## numpys and tensors
 49 | We can can create tensors from numpy arrays.
 50 | The memory is shared between the Numpy array and the Tensor so a change in one will trigger a change in the other.
 51 | 
 52 | 
 53 | ```python
 54 | import numpy as np
 55 | a = np.random.rand(4, 3)
 56 | ```
 57 | 
 58 | 
 59 | ```python
 60 | b = torch.from_numpy(a)
 61 | b
 62 | ```
 63 | 
 64 | 
 65 | 
 66 | 
 67 |     tensor([[0.8519, 0.4287, 0.9095],
 68 |             [0.7690, 0.7815, 0.7152],
 69 |             [0.7211, 0.4287, 0.8067],
 70 |             [0.4219, 0.7282, 0.3916]], dtype=torch.float64)
 71 | 
 72 | 
 73 | 
 74 | 
 75 | ```python
 76 | b.mul_(2)
 77 | ```
 78 | 
 79 | 
 80 | 
 81 | 
 82 |     tensor([[1.7038, 0.8573, 1.8190],
 83 |             [1.5379, 1.5630, 1.4305],
 84 |             [1.4422, 0.8574, 1.6133],
 85 |             [0.8438, 1.4564, 0.7832]], dtype=torch.float64)
 86 | 
 87 | 
 88 | 
 89 | 
 90 | ```python
 91 | a
 92 | ```
 93 | 
 94 | 
 95 | 
 96 | 
 97 |     array([[1.70379337, 0.85731166, 1.81903246],
 98 |            [1.53791446, 1.5629657 , 1.4304528 ],
 99 |            [1.442249  , 0.8573502 , 1.6133007 ],
100 |            [0.84380949, 1.45637187, 0.78323228]])
101 | 
102 | 
103 | 
104 | 
105 | ```python
106 | 
107 | ```
108 | 


--------------------------------------------------------------------------------
/raw_mnist_data/FASHION_MNIST_data/FashionMNIST/processed/test.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/raw_mnist_data/FASHION_MNIST_data/FashionMNIST/processed/test.pt


--------------------------------------------------------------------------------
/raw_mnist_data/FASHION_MNIST_data/FashionMNIST/processed/training.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/raw_mnist_data/FASHION_MNIST_data/FashionMNIST/processed/training.pt


--------------------------------------------------------------------------------
/raw_mnist_data/FASHION_MNIST_data/FashionMNIST/raw/t10k-images-idx3-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/raw_mnist_data/FASHION_MNIST_data/FashionMNIST/raw/t10k-images-idx3-ubyte


--------------------------------------------------------------------------------
/raw_mnist_data/FASHION_MNIST_data/FashionMNIST/raw/t10k-labels-idx1-ubyte:
--------------------------------------------------------------------------------
1 |     '	 	 	 	 	 	   		   		 	    		 	  	 			 	 	  		 		   	    		  		  			   	 		 	  	  		 		  	   	 			  		  			 	  		 	 	 		 		  	 	 	       	 		 	 			   	     	  											  	 		  	     	  	 	     	 			 	  					 	  	 	 		 	 		 			 	 		  	 	   		     	 		  	   		 			       	 				 		    		 	   		 				 	 	    		 		 	  		 	 				 				 	 		   	 	  		 		 		  		      	  		 		 		 	 	   			  	  	 			 	  		 			 			  				    			 							 	   	 	  		  	   	 		   		 	 			     		 	  			   	 	 		   			 	 		   	 	 			 						 	  	 	  	        		   		   	 		 	 		 	  	 	    	 	 	 	   		 	  		   	 	   	 			 	 	  	 	 	     	   			 		   		  		     	    		   		 		       		  				 			 	      		  	  			 	 	      		  	 	  		    						 		 								  	       		 		 		 	  				    		 		 	 		     	   	 		 	  	 	 		     			  				       	 			   	  	     	   		 		  	 	   	 		  	 			 							 		 		 	 	  	 				 	 	  	 		    	 					 	  		   			   					 	    	  			  	   			  					 			  				 					 	    	 		   	   	 		  	      			 	 			 							      				 	 			  	   	      		    	 			  			  		   	 			 									 	   						 	 	 		   		 		   	    		     			 	   		 	 	 	 	 							   	 			 			 				    	 	 	 	 	 	  				 	 									 	 	    	    		 			 							 	 				 	  	 			 		   		   				 			  	  		 		   	  	    	 				    				 				   	     	 		   				  			  	  	  		 		 			    		  	   				 	 	   			   	 					 	 		 			   	 		 				 	 	  	  	   				   	 			  				     	  	 	 	  		 	 	 		 		 				 			 	  	 	 		 			 			 											 	    		  			 		    					  	    						 	   	        	    		  		 	 		 		 	  				  	     	  			 	    		  	  	 	 				      					    			 			 	    	 	 	 			    	  	 	   	 	  		 	 				    		 	 	  		  		  				   		   		 			   	   	   		 	 		   				     	 	 	 	  			  	  	  		 	 	 	 	 						  		  				 		 	 	 	  	   				  	 		  	 	 	 			 	 	  			    	 	  	  	   		  	 	  		 				 	   				 			 		    		 	 	 		  	  			    	 				 			  	  	 		 			 	   				   	    	  	  			        	  	 	 	 	 	 	   			    	  		 				  		


--------------------------------------------------------------------------------
/raw_mnist_data/FASHION_MNIST_data/FashionMNIST/raw/train-images-idx3-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/raw_mnist_data/FASHION_MNIST_data/FashionMNIST/raw/train-images-idx3-ubyte


--------------------------------------------------------------------------------
/raw_mnist_data/FASHION_MNIST_data/FashionMNIST/raw/train-labels-idx1-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/raw_mnist_data/FASHION_MNIST_data/FashionMNIST/raw/train-labels-idx1-ubyte


--------------------------------------------------------------------------------
/raw_mnist_data/MNIST_data/MNIST/processed/test.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/raw_mnist_data/MNIST_data/MNIST/processed/test.pt


--------------------------------------------------------------------------------
/raw_mnist_data/MNIST_data/MNIST/processed/training.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/raw_mnist_data/MNIST_data/MNIST/processed/training.pt


--------------------------------------------------------------------------------
/raw_mnist_data/MNIST_data/MNIST/raw/t10k-images-idx3-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/raw_mnist_data/MNIST_data/MNIST/raw/t10k-images-idx3-ubyte


--------------------------------------------------------------------------------
/raw_mnist_data/MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte:
--------------------------------------------------------------------------------
1 |     ' 		 	 		   		  				 						   		  	  	 				 		 		  					      			  	 	   		 			 		   		 			 		 	 		 	   		 	  	   	 		  	 				    					  	    	  		 		  	 	   	 		 	   			 			  	  		 	 	 	 		 		 			 			 	 	 	 	  	   	   	 	 		   		   						 	 		 	    	 						 	 		 	  		 		 		  	   			 	    			   	 		 				 			 		  	  	  	 		       				  	  			   			 		 	 			   	 	 	 	 				   		   	  		   		 	 	  	 	  		  		 								   		 		    				   	  	 	 	 		 			  	 			   			 	 	  			  	 			  	   		  	    		 			     	 			  		    	  		  	    						 			  					 	 		   	 		 			   				    		   			    					    	 	    		      		        	     	 				 		   	     						  			 				 		 	 	   			  	      		  		   	 			   			   	 		    		 			  					  	 				 					   			  	 			 	 		 						 	  	 	    	   	  								  	  		 				  	 			 			 				 	  		 										 		  	 	   	 	 	 	   								 		  	     			 				   	 		 							 	  	   	  		 				 	 								  	  	 		  	 		  	  					  	 	  				   		   						  		  						   	 	 	 		 	 		   	 		  	  	 					 	  	      			 	 	 	   		  	  		   					 	 	 	 	   	  	    		   	 	  		 	 			 	  	 	 				    		 	    			 	 	   		 	 				 		  	    	  	 		 	 		 	 	   	 	 	 		 	    		   			 		 	 	 	  	  		     	 				 	 	 	 					 		        		 	     	 		  						    	 	 	     	 				 			    		 		    	  		 		 	 	 				 		 	 	 		      		  	 		  	 	 	    	 		  							    	 	 	  	     							  		  	 	 	  				 		 	 	 	   	 	 	    	 	 					 	   	 	 	 		 	  	 		   		 		  	    			 	 		 			  	 	 	   	 	 					 	   	 	 			 		  		  	 	   	  	 	 		  	 			 	  		  	  	 		 	  	 	  		   	 	 	 		   				  	  	 		   	 	 	 	 		   	 	 		    			   		 	 	 	 	 	 	 	  		 		 		     	 		 	 	  	 	 			 	 	 		    			  	 	 	 	    		 		    	  		 		 	 	 				    		 	    			 	 	 	 			 	       		 				 	 	 			 				    		   			   		 	 	 			    			  	 	  	 	 				       					 	  			  	  	   	 	 	 	  		 	 		 	      				 	 	 	   		 	 					 	 	 	    		  	  		   		 	 	 	 		    				   	  	 		 	   		 	  		 	 	     	 	 


--------------------------------------------------------------------------------
/raw_mnist_data/MNIST_data/MNIST/raw/train-images-idx3-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/raw_mnist_data/MNIST_data/MNIST/raw/train-images-idx3-ubyte


--------------------------------------------------------------------------------
/raw_mnist_data/MNIST_data/MNIST/raw/train-labels-idx1-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/raw_mnist_data/MNIST_data/MNIST/raw/train-labels-idx1-ubyte


--------------------------------------------------------------------------------
/raw_mnist_data/feature_extractor.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gitgik/differential-privacy-federated-learning/4063885a2184b69040ef76ccf0c90c62c48e4277/raw_mnist_data/feature_extractor.jpeg


--------------------------------------------------------------------------------