├── .gitignore
├── Python_feb_21_2021_classes_objects_lamda_functions.ipynb
├── README.md
├── Sentiment Analysis with LLMs.ipynb
├── deep_learning
    ├── BackProp.ipynb
    ├── README.MD
    ├── autoencoders.ipynb
    ├── cnn_with_keras.ipynb
    ├── convolutional_neural_networks.ipynb
    ├── data
    │   ├── fashion
    │   │   ├── t10k-images-idx3-ubyte.gz
    │   │   ├── t10k-labels-idx1-ubyte.gz
    │   │   ├── train-images-idx3-ubyte.gz
    │   │   └── train-labels-idx1-ubyte.gz
    │   └── mnist
    │   │   ├── t10k-images-idx3-ubyte.gz
    │   │   ├── t10k-labels-idx1-ubyte.gz
    │   │   ├── train-images-idx3-ubyte.gz
    │   │   └── train-labels-idx1-ubyte.gz
    ├── images
    │   ├── ann
    │   │   ├── README
    │   │   ├── activation_functions_plot.png
    │   │   └── perceptron_iris_plot.png
    │   └── autoencoders
    │   │   └── linear_autoencoder_pca_plot.png
    ├── introduction_to_artificial_neural_networks.ipynb
    ├── model_ckps
    │   └── README.txt
    ├── recurrent_neural_networks.ipynb
    ├── reinforcement_learning.ipynb
    ├── rnn_keras.ipynb
    ├── simple.txt
    ├── tensorflow.ipynb
    ├── tensorflow_keras_regression.ipynb
    └── training_deep_neural_nets.ipynb
├── exp
    └── Optimizer_2.ipynb
├── machine_learning
    ├── .ipynb_checkpoints
    │   ├── classification-checkpoint.ipynb
    │   ├── end_to_end_project-checkpoint.ipynb
    │   ├── end_to_end_project_bootcamp-checkpoint.ipynb
    │   └── training_linear_models-checkpoint.ipynb
    ├── Bikes_solution.ipynb
    ├── Natural_Language_Processing.ipynb
    ├── README.MD
    ├── Unsupervised Learning.ipynb
    ├── classification.ipynb
    ├── datasets
    │   ├── bike_sharing
    │   │   ├── Readme.txt
    │   │   ├── day.csv
    │   │   └── hour.csv
    │   └── housing
    │   │   ├── README.md
    │   │   ├── housing.csv
    │   │   └── housing.tgz
    ├── decision_trees.ipynb
    ├── dimensionality_reduction-Copy.ipynb
    ├── dimensionality_reduction.ipynb
    ├── end_to_end_project.ipynb
    ├── end_to_end_project_bootcamp.ipynb
    ├── ensemble_and_randomforest.ipynb
    ├── images
    │   ├── autoencoders
    │   │   └── README
    │   ├── classification
    │   │   └── README
    │   ├── cnn
    │   │   ├── README
    │   │   └── test_image.png
    │   ├── decision_trees
    │   │   ├── README
    │   │   └── iris_tree.png
    │   ├── deep
    │   │   └── README
    │   ├── dim_reduction
    │   │   └── README
    │   ├── distributed
    │   │   └── README
    │   ├── end_to_end_project
    │   │   ├── README
    │   │   └── california.png
    │   ├── ensembles
    │   │   ├── README
    │   │   ├── boosting_plot.png
    │   │   ├── decision_tree_without_and_with_bagging_plot.png
    │   │   ├── early_stopping_gbrt_plot.png
    │   │   ├── gbrt_learning_rate_plot.png
    │   │   ├── gradient_boosting_plot.png
    │   │   ├── hard_voting.png
    │   │   ├── law_of_large_numbers_plot.png
    │   │   ├── mnist_feature_importance_plot.png
    │   │   ├── pasting_bagging.png
    │   │   └── training_diverse_classifiers.png
    │   ├── fundamentals
    │   │   └── README
    │   ├── rl
    │   │   └── README
    │   ├── rnn
    │   │   └── README
    │   ├── svm
    │   │   └── README
    │   ├── tensorflow
    │   │   └── README
    │   └── training_linear_models
    │   │   ├── README
    │   │   ├── gradient_descent_paths_plot.png
    │   │   ├── gradient_descent_plot.png
    │   │   └── sgd_plot.png
    ├── math_linear_algebra.ipynb
    ├── naive_bayes.ipynb
    ├── sklearn_text_analyser.ipynb
    ├── support_vector_machines.ipynb
    └── training_linear_models.ipynb
├── projects
    ├── Fashion-MNIST
    │   ├── Fashion-MNIST-DL-Keras.ipynb
    │   └── Fashion-MNIST-ML.ipynb
    ├── autoquiz
    │   ├── README.MD
    │   └── auto_create_quiz.py.ipynb
    ├── deploy_mnist
    │   ├── README.md
    │   ├── flask_app
    │   │   └── predictions.py
    │   ├── requirements.txt
    │   ├── test-images
    │   │   ├── 2.png
    │   │   ├── 5.png
    │   │   └── 7.png
    │   └── train_mnist_model.py
    └── sui_2_sandeepgiri9034.ipynb
└── python
    ├── .ipynb_checkpoints
        └── Python - Numpy-checkpoint.ipynb
    ├── Lambda Operator.ipynb
    ├── Python - Numpy.ipynb
    ├── Python - Pandas.ipynb
    ├── Python - Part I.ipynb
    ├── Python - Part II.ipynb
    ├── README
    ├── __pycache__
        └── mylib.cpython-36.pyc
    ├── dataset
        └── housing.csv
    ├── hello.py
    ├── mbox-short.txt
    ├── mbox.txt
    ├── python-hands-sessions.ipynb
    ├── simpleexp.py
    ├── solutions
        └── Python_Project_1.ipynb
    └── stock_analysis_yfinance_pandas.ipynb


/.gitignore:
--------------------------------------------------------------------------------
1 | projects/deploy_mnist/venv/
2 | projects/deploy_mnist/__pycache__/
3 | projects/deploy_mnist/trained_models/*.pkl
4 | projects/deploy_mnist/flask_app/__pycache__/
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This repository contains machine learning projects and notebooks for the course of [CloudxLab](https://CloudxLab.com/)
 2 | Feel free to checkout and explore.
 3 | 
 4 | ### NOTE ABOUT THE Notebook Diff:
 5 | 
 6 | Usually the jupyter notebooks are unfriendly to diff. So, we have installed nbdime(See https://nbdime.readthedocs.io/en/latest/)
 7 | 
 8 | All you need to do to enable in git this:
 9 | 	export PATH=/usr/local/anaconda/bin:$PATH
10 | 	cd ml
11 | 	nbdime config-git --enable
12 | 
13 | To know more about us [click here](https://CloudxLab.com/)
14 | 


--------------------------------------------------------------------------------
/deep_learning/README.MD:
--------------------------------------------------------------------------------
1 | 
2 | Please note that some of the notebooks are based on the repository of this book: https://github.com/ageron/handson-ml
3 | 


--------------------------------------------------------------------------------
/deep_learning/autoencoders.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "**Autoencoders**"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "# Setup"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 1,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "# To support both python 2 and python 3\n",
 31 |     "from __future__ import division, print_function, unicode_literals\n",
 32 |     "\n",
 33 |     "# Common imports\n",
 34 |     "import numpy as np\n",
 35 |     "import os\n",
 36 |     "import sys\n",
 37 |     "\n",
 38 |     "# to make this notebook's output stable across runs\n",
 39 |     "def reset_graph(seed=42):\n",
 40 |     "    tf.reset_default_graph()\n",
 41 |     "    tf.set_random_seed(seed)\n",
 42 |     "    np.random.seed(seed)\n",
 43 |     "\n",
 44 |     "# To plot pretty figures\n",
 45 |     "%matplotlib inline\n",
 46 |     "import matplotlib\n",
 47 |     "import matplotlib.pyplot as plt\n",
 48 |     "plt.rcParams['axes.labelsize'] = 14\n",
 49 |     "plt.rcParams['xtick.labelsize'] = 12\n",
 50 |     "plt.rcParams['ytick.labelsize'] = 12\n",
 51 |     "\n",
 52 |     "# Where to save the figures\n",
 53 |     "PROJECT_ROOT_DIR = \".\"\n",
 54 |     "CHAPTER_ID = \"autoencoders\"\n",
 55 |     "\n",
 56 |     "def save_fig(fig_id, tight_layout=True):\n",
 57 |     "    path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n",
 58 |     "    print(\"Saving figure\", fig_id)\n",
 59 |     "    if tight_layout:\n",
 60 |     "        plt.tight_layout()\n",
 61 |     "    plt.savefig(path, format='png', dpi=300)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "# PCA with a linear Autoencoder"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "Build 3D dataset:"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 2,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "import numpy.random as rnd\n",
 85 |     "\n",
 86 |     "rnd.seed(4)\n",
 87 |     "m = 200\n",
 88 |     "w1, w2 = 0.1, 0.3\n",
 89 |     "noise = 0.1\n",
 90 |     "\n",
 91 |     "angles = rnd.rand(m) * 3 * np.pi / 2 - 0.5\n",
 92 |     "data = np.empty((m, 3))\n",
 93 |     "data[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * rnd.randn(m) / 2\n",
 94 |     "data[:, 1] = np.sin(angles) * 0.7 + noise * rnd.randn(m) / 2\n",
 95 |     "data[:, 2] = data[:, 0] * w1 + data[:, 1] * w2 + noise * rnd.randn(m)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "Normalize the data:"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 6,
108 |    "metadata": {},
109 |    "outputs": [
110 |     {
111 |      "data": {
112 |       "text/plain": [
113 |        "(200, 3)"
114 |       ]
115 |      },
116 |      "execution_count": 6,
117 |      "metadata": {},
118 |      "output_type": "execute_result"
119 |     }
120 |    ],
121 |    "source": [
122 |     "data.shape"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 7,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "from sklearn.preprocessing import StandardScaler\n",
132 |     "scaler = StandardScaler()\n",
133 |     "X_train = scaler.fit_transform(data[:100])\n",
134 |     "X_test = scaler.transform(data[100:])"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "Now let's build the Autoencoder..."
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 8,
147 |    "metadata": {},
148 |    "outputs": [
149 |     {
150 |      "name": "stderr",
151 |      "output_type": "stream",
152 |      "text": [
153 |       "/usr/local/anaconda/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
154 |       "  from ._conv import register_converters as _register_converters\n"
155 |      ]
156 |     }
157 |    ],
158 |    "source": [
159 |     "import tensorflow as tf\n",
160 |     "\n",
161 |     "reset_graph()\n",
162 |     "\n",
163 |     "n_inputs = 3\n",
164 |     "n_hidden = 2  # codings\n",
165 |     "n_outputs = n_inputs\n",
166 |     "\n",
167 |     "learning_rate = 0.01\n",
168 |     "\n",
169 |     "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n",
170 |     "hidden = tf.layers.dense(X, n_hidden)\n",
171 |     "outputs = tf.layers.dense(hidden, n_outputs)\n",
172 |     "\n",
173 |     "reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))\n",
174 |     "\n",
175 |     "optimizer = tf.train.AdamOptimizer(learning_rate)\n",
176 |     "training_op = optimizer.minimize(reconstruction_loss)\n",
177 |     "\n",
178 |     "init = tf.global_variables_initializer()"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": 9,
184 |    "metadata": {},
185 |    "outputs": [],
186 |    "source": [
187 |     "n_iterations = 1000\n",
188 |     "codings = hidden\n",
189 |     "\n",
190 |     "with tf.Session() as sess:\n",
191 |     "    init.run()\n",
192 |     "    for iteration in range(n_iterations):\n",
193 |     "        training_op.run(feed_dict={X: X_train})\n",
194 |     "    codings_val = codings.eval(feed_dict={X: X_test})"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 12,
200 |    "metadata": {},
201 |    "outputs": [
202 |     {
203 |      "data": {
204 |       "text/plain": [
205 |        "array([-1.9345188,  2.0936997], dtype=float32)"
206 |       ]
207 |      },
208 |      "execution_count": 12,
209 |      "metadata": {},
210 |      "output_type": "execute_result"
211 |     }
212 |    ],
213 |    "source": [
214 |     "codings_val[0]"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 13,
220 |    "metadata": {},
221 |    "outputs": [
222 |     {
223 |      "name": "stdout",
224 |      "output_type": "stream",
225 |      "text": [
226 |       "Saving figure linear_autoencoder_pca_plot\n"
227 |      ]
228 |     },
229 |     {
230 |      "data": {
231 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAARgAAADQCAYAAADcQn7hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAE8RJREFUeJzt3W+MXNV5x/Hvs7PrxQ0hCgb5TWT8IiClKQ3BfrNRKrbBagSK2gjUUiWOSexgk+BI0LRSLIFq4shIfpFaKEAx4o9NUiIUQCFJUVUIlqBeKTVySERT0SZgoIgG3JBgF//dpy/O3O74ev7cmb1nzp07v480Wu/MnZkzu57fnvOcM+eauyMiEsNE6gaISH0pYEQkGgWMiESjgBGRaBQwIhKNAkZEolHAiEg0ChgRiUYBIyLRTKZuwCDOO+88X7lyZepmiIyt55577i13P7/XcSMZMCtXrmT//v2pmyEytszsYJHjNEQSkWgUMCISjQJGRKJRwNTA3Bzcdlv4KlIlI1nklQVzc3D55XD8OCxZAk89BTMzqVslEqgHMwK69VD27g3hcupU+Lp377BbJ9KZejBDNjcXQmB29syeRrvbevVQZmfD9dnts7PDeBUixShgOugWBIt5zE5h0Xrb5CR84Quwbl37Hkpre2ZmwuOU3VaRMihg2ohV1+gWFq23nToFd98Nu3fDzp29eygzMwoWqSbVYNqIVdfIhjONxplhkd1mFr53D8996FAIuG3bygs6zTrJsKgH00asuka34Ux22549cN99Idyy5y6zh6JZJxkmBUwbMesa3cIiuy2rvcSoqfSq6YiUSQHTQcq6Rszn1qyTDNPYB0yZs0UxZp7KplknGaakAWNm08CdwBrgXOCXwBZ3f2IYz19mPWKUahuadZJhST2LNAm8ClwGvA+4GXjYzFYO48nLnC1azGMNe1an6PNptkkWK2kPxt2PAFtbrvqhmb0ErAJejv38ZdYjBn2sTgvsYvUwiva0ei0K1BBLiqhUDcbMlgMXAS+0uW0jsBFgxYoVpTxfmfWI1sdatmyhB9PrMTstsIs1xCo6i9TpuNbgaTRg/fq4gSgjzt0rcQGmgCeBu3sdu2rVKq+qffvcly51bzTC1337ih1v5h6W14X7bt+etn2djtu+PVyXtdWs2OvMP/b27f3dR6oF2O8F3teV6MGY2QTwIHAc2JyiDflu/6DDgH7XmXRbYBdD0V5bp+OyoeDRo1nE9LeeZpSK4VKCIikU8wIYcD/wNLC0yH3K7sHk/1rffXd/vZBuj1XHv+z79rlff7379HT/r7O1BxSzpyZxMUI9mLuADwFr3P3dFA3I9zoeeWTw1a6LqesMY/q4jALtYlYca6HfeEm9DuYCYBNwDHjDsk/6wSZ3/86w2pH/T3/JJfDjH8PExGBvgqquM+lnBqlIcAz6Oq+9NnxVcbj+Uk9THyQMkZLKzwDdeCPMz4dZkp076/MmKFIfilkjmZuDyy6DEydgaioEjNRb6oV2lTEzA1u2hO0Rjh8PATM/DwcO1GexWbftIjIxt+DcsSOEC4SvGzZosV/dVaEGUymtw6XJydNndkZ9xqNIfShmjeT110///he/CI/fqcalGafRpx5MzsxMGBZdfjlcccXCArg6bKhdpLaShVCZG1xlNmw487rjx8MUfTva0Hz01bYHM+hsydxcqMFkPZhGI1w/6jMe/fQGYhWpN24MX7dtg9de6328ZpxGXy0DZjFd69a/mgDXXQcrVoz+526qstFUFjKbNi1c99GPtj9WW0uMvloOkRbTtc4XQtetC8VfGO1iY5EC77AcOhSWAED4euhQ52Oz4rvCZTTVsgezmK51u7+adSg2Vqk3MDsL09Ma+oyDWgZMtzdTa20GOm/A3bo1wdatcOxYmLYuMryo6nYGVVkAWKWwk7hqGTDQ/s2U33vFvfsUdHZ8Fi5FVvbWobczDFUJO4mrljWYTvK1mRMnutdpsuOzcFmzpndgaGpVZMFYBUy+0Dk11b3o2Xr89HQYKvX6q1ulYqpIarUdIrWTH/tD9zrAILUC1RdEFljY2mG0rF692vfv35+6GZUt5orEZmbPufvqXseNVQ+mTCrmivQ2VjWYMqmYK9KbAmZAKuaK9KYhUot+aioq5or0poBpGqSmosViIt0lHyKZ2WYz229mx8zsgVTtWGxNRTuviZypCj2Y14FvAJ8ElqZqxGI+IKkZJZH2kgeMuz8KYGargQ+kasdiaipV2WtFpGqSB0xRMc5NnTdoTUU7r4m0NzIB4+67gF0QVvImbs5pivR+Os1QaTWw1NnIBEzVdev9dKrRqHYjdZd8FmmUFZ056jRDpdXAUnfJezBmNtlsRwNomNlZwEl3P5m2Zd310/voVKNR7UbqLnnAADcDf9vy/VrgVmBrktYU1O/MUbvzMWs1sNRd8oBx961UPEzaKdr7yPd08udj1mpgqbPkATOqivY+tEZGxpkCZhGK9D5UZ5FxpoCJrLWns2zZwkyRejEyDhQwQ5CFida8yLjROpgh0ZoXGUcKmCHRDngyjgoNkcxsCXAYmOpwyGPuflVpraohrXmRcVS0BjMFrG9z/U3ApcAPSmtRjWnNi4ybQgHj7keAb7deZ2Y7COHyVXe/P0LbRGTE9T2LZGYG3A7cANzg7neW3ioRqYW+irxmNkHYk+XLwIYsXMxs2szuMbNfmdk7ZvaimX0lQntFZIQU7sGYWQPYDVwDrHX3h3KP8wbwJ8CvgD8E/snM/tvdHy6xvSIyQgr1YMxsCvgu8OfANblwwd2PuPst7v6f7j7v7j8FHgc+XnqLRWRk9AwYM5sGHgU+BVyVbdLd4z5TwB8BP1t0C0VkZBUZIu0hhMsDwPvNbG3u9sfd/Xe5674FvNO8r4iMqa4B05wxuqL57eebl1bzwHtz9/kmMAN8wt2Pl9JKERlJXQPG3R04p+iDmdlO4HJCuLy1yLaJyIgr7dPUZnY78Angj939zbIeV0RGVykfdjSzC4CvAB8EXjKzw83LEwXue66ZPWZmR8zsoJl9pow2iUh6pfRg3P0gYAPe/Q7gOLAcuAT4kZk97+4vlNE2EUkn6XYNZvYe4GrgFnc/7O7PEtbPfC5lu0SkHKn3g7mIcA6kF1uuex74cP5AM9toZvvNbP+bb6rEIzIKUgfM2UB+Dc1vyU19Qzg3tbuvdvfV559//lAaVwVFzx4pUkWp9+Q9zJnT4OcQFumNPZ27WkZd6h7Mi8CkmV3Yct1HABV4Wdw+vur5SBUk7cG4+xEzexT4upl9kTCL9GfAx1K2qyoGPaeSej5SFal7MBD2llkK/Bp4CPiSpqiDbB/fbdv6CwmdwUCqInUNBnf/H+DTqdtRVfl9fOfmem8crrNJSlUkDxgprujQZ2YGdu6ERx6Bq6/W8EjSUcCMkHZDn3bhMTcHN94YjnnmGbj4YoWMpFGFGowUVPTkbarBSFWoBzNCip68rZ8aTJGajsigFDAjpsjJ24oGkaazJTYFTE0VCaKiNR2RQakGM8aK1nREBqUezBgrOpQSGZQCZswVGUqJDEpDJBGJRgEzpvRpaxkGDZHGkKanZVjUgxlDWukrw6IezJhoXbGrT1vLsChgxkC7IZGmp2UYNEQaA61DomPHYOvWcP2WLQoXiUsBMwayIdHEBMzPw5NPhh5N0RkkzTjJoFKfeG1z81xHx8zsgZRtqbNsxe6aNQsh06u4m4XKrl0hjG65pb9QEoH0NZjXgW8AnyTsyyuRzMyEodEzz7Qv7rYWgWGhZmMWAqk1lDSskqJSn1XgUQAzWw18IGVbxkGnzx7li8DXXrtQs5mYCB+GNNOMk/QvdQ+mMDPbCGwEWLFiReLWjK52nz3Kr4uB06exd+6EQ4c04yT9G5mAcfddwC6A1atXe+Lm1Ep+Xcy6deGiaWxZrGgBY2Z7gcs63Pwv7v7xWM8t/ek0dFKwyGJFCxh3n4312FK+frdtiLGXr/YHrp+kQyQzm2y2oQE0zOws4KS7n0zZLukuxocl9QHMekq90O5m4F3ga8Da5r9vTtoi6SnGhyX1Acx6Sj1NvRXYmrIN0r8YH5bUBzDraWRmkaQ6Yuzlq/2B60kBI5Wh/YHrRwEjfVNBVopKXeSVEaSCrBSlgJG+6YRtUpSGSNI3FWSHa5QXICpgZCApCrKj/EYbVJF6V5V/LgoYqZy5OdizJ/x73brwphnXwnK7elfr6676z0UBI5UyNxf+EmfbRtx/Pzz9dO83Wl31WoC4mJ/LMHo+ChiplL174cSJhe+zN032Rjt2LGx+9fbbYUvPKg4LytRtk7C9e2HZssFWQA+r56OAkUqZnYWpqdM3vsreWDt3wubNcPIk7NgRdtubnq7esKBs+XpXPhwG2RBsWD1CBYxUysxM+M+er8FAeBPNz4M3txvLb14+7EJnquJqPhwOHQqnoMk2ai/SnmF99ksBI5XTaYaqdZg0Px96MEuWhGHCsAudKYur7cKh3/YMa6mBAkZGRuubYtmyhWFBigJwjOcs2iNqFw633dZ/e4ax1EABIyOl05ti2Fs99DvE6BUenXogne6X/znMzsLkZOjZTU4O9jOIMeRTwMjIi93db/fG6+c5iwxfOn2+q2jo/PznofjtvlCj6vc1xhjyKWCkFsro7rdO/WbDL+j8xiv6nPnw2LPnzIBo1yMqGjoQZtdOnQr/Pnmy//UwW7cu1LbKHGYqYERY+AveWkCenj79JHSDvvFaw2NyEu67Lzxea2Dl60ud1ri0C51XXgmhkpmY6H89TL5wXtYwM1nAmNk0cCewBjgX+CWwxd2fSNUmGV/ZG3d+Pnyf/SWHYrWWbvWL1vD4yU/g+98Pw5h8YGVfe61xaW3PsmVw660Lw6JGA+64o//1MFm4rFkTejN1qMFMAq8Szp30CnAl8LCZXezuLydsl4yhdlPgk813R6+FbEXqF9n3+TDotfQ/W+PS+jhPPbWwTujAgYXeixlcdx1s3Nj/687aXma4QMKAcfcjnL7h9w/N7CVgFfByijbJ+GrtZbz9dvh64ADcc0/vomfRKeu9e08Pg/Xrzzyu6OzU7t0LQ65GI1y3ZElYmNjPbFDsAnllajBmthy4CHihw+0b0bmpJYLWN+TsbOiNHD260NPoVXspGgrtTtGb164Wk12faQ00CL2WFSt6F6U7iboext2TX4Ap4Eng7iLHr1q1ykXKsG+f+9Kl7o1G+Hr99eHf2YSvWbh+375w2b49fG33OJ1uG/S41na1Ht/ttu3bF9rfaITvYwD2e4H3avJzU5vZBPAgcBzYHKs9Iu3khzew0MtoNMIwJutpdOoZ9Dsk6XVMr2njbsOaqp1fKum5qc3MgHuB5cCV7n6ix11EStVu2LJu3Zlv3k5L8QddoNYplIpOG3cKqqptZ5q6BnMX8CFgjbu/m7gtMoY6vSGLFl97FXjbBUm3UCpj2rhK55dKuQ7mAmATcAx4I3RmANjk7t9J1S4ZP0XekJ2CqNuQpFOQdAul2NPGw5ZymvogYD0PFKmIdkHUbUjSKUi6hVLVhjiLlXqIJDLyeu1fkw+SXiFSpSHOYilgRCLpFiR1CpFuFDAiEY1LkHSiU8eK1Ey2N+/cXOqWqAcjUitVOxGbejAiNdJpk6pUFDAiNZLNXDUaNf+ogIgMX9XW0ShgRGqmSjNXGiKJSDQKGBGJRgEjItGYD3KWpsTM7E3gYOp25JwHvJW6EUOm1zwe2r3mC9z9/F53HMmAqSIz2+/uq1O3Y5j0msfDYl6zhkgiEo0CRkSiUcCUZ1fqBiSg1zweBn7NqsGISDTqwYhINAoYEYlGASMi0ShgSmJm02Z2r5kdNLN3zOynZnZF6nbFYGbnmtljZnak+Xo/k7pNMY3T77YdM7vQzI6a2bf7va8CpjyTwKuE0+W+D7gZeNjMViZsUyx3EE71uxz4LHCXmX04bZOiGqffbTt3AP86yB01ixSRmf0MuNXdH0ndlrKY2XuA3wB/4O4vNq97EPgvd/9a0sYNUR1/t+2Y2V8CVwH/BnzQ3df2c3/1YCIxs+XARcALqdtSsouAk1m4ND0P1LkHc5oa/25PY2bnAF8H/mrQx1DARGBmU8B3gN3u/u+p21Oys4Hf5a77LfDeBG0Zupr/bvO2Afe6+2uDPoACpiAz22tm3uHybMtxE8CDhBrF5mQNjucwcE7uunOAdxK0ZajG4Hf7/8zsEmAN8HeLeRxtmVmQu8/2OsbMDLiXUPy80t1PxG5XAi8Ck2Z2obv/R/O6j1D/4cI4/G5bzQIrgVfCS+dsoGFmv+/ulxZ9EBV5S2Rmfw9cAqxx98Op2xOLmX0XcOCLhNf7j8DH3L22ITMuv9uMmf0ep/dU/5oQOF9y9zeLPo6GSCUxswuATYT/hG+Y2eHm5bOJmxbDl4GlwK+Bhwj/6eocLuP0uwXA3f/X3d/ILoSh8dF+wgXUgxGRiNSDEZFoFDAiEo0CRkSiUcCISDQKGBGJRgEjItEoYEQkGgWMiESjgBGRaBQwEoWZLTGz410+gf5o6jZKfPo0tcQyBaxvc/1NwKXAD4bbHElBn0WSoTGzHcDfAF9192+mbo/Epx6MRNfcS+V24AbgBne/M3GTZEhUg5GomrvA7SJs8bChNVzM7C/M7Nnm1gcvp2qjxKMejERjZg1gN3ANsNbdH8od8hvgW4Rd4m4acvNkCBQwEkVzc+x/AP4UuMbdz5g1cvd/bh776SE3T4ZEASOlM7Np4HuETaOvcvcfJW6SJKKAkRj2AJ8CHgDeb2b5k3U97u75U59IDSlgpFTNGaPsvM2fb15azTMm51ASBYyUzMPCqvx5k2RMKWAkmeYs01TzYmZ2FiGjjqVtmZRFASMpfQ64v+X7d4GDhPPvSA3oowIiEo1W8opINAoYEYlGASMi0ShgRCQaBYyIRKOAEZFoFDAiEs3/AQcsVH7eWduMAAAAAElFTkSuQmCC\n",
232 |       "text/plain": [
233 |        "<Figure size 288x216 with 1 Axes>"
234 |       ]
235 |      },
236 |      "metadata": {},
237 |      "output_type": "display_data"
238 |     }
239 |    ],
240 |    "source": [
241 |     "fig = plt.figure(figsize=(4,3))\n",
242 |     "plt.plot(codings_val[:,0], codings_val[:, 1], \"b.\")\n",
243 |     "plt.xlabel(\"$z_1$\", fontsize=18)\n",
244 |     "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n",
245 |     "save_fig(\"linear_autoencoder_pca_plot\")\n",
246 |     "plt.show()"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": 14,
252 |    "metadata": {},
253 |    "outputs": [
254 |     {
255 |      "name": "stdout",
256 |      "output_type": "stream",
257 |      "text": [
258 |       "Saving figure linear_autoencoder_pca_plot\n"
259 |      ]
260 |     },
261 |     {
262 |      "data": {
263 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAARgAAADQCAYAAADcQn7hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAEwJJREFUeJzt3V2IXOd9x/Hvf18kG8eGeC2ci7AWofFNUZNYe7NtitexW5MQkuCFGlx5Y7u1TCMbZEKhApuuIyNBLoIwdqjWxLbUNoZQycbBlKYx2gtXe7PCcSBQHGgjFUzaWG0SS01WWu2/F8+ezuhoXs7Mnue8zPl9YBjpzJnZR6uZ3zzvx9wdEZEYxsougIiMLgWMiESjgBGRaBQwIhKNAkZEolHAiEg0ChgRiUYBIyLRKGBEJJqJsgswjFtuucV37txZdjFEGuvMmTMfuPuOfufVMmB27tzJ6upq2cUQaSwzO5vlPDWRRCQaBYyIRKOAEZFoFDAim1ZW4PDhcC/5qGUnr0jeVlbg7rvh0iXYtg3eegtmZ8suVf2pBiMCLC+HcLlyJdwvL+f32k2uGakGIwLMzYWaS1KDmZvL53WbXjNSwIgQPvRvvRVqLnNzWw+BlZXwWufOXVszUsCIyNDaay3j4zCx+SnLs2ZUFwoYGSlJzWHQWkivpswgr7myAouLsLYGGxvh2KOPDvzPGBkKGBkZW+nv6NTJOzs7WPAk5ybhYhZqL5/5DOzfH17j2LFm9cNoFElqIctIzFZGgpJO3vHxq5sy3V4zCZOnnw73SdhcutSqubiH2zvvxBuhqjrVYKTystZM0iNBU1MhlLI0bbp18nYbXeoUPMm5v/1tCBYIj0OcEao6UMBI4QbtJ+nWfElrD4mpqVazJGtzaXb22nMGCZ7k3OPH4eWXYX09PLawEG7p1xi2v6hW3L3UG/A4sAqsAa9kec7u3btd6un0affrr3cfHw/3p0/3f87Ro+F8M/dt27I959Ch8BwI94cObb3saadPh9ftVJ5ejyWP9/o99Ht+2YBVz/BZrUIN5n3gWeBe4PqSyyJb1O9bOWttpP31nnii1dTIeqXjWBPn2nWq8WR5DHr/HkZpcl7pAePuJwHMbAb4eMnFkS3I8sHo9sHvFEzJkO/ly63nr69nm6yW98S5bmUcVqffwyhOzis9YLIys73AXoDp6emSSyOddBtxaf9QdvrgdwomaA35ttda0rWRXh/6frWIQeRZq0jKfOQInD/f+vckrz8xEUazoP6dwrUJGHdfApYAZmZmMlaUpUidRnGyfCg7BdO5c63RmLExmJmBO+4InaVlNCUGbdp1063Mhw+3Xh/C5Lzp6fp3ANcmYKT60rWT9g/l2lpo7szPXzu60ymYnnmmVXOZmAjf9ukPWl4f+iympkLQuW+tVtGtlnfu3NW1lvYgrTMFjOQq3SzZtq01s/WHP4RTp8KHa2Oj9QE7cODaYFpfD883g0ceyTbvJVZTYmUlhOKVKyFkOoVdVuky//KXcOed4bUnJ0PNZVTCBSoQMGY2sVmOcWDczK4D1t19vdySyVYlNZrFxRAuyQzXsbEQHO2h0CmYkg/hwkLv1489l6R9hq5Z6DcZVnquzr59rTC9fDk0i0YlXIBKzINZBDx1W+z1HM2DqZf0nI+jR/vP8ajSPJBh5u5kceiQ+9hYsqDAfXKyGv/eLMg4D8Y868SCCpmZmXFdF6mauo3q1H3Waozyty+OHBuDF16AvXvzee3YzOyMu8/0PU8BI3kZpQliRalr8GYNmNL7YKQ6sr7Zu51X5KjOqMhzrk4VKWAEyF776HVeUaM6Uh/aD0aA7Hup9DovGSE5eFDNIwlUgxEge+2j33mjXuWXwShgBMg+p6SouScyGhQwDZburE3XPrqtcFa4SFYKmBHXa15Kr07dXiucNQwtWSlgRlivEOk3pNytM1fD0DIIBcwI6LVZU7LQ8NKlsFdsct7UVFhXMzbWubO2W2euhqFlEAqYmuu3WdPGRgiR8fHWRtQTE2H1y8ZGON5pdXC3zlx18I6uGP1rCpia69WUScLlnnvgE5+AF19sbZUAIWR6rQ7utsu+gmX0xFrmoYCpuSxNmcXFcOzYsdaWjO4hbNTUaYa8N2PPSgFTc4M0ZdqPgZo6TbCycu11mgbZjH2rtJq6YTSPpTmSZk/7lSbHx8NSjgMHOp+f9b2h1dRyDW2n0CxJsycJl/QugtB5smWeFDANou0UmqW92TM+HvY2LvqqDAqYBtF2Cs3Sb91YEV84CpgG0ULF5unV7Ok32TIPCpgayLNjVvNYBFqXYuk12TIPCpiKy6OdrJEjScvzUiy9KGAqrNN6okHbyRo5kk6K6o9TwFRU+yUtkin/w7wRNHIknRTVH6eAqaj2KmyynmhxcfA3gkaOpJsi+uMUMBWVDoZhwgU0ciRBuh+uqH45BUxF5RkMGjlqtnQ/3JEjYQSpiH45BUyFKRgkD+l+uBMniuuX03WRCrKyAocPh3uRIiXN7fHxcD8/H+7HxsIQ9dRUvJ+tGkwBNFQsZerW3H788VCL2b8fdu3SRLva0lCxlC3d3D5/PoxQDju/KqvSm0hmdrOZvWZmF83srJk9UHaZ8pauomqoWMqWfk9OTcVpwlehBvMCcAm4Ffg08KaZvevuPym3WPlJqqjHj5ddEpGgvdk0NRVvVKnUGoyZ3QDMA0+7+wV3fxt4A3iwzHLFcuxY2Hj77rvV2Svlm50NO9udP9954/g8lF2DuR1Yd/f32o69C9xZUnkGlnXCUrfd/zUBTsoWc7Z3poAxs23ABWCyyymvuft9Q/z8jwC/Th37FXBjhzLsBfYCTE9PD/Gj8jfI6FD6P3FqSiNLUox+X4IxZ3tnrcFMAo90OP4kcAfw/SF//gXgptSxm4AP0ye6+xKwBGHT7yF/Xq4GGR1K/ydqZEmKsLIS3m+XL8PkZPf3WaxJnZkCxt0vAn/XfszMvkkIl6+7+8tD/vz3gAkz+6S7/3Tz2KeAWnTwDlq1TP8nahGixHb8eHiPQevywUV+kQ3cB2NmBjwH7AP2ufu3h/3h7n7RzE4C3zCzPyeMIn0Z+P1hX7NIW6laahGiNIK7Z74RRp1eBK4AD7cd3755/N8IzZv3gCcyvubNwOvAReAc8EC/5+zevdtFpL/Tp923b3c3C/enT3c/79Ch7o+nAaue4fOduQZjZuPAMeB+YI+7v9r28ATwc+CPN0Pm94B/MrP/dPfv9Qm4/wa+krUcIpLd7CycOtW7phxzKUvWUaRJ4LvAl4D73f1k++Me+miebjv0IzN7A/gs0DNgRCSufh24MQcc+k60M7PtwEngi8B96XDp8pxJ4A+BH2+5hCISVcylLFlqMMcJ4fIK8FEz25N6/A13T89leZ7QF9OoyfHavV/qKOaAg7l3n1KyOWLUceLbpg3gRnf/37bnfAu4B/icu3+QX1FbZmZmfHV1NcZLD01bMkiTmNkZd5/pd17PJtJmh/FN7m5dbuOpcDkC/BFwd6xwqapuSwFEqi7mZmi5rUUys+eAzwF3ufsv8nrdutDu/VJHsWveuaymNrPbgCeA3wH+3cwubN7+MY/Xr4OkHXvwYOs/SdtkStXFrnnnUoNx97OA5fFaddGpQ7d9OFB9MlKWQQYbYte8y96uoZayhIcWM0oZlpZg376wFeb27f2/2GIvWVHADCFLeKhPRoq2shI28l5fD39fW8v2xRbz8jgKmCFkCQ8tZpSiLS+HL73E2Fj5X2wKmCFkDQ9dOE2KNDcXmkVra2FW7vPPl//+U8CIjICVlbDXy733wsc+BgsL5YcLKGCGohEiqZJk17pkY6nt20PAVEHp10WqI83alSpZXg5bYiaq9J5UwAxBF1KTKpmbC/vtJqr0nlQTaQgaIZIqmZ0N78Xkwn5V6X+BPqupq6qKq6lFmiSX1dQiIlvR+IDRgkSReBrdB9M+3DwxAQ8/3Gq/anc6ka1rdMC0DzdfuQJHj4YL1B85Avv3a56LyFY1uomUDDfb5kYT7iFUTpzQPBeRPDQ6YJLh5sceu3pey/y85rlItdS1r7DRTSRoLUhcWLi6z2XXLvXBSDXUeWlK4wMmkV75rJXQUhV13rys0U0kkTqo89IU1WBEKq7OS1MUMCIVlZ6LVadgSShgRCqozh277dQHI1JB/fYcqsuwdak1GDN7HHgI2AW86u4PlVkekaqYmwuduhsb4b69Y7dOtZuyazDvA88CL5VcDpHKSWaYW+qShnXaUbHUgHH3k+7+OnA+79cetgpZl6qnjI5O77nl5XB9I/dw3x4idRq2rk0nr5ntBfYCTE9P9zx32CpknaqeMhq6ved6XXurTsPWZTeRMnP3JXefcfeZHTt29Dx32CpknaqeMhq6veeSEDl4sPMX3ewsHDhQ7XCBiDUYM1sG7uzy8L+4+2dj/exhL9uqy71K0frVVKoeIP1ECxh3n4v12v0MW4WsU9VTRsOov+dK3fTbzCYIIffXwMeBR4F1d1/v9byiNv3WrnYinWXd9LvsTt6nCOGS2AM8AyyWUpo26vAV2bqyh6kX3d1St8UYP2vQ4Wd1+IpsXdk1mEIMUxtRh6/I1jUiYIbdsOerXw33VbpSnkidNCJgBq2NpGs8CwtFlFJk9NRmot1W9Ju0lKb+FynKqC9NaUQNBgabtKT+FylCE0YqG1GDaZflG2PQGo/IMJpQU25MDQYG+8YYhWnaUm1NqCk3KmDqfPkHGT2jvkwAGhYwTfjGkHoZ9ZpyowKmCd8YIlXSqICB0f/GkOJoMWx/jQsYkTw0YYg5D40bphbJQxOGmPPQuIAZ9ZmTUow6bbxdpkY1kTpVa0HtaBmcBgyyaVTApKu1x4/DsWOd29FLS3DiBMzPw969pRZbKkoDBv01KmDS82Cg88S7pSV47LHw+A9+EO4VMiKDa1QfTHqN0cJC53b0iRNXPy/9dxHJplE1GLi2WtupHT0/36q5JH8XkcE1LmDSOrWjk+aQ+mBEtqbRAdNtJubKCpw/D4uL6sQT2YrGBky3mZjp40eOhLDRUKTI4BobMN22bmg/vrYG+/aBu6aDiwyjUaNI7brNxGw/Pj4OGxuaDi4yrMbWYLrNxGw/PjUF+/dr/xiRYTU2YKAVKknNpD1kkj/v2qXp4CLDanTAZFlyr+ngIsNrbB8MaMm9SGyNDpitLrnX1g8ivTW6iZTu0E33xfSiHc1E+mt0wEArFAYNC10CRaS/0ppIZrbdzL5jZmfN7EMz+5GZfb6MsgzTF6MdzUT6K7MGMwH8B3AncA74AvA9M9vl7j8rsiDDXC9JO5qJ9GfuXnYZ/p+Z/Rh4xt177sAyMzPjq6uruf7sZOHj1BS88044trBw7SJIBYoImNkZd5/pd15l+mDM7FbgduAnXR7fC+wFmJ6ezv3nJ4Fx111hDRLASy+1+lbUqSsyuEoMU5vZJPD3wDF3/9dO57j7krvPuPvMjh07opQj6YtJXL7c6o/RnBmRwUULGDNbNjPvcnu77bwx4G+BS8DjscqTRdIXk5ic7LwIUp26ItlEayK5+1y/c8zMgO8AtwJfcPfLscqTxewsnDoVrjYAV/fBqFNXZHCldvKa2d8AnwbucfcLWZ8Xo5NXRLLL2slb5jyY24DHCAHzczO7sHn707LKJCL5Km0Uyd3PAlbWzxeR+CoxiiQio6lSE+2yMrNfAGfLLseAbgE+KLsQFaPfSWd1+L3c5u5954vUMmDqyMxWs3SKNYl+J52N0u9FTSQRiUYBIyLRKGCKs1R2ASpIv5PORub3oj4YEYlGNRgRiUYBIyLRKGBEJBoFTIHM7HEzWzWzNTN7pezylMXMbjaz18zs4uaezA+UXaayjep7ozI72jXE+8CzwL3A9SWXpUwvEPb/uZWw2PVNM3vX3TvuZtgQI/neUA2mQO5+0t1fB86XXZaymNkNwDzwtLtfcPe3gTeAB8stWblG9b2hgJGi3Q6su/t7bcfeBX63pPJIRAoYKdpHgF+njv0KuLGEskhkCpicZN2DWLgA3JQ6dhPwYQllkcjUyZuTLHsQCwDvARNm9kl3/+nmsU/R5XI1Um+qwRTIzCbM7DpgHBg3s+vMrFEh7+4XgZPAN8zsBjP7A+DLhCtLNNaovjcUMMV6CvgN8FfAns0/P1VqicrxNcJQ7H8BrwJ/0fAhahjR94YWO4pINKrBiEg0ChgRiUYBIyLRKGBEJBoFjIhEo4ARkWgUMCISjQJGRKJRwIhINAoYicLMtpnZpR4rzE+WXUaJr/aLqaSyJoFHOhx/ErgD+H6xxZEyaC2SFMbMvgn8JfB1d/9W2eWR+FSDkejMzIDngH3APnf/dslFkoKoD0aiMrMxwrWWvwb8WXu4mNmfmNnbZnbBzH5WVhklHtVgJBozGweOAfcDe9z91dQp/wM8T7h8yZMFF08KoICRKMxsEvgu8CXgfne/ZtTI3f9589yvFFw8KYgCRnJnZtuBfwDuAe5z9zdLLpKURAEjMRwHvgi8AnzUzPakHn/D3dOXLpERpICRXG2OGH1+868Pbd7abaBrIDWGAkZy5WFiVfq6R9JQChgpzeYo0+TmzTYv2+HuvlZuySQvChgp04PAy21//w1wFthZSmkkd1oqICLRaCaviESjgBGRaBQwIhKNAkZEolHAiEg0ChgRiUYBIyLR/B9Eh1SALGTF8QAAAABJRU5ErkJggg==\n",
264 |       "text/plain": [
265 |        "<Figure size 288x216 with 1 Axes>"
266 |       ]
267 |      },
268 |      "metadata": {},
269 |      "output_type": "display_data"
270 |     }
271 |    ],
272 |    "source": [
273 |     "fig = plt.figure(figsize=(4,3))\n",
274 |     "plt.plot(X_train[:,0], X_train[:, 1], \"b.\")\n",
275 |     "plt.xlabel(\"$z_1$\", fontsize=18)\n",
276 |     "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n",
277 |     "save_fig(\"linear_autoencoder_pca_plot\")\n",
278 |     "plt.show()"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": 16,
284 |    "metadata": {},
285 |    "outputs": [],
286 |    "source": [
287 |     "from sklearn.decomposition import PCA"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": 17,
293 |    "metadata": {},
294 |    "outputs": [],
295 |    "source": [
296 |     "pca = PCA(n_components=2)"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": 19,
302 |    "metadata": {},
303 |    "outputs": [],
304 |    "source": [
305 |     "X_2 = pca.fit_transform(X_train)"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": 21,
311 |    "metadata": {},
312 |    "outputs": [
313 |     {
314 |      "name": "stdout",
315 |      "output_type": "stream",
316 |      "text": [
317 |       "Saving figure linear_autoencoder_pca_plot\n"
318 |      ]
319 |     },
320 |     {
321 |      "data": {
322 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAARgAAADQCAYAAADcQn7hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAFXVJREFUeJzt3XuMXGd9xvHvb3dtx4pJlWzS/FM5FhJR1RBCwwq0XMSKi1EqIIigptBgQ8BG4LRKykWJStRAqkQgEZlgLlk3TuyQBNImCBC0BUWsGuqV0LoQoqAqVEBcKKTJhpujxsbeX/94Z/Dx2Zmdc2bOO+f2fKTRZGbPjt/MnnnmvZ33NXdHRCSGibILICLNpYARkWgUMCISjQJGRKJRwIhINAoYEYlGASMi0ShgRCQaBYyIRDNVdgFiO/vss33Lli1lF0OkUQ4dOvSUu58z6LjGB8yWLVtYWloquxgijWJmj2c5Tk0kEYlGASMi0ShgRCQaBYxU3uIi3HxzuJd6aXwnr9Tb4iK8+tVw7BisXw8PPgizs2WXSrJSDUYqbWEhhMuJE+F+YaHsEkkeChiptLm5UHOZnAz3c3Nll0jyKD1gzOwqM1sys6Nmducax73DzE6Y2ZHEbW58JZUyzM6GZtGNN6p5VEdV6IP5H+DvgdcBGwccu+juL49fJCnC4mJo0szNjRYMs7MKlroqPWDc/QEAM5sB/qjk4khB0p2zu3fD8vLoYSP1UnrA5PSnZvYU8DRwF3Czux9PH2RmO4GdAJs3bx5vCQU4tXP26FHYtQvcNRLUNqX3weTwb8DzgT8ELgPeCnyw14HuPu/uM+4+c845A6/HkgiSnbOTk7CyopGgNqpNwLj7j9z9x+6+4u6PAB8F3lJ2udooy8S3ZOfsnj2wYYNGgtqobk2kJAes7EK0TZ6Jb8nO2QsvLKbDV+ql9IAxs6lOOSaBSTM7DTie7lsxs0uA/3D3J8zsj4HrgX8ce4FbLj3x7cCBbMGhkaB2Kj1ggA8Df5d4fAXwETPbB/wA+BN3Pwy8GrjTzDYBTwCfB24ad2Hbrtu3cuwYTE3Bvn0hbNR5K72UHjDufgNwQ58fb0oc9wHgA2MoUuONMj+l27eysACHD8Pevad23ipgJKn0gJHxKuLiwW5zZ3ER9u8/WZs5fDg8p5CRrtqMIkkxirx4sFub2bEjzHHZuzeEl5ZVkC4FTMsUffHg7Cxs3hwCS/NcJE1NpJZJ9qEUNWSc7PjVPBdJUsC0UNFDxjFCS5pBASOF0DwX6UV9MBVWxbVoq1gmqS7VYCqq6LVoi1ibRevjSl6qwVRUkcPJ3WC4/vrRhpG1Pq7kpYCpqCKHk4sKBq2PK3mpiVRRRY7MFDWMrNEiycvcvewyRDUzM+NLS0tlF6Mww/alFLU+rgiAmR1y95lBx6kGU3HJYIDhO1k1jCxlUMBENkrNIT1qs3376r4UhYZUmQImolGHddOds6Ap+VIvGkWKaNTRm/SozbZt2oRM6kU1mIhGHb3pN2qjYJG6UMBEVMSwrjpnpc5KbyJl3Zu6c+w1ZvYLM/uNme0zsw1jKubQZmfhuusUEtJOpQcMJ/em3rfWQWb2OuBawuLf5wHPBT4SvXQVpAsOpS5KbyLl2Jt6O3C7uz/aOf5G4G5C6JRqnJPYdMGh1EnpAZPDBcCXE48fBs41s2l3X04eOM69qce9yXuvkSkFjFRVnQJmE/DrxOPufz8HOCVg3H0emIdwqUDMQsXc5L1XzUjLU0qd1ClgjgBnJB53//u3JZTl95If+ImJEDQrK6PXLvo1hXTBodRJnQLmUeAi4L7O44uAJ9LNo3FLfuCnp+Hqq4upXazVFNLQtdRF6QGTdW9q4ABh69i7CSNPHwbuHGdZ+4mxybuaQtIEpQcMGfemdvd/MbOPA98CNgL3p36vEoqsXWzfHu63bVONRepJ68FUkIaipeqyrgdThYl2kqK1b6UpFDAVkJ6Zq7VvpSmq0AfTav2aQxqKliZQwJTswAF49tkwOS85HK2haGkCNZFKtLgId9wRwgVCk0jNIWkSBUyJFhbgeGe2jxlceaVqLdIsCpgSJTtzTzstzHcRaRL1wRQsz9IN6syVplPAFGiYCXLqzJUmU8AUqN8EOdVQpK0UMAVKX6A4Pa0p/9Ju6uQtULdPpbtv0fKypvxLu6kGU7B0n4qWXJA2yxQwZraesKLcuj6HfMnd31xYqWogvSl9r34WjRJJ22WtwawDruzx/DXAxcBXCytRDSRHi6amwkzcEyd697NolEjaLFPAuPszwOeTz3UWf7oYeL+73xGhbJWVHC1aWQnPpa8lEpEh+mDMzIBbgV3ALnf/TOGlqrjkaFG6BqN+FqmKce7X1U+ugDGzCeA2QnPpXd2aS2cL1z2EXRfPAX4OfMrdP5XhNc8Cbge2Ak8B17n7PT2OuwH4W+Bo4ukXuPuP8vw/FCHdtwLl/yFFkqqyKmLmgDGzSWA/cDlwhbvfm3qdXxBC4kfAC4B/NbMn3P2+VS92qk8Dx4BzgRcCXzOzh7s7OKZ80d2vyFrmmNJ9KwoWqZJhNuiLUePJOoq0DrgHeCNweXe7165OH831iae+Z2ZfAV7OyW1Ger3u6cBlwPPd/Qjw7c7vvZ0KbAkrUld5d6WIVeMZONGu0/x5AHg98OZ0uPT5nXXAK4DvDzj0fMIWJY8lnnuYsE1sL28ws6fN7FEze+8a//5OM1sys6Unn3xyUHFFSpNeLrUo6Umfg8Ii1jrQWWowBwjhcidwppmlmyhfcfffpJ7bQ9hx8cCA194EpH/314TtYNPuI2wH+wTwEuB+M/tVqqkGjHfrWJFhFVlr6NW8yTNFItY+XGsGTGfE6JLOw3d0bkkrpMLAzG4BZoFXufuxAf9+ejtYOo9XbQfr7j9IPDxoZp8E3gKsChiROhimn6SXIoIq1qTQNQPGw6ZJ6QDoy8x2E0aSXuXuT2X4lceAKTN7nrv/sPPcRYRtYgdxwLKWTaRqiqo1FBVUMSaFFnaxo5ndCryGEC6ZOj46ncMPAB81s9PN7GXApcBdPV7/UjM704IXA38NfLmo8ouMW95+kn6G3eYmVv9PUiE7O5rZecBPCHNUkntKP+Tul/T8pZO/exawD3gtsAxc6+73mNkrgH92902d4+4lDINvAH4KfMbdbx1Utjru7CiSV94h5lGbVVl3dizkamp3f5whmyvu/jTwph7PP0ToBO4+fuvQBRRpuLzNm6KaVYNoPRiRFpqeDjtZTEzEvcRFASPSMouLcPXV4ULdyUnYvTveTHQFjEhD9evE7TaPVlbCbXk5Xhm0op1IA63ViZscHp+chMOHw/HqgxGRTNaa+t8dHt+xI/TD7N0bwijGcLUCRqSBBs2NmZ2FzZvD1sUxF6VXE0mkgbJM/Y91/VGSAkakobqhsrAAjzwSOnPTF0PGXpReASPSUN2O3qNHw2jRxARs2HBqh2/sRenVByPSUMnhaAj3494AUAEj0lDdPpaJzqc89qzdXtREEomgCiv6J/tYpqdX98GMgwJGpGBVWdEfyt/4T02kihnHGh0SV6z1betINZiKWFyEAwdg377+29BKPQwzv6QKTaoYFDAV0K1SP/ts2CUStA1tneWdX1KlJlXRFDAVsLAQ5iokFxecnNQ2tHWWp+9jXIs/lUF9MEMqsq9kbi4EStKVVzbnJJO1Dbumbh2UHjBmdpaZfcnMnjGzx83sbX2OMzP7mJktd24f62yrMnbdKu311xdzFersLOzZA1NTYa7Cxo2wbVsxZZV62L49XN3cpOYRVKOJlHVv6p2EtXsvImxZ8k3gx8DnxlhWIE6VdudOuPDCZnb0SX/p/pemfbGUGjA596beDnzC3X/a+d1PADsoIWBiXYVa9pwFGb8m979A+TWYfntTv7LHsRd0fpY8ruce1ma2k1DjYfPmzQMLkXeIcBxXoUo7jGPJhDKVHTB59qbe1PlZ8rhNZmae2twpz97Uww4R9qptNHUugxQrfZ40+cuq7IDJvDd1j2PPAI6kwyWvKu0PLM3X7zxp6rlS9ijS7/emTjzXb2/qRzs/G3RcLkUNEWp6uGTRtvOk1BqMuz9jZt29qd9NGEW6FHhpj8MPAH9jZl8njCK9H/jUqGUoqora9La0DC/ZJFrrPGliE7vsJhLA+wh7U/8vYW/q97r7o+m9qYHbgOcCj3Qe/0PnuZEVUUVteltahtOrSdTrPGlqE7v0gMmxN7UDH+rcKmlcbekmftM1Va8m0XXXrf67NXW4uvSAkXya+k3XVFmbzk1tYitgciq79tDUb7qmytp0bmoTWwGTQ8zaQ9bgauo3XZNlbTo3cW6VAiaHUWsP/U6WPMHV1G86Wa0JzWEFTEeWb4phaw+DVqvLG1xNnpjVdHlqJE1oDitgyP5N0av2MOiEybJanZo97ZC3RtKE80IBQ75vimTtIblz3uRkWNNl587er90NF7PVJ4uaPe0wTE217ueFAobhvym6S12urITbrl1hTZfkiZB87akpeOc7w5of6ZNFzZ7mG+Y8q/t5oYBh+G+K7lKXya05099KTfgWkmK08VywES9GrryZmRlfWlqK9vrz86HmsrKyemNxkaYys0PuPjPoONVgRqSlLiW2Os+FUcAUoIkTpKQa6j4XRgETQd1PCqmOus+FKXvBqUZq26JCEk/d90xSDSaCJkyQkmqo+8iTAiaCup8UclIV+tLqPBdGASPSR5F9aVUIqjIoYCJQJ28zaMeJ0ZXayZt1X+rOsTeY2e/M7Eji9txxljermJ28i4tw882j74ctgw3bwZr+G7W507/sGkzWfam7vujuV4ytdEPq18k7ajW5zd+Eo+q+99PTsLyc7W8wTF9ar79Rmzv9SwuYnPtS18rsLOzeDfffD5dddnJZh1HDoe5zIsqSvOp9ZQUmJrJf1pG3g7XfIt9t7fQvswZzPtn3pe56g5k9Dfwc2OPun+11UN69qYu2uAhXXx1OsIceOnkpwajh0OZvwlF03/vkRalFBXS6Vtrvb1TnkaBRlBkwefalBriPsN/0E8BLgPvN7Ffufm/6wDx7U8fQK0yKCAcNfw+n+94nazBFBHS/Wqn+RidFCxgzW6B/beTfgb8i+77UuPsPEg8PmtkngbcAqwKmbL3CZNj2fPr4Yb4J2zpE2pV87/P0wQzSr1ba1tpKL9ECxt3n1vp5pw9mysye5+4/7DydZ79pB2z4Eo6u3we3X5jkOfGK6tAdZ8dwlYMsxodeTdYM3L20G/AFQg3kdOBlhCbSBX2OvRQ4kxAqLwZ+Bmwf9G+86EUv8hgOHnTfuNF9cjLcHzyY/zVuu81969Zwn3bTTeG1IdzfdNNw5SzqdQYp4v2oo4MHw3valv/fLmDJM3zGy77Y8X3ARsK+1PfS2ZcawMxeYWZHEsf+BfBfhCbUAeBj7r5/zOX9vVHnNszPw3veA9/4Rrifnz/15+k5GNPTw81/6TWXI8ZcmnHP9ajKfKDZ2d5bwUpHlhSq862qNZitW0OtonvbunX1Md0azoc+NNq/lfyWjVXTGGcNpq21pSqhJjWY2ur2s9x443D9Gpddtvbj7lD3gw/CLbeEEZATJ8L9DTcM/809bE2jX42h+zyM9n7k0eaZsbWTJYXqfItVgylC1j6YiQn3devCfbfGs359tm/u9Lf9bbf1//bv15/Qr8aQpSYRo49CNZjykbEGU/alAq22c+fqfZS60iMUu3fD7bfDd74Tfn7sWNgtMu/ugMvLvUe41hpt6jccO2jyYKwRrEFD/lUezWobBUxF9foQffe7JwOml14frH5zctIfvLXCot9w7KBh2piXNvQbdtb1WhWTpZpT51uVm0h5HTzovmGDu1m4Tzdv8jZ9sr72Wq+xVjOvjKbMuIbl2w41kZpndha+9a3e1f+1agtZJ5m5n3qf/rd7NUXS11xlmXAYswmjyW/VooAZoyI+WP3CYpQP1uJiGJk6fjyEy4kT2ZozWZpA6fLOz8NVV4XfibVR3fbt4b7XFr0yXgqYMYndN9BriYg85UpeCDg5CYcPh5+t9Tp5Q21xMeyCefx4eHz06OAgyxPK6fd427a1j5f4FDBjEnstl0HNlV7HLyyEIOkuZTAxATMz8PDDsHcv7N+/dhDmvYBzYeHkkgkQgmytUMobylovp3oUMGMSu28gz4cr+cGdnISpzlmwfj1cfDEcOrR6Elu/EMlzEeHcXGgWHT0awmzPnmIDQ/0v1aOAGZPY64Tk+XAlP7gAO3bA5s0nf2f//pOvMz09XNOu31ITed6DvIGhtVgqKMtQU51vTRqmHiTrrNlBw8fJ1xlm2LfI4em2Xq1cdWiYun2yNlcGfdOnXydvs6PIvhAt3lRvCpiWKiqMelFfiHQpYGSgvLUI9YVIlwJGolDTRqDknR1FpNkUMCISjQJGRKIx73XpbIOY2ZPA40P86tnAUwUXp2n0HmXTxPfpPHc/Z9BBjQ+YYZnZkrvPlF2OKtN7lE2b3yc1kUQkGgWMiESjgOlvfvAhraf3KJvWvk/qgxGRaFSDEZFoFDAiEo0CRkSiUcD0YWYbzOx2M3vczH5rZt8zs0vKLlcVmNlZZvYlM3um8/68rewyVY3On0AB098U8N/AK4E/AD4M3GdmW0osU1V8GjgGnAv8JfBZM7ug3CJVjs4fNIqUi5l9H/iIu99fdlnKYmanA78Enu/uj3Weuwv4mbtfW2rhKq6N549qMBmZ2bnA+cCjZZelZOcDx7vh0vEwoBrMGtp6/ihgMjCzdcDdwH53/8+yy1OyTcBvUs/9GnhOCWWphTafP60NGDNbMDPvc/t24rgJ4C5Cn8NVpRW4Oo4AZ6SeOwP4bQllqby2nz+tXTLT3ecGHWNmBtxO6Mz8M3f/Xexy1cBjwJSZPc/df9h57iJaVvXPQuePOnnXZGafA14IvMbdj5Rdnqowsy8ADryb8P58HXipuytkEnT+KGD6MrPzgJ8AR4HjiR+9x93vLqVQFWFmZwH7gNcCy8C17n5PuaWqFp0/gQJGRKJpbSeviMSngBGRaBQwIhKNAkZEolHAiEg0ChgRiUYBIyLRKGBEJBoFjIhEo4CRsTKz9WZ2bI0r2R8ou4xSnNZeTS2lWQdc2eP5a4CLga+OtzgSk65FktKZ2ceBDwLvd/dbyi6PFEc1GClNZ72UW4FdwC53/0zJRZKCqQ9GStFZ6W0eeB/wrmS4mNmfm9m3zeyImf2krDLK6FSDkbEzs0lgP3A5cIW735s65JfAHsJKcNeMuXhSIAWMjFVnAex7gDcCl7v7qlEjd/9m59g3jbl4UjAFjIyNmW0A/gl4DfBmd/9ayUWSyBQwMk4HgNcDdwJnmtkVqZ9/xd3TW6JIjSlgZCw6I0bdvZnf0bklraC9lRpHASNj4WHCVXo/JWk4BYxUTmeUaV3nZmZ2GiGjjpZbMslLASNV9HbgjsTj/wMeB7aUUhoZmi4VEJFoNJNXRKJRwIhINAoYEYlGASMi0ShgRCQaBYyIRKOAEZFo/h/I+hWgkh+gbAAAAABJRU5ErkJggg==\n",
323 |       "text/plain": [
324 |        "<Figure size 288x216 with 1 Axes>"
325 |       ]
326 |      },
327 |      "metadata": {},
328 |      "output_type": "display_data"
329 |     }
330 |    ],
331 |    "source": [
332 |     "fig = plt.figure(figsize=(4,3))\n",
333 |     "plt.plot(X_2[:,0], X_2[:, 1], \"b.\")\n",
334 |     "plt.xlabel(\"$z_1$\", fontsize=18)\n",
335 |     "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n",
336 |     "save_fig(\"linear_autoencoder_pca_plot\")\n",
337 |     "plt.show()"
338 |    ]
339 |   },
340 |   {
341 |    "attachments": {
342 |     "image.png": {
343 |      "image/png": "iVBORw0KGgoAAAANSUhEUgAAARgAAADQCAYAAADcQn7hAAAZyklEQVR4Ae2da6wexX3Gn+Pbwc2lCqdH9EMEfGiQooiWxPmSloqkdhvRRmoU1DRJKQRTLk1d1Wm+1BIoJo6gsqrWagupQcXYSZSUhKS5CaWCgERKqsguuQilippyCUI0hio3hO9v9bA7PnPmzOzO7ruz12ek97x7mct/fnvmef8zszsLKIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACLRPYKH9IucvcWlpaXbhhRfOn5FyEAERqEXgyJEjzwNYLku8oSxCH89TXA4fPtxH02STCEyCwMLCwlMxFV0XE0lxREAERKAOAQlMHWpKIwIiEEVAAhOFSZFEQATqEJDA1KHWszTf+AZw220AvxVEoE8EBjnI2yeAXdtCUdm6FThxAti0CXjwQeAtb+naKpUvAhkBeTAD+E8o8lAefjgTl9Ons2/uK4hAXwjIg2n5SlAsKAJvfetaT8N3rsxDYT70XIwHw30FEegLAQlM4Er4GnsgavThIrGwz23YAFxzDXDVVZkYUTxsD8XuAnGb3aKQaEUbp4gikICABMYD1W7sTY5r+LozRizscxST/fuBgweBffvKPRTmYfLxVEeHRKAzAhqD8aC3Gzu9B+43EUx3Zv36TDTs7ow5t5A/vDGbZd2eF17IPJQ9e5obwKWAatapiSuqPMoIyIPxEDKNvelxjaLujDl36BBw991Zl4jeE21p0kNJ5Z15MOqQCEAC4/knMI2dnotp4J5otQ4ViYU5Z8Zemi6bBvu8M5arIAIpCEhgAlRNYw+cTno4ZdmpvLOkQJT5YAlMXmDYZWjKU2kyr1T/URQvzTqloqt8XQJdC8wigDsAbANwLoAfANgF4H7X0BT7TY5HNJlXirraeab0kOxytC0CXc8iUeB+COAyAL8I4CYA9wJoZTUp33hE3X+JefKiOLU5qxNbXmy8usyUbvwEuvZgXgSw28L8ZQBPANgC4EnreJLNJscj6uZlez72DXb0MlIEu7yie3yK4vFcU93KFHVUnv0h0LXAuCTOA3ARgMfdEwCuzz84evSo53T1Q02OR9h5LS2t3DtTJhS252PfYJfqoUW7PHOPj8/GUDxbeHg/z/bt2R3HvjyqXxGlEIF0BDYCeADA/rIitmzZMutrePTR2Wzz5tls/frsm/tFwcRfWOCtddmHaW+9tShV/XOmvDL7QvFoF9MaW2k361tWT9tixmU+VdLY6bXdPQEAUWvW9sWD4VjQxwGcALCjTGBSnHfdfnc/tszQL38ovfF8fDfYhdLMc9yURzuL7rMJxTNdwWPHjMSsPMUd48XYHlBRF22eOiqtCNgEeHP8AQAPAdhsnwhtN+3BuL/W+/dX80Ls3xM3ryq/0ow7hF922nnjjbPZ4mK8p2YY2R5QSk/NlKfvNASG5MF8DMDr86nql0KikvK463Xcd1/2qxx6grnIltAvf1Eac45pY7wAE7/Od13PzC7L2FnnjmPjATX9GIZtn7b7Q6DrLtIFAG4AcBzAcxYWHvuktZ900/2nv+QS4GtfA9atW/tQYowhpgHGxG0zTmz3JFaE6tbz6quzWlOgUgtqm3xV1loCXQsM363S+cvf+E9u7m7lDNDOncCZMwBnSbhcwlgageupcd+tW6wIrf1XKj/CvC+7DDh5Eti4MZt9Kk+lGEMm0PWNdr1hx4a2axfA5RHovlNg+HnssXZvgksJxHhqvuUiTLk+ETLn5v3euzcTF+ZDkbn22vKFyilKbd6EOG8dlX41ga49mNXW9GDPNEKKDG98s5dOSHVvSlvVtj011tP1XmiHXX+zXERT9j377Oqcvve9rDyfJ8WYKb2p1ZZoLxUBeTAOWTY6dou4Uv/ll2frstiDvU70Qe2ywbIxh8SFlTEi1OQCVwYSPRY3UMg5Re8LKb0pX3k61jyB0XowMY3Jh5PpOAZjPBh2Jxia/jX3lZ3yWBVvgCLj827mte963osNgOL1zDPluaX0pspLV4wmCIxSYKo0Jhei/avJc9ddB5x/fvGvvptHH/ftelE8uZ9CRMrqbkTmBs4T5uGNbzRbq7+NN0Vbi7yu1am01ycCo+wi+RpTLHTzq2kGQjmVysFfhiEPNrr14n5XgQPpvAWAgd/cDwWKDPl3IYYhm3Q8nsAoPRjTmPhLXbVr4/vVnMcjir8UaWP66pW2xHDuvD6Li1k3tOr1CeeqM30kMEqBKWpMFAvjcvOCmG37F5LbZp/xd+8Gjh/Ppq1juhd2GSafPlx8u15d2kM7zH1HFJs+MeqSi8ruCYG6zyLZzwnxOZpNm4qfpTHx163Lnh7md9mTwyZN2dPKaZ4QUa4i0A6B2GeRRjkGE9JBd2yGN3sVTUGb+LzhjmMF27aVv5vIpCnKN2SfjovA2AhMSmDM2IwZwOXt6mab59xgx+eYAbtKZe68nUbjCy5R7U+NwCjHYEIX0e37M55vDMakd+OXiQvT1UljytO3CIyNQOcPGtYByjGYw4ejFtSqk310mr4O5kZXQBFFoCaBhYWFIwDeXJZ8Uh5MGYwq5ykufJzATIUP/TmlKnVXXBGIJTCpMZhYKDHxNJgbQ0lxpk5AAlPzP0CDuTXBKdmkCKiLZF3uKmMqGsy1wGlTBAIEJDA5mDpjKhSZmJmlAHsdFoHRE+hDF4mvKeGUENflvacr4vOOqVCghvwwZFfcVe64CfTBg+E6Zx8F8PbY15akuCRmTMXMCnE/NtTxfmLzVjwRGDKBPgjM53KAnFN/bVcw5xlT8Xk/6jp1dSVVbp8I9EFgYnk0/m5qt+C6YyrzeD+uDdoXgTERGJLA3AmAHywvL8/6dBFivJ/QDFXoeJ/qJ1tEoC6BIQlM3Tq2kq7I+wmN0YSOt2KwChGBFgj0YRaphWqmKYICETNz5BujoUWh42msVa4i0D6BPngwtIEfrt/PzzkATuWf9olElljF+wiN0YSOR5qgaCLQewJ9EJibAHzYInUlgFsA7LaO9W7T530UzRz53sccM3bTu4rLIBGoQKAPAkMh6bWY+HjGeh+up8O3FNihaOzGjqdtERgigT4IzBC5RS8sVdXTGSQMGS0CAQISmACYmMMx3kespxNTnuKIwNAISGASXzF7nGVpKZs5YpE8riACYycggWnhChsx0Qp4LcBWEb0ioPtgWrocvrGYlopWMSLQGQEJTEvozVhM0WtSWjJFxYhAawRiu0ibAPwcwMaAZZ8H8K7AOR3W60z0PzBRArECQ2HZ7mH0QQBvAvAlzzkdcgjEzDo5SbQrAoMmECswLwL4hFPTvbm4fAjAAeecdkVABETg5WeAqmLgy9r+HsCf5Z87qmag+CIgAtMgUHWQl/G5JssHAFwLwIjLIoC7APwPgJ8B+D6AP58GQtVSBEQgRCC2i8T0fNL5IIA/BMAHEj9lZcp8ngPwO7nI/CqArwL4XwD3WvG0KQIiMCECsR4MB3k/DeAPcoGxxYW4OEZzM4D/BnAGwLcAfBHApRNiqaqKgAg4BGIEht0fLsz9jnwq2izS7WS1apeC9JsAvrPqqHZEQAQmRSCmi3QoFxe+s+g1effIhkRP5af2AQD/mI/FMK2CCIjARAmUCQxnjC7P2bwfAD92YHfoVfYBAH/L+8oA/BaAE8457YqACEyIQJnAcPX+V1fgsQ/A1lxcnq+QTlFFQARGSKBMYKpUmffG0Gt5G4CjVRIqrgiIwDgJxAzyxtT8gvy+l18B8ET+3BKfXbo/IvG5APgsE2eingLwvog0iiICIjAAAk15MBQGjtfUCbfnYzXnAbgEwFcAfBvA43UyUxoREIH+EGjKg6lbo1cAuCK/h4Yez9fz+2f+uG6GSicCItAfAl0LzEX5+4/4aIEJ9F7eYHasb76b+jA/R49qiMfiok0R6C2BrgXmlZ57aH7imfomQD4D9WZ+lpeXewu0acNi3x7ZdLnKTwSaINDUGExdW9gtcqfBuc8HJicf3HcqPfigFguf/D/FwAB07cGwa0SRe53F7dc0wJvRmGcdX3k+1n+UNjsj0LUHw6lpPtv0EQB/ks8i/T6AX++MSI8KNuv4njgBbNoEcD8myPOJoaQ4bRDo2oNhHbm2zGYAP8qXgPhTeTDZpTfvVNqzB6jSPZrH82njn05lTIdA1x4MSf8fgHdOB3m1mrrr+NI7oYDQmzHvW3JzrOv5uPloXwTmJdAHgZm3DpNJH9v1ofDs2wfcdx9wxRVhIZoMOFW0MwISmM7QVy/Y1/XxeTEUop07AY7dPPIIcPHFEpnqtJWiCQJ9GINpoh6TyMN0fcpe3uYTokkAUiV7R0AeTO8uSdggeisc7G1yDCZmTCdskc6IQDEBCUwxn96dpcj4ukW2obFCFDumY+etbRGoQkACU4XWgOLGCJGvK1UmXgNCIFN7QEBjMD24CF2ZEDum05V9Knf4BOTBDP8a1q5BbFeqdgFKOHkCEpiJ/wvEdKUmjkjVn4OAukhzwFNSERCBYgISmGI+oz3LGaTbbgP4rSACqQioi5SKbI/z1fR0jy/OyEyTBzOyCxpTHd/0dEw6xRGBqgTkwVQlNtD49FrMHcBmerrqOjMDrbrM7pCABKZD+G0V7esSxTxy0JZ9Kme8BNRFGu+1PVszu0t0/Diwe3d2ateu8scOzmaiDRGoQUACUwPa0JKYLtG6dcCZM8ADDwBbt8bPINED0ozT0K56P+ztWmB25O86Og7gnn4gGZ8V5o7dbdsAIzIcf6FnEwpGVO68MxOjm2+uJkqhfHV8WgS6HoN5FsBHAbw9X5d3WvRbrC1Fhl0jLkDlG9y1B4FpFj0cxltYyLweej5GlJiXggjEEOhaYPhGAQa+UO21+ba+EhEwnoyZTTJC4Q4CX311JianT2ceDxe4otBUebNBoioo24ER6FpgquDiq2P5gV4dWwXb6rgUFSMs5ow9CEwvhYFiYjwdru/7wgvFC42bvPQtAjaBIQkMXx3LD5aXl2d2JbQ9HwEzCGwE5aqrAH5cT2e+UpR6igRSCgyHEC8LQP13AJcGzulwywRCXSfX02nZLBU3AgIpBSbyPYQjoDiCKvi6TkXVsgeFmxKiFHkW1UHn0hNIKTAx1rN8ftbnn3MAnMo/MekVpwMC7qBwlbdOhsxNkWeoLB1vj0DX98HcBOAlAH8F4Mp8m8cUekzAHRTm/rwhRZ7z2qT08xPoWmB40/qC88lvZJ+/csohDQEzKFz2fqYqpafIs0r5ipuGQNddpDS1Uq5JCYQGhecpNEWe89ijtM0QkMA0w1G5NECg6kBzA0Uqi8QEJDCJAY8xew3IjvGqpqlT12MwaWqlXJMS0IBsUryjylwCM6rL2U5lNCDbDucxlKIu0hiuYst10IBsu8CHfAOiBKbd/5XRlNbFgOyQG1rdCx8z3tVnLhKYulde6ZIRYIM5dCjLng9dUsxiGloygzrM2DfeRR4m9J2LBMZcKX33ggAbDMd4zLIRBw4ADz2UPdnNY1yjht9seHZD64XxCYww413mSXfu26FMgOy47jZZMz3zTMVSAuNS136nBPgPf/LkiglGTExD46LlXPzqxz/O1glO2ThWrOhuiw3f9wYIIw5LS6vX7nEFKGR5W56PBCZ0BXS8EwJsIBs3rngwZhU9NjQufLVjB3DqFLB3b7ba3uJi1gBT/QJ3AsEplHWz6+eKQ50FwebxfBzzCnclMIV4dLJtAmxI/Od3x2BoB1fV49rAs3y5MXudYJ5P7e67LIwX0bYX5YoDufAVNLSHb3+IsYdx7FULuZ8iSGBSUFWecxFwf7FNZqZRsJtEceEbEthI2E0wi5Rzv4nlI0yZoW/Xi2ijTGOL4WCPy1S1h4x9XS9TRlPfEpimSCqf5ATsRkFRMesEu7/o3GfclCFFmbEekc2BYsN9ei4UnCqD4EyXmpMEJuV/ofJunECoUbTh7tuV8XkR9nl3u0w8Qh5IKJ3LgfZs2JB5dvzmftUQKqtqPnZ8CYxNQ9uDJMDGltLd9zW8KmWGxMOG7fOIeN7X9fPZ893vZoPfHJ8yY1R2/mXbMTaW5eE7L4HxUdGxwRFwf9HrVMA0XLv7xXx8jZzHY8t0xYMD2DxmujfMy+cRuem4z+Daw2OcXWP3iIGzbIxL+2IC682X8pmxLXNrQGz6ojIkMEV0dG4yBMwvuGlkHEDmFLj9Erq6Dc8WD3Zf7r47EwN7QJqN2XhhFDgKBL/drp9PdJ5+OhMVc7Foe2wXyVdvlhmb3pQZ+u5SYBYB3AFgG4BzAfwAwC4A94eM1XERSEXANFzOTjGYKXBuu408i7H6r/F+2DDdX35bPL75TeALX8i6Ma5gmXS2h+K7x8W2hyJ0yy0r3SIuY3r77WttWG3typ5dbwoT319Ob8bYshKz3laXAsOyf5i/O+lpAL8L4F4AFwN4sl51lEoE6hEwXobtwdDbYPA1crsU4wVQMNj4fVPWpsG6YuB6CqbBm9kgc4+LKc+IlblP6LHHVrwX3uF83XXA9S+//9SkKP429Ta2NykuLLlLgXkRgL3A95cBPAFgiwSm+J9CZ5snYBouGzgfQ+A3G+9dd4VFw1jhigL3jaCYOPzmcY6PMFAMtm9fG89t8K4AZamBgwezaWmKIL0WBoobHw4t8qZMevNt15tl+ew2cet8dykwrr3nAbgIwOPuiXxf76YOgNHh+QjYDZKNjF2UY8dWuh1uV8YtLVYU3HgUAzfYDd6MxTCO3fBtQeM5ei3nn78ybmJ3sXzelK9MO3/3/Bj2NwJ4AMD+mMps2bJlpiACTRB49NHZbPPm2Wz9+uz7xhuz7UxeZrOFhew44/Fz663Zt1t20Tk7bpV4tl1MZwK3Q+doH+tC+/nN/RQBwOGYtprSg4l9NzWX7fw4gBOcbYsxWnFEoCkCtjdAT4XBDKKy68FujPE0Qp6B7QGVeQI8XxaH+RVNGzO9mXFyuzWul8T9LkNKgYmpGl+69s8A2D3iIK/1oH6XWFT2VAi4DZJiwg+Fx268oVvxKQYh4SliGBIlk5892EzBc4UiJFRF4lNkT6pzKQUmxuaPAXh9PlXNV8gqiECrBEINksft4AqRafCuB8R9O61PSIyImJkbe5zE5Gce5qwzbczybRvserS93aXAXADgBgDHATxnVZzHPmnta1MEkhKIaZCM4+uWhISHBoeExIiImYq2RcnNr+lp46QgPZl3KTBP5e+k9pilQyLQPwI+IQoJD60PCYkrIsYbYpqi/PpHpNyiLgWm3DrFEIEBEPAJD80OCUmZiITyGwCKNSZKYNYg0QERaIZAkZCMSUSKaElgiujonAjMSWAqQhLCpFfHhsjouAgMlAAHlzmtzu+ugzyYrq+AyheBBgmEZq4aLKJSVvJgKuFSZBHoNwHfzFWXFktguqSvskWgYQJm5oqPOfjuAG64uNLs1EUqRaQIIjAcAkUzV13UQgLTBXWVKQIJCfRp5kpdpIQXWlmLwNQJSGCm/h+g+otAQgISmIRwlbUITJ0A12MZYjgKgA9L9in8EoDn+2RQC7aozi1A7kERvuvM1RCWe2DbZEyIWkJwZDRU55Fd0EB1al9ndZECRHVYBERgfgISmPkZKgcREIEAgfyNKoGzOlyVwJGqCUYQX3UewUWMqMIUr3MEFkURAREQAREQAREQAREQAREQAREQAREQAREYE4HF/CVyvAHwZwC+BeDyMVXQqsu5AD4P4MX8hsf3WefGuDmla+u7fq8DcAzAJ3wndawdAq8AsBvAhQA4/f+OXGi4P7bwKQD/AuCVAC4F8BMAbxhbJa36TOnaWtU+u/lvAB6RwJzl0ZuN7wC4ojfWNGMIGxvf4nyRlR3fLf7X1v4UNsd4bX3X7T0A7s1/PCt7MLrRzoe0mWN83zYb4ePNZNebXFinUwC+b1n07ZF7MFZVX94c67V16/lqAB8B8Jfuidh9CUwsqWrxNuavvz0I4L+qJe19bHaLfupYyS7Sq5xjY90d87V1r9mefFzxGfdE7L4EJpYU8DCAWeDzdSsbMmWXgd2IHdbxsWz+HAB/2ezAfQ5sjz2M/dra1+8SANsA/J19UNvdEuDyFwcAPARgc7emJCvdjMFwZsGEQxMYg5nCtTXXk98781nC5wDwwx+WlwD8px1J2+0S+CcA/5HPrrRbcrulfRoAZ5IoNr8xgVkk0p3KtTX/Sb8A4Jetz98A+KzWgDF42v/mAjzsQvF+Aaq9+fxR+6YkL5H3wfxr/gv3NICx3wczpWsb+ufhLRiVZ5FCmem4CIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIjAEAlsyp8oDz2B/rkhVko2VyOwoVp0xRaBaAJcN2W7J/YHAbwJwJc853RIBERABGoT2Js/EFp7hbTaJSuhCIjAaAlwLZV/AHAGwAdGW0tVTAREoHUCXAXuLgCnAVzjlP5uAFwNkEtbPOmc064IiIAIFBJYn68hchLAez0xfxsAV63/CwmMh44OiYAIBAlwkPczAI4DeFcwVnbinRKYEkI6LQIicJYA34TIWSKu4fp7Z4+GNyQwYTaDPqNp6kFfvt4az0XA+WbLewC8BsCVjqVf9Lz6xImiXREQARFYS4AzRnxvUugGOw72ckFpO8iDsWmMaFsezIguZk+qQmFx35vUE9NkRtsEJDBtE1d5NgHOMnEwmB96Pufkng8HhhVEQAREYC4C7/d0pXQ/zFxIlVgEREAEREAEREAEREAEREAEREAEREAEREAEREAEREAEREAEREAEREAEREAEREAEREAERKBpAv8PmhajeRgMBjIAAAAASUVORK5CYII="
344 |     }
345 |    },
346 |    "cell_type": "markdown",
347 |    "metadata": {},
348 |    "source": [
349 |     "![image.png](attachment:image.png)"
350 |    ]
351 |   }
352 |  ],
353 |  "metadata": {
354 |   "kernelspec": {
355 |    "display_name": "Python 3",
356 |    "language": "python",
357 |    "name": "python3"
358 |   },
359 |   "language_info": {
360 |    "codemirror_mode": {
361 |     "name": "ipython",
362 |     "version": 3
363 |    },
364 |    "file_extension": ".py",
365 |    "mimetype": "text/x-python",
366 |    "name": "python",
367 |    "nbconvert_exporter": "python",
368 |    "pygments_lexer": "ipython3",
369 |    "version": "3.6.5"
370 |   },
371 |   "nav_menu": {
372 |    "height": "381px",
373 |    "width": "453px"
374 |   },
375 |   "toc": {
376 |    "base_numbering": 1,
377 |    "nav_menu": {},
378 |    "number_sections": true,
379 |    "sideBar": true,
380 |    "skip_h1_title": false,
381 |    "title_cell": "Table of Contents",
382 |    "title_sidebar": "Contents",
383 |    "toc_cell": false,
384 |    "toc_position": {},
385 |    "toc_section_display": "block",
386 |    "toc_window_display": false
387 |   }
388 |  },
389 |  "nbformat": 4,
390 |  "nbformat_minor": 1
391 | }
392 | 


--------------------------------------------------------------------------------
/deep_learning/cnn_with_keras.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 4,
  6 |    "metadata": {
  7 |     "scrolled": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from tensorflow import keras\n",
 12 |     "import tensorflow as tf\n",
 13 |     "import numpy as np"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 5,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()\n",
 23 |     "X_train, X_valid = X_train_full[:-5000], X_train_full[-5000:]\n",
 24 |     "y_train, y_valid = y_train_full[:-5000], y_train_full[-5000:]\n",
 25 |     "\n",
 26 |     "X_mean = X_train.mean(axis=0, keepdims=True)\n",
 27 |     "X_std = X_train.std(axis=0, keepdims=True) + 1e-7\n",
 28 |     "X_train = (X_train - X_mean) / X_std\n",
 29 |     "X_valid = (X_valid - X_mean) / X_std\n",
 30 |     "X_test = (X_test - X_mean) / X_std\n",
 31 |     "\n",
 32 |     "X_train = X_train[..., np.newaxis]\n",
 33 |     "X_valid = X_valid[..., np.newaxis]\n",
 34 |     "X_test = X_test[..., np.newaxis]"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 1,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "# Deep CNN for Fashion MNIST"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 6,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "from functools import partial\n",
 53 |     "\n",
 54 |     "DefaultConv2D = partial(keras.layers.Conv2D,\n",
 55 |     "                        kernel_size=3, activation='relu', padding=\"SAME\")\n",
 56 |     "\n",
 57 |     "model = keras.models.Sequential([\n",
 58 |     "    DefaultConv2D(filters=64, kernel_size=7, input_shape=[28, 28, 1]),\n",
 59 |     "    keras.layers.MaxPooling2D(pool_size=2),\n",
 60 |     "    DefaultConv2D(filters=128),\n",
 61 |     "    DefaultConv2D(filters=128),\n",
 62 |     "    keras.layers.MaxPooling2D(pool_size=2),\n",
 63 |     "    DefaultConv2D(filters=256),\n",
 64 |     "    DefaultConv2D(filters=256),\n",
 65 |     "    keras.layers.MaxPooling2D(pool_size=2),\n",
 66 |     "    keras.layers.Flatten(),\n",
 67 |     "    keras.layers.Dense(units=128, activation='relu'),\n",
 68 |     "    keras.layers.Dropout(0.5),\n",
 69 |     "    keras.layers.Dense(units=64, activation='relu'),\n",
 70 |     "    keras.layers.Dropout(0.5),\n",
 71 |     "    keras.layers.Dense(units=10, activation='softmax'),\n",
 72 |     "])"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [
 80 |     {
 81 |      "name": "stdout",
 82 |      "output_type": "stream",
 83 |      "text": [
 84 |       "Train on 55000 samples, validate on 5000 samples\n",
 85 |       "Epoch 1/10\n",
 86 |       "35808/55000 [==================>...........] - ETA: 3:05 - loss: 0.8821 - acc: 0.6827"
 87 |      ]
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n",
 92 |     "history = model.fit(X_train, y_train, epochs=10, validation_data=[X_valid, y_valid])\n",
 93 |     "score = model.evaluate(X_test, y_test)\n",
 94 |     "X_new = X_test[:10] # pretend we have new images\n",
 95 |     "y_pred = model.predict(X_new)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 2,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "# Resnet"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "DefaultConv2D = partial(keras.layers.Conv2D, kernel_size=3, strides=1,\n",
114 |     "                        padding=\"SAME\", use_bias=False)\n",
115 |     "\n",
116 |     "class ResidualUnit(keras.layers.Layer):\n",
117 |     "    def __init__(self, filters, strides=1, activation=\"relu\", **kwargs):\n",
118 |     "        super().__init__(**kwargs)\n",
119 |     "        self.activation = keras.activations.get(activation)\n",
120 |     "        self.main_layers = [\n",
121 |     "            DefaultConv2D(filters, strides=strides),\n",
122 |     "            keras.layers.BatchNormalization(),\n",
123 |     "            self.activation,\n",
124 |     "            DefaultConv2D(filters),\n",
125 |     "            keras.layers.BatchNormalization()]\n",
126 |     "        self.skip_layers = []\n",
127 |     "        if strides > 1:\n",
128 |     "            self.skip_layers = [\n",
129 |     "                DefaultConv2D(filters, kernel_size=1, strides=strides),\n",
130 |     "                keras.layers.BatchNormalization()]\n",
131 |     "\n",
132 |     "    def call(self, inputs):\n",
133 |     "        Z = inputs\n",
134 |     "        for layer in self.main_layers:\n",
135 |     "            Z = layer(Z)\n",
136 |     "        skip_Z = inputs\n",
137 |     "        for layer in self.skip_layers:\n",
138 |     "            skip_Z = layer(skip_Z)\n",
139 |     "        return self.activation(Z + skip_Z)"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "\n",
149 |     "model = keras.models.Sequential()\n",
150 |     "model.add(DefaultConv2D(64, kernel_size=7, strides=2,\n",
151 |     "                        input_shape=[224, 224, 3]))\n",
152 |     "model.add(keras.layers.BatchNormalization())\n",
153 |     "model.add(keras.layers.Activation(\"relu\"))\n",
154 |     "model.add(keras.layers.MaxPool2D(pool_size=3, strides=2, padding=\"SAME\"))\n",
155 |     "prev_filters = 64\n",
156 |     "for filters in [64] * 3 + [128] * 4 + [256] * 6 + [512] * 3:\n",
157 |     "    strides = 1 if filters == prev_filters else 2\n",
158 |     "    model.add(ResidualUnit(filters, strides=strides))\n",
159 |     "    prev_filters = filters\n",
160 |     "model.add(keras.layers.GlobalAvgPool2D())\n",
161 |     "model.add(keras.layers.Flatten())\n",
162 |     "model.add(keras.layers.Dense(10, activation=\"softmax\"))"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": [
171 |     "model.summary()"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": null,
177 |    "metadata": {},
178 |    "outputs": [],
179 |    "source": []
180 |   }
181 |  ],
182 |  "metadata": {
183 |   "kernelspec": {
184 |    "display_name": "Python 3",
185 |    "language": "python",
186 |    "name": "python3"
187 |   },
188 |   "language_info": {
189 |    "codemirror_mode": {
190 |     "name": "ipython",
191 |     "version": 3
192 |    },
193 |    "file_extension": ".py",
194 |    "mimetype": "text/x-python",
195 |    "name": "python",
196 |    "nbconvert_exporter": "python",
197 |    "pygments_lexer": "ipython3",
198 |    "version": "3.6.5"
199 |   },
200 |   "toc": {
201 |    "base_numbering": 1,
202 |    "nav_menu": {},
203 |    "number_sections": true,
204 |    "sideBar": true,
205 |    "skip_h1_title": false,
206 |    "title_cell": "Table of Contents",
207 |    "title_sidebar": "Contents",
208 |    "toc_cell": false,
209 |    "toc_position": {},
210 |    "toc_section_display": true,
211 |    "toc_window_display": false
212 |   }
213 |  },
214 |  "nbformat": 4,
215 |  "nbformat_minor": 2
216 | }
217 | 


--------------------------------------------------------------------------------
/deep_learning/data/fashion/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/data/fashion/t10k-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/deep_learning/data/fashion/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/data/fashion/t10k-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/deep_learning/data/fashion/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/data/fashion/train-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/deep_learning/data/fashion/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/data/fashion/train-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/deep_learning/data/mnist/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/data/mnist/t10k-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/deep_learning/data/mnist/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/data/mnist/t10k-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/deep_learning/data/mnist/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/data/mnist/train-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/deep_learning/data/mnist/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/data/mnist/train-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/deep_learning/images/ann/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 | 


--------------------------------------------------------------------------------
/deep_learning/images/ann/activation_functions_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/images/ann/activation_functions_plot.png


--------------------------------------------------------------------------------
/deep_learning/images/ann/perceptron_iris_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/images/ann/perceptron_iris_plot.png


--------------------------------------------------------------------------------
/deep_learning/images/autoencoders/linear_autoencoder_pca_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/images/autoencoders/linear_autoencoder_pca_plot.png


--------------------------------------------------------------------------------
/deep_learning/model_ckps/README.txt:
--------------------------------------------------------------------------------
1 | This directory contains model checkpoints
2 | 


--------------------------------------------------------------------------------
/deep_learning/simple.txt:
--------------------------------------------------------------------------------
1 | 1
2 | 2
3 | 3
4 | 4
5 | 


--------------------------------------------------------------------------------
/exp/Optimizer_2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 6,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# Python ≥3.5 is required\n",
 10 |     "import sys\n",
 11 |     "assert sys.version_info >= (3, 5)\n",
 12 |     "\n",
 13 |     "# Scikit-Learn ≥0.20 is required\n",
 14 |     "import sklearn\n",
 15 |     "assert sklearn.__version__ >= \"0.20\"\n",
 16 |     "\n",
 17 |     "try:\n",
 18 |     "    # %tensorflow_version only exists in Colab.\n",
 19 |     "    %tensorflow_version 2.x\n",
 20 |     "except Exception:\n",
 21 |     "    pass\n",
 22 |     "\n",
 23 |     "# TensorFlow ≥2.0 is required\n",
 24 |     "import tensorflow as tf\n",
 25 |     "from tensorflow import keras\n",
 26 |     "assert tf.__version__ >= \"2.0\"\n",
 27 |     "\n",
 28 |     "# Common imports\n",
 29 |     "import numpy as np\n",
 30 |     "import os\n",
 31 |     "\n",
 32 |     "# to make this notebook's output stable across runs\n",
 33 |     "np.random.seed(42)\n",
 34 |     "tf.random.set_seed(42)\n",
 35 |     "\n",
 36 |     "# To plot pretty figures\n",
 37 |     "%matplotlib inline\n",
 38 |     "import matplotlib as mpl\n",
 39 |     "import matplotlib.pyplot as plt\n",
 40 |     "mpl.rc('axes', labelsize=14)\n",
 41 |     "mpl.rc('xtick', labelsize=12)\n",
 42 |     "mpl.rc('ytick', labelsize=12)"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 7,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "from sklearn.datasets import fetch_california_housing\n",
 52 |     "from sklearn.model_selection import train_test_split\n",
 53 |     "from sklearn.preprocessing import StandardScaler\n",
 54 |     "\n",
 55 |     "housing = fetch_california_housing()\n",
 56 |     "X_train_full, X_test, y_train_full, y_test = train_test_split(\n",
 57 |     "    housing.data, housing.target.reshape(-1, 1), random_state=42)\n",
 58 |     "X_train, X_valid, y_train, y_valid = train_test_split(\n",
 59 |     "    X_train_full, y_train_full, random_state=42)\n",
 60 |     "\n",
 61 |     "scaler = StandardScaler()\n",
 62 |     "X_train_scaled = scaler.fit_transform(X_train)\n",
 63 |     "X_valid_scaled = scaler.transform(X_valid)\n",
 64 |     "X_test_scaled = scaler.transform(X_test)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 8,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "X_new_scaled = X_test_scaled"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 93,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "class MySGDOptimizer(keras.optimizers.Optimizer):\n",
 83 |     "    def __init__(self, learning_rate=0.001, momentum=0.9, name=\"MySGDOptimizer\", **kwargs):\n",
 84 |     "        \"\"\"Call super().__init__() and use _set_hyper() to store hyperparameters\"\"\"\n",
 85 |     "        super().__init__(name, **kwargs)\n",
 86 |     "        self._set_hyper(\"learning_rate\", kwargs.get(\"lr\", learning_rate)) # handle lr=learning_rate\n",
 87 |     "        self._set_hyper(\"decay\", self._initial_decay) # \n",
 88 |     "        self._set_hyper(\"momentum\", momentum)\n",
 89 |     "    \n",
 90 |     "    def _create_slots(self, var_list):\n",
 91 |     "        \"\"\"For each model variable, create the optimizer variable associated with it.\n",
 92 |     "        TensorFlow calls these optimizer variables \"slots\".\n",
 93 |     "        For momentum optimization, we need one momentum slot per model variable.\n",
 94 |     "        \"\"\"\n",
 95 |     "        for var in var_list:\n",
 96 |     "            self.add_slot(var, \"momentum\")\n",
 97 |     "\n",
 98 |     "    @tf.function\n",
 99 |     "    def _resource_apply_dense(self, grad, var):\n",
100 |     "        \"\"\"Update the slots and perform one optimization step for one model variable\n",
101 |     "        \"\"\"\n",
102 |     "        var_dtype = var.dtype.base_dtype\n",
103 |     "        lr_t = self._decayed_lr(var_dtype) # handle learning rate decay\n",
104 |     "#         momentum_var = self.get_slot(var, \"momentum\")\n",
105 |     "#         momentum_hyper = self._get_hyper(\"momentum\", var_dtype)\n",
106 |     "#         momentum_var.assign(momentum_var * momentum_hyper - (1. - momentum_hyper)* grad)\n",
107 |     "        var.assign_sub(grad * lr_t)\n",
108 |     "\n",
109 |     "    def _resource_apply_sparse(self, grad, var):\n",
110 |     "        raise NotImplementedError\n",
111 |     "\n",
112 |     "    def get_config(self):\n",
113 |     "        base_config = super().get_config()\n",
114 |     "        return {\n",
115 |     "            **base_config,\n",
116 |     "            \"learning_rate\": self._serialize_hyperparameter(\"learning_rate\"),\n",
117 |     "            \"decay\": self._serialize_hyperparameter(\"decay\"),\n",
118 |     "            \"momentum\": self._serialize_hyperparameter(\"momentum\"),\n",
119 |     "        }"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 94,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "keras.backend.clear_session()\n",
129 |     "np.random.seed(42)\n",
130 |     "tf.random.set_seed(42)"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 96,
136 |    "metadata": {},
137 |    "outputs": [
138 |     {
139 |      "name": "stdout",
140 |      "output_type": "stream",
141 |      "text": [
142 |       "Train on 11610 samples\n",
143 |       "Epoch 1/20\n",
144 |       "11610/11610 [==============================] - 1s 81us/sample - loss: 3.9043\n",
145 |       "Epoch 2/20\n",
146 |       "11610/11610 [==============================] - 0s 38us/sample - loss: 1.5243\n",
147 |       "Epoch 3/20\n",
148 |       "11610/11610 [==============================] - 0s 37us/sample - loss: 1.0021\n",
149 |       "Epoch 4/20\n",
150 |       "11610/11610 [==============================] - 0s 37us/sample - loss: 0.8606\n",
151 |       "Epoch 5/20\n",
152 |       "11610/11610 [==============================] - 0s 38us/sample - loss: 0.8038\n",
153 |       "Epoch 6/20\n",
154 |       "11610/11610 [==============================] - 0s 38us/sample - loss: 0.7715\n",
155 |       "Epoch 7/20\n",
156 |       "11610/11610 [==============================] - 0s 39us/sample - loss: 0.7457\n",
157 |       "Epoch 8/20\n",
158 |       "11610/11610 [==============================] - 0s 40us/sample - loss: 0.7228\n",
159 |       "Epoch 9/20\n",
160 |       "11610/11610 [==============================] - 0s 40us/sample - loss: 0.7012\n",
161 |       "Epoch 10/20\n",
162 |       "11610/11610 [==============================] - 0s 41us/sample - loss: 0.6834\n",
163 |       "Epoch 11/20\n",
164 |       "11610/11610 [==============================] - 0s 43us/sample - loss: 0.6681\n",
165 |       "Epoch 12/20\n",
166 |       "11610/11610 [==============================] - 0s 40us/sample - loss: 0.6529\n",
167 |       "Epoch 13/20\n",
168 |       "11610/11610 [==============================] - 0s 42us/sample - loss: 0.6394\n",
169 |       "Epoch 14/20\n",
170 |       "11610/11610 [==============================] - 0s 41us/sample - loss: 0.6280\n",
171 |       "Epoch 15/20\n",
172 |       "11610/11610 [==============================] - 0s 41us/sample - loss: 0.6175\n",
173 |       "Epoch 16/20\n",
174 |       "11610/11610 [==============================] - 0s 38us/sample - loss: 0.6091\n",
175 |       "Epoch 17/20\n",
176 |       "11610/11610 [==============================] - 0s 38us/sample - loss: 0.5996\n",
177 |       "Epoch 18/20\n",
178 |       "11610/11610 [==============================] - 0s 38us/sample - loss: 0.5928\n",
179 |       "Epoch 19/20\n",
180 |       "11610/11610 [==============================] - 0s 38us/sample - loss: 0.5862\n",
181 |       "Epoch 20/20\n",
182 |       "11610/11610 [==============================] - 0s 38us/sample - loss: 0.5782\n"
183 |      ]
184 |     },
185 |     {
186 |      "data": {
187 |       "text/plain": [
188 |        "<tensorflow.python.keras.callbacks.History at 0x7fcac81d26d8>"
189 |       ]
190 |      },
191 |      "execution_count": 96,
192 |      "metadata": {},
193 |      "output_type": "execute_result"
194 |     }
195 |    ],
196 |    "source": [
197 |     "model = keras.models.Sequential([keras.layers.Dense(1, input_shape=[8])])\n",
198 |     "model.compile(loss=\"mse\", optimizer=MySGDOptimizer(learning_rate=0.001))\n",
199 |     "model.fit(X_train_scaled, y_train, epochs=20)"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": 102,
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": [
208 |     "class SGOptimizer(keras.optimizers.Optimizer):\n",
209 |     "    def __init__(self, learning_rate=0.01, name=\"SGOptimizer\", **kwargs):\n",
210 |     "        \"\"\"Call super().__init__() and use _set_hyper() to store hyperparameters\"\"\"\n",
211 |     "        super().__init__(name, **kwargs)\n",
212 |     "        self._set_hyper(\"learning_rate\", kwargs.get(\"lr\", learning_rate)) # handle lr=learning_rate\n",
213 |     "        self._is_first = True\n",
214 |     "    \n",
215 |     "    def _create_slots(self, var_list):\n",
216 |     "        \"\"\"For each model variable, create the optimizer variable associated with it.\n",
217 |     "        TensorFlow calls these optimizer variables \"slots\".\n",
218 |     "        For momentum optimization, we need one momentum slot per model variable.\n",
219 |     "        \"\"\"\n",
220 |     "        for var in var_list:\n",
221 |     "            self.add_slot(var, \"pv\") #previous variable i.e. weight or bias\n",
222 |     "        for var in var_list:\n",
223 |     "            self.add_slot(var, \"pg\") #previous gradient\n",
224 |     "\n",
225 |     "\n",
226 |     "    @tf.function\n",
227 |     "    def _resource_apply_dense(self, grad, var):\n",
228 |     "        \"\"\"Update the slots and perform one optimization step for one model variable\n",
229 |     "        \"\"\"\n",
230 |     "        var_dtype = var.dtype.base_dtype\n",
231 |     "        lr_t = self._decayed_lr(var_dtype) # handle learning rate decay\n",
232 |     "        new_var_m = var - grad * lr_t\n",
233 |     "        pv_var = self.get_slot(var, \"pv\")\n",
234 |     "        pg_var = self.get_slot(var, \"pg\")\n",
235 |     "        \n",
236 |     "        if self._is_first:\n",
237 |     "            self._is_first = False\n",
238 |     "            new_var = new_var_m\n",
239 |     "        else:\n",
240 |     "            cond = grad*pg_var >= 0\n",
241 |     "            print(cond)\n",
242 |     "            avg_weights = (pv_var + var)/2.0\n",
243 |     "            new_var = tf.where(cond, new_var_m, avg_weights)\n",
244 |     "        pv_var.assign(var)\n",
245 |     "        pg_var.assign(grad)\n",
246 |     "        var.assign(new_var)\n",
247 |     "\n",
248 |     "    def _resource_apply_sparse(self, grad, var):\n",
249 |     "        raise NotImplementedError\n",
250 |     "\n",
251 |     "    def get_config(self):\n",
252 |     "        base_config = super().get_config()\n",
253 |     "        return {\n",
254 |     "            **base_config,\n",
255 |     "            \"learning_rate\": self._serialize_hyperparameter(\"learning_rate\"),\n",
256 |     "        }"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "code",
261 |    "execution_count": 103,
262 |    "metadata": {},
263 |    "outputs": [],
264 |    "source": [
265 |     "keras.backend.clear_session()\n",
266 |     "np.random.seed(42)\n",
267 |     "tf.random.set_seed(42)"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": 104,
273 |    "metadata": {},
274 |    "outputs": [
275 |     {
276 |      "name": "stdout",
277 |      "output_type": "stream",
278 |      "text": [
279 |       "Train on 11610 samples\n",
280 |       "Epoch 1/50\n",
281 |       "Tensor(\"GreaterEqual:0\", shape=(1,), dtype=bool)\n",
282 |       "11610/11610 [==============================] - 1s 95us/sample - loss: 3.7333\n",
283 |       "Epoch 2/50\n",
284 |       "11610/11610 [==============================] - 1s 47us/sample - loss: 1.4848\n",
285 |       "Epoch 3/50\n",
286 |       "11610/11610 [==============================] - 1s 48us/sample - loss: 0.9218\n",
287 |       "Epoch 4/50\n",
288 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.7634\n",
289 |       "Epoch 5/50\n",
290 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.7067\n",
291 |       "Epoch 6/50\n",
292 |       "11610/11610 [==============================] - 1s 45us/sample - loss: 0.6801\n",
293 |       "Epoch 7/50\n",
294 |       "11610/11610 [==============================] - 1s 45us/sample - loss: 0.6624\n",
295 |       "Epoch 8/50\n",
296 |       "11610/11610 [==============================] - 1s 47us/sample - loss: 0.6482\n",
297 |       "Epoch 9/50\n",
298 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.6354\n",
299 |       "Epoch 10/50\n",
300 |       "11610/11610 [==============================] - 1s 47us/sample - loss: 0.6252\n",
301 |       "Epoch 11/50\n",
302 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.6166\n",
303 |       "Epoch 12/50\n",
304 |       "11610/11610 [==============================] - 1s 47us/sample - loss: 0.6077\n",
305 |       "Epoch 13/50\n",
306 |       "11610/11610 [==============================] - 1s 47us/sample - loss: 0.5999\n",
307 |       "Epoch 14/50\n",
308 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5934\n",
309 |       "Epoch 15/50\n",
310 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5872\n",
311 |       "Epoch 16/50\n",
312 |       "11610/11610 [==============================] - 1s 47us/sample - loss: 0.5826\n",
313 |       "Epoch 17/50\n",
314 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5766\n",
315 |       "Epoch 18/50\n",
316 |       "11610/11610 [==============================] - 1s 49us/sample - loss: 0.5725\n",
317 |       "Epoch 19/50\n",
318 |       "11610/11610 [==============================] - 1s 47us/sample - loss: 0.5687\n",
319 |       "Epoch 20/50\n",
320 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5633\n",
321 |       "Epoch 21/50\n",
322 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5612\n",
323 |       "Epoch 22/50\n",
324 |       "11610/11610 [==============================] - 1s 49us/sample - loss: 0.5579\n",
325 |       "Epoch 23/50\n",
326 |       "11610/11610 [==============================] - 1s 48us/sample - loss: 0.5572\n",
327 |       "Epoch 24/50\n",
328 |       "11610/11610 [==============================] - 1s 48us/sample - loss: 0.5537\n",
329 |       "Epoch 25/50\n",
330 |       "11610/11610 [==============================] - 1s 49us/sample - loss: 0.5510\n",
331 |       "Epoch 26/50\n",
332 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5490\n",
333 |       "Epoch 27/50\n",
334 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5459\n",
335 |       "Epoch 28/50\n",
336 |       "11610/11610 [==============================] - 1s 47us/sample - loss: 0.5452\n",
337 |       "Epoch 29/50\n",
338 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5433\n",
339 |       "Epoch 30/50\n",
340 |       "11610/11610 [==============================] - 1s 44us/sample - loss: 0.5428\n",
341 |       "Epoch 31/50\n",
342 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5423\n",
343 |       "Epoch 32/50\n",
344 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5405\n",
345 |       "Epoch 33/50\n",
346 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5384\n",
347 |       "Epoch 34/50\n",
348 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5392\n",
349 |       "Epoch 35/50\n",
350 |       "11610/11610 [==============================] - 1s 51us/sample - loss: 0.5379\n",
351 |       "Epoch 36/50\n",
352 |       "11610/11610 [==============================] - 1s 45us/sample - loss: 0.5367\n",
353 |       "Epoch 37/50\n",
354 |       "11610/11610 [==============================] - 1s 45us/sample - loss: 0.5354\n",
355 |       "Epoch 38/50\n",
356 |       "11610/11610 [==============================] - 1s 45us/sample - loss: 0.5356\n",
357 |       "Epoch 39/50\n",
358 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5339\n",
359 |       "Epoch 40/50\n",
360 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5349\n",
361 |       "Epoch 41/50\n",
362 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5337\n",
363 |       "Epoch 42/50\n",
364 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5334\n",
365 |       "Epoch 43/50\n",
366 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5326\n",
367 |       "Epoch 44/50\n",
368 |       "11610/11610 [==============================] - 1s 45us/sample - loss: 0.5322\n",
369 |       "Epoch 45/50\n",
370 |       "11610/11610 [==============================] - 1s 45us/sample - loss: 0.5329\n",
371 |       "Epoch 46/50\n",
372 |       "11610/11610 [==============================] - 1s 45us/sample - loss: 0.5325\n",
373 |       "Epoch 47/50\n",
374 |       "11610/11610 [==============================] - 1s 45us/sample - loss: 0.5306\n",
375 |       "Epoch 48/50\n",
376 |       "11610/11610 [==============================] - 1s 45us/sample - loss: 0.5317\n",
377 |       "Epoch 49/50\n",
378 |       "11610/11610 [==============================] - 1s 47us/sample - loss: 0.5311\n",
379 |       "Epoch 50/50\n",
380 |       "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5312\n"
381 |      ]
382 |     },
383 |     {
384 |      "data": {
385 |       "text/plain": [
386 |        "<tensorflow.python.keras.callbacks.History at 0x7fcaa46cf710>"
387 |       ]
388 |      },
389 |      "execution_count": 104,
390 |      "metadata": {},
391 |      "output_type": "execute_result"
392 |     }
393 |    ],
394 |    "source": [
395 |     "model = keras.models.Sequential([keras.layers.Dense(1, input_shape=[8])])\n",
396 |     "model.compile(loss=\"mse\", optimizer=SGOptimizer(learning_rate=0.001))\n",
397 |     "model.fit(X_train_scaled, y_train, epochs=50)"
398 |    ]
399 |   },
400 |   {
401 |    "cell_type": "code",
402 |    "execution_count": 58,
403 |    "metadata": {},
404 |    "outputs": [
405 |     {
406 |      "data": {
407 |       "text/plain": [
408 |        "<tf.Tensor: shape=(4,), dtype=bool, numpy=array([False,  True,  True,  True])>"
409 |       ]
410 |      },
411 |      "execution_count": 58,
412 |      "metadata": {},
413 |      "output_type": "execute_result"
414 |     }
415 |    ],
416 |    "source": [
417 |     "a = tf.Variable([-1,2,2,0])\n",
418 |     "b = tf.Variable([5,6,7,8])\n",
419 |     "cond = a*b >= 0\n",
420 |     "cond"
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "code",
425 |    "execution_count": 59,
426 |    "metadata": {},
427 |    "outputs": [
428 |     {
429 |      "data": {
430 |       "text/plain": [
431 |        "<tf.Tensor: shape=(4,), dtype=int32, numpy=array([5, 2, 2, 0], dtype=int32)>"
432 |       ]
433 |      },
434 |      "execution_count": 59,
435 |      "metadata": {},
436 |      "output_type": "execute_result"
437 |     }
438 |    ],
439 |    "source": [
440 |     "tf.where(cond, a, b)"
441 |    ]
442 |   },
443 |   {
444 |    "cell_type": "code",
445 |    "execution_count": null,
446 |    "metadata": {},
447 |    "outputs": [],
448 |    "source": []
449 |   }
450 |  ],
451 |  "metadata": {
452 |   "kernelspec": {
453 |    "display_name": "tensorflow2",
454 |    "language": "python",
455 |    "name": "tensorflow2"
456 |   },
457 |   "language_info": {
458 |    "codemirror_mode": {
459 |     "name": "ipython",
460 |     "version": 3
461 |    },
462 |    "file_extension": ".py",
463 |    "mimetype": "text/x-python",
464 |    "name": "python",
465 |    "nbconvert_exporter": "python",
466 |    "pygments_lexer": "ipython3",
467 |    "version": "3.6.8"
468 |   },
469 |   "toc": {
470 |    "base_numbering": 1,
471 |    "nav_menu": {},
472 |    "number_sections": true,
473 |    "sideBar": true,
474 |    "skip_h1_title": false,
475 |    "title_cell": "Table of Contents",
476 |    "title_sidebar": "Contents",
477 |    "toc_cell": false,
478 |    "toc_position": {},
479 |    "toc_section_display": true,
480 |    "toc_window_display": false
481 |   }
482 |  },
483 |  "nbformat": 4,
484 |  "nbformat_minor": 2
485 | }
486 | 


--------------------------------------------------------------------------------
/machine_learning/Natural_Language_Processing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Natural Language Processing\n",
  8 |     "\n",
  9 |     "## Generating Quiz from a given text"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 5,
 15 |    "metadata": {
 16 |     "scrolled": true
 17 |    },
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "from textblob import TextBlob"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 3,
 26 |    "metadata": {
 27 |     "collapsed": true
 28 |    },
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "# f = open('filename')\n",
 32 |     "# ww2 = f.read()\n",
 33 |     "# or\n",
 34 |     "\n",
 35 |     "ww2 = '''\n",
 36 |     "World War II (often abbreviated to WWII or WW2), also known as the Second World War, was a global war that lasted from 1939 to 1945, although related conflicts began earlier. It involved the vast majority of the world's countries—including all of the great powers—eventually forming two opposing military alliances: the Allies and the Axis. It was the most widespread war in history, and directly involved more than 100 million people from over 30 countries. In a state of total war, the major participants threw their entire economic, industrial, and scientific capabilities behind the war effort, erasing the distinction between civilian and military resources.\n",
 37 |     "\n",
 38 |     "World War II was the deadliest conflict in human history, marked by 50 million to 85 million fatalities, most of which were civilians in the Soviet Union and China. It included massacres, the deliberate genocide of the Holocaust, strategic bombing, starvation, disease and the first use of nuclear weapons in history.[1][2][3][4]\n",
 39 |     "\n",
 40 |     "The Empire of Japan aimed to dominate Asia and the Pacific and was already at war with the Republic of China in 1937,[5] but the world war is generally said to have begun on 1 September 1939[6] with the invasion of Poland by Nazi Germany and subsequent declarations of war on Germany by France and the United Kingdom. Supplied by the Soviet Union, from late 1939 to early 1941, in a series of campaigns and treaties, Germany conquered or controlled much of continental Europe, and formed the Axis alliance with Italy and Japan. Under the Molotov–Ribbentrop Pact of August 1939, Germany and the Soviet Union partitioned and annexed territories of their European neighbours, Poland, Finland, Romania and the Baltic states. The war continued primarily between the European Axis powers and the coalition of the United Kingdom and the British Commonwealth, with campaigns including the North Africa and East Africa campaigns, the aerial Battle of Britain, the Blitz bombing campaign, and the Balkan Campaign, as well as the long-running Battle of the Atlantic. On 22 June 1941, the European Axis powers launched an invasion of the Soviet Union, opening the largest land theatre of war in history, which trapped the major part of the Axis military forces into a war of attrition. In December 1941, Japan attacked the United States and European colonies in the Pacific Ocean, and quickly conquered much of the Western Pacific.\n",
 41 |     "\n",
 42 |     "The Axis advance halted in 1942 when Japan lost the critical Battle of Midway, and Germany and Italy were defeated in North Africa and then, decisively, at Stalingrad in the Soviet Union. In 1943, with a series of German defeats on the Eastern Front, the Allied invasion of Sicily and the Allied invasion of Italy which brought about Italian surrender, and Allied victories in the Pacific, the Axis lost the initiative and undertook strategic retreat on all fronts. In 1944, the Western Allies invaded German-occupied France, while the Soviet Union regained all of its territorial losses and invaded Germany and its allies. During 1944 and 1945 the Japanese suffered major reverses in mainland Asia in South Central China and Burma, while the Allies crippled the Japanese Navy and captured key Western Pacific islands.\n",
 43 |     "\n",
 44 |     "The war in Europe concluded with an invasion of Germany by the Western Allies and the Soviet Union, culminating in the capture of Berlin by Soviet troops, the suicide of Adolf Hitler and the subsequent German unconditional surrender on 8 May 1945. Following the Potsdam Declaration by the Allies on 26 July 1945 and the refusal of Japan to surrender under its terms, the United States dropped atomic bombs on the Japanese cities of Hiroshima and Nagasaki on 6 August and 9 August respectively. With an invasion of the Japanese archipelago imminent, the possibility of additional atomic bombings and the Soviet invasion of Manchuria, Japan formally surrendered on 2 September 1945. Thus ended the war in Asia, cementing the total victory of the Allies.\n",
 45 |     "\n",
 46 |     "World War II changed the political alignment and social structure of the world. The United Nations (UN) was established to foster international co-operation and prevent future conflicts. The victorious great powers—China, France, the Soviet Union, the United Kingdom, and the United States—became the permanent members of the United Nations Security Council.[7] The Soviet Union and the United States emerged as rival superpowers, setting the stage for the Cold War, which lasted for the next 46 years. Meanwhile, the influence of European great powers waned, while the decolonisation of Africa and Asia began. Most countries whose industries had been damaged moved towards economic recovery. Political integration, especially in Europe, emerged as an effort to end pre-war enmities and to create a common identity.[8]\n",
 47 |     "'''"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 4,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "\n",
 60 |       "____ ____ II (often abbreviated to ____ or ____), also known as the ____ ____ ____, was a global ____ that lasted from 1939 to 1945, although related conflicts began earlier.\n",
 61 |       "\n",
 62 |       "==================\n",
 63 |       "\n",
 64 |       "It involved the vast ____ of the ____'s countries—including all of the great powers—eventually forming two opposing military alliances: the Allies and the ____.\n",
 65 |       "\n",
 66 |       "==================\n",
 67 |       "\n",
 68 |       "It was the most widespread ____ in ____, and directly involved more than 100 million people from over 30 countries.\n",
 69 |       "\n",
 70 |       "==================\n",
 71 |       "\n",
 72 |       "In a state of total ____, the major participants threw their entire economic, industrial, and scientific capabilities behind the ____ ____, erasing the ____ between civilian and military resources.\n",
 73 |       "\n",
 74 |       "==================\n",
 75 |       "\n",
 76 |       "World War II was the deadliest ____ in human ____, marked by 50 million to 85 million fatalities, most of which were civilians in the ____ ____ and ____.\n",
 77 |       "\n",
 78 |       "==================\n",
 79 |       "\n",
 80 |       "It included massacres, the ____ ____ of the ____, strategic ____, ____, ____ and the first ____ of nuclear weapons in ____.\n",
 81 |       "\n",
 82 |       "==================\n",
 83 |       "\n",
 84 |       "____1________2________3________4____\n",
 85 |       "\n",
 86 |       "The ____ of ____ aimed to dominate ____ and the ____ and was already at ____ with the ____ of ____ in 1937,____5____ but the ____ ____ is generally said to have begun on 1 ____ 1939____6____ with the ____ of ____ by ____ ____ and subsequent declarations of ____ on ____ by ____ and the ____ ____.\n",
 87 |       "\n",
 88 |       "==================\n",
 89 |       "\n",
 90 |       "Supplied by the Soviet ____, from late 1939 to early 1941, in a ____ of campaigns and treaties, ____ conquered or controlled much of continental ____, and formed the ____ ____ with ____ and ____.\n",
 91 |       "\n",
 92 |       "==================\n",
 93 |       "\n",
 94 |       "Under the Molotov–Ribbentrop Pact of ____ 1939, ____ and the ____ ____ partitioned and annexed territories of their European neighbours, ____, ____, ____ and the ____ states.\n",
 95 |       "\n",
 96 |       "==================\n",
 97 |       "\n",
 98 |       "The war continued primarily between the ____ ____ powers and the ____ of the ____ ____ and the British ____, with ____s including the ____ ____ and ____ ____ ____s, the aerial ____ of ____, the ____ ____ ____, and the ____ ____, as well as the long-running ____ of the ____.\n",
 99 |       "\n",
100 |       "==================\n",
101 |       "\n",
102 |       "On 22 June 1941, the ____ ____ powers launched an ____ of the ____ ____, opening the largest ____ ____ of ____ in ____, which trapped the major ____ of the ____ military forces into a ____ of ____.\n",
103 |       "\n",
104 |       "==================\n",
105 |       "\n",
106 |       "In December 1941, Japan attacked the ____ States and European colonies in the ____ ____, and quickly conquered much of the ____ ____.\n",
107 |       "\n",
108 |       "==================\n",
109 |       "\n",
110 |       "The Axis advance halted in 1942 when ____ lost the critical ____ of ____, and ____ and ____ were defeated in ____ ____ and then, decisively, at ____ in the ____ ____.\n",
111 |       "\n",
112 |       "==================\n",
113 |       "\n",
114 |       "In 1943, with a ____ of German defeats on the ____ ____, the ____ ____ of ____ and the ____ ____ of ____ which brought about Italian ____, and ____ victories in the ____, the ____ lost the ____ and undertook strategic ____ on all fronts.\n",
115 |       "\n",
116 |       "==================\n",
117 |       "\n",
118 |       "In 1944, the Western Allies invaded German-occupied ____, while the ____ ____ regained all of its territorial losses and invaded ____ and its allies.\n",
119 |       "\n",
120 |       "==================\n",
121 |       "\n",
122 |       "During 1944 and 1945 the Japanese suffered major reverses in ____ ____ in ____ ____ ____ and ____, while the Allies crippled the Japanese ____ and captured key ____ ____ islands.\n",
123 |       "\n",
124 |       "==================\n",
125 |       "\n",
126 |       "The war in Europe concluded with an ____ of ____ by the Western Allies and the ____ ____, culminating in the ____ of ____ by ____ troops, the ____ of ____ ____ and the subsequent German unconditional ____ on 8 ____ 1945.\n",
127 |       "\n",
128 |       "==================\n",
129 |       "\n",
130 |       "Following the Potsdam Declaration by the Allies on 26 ____ 1945 and the ____ of ____ to surrender under its terms, the ____ States dropped atomic bombs on the ____ese cities of ____ and ____ on 6 ____ and 9 ____ respectively.\n",
131 |       "\n",
132 |       "==================\n",
133 |       "\n",
134 |       "With an ____ of the ____ese archipelago ____, the ____ of additional atomic bombings and the Soviet ____ of ____, ____ formally surrendered on 2 ____ 1945.\n",
135 |       "\n",
136 |       "==================\n",
137 |       "\n",
138 |       "Thus ended the war in ____, cementing the total ____ of the Allies.\n",
139 |       "\n",
140 |       "==================\n",
141 |       "\n",
142 |       "World War II changed the political ____ and social ____ of the ____.\n",
143 |       "\n",
144 |       "==================\n",
145 |       "\n",
146 |       "The United Nations (UN) was established to foster international ____ and ____ ____ conflicts.\n",
147 |       "\n",
148 |       "==================\n",
149 |       "\n",
150 |       "The victorious great powers—China, ____, the ____ ____, the ____ ____, and the ____ ____ the permanent members of the ____ ____ ____ ____.\n",
151 |       "\n",
152 |       "==================\n",
153 |       "\n",
154 |       "[7] The ____ ____ and the ____ States emerged as rival superpowers, setting the ____ for the ____ ____, which lasted for the next 46 years.\n",
155 |       "\n",
156 |       "==================\n",
157 |       "\n",
158 |       "Meanwhile, the influence of European great powers waned, while the ____ of ____ and ____ began.\n",
159 |       "\n",
160 |       "==================\n",
161 |       "\n",
162 |       "Most countries whose industries had been damaged moved towards economic ____.\n",
163 |       "\n",
164 |       "==================\n",
165 |       "\n",
166 |       "Political integration, especially in ____, emerged as an ____ to end pre-war enmities and to create a common ____.\n",
167 |       "\n",
168 |       "==================\n",
169 |       "\n",
170 |       "[8]\n",
171 |       "\n",
172 |       "==================\n",
173 |       "\n"
174 |      ]
175 |     }
176 |    ],
177 |    "source": [
178 |     "ww2b = TextBlob(ww2)\n",
179 |     "for sentence in ww2b.sentences:\n",
180 |     "    new_sentence = sentence\n",
181 |     "    for index, tag in enumerate(sentence.tags):\n",
182 |     "        if tag[1] in ('NN', 'NNP') and index > 3:\n",
183 |     "            new_sentence = new_sentence.replace(tag[0], \"____\") \n",
184 |     "    print(new_sentence)\n",
185 |     "    print(\"\\n==================\\n\")"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "markdown",
190 |    "metadata": {},
191 |    "source": [
192 |     "##  Finding Related Posts\n"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "We are given the task of finding the most related posts from a bunch of posts. \n",
200 |     "\n",
201 |     "How we will find similarity between posts?\n",
202 |     "\n",
203 |     "The tricky thing that we have to tackle  first is how to turn text into something on which we can calculate similarity."
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "markdown",
208 |    "metadata": {},
209 |    "source": [
210 |     "### How to do it\n",
211 |     "\n",
212 |     "+ **Bag of Word**\n",
213 |     "\n",
214 |     "It totally ignores the order of words and simply uses word counts as their basis.\n",
215 |     "\n",
216 |     "##### Vectorization\n",
217 |     "For each word in the post, its occurrence is counted and noted in a vector. This step is also called vectorization. The vector is typically huge as it contains as many elements as words occur in the whole dataset."
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "markdown",
222 |    "metadata": {},
223 |    "source": [
224 |     "|Word  | Occurence in post 1 | Occurence in post 2|\n",
225 |     "|------|---------------------|--------------------|\n",
226 |     "|disk  |            1        |          1         |\n",
227 |     "|format|     1               |           1        |\n",
228 |     "|how   |1                    | 0 |\n",
229 |     "|hard| 1| 1|\n",
230 |     "|my |1 |0|\n",
231 |     "|problems| 0| 1|\n",
232 |     "|to| 1| 0|"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "markdown",
237 |    "metadata": {},
238 |    "source": [
239 |     "We can simply calculate the **Euclidean distance** between the vectors of all posts and take the nearest one, it will be too slow."
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "markdown",
244 |    "metadata": {},
245 |    "source": [
246 |     "### Vectorization -  Converting raw text into a bag of words \n",
247 |     "\n",
248 |     "\n"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": 5,
254 |    "metadata": {
255 |     "collapsed": true
256 |    },
257 |    "outputs": [],
258 |    "source": [
259 |     "from sklearn.feature_extraction.text import CountVectorizer"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": 6,
265 |    "metadata": {
266 |     "collapsed": true
267 |    },
268 |    "outputs": [],
269 |    "source": [
270 |     "vectorizer = CountVectorizer(min_df=1)"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "markdown",
275 |    "metadata": {},
276 |    "source": [
277 |     "The **min_df parameter** determines how CountVectorizer treats seldom words\n",
278 |     "(minimum document frequency).\n",
279 |     "+ If it is set to an integer, all words occurring less than that value will be dropped\n",
280 |     "+ If it is a fraction, all words that occur in less than that fraction of the overall dataset will be dropped. \n",
281 |     "\n",
282 |     "The max_df parameter works in a similar manner."
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "code",
287 |    "execution_count": 7,
288 |    "metadata": {
289 |     "collapsed": true
290 |    },
291 |    "outputs": [],
292 |    "source": [
293 |     "content = [\"How to format my hard disk\", \" Hard disk format problems \"]"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 8,
299 |    "metadata": {
300 |     "collapsed": true
301 |    },
302 |    "outputs": [],
303 |    "source": [
304 |     "X = vectorizer.fit_transform(content)"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": 9,
310 |    "metadata": {},
311 |    "outputs": [
312 |     {
313 |      "data": {
314 |       "text/plain": [
315 |        "['disk', 'format', 'hard', 'how', 'my', 'problems', 'to']"
316 |       ]
317 |      },
318 |      "execution_count": 9,
319 |      "metadata": {},
320 |      "output_type": "execute_result"
321 |     }
322 |    ],
323 |    "source": [
324 |     "vectorizer.get_feature_names()"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "code",
329 |    "execution_count": 10,
330 |    "metadata": {},
331 |    "outputs": [
332 |     {
333 |      "name": "stdout",
334 |      "output_type": "stream",
335 |      "text": [
336 |       "[[1 1]\n",
337 |       " [1 1]\n",
338 |       " [1 1]\n",
339 |       " [1 0]\n",
340 |       " [1 0]\n",
341 |       " [0 1]\n",
342 |       " [1 0]]\n"
343 |      ]
344 |     }
345 |    ],
346 |    "source": [
347 |     "print(X.toarray().transpose())"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "markdown",
352 |    "metadata": {},
353 |    "source": [
354 |     "This means that the first sentence contains all the words except \"problems\", while\n",
355 |     "the second contains all but \"how\", \"my\", and \"to\". In fact, these are exactly the same\n",
356 |     "columns as we have seen in the preceding table. From X, we can extract a feature\n",
357 |     "vector that we will use to compare two documents with each other."
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "markdown",
362 |    "metadata": {},
363 |    "source": [
364 |     "Lets consider toy posts"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "code",
369 |    "execution_count": 11,
370 |    "metadata": {
371 |     "collapsed": true
372 |    },
373 |    "outputs": [],
374 |    "source": [
375 |     "post1 = \"This is a toy post about machine learning. Actually, it contains not much interesting stuff.\"\n",
376 |     "post2 = \"Imaging databases can get huge.\"\n",
377 |     "post3 = \"Most imaging databases save images permanently.\"\n",
378 |     "post4 = \"Imaging databases store images.\"\n",
379 |     "post5 = \"Imaging databases store images. Imaging databases store images. Imaging databases store images.\""
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "markdown",
384 |    "metadata": {},
385 |    "source": [
386 |     "Now we will train our vectorizer"
387 |    ]
388 |   },
389 |   {
390 |    "cell_type": "code",
391 |    "execution_count": 12,
392 |    "metadata": {
393 |     "collapsed": true
394 |    },
395 |    "outputs": [],
396 |    "source": [
397 |     "from sklearn.feature_extraction.text import CountVectorizer\n",
398 |     "vectorizer = CountVectorizer(min_df=1)\n",
399 |     "\n",
400 |     "posts = [post1, post2, post3, post4, post5]\n",
401 |     "\n",
402 |     "X_train = vectorizer.fit_transform(posts)"
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "code",
407 |    "execution_count": 13,
408 |    "metadata": {
409 |     "collapsed": true
410 |    },
411 |    "outputs": [],
412 |    "source": [
413 |     "num_samples, num_features = X_train.shape"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "code",
418 |    "execution_count": 14,
419 |    "metadata": {},
420 |    "outputs": [
421 |     {
422 |      "name": "stdout",
423 |      "output_type": "stream",
424 |      "text": [
425 |       "#samples: 5, #features: 24\n"
426 |      ]
427 |     }
428 |    ],
429 |    "source": [
430 |     "print(\"#samples: %d, #features: %d\" % (num_samples,\n",
431 |     "num_features))"
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "code",
436 |    "execution_count": 15,
437 |    "metadata": {},
438 |    "outputs": [
439 |     {
440 |      "name": "stdout",
441 |      "output_type": "stream",
442 |      "text": [
443 |       "24\n"
444 |      ]
445 |     }
446 |    ],
447 |    "source": [
448 |     "print(len(vectorizer.get_feature_names()))"
449 |    ]
450 |   },
451 |   {
452 |    "cell_type": "code",
453 |    "execution_count": 16,
454 |    "metadata": {
455 |     "collapsed": true
456 |    },
457 |    "outputs": [],
458 |    "source": [
459 |     "new_post = \"imaging databases\""
460 |    ]
461 |   },
462 |   {
463 |    "cell_type": "code",
464 |    "execution_count": 17,
465 |    "metadata": {},
466 |    "outputs": [
467 |     {
468 |      "data": {
469 |       "text/plain": [
470 |        "array([[0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
471 |        "        0, 0]])"
472 |       ]
473 |      },
474 |      "execution_count": 17,
475 |      "metadata": {},
476 |      "output_type": "execute_result"
477 |     }
478 |    ],
479 |    "source": [
480 |     "new_post_vec = vectorizer.transform([new_post])\n",
481 |     "new_post_vec.toarray()"
482 |    ]
483 |   },
484 |   {
485 |    "cell_type": "markdown",
486 |    "metadata": {},
487 |    "source": [
488 |     "Now let us define a function for finding the distance between two vectors. It\n",
489 |     "will firt normalize the vectors and then find the distance between them."
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "code",
494 |    "execution_count": 18,
495 |    "metadata": {
496 |     "collapsed": true
497 |    },
498 |    "outputs": [],
499 |    "source": [
500 |     "def dist_norm(v1, v2):\n",
501 |     "    v1_normalized = v1/sp.linalg.norm(v1.toarray())\n",
502 |     "    v2_normalized = v2/sp.linalg.norm(v2.toarray())\n",
503 |     "    delta = v1_normalized - v2_normalized\n",
504 |     "    return sp.linalg.norm(delta.toarray())"
505 |    ]
506 |   },
507 |   {
508 |    "cell_type": "code",
509 |    "execution_count": 19,
510 |    "metadata": {},
511 |    "outputs": [
512 |     {
513 |      "name": "stdout",
514 |      "output_type": "stream",
515 |      "text": [
516 |       "=== Post 0 with dist=1.41: This is a toy post about machine learning. Actually, it contains not much interesting stuff.\n",
517 |       "=== Post 1 with dist=0.86: Imaging databases can get huge.\n",
518 |       "=== Post 2 with dist=0.92: Most imaging databases save images permanently.\n",
519 |       "=== Post 3 with dist=0.77: Imaging databases store images.\n",
520 |       "=== Post 4 with dist=0.77: Imaging databases store images. Imaging databases store images. Imaging databases store images.\n",
521 |       "Best post is 3 with dist=0.77\n"
522 |      ]
523 |     }
524 |    ],
525 |    "source": [
526 |     "import sys\n",
527 |     "import scipy as sp\n",
528 |     "best_doc = None\n",
529 |     "best_dist = sys.maxsize\n",
530 |     "best_i = None\n",
531 |     "for i, post in enumerate(posts):\n",
532 |     "    post_vec = X_train.getrow(i)\n",
533 |     "    d = dist_norm(post_vec, new_post_vec)\n",
534 |     "    print(\"=== Post %i with dist=%.2f: %s\"%(i, d, post))\n",
535 |     "    if d<best_dist:\n",
536 |     "        best_dist = d\n",
537 |     "        best_i = i\n",
538 |     "print(\"Best post is %i with dist=%.2f\"%(best_i, best_dist))"
539 |    ]
540 |   },
541 |   {
542 |    "cell_type": "code",
543 |    "execution_count": null,
544 |    "metadata": {
545 |     "collapsed": true
546 |    },
547 |    "outputs": [],
548 |    "source": []
549 |   }
550 |  ],
551 |  "metadata": {
552 |   "kernelspec": {
553 |    "display_name": "Python 3",
554 |    "language": "python",
555 |    "name": "python3"
556 |   },
557 |   "language_info": {
558 |    "codemirror_mode": {
559 |     "name": "ipython",
560 |     "version": 3
561 |    },
562 |    "file_extension": ".py",
563 |    "mimetype": "text/x-python",
564 |    "name": "python",
565 |    "nbconvert_exporter": "python",
566 |    "pygments_lexer": "ipython3",
567 |    "version": "3.6.3"
568 |   }
569 |  },
570 |  "nbformat": 4,
571 |  "nbformat_minor": 2
572 | }
573 | 


--------------------------------------------------------------------------------
/machine_learning/README.MD:
--------------------------------------------------------------------------------
1 | This is for the machine learning course of [CloudxLab.com](https://cloudxlab.com/)
2 | 
3 | Please note that some of the notebooks are based on the repository of this book: https://github.com/ageron/handson-ml
4 | 


--------------------------------------------------------------------------------
/machine_learning/datasets/bike_sharing/Readme.txt:
--------------------------------------------------------------------------------
  1 | ==========================================
  2 | Bike Sharing Dataset
  3 | ==========================================
  4 | 
  5 | Hadi Fanaee-T
  6 | 
  7 | Laboratory of Artificial Intelligence and Decision Support (LIAAD), University of Porto
  8 | INESC Porto, Campus da FEUP
  9 | Rua Dr. Roberto Frias, 378
 10 | 4200 - 465 Porto, Portugal
 11 | 
 12 | 
 13 | =========================================
 14 | Background 
 15 | =========================================
 16 | 
 17 | Bike sharing systems are new generation of traditional bike rentals where whole process from membership, rental and return 
 18 | back has become automatic. Through these systems, user is able to easily rent a bike from a particular position and return 
 19 | back at another position. Currently, there are about over 500 bike-sharing programs around the world which is composed of 
 20 | over 500 thousands bicycles. Today, there exists great interest in these systems due to their important role in traffic, 
 21 | environmental and health issues. 
 22 | 
 23 | Apart from interesting real world applications of bike sharing systems, the characteristics of data being generated by
 24 | these systems make them attractive for the research. Opposed to other transport services such as bus or subway, the duration
 25 | of travel, departure and arrival position is explicitly recorded in these systems. This feature turns bike sharing system into
 26 | a virtual sensor network that can be used for sensing mobility in the city. Hence, it is expected that most of important
 27 | events in the city could be detected via monitoring these data.
 28 | 
 29 | =========================================
 30 | Data Set
 31 | =========================================
 32 | Bike-sharing rental process is highly correlated to the environmental and seasonal settings. For instance, weather conditions,
 33 | precipitation, day of week, season, hour of the day, etc. can affect the rental behaviors. The core data set is related to  
 34 | the two-year historical log corresponding to years 2011 and 2012 from Capital Bikeshare system, Washington D.C., USA which is 
 35 | publicly available in http://capitalbikeshare.com/system-data. We aggregated the data on two hourly and daily basis and then 
 36 | extracted and added the corresponding weather and seasonal information. Weather information are extracted from http://www.freemeteo.com. 
 37 | 
 38 | =========================================
 39 | Associated tasks
 40 | =========================================
 41 | 
 42 | 	- Regression: 
 43 | 		Predication of bike rental count hourly or daily based on the environmental and seasonal settings.
 44 | 	
 45 | 	- Event and Anomaly Detection:  
 46 | 		Count of rented bikes are also correlated to some events in the town which easily are traceable via search engines.
 47 | 		For instance, query like "2012-10-30 washington d.c." in Google returns related results to Hurricane Sandy. Some of the important events are 
 48 | 		identified in [1]. Therefore the data can be used for validation of anomaly or event detection algorithms as well.
 49 | 
 50 | 
 51 | =========================================
 52 | Files
 53 | =========================================
 54 | 
 55 | 	- Readme.txt
 56 | 	- hour.csv : bike sharing counts aggregated on hourly basis. Records: 17379 hours
 57 | 	- day.csv - bike sharing counts aggregated on daily basis. Records: 731 days
 58 | 
 59 | 	
 60 | =========================================
 61 | Dataset characteristics
 62 | =========================================	
 63 | Both hour.csv and day.csv have the following fields, except hr which is not available in day.csv
 64 | 	
 65 | 	- instant: record index
 66 | 	- dteday : date
 67 | 	- season : season (1:springer, 2:summer, 3:fall, 4:winter)
 68 | 	- yr : year (0: 2011, 1:2012)
 69 | 	- mnth : month ( 1 to 12)
 70 | 	- hr : hour (0 to 23)
 71 | 	- holiday : weather day is holiday or not (extracted from http://dchr.dc.gov/page/holiday-schedule)
 72 | 	- weekday : day of the week
 73 | 	- workingday : if day is neither weekend nor holiday is 1, otherwise is 0.
 74 | 	+ weathersit : 
 75 | 		- 1: Clear, Few clouds, Partly cloudy, Partly cloudy
 76 | 		- 2: Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist
 77 | 		- 3: Light Snow, Light Rain + Thunderstorm + Scattered clouds, Light Rain + Scattered clouds
 78 | 		- 4: Heavy Rain + Ice Pallets + Thunderstorm + Mist, Snow + Fog
 79 | 	- temp : Normalized temperature in Celsius. The values are divided to 41 (max)
 80 | 	- atemp: Normalized feeling temperature in Celsius. The values are divided to 50 (max)
 81 | 	- hum: Normalized humidity. The values are divided to 100 (max)
 82 | 	- windspeed: Normalized wind speed. The values are divided to 67 (max)
 83 | 	- casual: count of casual users
 84 | 	- registered: count of registered users
 85 | 	- cnt: count of total rental bikes including both casual and registered
 86 | 	
 87 | =========================================
 88 | License
 89 | =========================================
 90 | Use of this dataset in publications must be cited to the following publication:
 91 | 
 92 | [1] Fanaee-T, Hadi, and Gama, Joao, "Event labeling combining ensemble detectors and background knowledge", Progress in Artificial Intelligence (2013): pp. 1-15, Springer Berlin Heidelberg, doi:10.1007/s13748-013-0040-3.
 93 | 
 94 | @article{
 95 | 	year={2013},
 96 | 	issn={2192-6352},
 97 | 	journal={Progress in Artificial Intelligence},
 98 | 	doi={10.1007/s13748-013-0040-3},
 99 | 	title={Event labeling combining ensemble detectors and background knowledge},
100 | 	url={http://dx.doi.org/10.1007/s13748-013-0040-3},
101 | 	publisher={Springer Berlin Heidelberg},
102 | 	keywords={Event labeling; Event detection; Ensemble learning; Background knowledge},
103 | 	author={Fanaee-T, Hadi and Gama, Joao},
104 | 	pages={1-15}
105 | }
106 | 
107 | =========================================
108 | Contact
109 | =========================================
110 | 	
111 | For further information about this dataset please contact Hadi Fanaee-T (hadi.fanaee@fe.up.pt)
112 | 


--------------------------------------------------------------------------------
/machine_learning/datasets/housing/README.md:
--------------------------------------------------------------------------------
 1 | # California Housing
 2 | 
 3 | ## Source
 4 | This dataset is a modified version of the California Housing dataset available from [http://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html](Luís Torgo's page) (University of Porto). Luís Torgo obtained it from the StatLib repository (which is closed now). The dataset may also be downloaded from StatLib mirrors.
 5 | 
 6 | This dataset appeared in a 1997 paper titled *Sparse Spatial Autoregressions* by Pace, R. Kelley and Ronald Barry, published in the *Statistics and Probability Letters* journal. They built it using the 1990 California census data. It contains one row per census block group. A block group is the smallest geographical unit for which the U.S. Census Bureau publishes sample data (a block group typically has a population of 600 to 3,000 people).
 7 | 
 8 | ## Tweaks
 9 | The dataset in this directory is almost identical to the original, with two differences:
10 | 
11 | * 207 values were randomly removed from the `total_bedrooms` column, so we can discuss what to do with missing data.
12 | * An additional categorical attribute called `ocean_proximity` was added, indicating (very roughly) whether each block group is near the ocean, near the Bay area, inland or on an island. This allows discussing what to do with categorical data.
13 | 
14 | Note that the block groups are called "districts" in the Jupyter notebooks, simply because in some contexts the name "block group" was confusing.
15 | 
16 | ## Data description
17 | 
18 |     >>> housing.info()
19 |     <class 'pandas.core.frame.DataFrame'>
20 |     RangeIndex: 20640 entries, 0 to 20639
21 |     Data columns (total 10 columns):
22 |     longitude             20640 non-null float64
23 |     latitude              20640 non-null float64
24 |     housing_median_age    20640 non-null float64
25 |     total_rooms           20640 non-null float64
26 |     total_bedrooms        20433 non-null float64
27 |     population            20640 non-null float64
28 |     households            20640 non-null float64
29 |     median_income         20640 non-null float64
30 |     median_house_value    20640 non-null float64
31 |     ocean_proximity       20640 non-null object
32 |     dtypes: float64(9), object(1)
33 |     memory usage: 1.6+ MB
34 |     
35 |     >>> housing["ocean_proximity"].value_counts()
36 |     <1H OCEAN     9136
37 |     INLAND        6551
38 |     NEAR OCEAN    2658
39 |     NEAR BAY      2290
40 |     ISLAND           5
41 |     Name: ocean_proximity, dtype: int64
42 |     
43 |     >>> housing.describe()
44 |               longitude      latitude  housing_median_age   total_rooms  \
45 |     count  16513.000000  16513.000000        16513.000000  16513.000000   
46 |     mean    -119.575972     35.639693           28.652335   2622.347605   
47 |     std        2.002048      2.138279           12.576306   2138.559393   
48 |     min     -124.350000     32.540000            1.000000      6.000000   
49 |     25%     -121.800000     33.940000           18.000000   1442.000000   
50 |     50%     -118.510000     34.260000           29.000000   2119.000000   
51 |     75%     -118.010000     37.720000           37.000000   3141.000000   
52 |     max     -114.310000     41.950000           52.000000  39320.000000   
53 | 
54 |            total_bedrooms    population    households  median_income  
55 |     count    16355.000000  16513.000000  16513.000000   16513.000000  
56 |     mean       534.885112   1419.525465    496.975050       3.875651  
57 |     std        412.716467   1115.715084    375.737945       1.905088  
58 |     min          2.000000      3.000000      2.000000       0.499900  
59 |     25%        295.000000    784.000000    278.000000       2.566800  
60 |     50%        433.000000   1164.000000    408.000000       3.541400  
61 |     75%        644.000000   1718.000000    602.000000       4.745000  
62 |     max       6210.000000  35682.000000   5358.000000      15.000100
63 |  


--------------------------------------------------------------------------------
/machine_learning/datasets/housing/housing.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/datasets/housing/housing.tgz


--------------------------------------------------------------------------------
/machine_learning/images/autoencoders/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 | 


--------------------------------------------------------------------------------
/machine_learning/images/classification/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 | 


--------------------------------------------------------------------------------
/machine_learning/images/cnn/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 | 


--------------------------------------------------------------------------------
/machine_learning/images/cnn/test_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/cnn/test_image.png


--------------------------------------------------------------------------------
/machine_learning/images/decision_trees/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 | 


--------------------------------------------------------------------------------
/machine_learning/images/decision_trees/iris_tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/decision_trees/iris_tree.png


--------------------------------------------------------------------------------
/machine_learning/images/deep/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 | 


--------------------------------------------------------------------------------
/machine_learning/images/dim_reduction/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 | 


--------------------------------------------------------------------------------
/machine_learning/images/distributed/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 | 


--------------------------------------------------------------------------------
/machine_learning/images/end_to_end_project/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 | 


--------------------------------------------------------------------------------
/machine_learning/images/end_to_end_project/california.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/end_to_end_project/california.png


--------------------------------------------------------------------------------
/machine_learning/images/ensembles/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 | 


--------------------------------------------------------------------------------
/machine_learning/images/ensembles/boosting_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/boosting_plot.png


--------------------------------------------------------------------------------
/machine_learning/images/ensembles/decision_tree_without_and_with_bagging_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/decision_tree_without_and_with_bagging_plot.png


--------------------------------------------------------------------------------
/machine_learning/images/ensembles/early_stopping_gbrt_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/early_stopping_gbrt_plot.png


--------------------------------------------------------------------------------
/machine_learning/images/ensembles/gbrt_learning_rate_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/gbrt_learning_rate_plot.png


--------------------------------------------------------------------------------
/machine_learning/images/ensembles/gradient_boosting_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/gradient_boosting_plot.png


--------------------------------------------------------------------------------
/machine_learning/images/ensembles/hard_voting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/hard_voting.png


--------------------------------------------------------------------------------
/machine_learning/images/ensembles/law_of_large_numbers_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/law_of_large_numbers_plot.png


--------------------------------------------------------------------------------
/machine_learning/images/ensembles/mnist_feature_importance_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/mnist_feature_importance_plot.png


--------------------------------------------------------------------------------
/machine_learning/images/ensembles/pasting_bagging.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/pasting_bagging.png


--------------------------------------------------------------------------------
/machine_learning/images/ensembles/training_diverse_classifiers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/training_diverse_classifiers.png


--------------------------------------------------------------------------------
/machine_learning/images/fundamentals/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 | 


--------------------------------------------------------------------------------
/machine_learning/images/rl/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 | 


--------------------------------------------------------------------------------
/machine_learning/images/rnn/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 | 


--------------------------------------------------------------------------------
/machine_learning/images/svm/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 | 


--------------------------------------------------------------------------------
/machine_learning/images/tensorflow/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 | 


--------------------------------------------------------------------------------
/machine_learning/images/training_linear_models/README:
--------------------------------------------------------------------------------
1 | Images generated by the notebooks
2 | 


--------------------------------------------------------------------------------
/machine_learning/images/training_linear_models/gradient_descent_paths_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/training_linear_models/gradient_descent_paths_plot.png


--------------------------------------------------------------------------------
/machine_learning/images/training_linear_models/gradient_descent_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/training_linear_models/gradient_descent_plot.png


--------------------------------------------------------------------------------
/machine_learning/images/training_linear_models/sgd_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/training_linear_models/sgd_plot.png


--------------------------------------------------------------------------------
/machine_learning/naive_bayes.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## A simple example of Naive Bayes"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "name": "stdout",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "[3 4]\n"
 20 |      ]
 21 |     }
 22 |    ],
 23 |    "source": [
 24 |     "#Import Library of Gaussian Naive Bayes model\n",
 25 |     "from sklearn.naive_bayes import GaussianNB\n",
 26 |     "import numpy as np\n",
 27 |     "\n",
 28 |     "#assigning predictor and target variables\n",
 29 |     "X= np.array([[-3,7],[1,5], [1,2], [-2,0], [2,3], [-4,0], [-1,1], [1,1], [-2,2], [2,7], [-4,1], [-2,7]])\n",
 30 |     "y = np.array([3, 3, 3, 3, 4, 3, 3, 4, 3, 4, 4, 4])\n",
 31 |     "#Create a Gaussian Classifier\n",
 32 |     "model = GaussianNB()\n",
 33 |     "\n",
 34 |     "# Train the model using the training sets \n",
 35 |     "model.fit(X, y)\n",
 36 |     "\n",
 37 |     "#Predict Output \n",
 38 |     "predicted= model.predict([[1,2],[3,4]])\n",
 39 |     "print(predicted)\n"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "## Over Iris Datase - Naive Bayes"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 2,
 52 |    "metadata": {},
 53 |    "outputs": [
 54 |     {
 55 |      "name": "stdout",
 56 |      "output_type": "stream",
 57 |      "text": [
 58 |       "Number of mislabeled points out of a total 150 points : 6\n"
 59 |      ]
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "from sklearn import datasets\n",
 64 |     "iris = datasets.load_iris()\n",
 65 |     "from sklearn.naive_bayes import GaussianNB\n",
 66 |     "gnb = GaussianNB()\n",
 67 |     "y_pred = gnb.fit(iris.data, iris.target).predict(iris.data)\n",
 68 |     "print(\"Number of mislabeled points out of a total %d points : %d\" % (iris.data.shape[0],(iris.target != y_pred).sum()))"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 3,
 74 |    "metadata": {},
 75 |    "outputs": [
 76 |     {
 77 |      "data": {
 78 |       "text/plain": [
 79 |        "0.95999999999999996"
 80 |       ]
 81 |      },
 82 |      "execution_count": 3,
 83 |      "metadata": {},
 84 |      "output_type": "execute_result"
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "from sklearn.metrics import accuracy_score\n",
 89 |     "accuracy_score(y_pred, iris.target)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "## Iris dataset using random shuffling and split"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 14,
102 |    "metadata": {},
103 |    "outputs": [
104 |     {
105 |      "data": {
106 |       "text/plain": [
107 |        "0.95999999999999996"
108 |       ]
109 |      },
110 |      "execution_count": 14,
111 |      "metadata": {},
112 |      "output_type": "execute_result"
113 |     }
114 |    ],
115 |    "source": [
116 |     "import numpy as np\n",
117 |     "seq = np.random.permutation(150)\n",
118 |     "X = iris.data[seq]\n",
119 |     "y = iris.target[seq]\n",
120 |     "X_train, X_test, y_train, y_test = X[:100], X[100:], y[:100], y[100:]\n",
121 |     "gnb = GaussianNB()\n",
122 |     "y_pred = gnb.fit(X_train, y_train).predict(X_test)\n",
123 |     "\n",
124 |     "from sklearn.metrics import accuracy_score\n",
125 |     "accuracy_score(y_pred, y_test)"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {},
131 |    "source": [
132 |     "## Another Example with Categorical Data"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 24,
138 |    "metadata": {},
139 |    "outputs": [
140 |     {
141 |      "data": {
142 |       "text/plain": [
143 |        "array([[0, 0, 1],\n",
144 |        "       [1, 0, 0],\n",
145 |        "       [1, 0, 0],\n",
146 |        "       [0, 1, 0],\n",
147 |        "       [0, 0, 1]])"
148 |       ]
149 |      },
150 |      "execution_count": 24,
151 |      "metadata": {},
152 |      "output_type": "execute_result"
153 |     }
154 |    ],
155 |    "source": [
156 |     "X = np.array([[\"Sunny\"],[\"Overcast\"], [\"Overcast\"], [\"Rainy\"], [\"Sunny\"]])\n",
157 |     "y = np.array([1, 0, 1, 0, 0])\n",
158 |     "\n",
159 |     "from sklearn.preprocessing import LabelBinarizer\n",
160 |     "enc = LabelBinarizer()\n",
161 |     "# encoder = OneHotEncoder()\n",
162 |     "# housing_cat_1hot = encoder.fit_transform(housing_cat_encoded.reshape(-1,1))\n",
163 |     "\n",
164 |     "X1 = enc.fit_transform(X.reshape(-1,1))\n",
165 |     "gnb = GaussianNB()\n",
166 |     "gnb.fit(X1, y)\n",
167 |     "\n",
168 |     "X1"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 29,
174 |    "metadata": {},
175 |    "outputs": [
176 |     {
177 |      "data": {
178 |       "text/plain": [
179 |        "array([0])"
180 |       ]
181 |      },
182 |      "execution_count": 29,
183 |      "metadata": {},
184 |      "output_type": "execute_result"
185 |     }
186 |    ],
187 |    "source": [
188 |     "# Rainy\n",
189 |     "gnb.predict([[0, 1, 0]])"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": 30,
195 |    "metadata": {},
196 |    "outputs": [
197 |     {
198 |      "data": {
199 |       "text/plain": [
200 |        "array([1])"
201 |       ]
202 |      },
203 |      "execution_count": 30,
204 |      "metadata": {},
205 |      "output_type": "execute_result"
206 |     }
207 |    ],
208 |    "source": [
209 |     "# Sunny\n",
210 |     "gnb.predict([[0, 0, 1]])"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": 31,
216 |    "metadata": {},
217 |    "outputs": [
218 |     {
219 |      "data": {
220 |       "text/plain": [
221 |        "array([1])"
222 |       ]
223 |      },
224 |      "execution_count": 31,
225 |      "metadata": {},
226 |      "output_type": "execute_result"
227 |     }
228 |    ],
229 |    "source": [
230 |     "# Overcast\n",
231 |     "gnb.predict([[1, 0, 0]])"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": 32,
237 |    "metadata": {},
238 |    "outputs": [
239 |     {
240 |      "data": {
241 |       "text/plain": [
242 |        "array(['Overcast', 'Rainy', 'Sunny'],\n",
243 |        "      dtype='<U8')"
244 |       ]
245 |      },
246 |      "execution_count": 32,
247 |      "metadata": {},
248 |      "output_type": "execute_result"
249 |     }
250 |    ],
251 |    "source": [
252 |     "enc.classes_"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": 33,
258 |    "metadata": {},
259 |    "outputs": [
260 |     {
261 |      "data": {
262 |       "text/plain": [
263 |        "array([[  3.36351842e-05,   9.99966365e-01]])"
264 |       ]
265 |      },
266 |      "execution_count": 33,
267 |      "metadata": {},
268 |      "output_type": "execute_result"
269 |     }
270 |    ],
271 |    "source": [
272 |     "gnb.predict_proba([[1, 0, 0]])"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": null,
278 |    "metadata": {
279 |     "collapsed": true
280 |    },
281 |    "outputs": [],
282 |    "source": []
283 |   }
284 |  ],
285 |  "metadata": {
286 |   "kernelspec": {
287 |    "display_name": "Python 3",
288 |    "language": "python",
289 |    "name": "python3"
290 |   },
291 |   "language_info": {
292 |    "codemirror_mode": {
293 |     "name": "ipython",
294 |     "version": 3
295 |    },
296 |    "file_extension": ".py",
297 |    "mimetype": "text/x-python",
298 |    "name": "python",
299 |    "nbconvert_exporter": "python",
300 |    "pygments_lexer": "ipython3",
301 |    "version": "3.6.3"
302 |   }
303 |  },
304 |  "nbformat": 4,
305 |  "nbformat_minor": 2
306 | }
307 | 


--------------------------------------------------------------------------------
/machine_learning/sklearn_text_analyser.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "## Based on http://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html#tutorial-setup\n",
 10 |     "\n",
 11 |     "from sklearn.datasets import fetch_20newsgroups"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 3,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "categories = ['alt.atheism', 'soc.religion.christian','comp.graphics', 'sci.med']"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 4,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "from sklearn.datasets import fetch_20newsgroups"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 5,
 35 |    "metadata": {},
 36 |    "outputs": [
 37 |     {
 38 |      "name": "stderr",
 39 |      "output_type": "stream",
 40 |      "text": [
 41 |       "Downloading 20news dataset. This may take a few minutes.\n",
 42 |       "Downloading dataset from https://ndownloader.figshare.com/files/5975967 (14 MB)\n"
 43 |      ]
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "twenty_train = fetch_20newsgroups(subset='train',categories=categories, shuffle=True, random_state=42)"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 6,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "data": {
 57 |       "text/plain": [
 58 |        "['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian']"
 59 |       ]
 60 |      },
 61 |      "execution_count": 6,
 62 |      "metadata": {},
 63 |      "output_type": "execute_result"
 64 |     }
 65 |    ],
 66 |    "source": [
 67 |     "twenty_train.target_names"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 7,
 73 |    "metadata": {},
 74 |    "outputs": [
 75 |     {
 76 |      "name": "stdout",
 77 |      "output_type": "stream",
 78 |      "text": [
 79 |       "From: sd345@city.ac.uk (Michael Collier)\n",
 80 |       "Subject: Converting images to HP LaserJet III?\n",
 81 |       "Nntp-Posting-Host: hampton\n"
 82 |      ]
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "print(\"\\n\".join(twenty_train.data[0].split(\"\\n\")[:3]))"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 8,
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "name": "stdout",
 96 |      "output_type": "stream",
 97 |      "text": [
 98 |       "comp.graphics\n"
 99 |      ]
100 |     }
101 |    ],
102 |    "source": [
103 |     "print(twenty_train.target_names[twenty_train.target[0]])"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 9,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "from sklearn.feature_extraction.text import CountVectorizer"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 10,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "count_vect = CountVectorizer()"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 11,
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "X_train_counts = count_vect.fit_transform(twenty_train.data)"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 12,
136 |    "metadata": {},
137 |    "outputs": [
138 |     {
139 |      "data": {
140 |       "text/plain": [
141 |        "(2257, 35788)"
142 |       ]
143 |      },
144 |      "execution_count": 12,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "X_train_counts = count_vect.fit_transform(twenty_train.data)\n",
151 |     "X_train_counts.shape"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 13,
157 |    "metadata": {},
158 |    "outputs": [
159 |     {
160 |      "data": {
161 |       "text/plain": [
162 |        "4690"
163 |       ]
164 |      },
165 |      "execution_count": 13,
166 |      "metadata": {},
167 |      "output_type": "execute_result"
168 |     }
169 |    ],
170 |    "source": [
171 |     "count_vect.vocabulary_.get(u'algorithm')"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "markdown",
176 |    "metadata": {
177 |     "collapsed": true
178 |    },
179 |    "source": [
180 |     "From occurrences to frequencies"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 14,
186 |    "metadata": {},
187 |    "outputs": [
188 |     {
189 |      "data": {
190 |       "text/plain": [
191 |        "(2257, 35788)"
192 |       ]
193 |      },
194 |      "execution_count": 14,
195 |      "metadata": {},
196 |      "output_type": "execute_result"
197 |     }
198 |    ],
199 |    "source": [
200 |     "from sklearn.feature_extraction.text import TfidfTransformer\n",
201 |     "tf_transformer = TfidfTransformer(use_idf=False).fit(X_train_counts)\n",
202 |     "X_train_tf = tf_transformer.transform(X_train_counts)\n",
203 |     "X_train_tf.shape\n"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "markdown",
208 |    "metadata": {},
209 |    "source": [
210 |     "Or we can call fit and transform using fit_transform"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": 15,
216 |    "metadata": {},
217 |    "outputs": [
218 |     {
219 |      "data": {
220 |       "text/plain": [
221 |        "(2257, 35788)"
222 |       ]
223 |      },
224 |      "execution_count": 15,
225 |      "metadata": {},
226 |      "output_type": "execute_result"
227 |     }
228 |    ],
229 |    "source": [
230 |     "tfidf_transformer = TfidfTransformer()\n",
231 |     "X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)\n",
232 |     "X_train_tfidf.shape"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "markdown",
237 |    "metadata": {},
238 |    "source": [
239 |     "Training a classifier"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": 16,
245 |    "metadata": {},
246 |    "outputs": [],
247 |    "source": [
248 |     "from sklearn.naive_bayes import MultinomialNB\n",
249 |     "clf = MultinomialNB().fit(X_train_tfidf, twenty_train.target)"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "markdown",
254 |    "metadata": {},
255 |    "source": [
256 |     "Predicting on a new document"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "code",
261 |    "execution_count": 17,
262 |    "metadata": {},
263 |    "outputs": [
264 |     {
265 |      "name": "stdout",
266 |      "output_type": "stream",
267 |      "text": [
268 |       "'God is love' => soc.religion.christian\n",
269 |       "'OpenGL on the GPU is fast' => comp.graphics\n"
270 |      ]
271 |     }
272 |    ],
273 |    "source": [
274 |     "docs_new = ['God is love', 'OpenGL on the GPU is fast']\n",
275 |     "X_new_counts = count_vect.transform(docs_new)\n",
276 |     "X_new_tfidf = tfidf_transformer.transform(X_new_counts)\n",
277 |     "\n",
278 |     "predicted = clf.predict(X_new_tfidf)\n",
279 |     "\n",
280 |     "for doc, category in zip(docs_new, predicted):\n",
281 |     "    print('%r => %s' % (doc, twenty_train.target_names[category]))"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "metadata": {},
287 |    "source": [
288 |     "Building a pipeline"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "code",
293 |    "execution_count": 18,
294 |    "metadata": {},
295 |    "outputs": [],
296 |    "source": [
297 |     "from sklearn.pipeline import Pipeline\n",
298 |     "text_clf = Pipeline([('vect', CountVectorizer()),\n",
299 |     "                     ('tfidf', TfidfTransformer()),\n",
300 |     "                     ('clf', MultinomialNB()),\n",
301 |     "])"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": 19,
307 |    "metadata": {},
308 |    "outputs": [
309 |     {
310 |      "data": {
311 |       "text/plain": [
312 |        "Pipeline(memory=None,\n",
313 |        "     steps=[('vect', CountVectorizer(analyzer='word', binary=False, decode_error='strict',\n",
314 |        "        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',\n",
315 |        "        lowercase=True, max_df=1.0, max_features=None, min_df=1,\n",
316 |        "        ngram_range=(1, 1), preprocessor=None, stop_words=None,\n",
317 |        "        strip...inear_tf=False, use_idf=True)), ('clf', MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))])"
318 |       ]
319 |      },
320 |      "execution_count": 19,
321 |      "metadata": {},
322 |      "output_type": "execute_result"
323 |     }
324 |    ],
325 |    "source": [
326 |     "text_clf.fit(twenty_train.data, twenty_train.target)  "
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "markdown",
331 |    "metadata": {},
332 |    "source": [
333 |     "Evaluation of the performance on the test set"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": 20,
339 |    "metadata": {},
340 |    "outputs": [
341 |     {
342 |      "data": {
343 |       "text/plain": [
344 |        "0.8348868175765646"
345 |       ]
346 |      },
347 |      "execution_count": 20,
348 |      "metadata": {},
349 |      "output_type": "execute_result"
350 |     }
351 |    ],
352 |    "source": [
353 |     "import numpy as np\n",
354 |     "twenty_test = fetch_20newsgroups(subset='test',\n",
355 |     "    categories=categories, shuffle=True, random_state=42)\n",
356 |     "docs_test = twenty_test.data\n",
357 |     "predicted = text_clf.predict(docs_test)\n",
358 |     "np.mean(predicted == twenty_test.target)  "
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "markdown",
363 |    "metadata": {},
364 |    "source": [
365 |     "Now let us use Linear support vector machine (SVM)"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "code",
370 |    "execution_count": 21,
371 |    "metadata": {},
372 |    "outputs": [
373 |     {
374 |      "data": {
375 |       "text/plain": [
376 |        "0.9127829560585885"
377 |       ]
378 |      },
379 |      "execution_count": 21,
380 |      "metadata": {},
381 |      "output_type": "execute_result"
382 |     }
383 |    ],
384 |    "source": [
385 |     "from sklearn.linear_model import SGDClassifier\n",
386 |     "text_clf = Pipeline([('vect', CountVectorizer()),\n",
387 |     "                     ('tfidf', TfidfTransformer()),\n",
388 |     "                     ('clf', SGDClassifier(loss='hinge', penalty='l2',\n",
389 |     "                                           alpha=1e-3, random_state=42,\n",
390 |     "                                           max_iter=5, tol=None)),\n",
391 |     "])\n",
392 |     "text_clf.fit(twenty_train.data, twenty_train.target)  \n",
393 |     "\n",
394 |     "predicted = text_clf.predict(docs_test)\n",
395 |     "np.mean(predicted == twenty_test.target)  "
396 |    ]
397 |   },
398 |   {
399 |    "cell_type": "markdown",
400 |    "metadata": {},
401 |    "source": [
402 |     "scikit-learn further provides utilities for more detailed performance analysis of the results:\n",
403 |     "\n"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "code",
408 |    "execution_count": 22,
409 |    "metadata": {},
410 |    "outputs": [
411 |     {
412 |      "name": "stdout",
413 |      "output_type": "stream",
414 |      "text": [
415 |       "                        precision    recall  f1-score   support\n",
416 |       "\n",
417 |       "           alt.atheism       0.95      0.81      0.87       319\n",
418 |       "         comp.graphics       0.88      0.97      0.92       389\n",
419 |       "               sci.med       0.94      0.90      0.92       396\n",
420 |       "soc.religion.christian       0.90      0.95      0.93       398\n",
421 |       "\n",
422 |       "           avg / total       0.92      0.91      0.91      1502\n",
423 |       "\n"
424 |      ]
425 |     },
426 |     {
427 |      "data": {
428 |       "text/plain": [
429 |        "array([[258,  11,  15,  35],\n",
430 |        "       [  4, 379,   3,   3],\n",
431 |        "       [  5,  33, 355,   3],\n",
432 |        "       [  5,  10,   4, 379]])"
433 |       ]
434 |      },
435 |      "execution_count": 22,
436 |      "metadata": {},
437 |      "output_type": "execute_result"
438 |     }
439 |    ],
440 |    "source": [
441 |     "from sklearn import metrics\n",
442 |     "print(metrics.classification_report(twenty_test.target, predicted,\n",
443 |     "    target_names=twenty_test.target_names))\n",
444 |     "\n",
445 |     "metrics.confusion_matrix(twenty_test.target, predicted)"
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "markdown",
450 |    "metadata": {},
451 |    "source": [
452 |     "Parameter tuning using grid search"
453 |    ]
454 |   },
455 |   {
456 |    "cell_type": "code",
457 |    "execution_count": 23,
458 |    "metadata": {},
459 |    "outputs": [],
460 |    "source": [
461 |     "from sklearn.model_selection import GridSearchCV\n",
462 |     "parameters = {'vect__ngram_range': [(1, 1), (1, 2)],\n",
463 |     "              'tfidf__use_idf': (True, False),\n",
464 |     "              'clf__alpha': (1e-2, 1e-3),\n",
465 |     "}"
466 |    ]
467 |   },
468 |   {
469 |    "cell_type": "code",
470 |    "execution_count": 24,
471 |    "metadata": {},
472 |    "outputs": [],
473 |    "source": [
474 |     "gs_clf = GridSearchCV(text_clf, parameters, n_jobs=-1)\n"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "code",
479 |    "execution_count": 25,
480 |    "metadata": {},
481 |    "outputs": [],
482 |    "source": [
483 |     "gs_clf = gs_clf.fit(twenty_train.data[:400], twenty_train.target[:400])\n"
484 |    ]
485 |   },
486 |   {
487 |    "cell_type": "code",
488 |    "execution_count": 26,
489 |    "metadata": {},
490 |    "outputs": [
491 |     {
492 |      "data": {
493 |       "text/plain": [
494 |        "'soc.religion.christian'"
495 |       ]
496 |      },
497 |      "execution_count": 26,
498 |      "metadata": {},
499 |      "output_type": "execute_result"
500 |     }
501 |    ],
502 |    "source": [
503 |     "twenty_train.target_names[gs_clf.predict(['God is love'])[0]]\n"
504 |    ]
505 |   },
506 |   {
507 |    "cell_type": "code",
508 |    "execution_count": 27,
509 |    "metadata": {},
510 |    "outputs": [
511 |     {
512 |      "data": {
513 |       "text/plain": [
514 |        "0.9"
515 |       ]
516 |      },
517 |      "execution_count": 27,
518 |      "metadata": {},
519 |      "output_type": "execute_result"
520 |     }
521 |    ],
522 |    "source": [
523 |     "gs_clf.best_score_ "
524 |    ]
525 |   },
526 |   {
527 |    "cell_type": "code",
528 |    "execution_count": 28,
529 |    "metadata": {},
530 |    "outputs": [
531 |     {
532 |      "name": "stdout",
533 |      "output_type": "stream",
534 |      "text": [
535 |       "clf__alpha: 0.001\n",
536 |       "tfidf__use_idf: True\n",
537 |       "vect__ngram_range: (1, 1)\n"
538 |      ]
539 |     }
540 |    ],
541 |    "source": [
542 |     "for param_name in sorted(parameters.keys()):\n",
543 |     "    print(\"%s: %r\" % (param_name, gs_clf.best_params_[param_name]))"
544 |    ]
545 |   },
546 |   {
547 |    "cell_type": "code",
548 |    "execution_count": null,
549 |    "metadata": {},
550 |    "outputs": [],
551 |    "source": []
552 |   }
553 |  ],
554 |  "metadata": {
555 |   "kernelspec": {
556 |    "display_name": "Python 3",
557 |    "language": "python",
558 |    "name": "python3"
559 |   },
560 |   "language_info": {
561 |    "codemirror_mode": {
562 |     "name": "ipython",
563 |     "version": 3
564 |    },
565 |    "file_extension": ".py",
566 |    "mimetype": "text/x-python",
567 |    "name": "python",
568 |    "nbconvert_exporter": "python",
569 |    "pygments_lexer": "ipython3",
570 |    "version": "3.6.3"
571 |   }
572 |  },
573 |  "nbformat": 4,
574 |  "nbformat_minor": 2
575 | }
576 | 


--------------------------------------------------------------------------------
/projects/autoquiz/README.MD:
--------------------------------------------------------------------------------
1 | Here we are using the NLP to automatically create fill-in-the-blanks type questions.
2 | 


--------------------------------------------------------------------------------
/projects/autoquiz/auto_create_quiz.py.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 13,
  6 |    "metadata": {
  7 |     "scrolled": true
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "name": "stdout",
 12 |      "output_type": "stream",
 13 |      "text": [
 14 |       "Requirement already up-to-date: nltk in /Users/shahrukh/Code/Work/Cxl_Notebooks/env/lib/python3.6/site-packages\n",
 15 |       "Requirement already up-to-date: six in /Users/shahrukh/Code/Work/Cxl_Notebooks/env/lib/python3.6/site-packages (from nltk)\n",
 16 |       "Requirement already up-to-date: textblob in /Users/shahrukh/Code/Work/Cxl_Notebooks/env/lib/python3.6/site-packages\n",
 17 |       "Requirement already up-to-date: nltk>=3.1 in /Users/shahrukh/Code/Work/Cxl_Notebooks/env/lib/python3.6/site-packages (from textblob)\n",
 18 |       "Requirement already up-to-date: six in /Users/shahrukh/Code/Work/Cxl_Notebooks/env/lib/python3.6/site-packages (from nltk>=3.1->textblob)\n",
 19 |       "[nltk_data] Downloading package brown to /usr/local/share/nltk_data...\n",
 20 |       "[nltk_data]   Package brown is already up-to-date!\n",
 21 |       "[nltk_data] Downloading package punkt to /usr/local/share/nltk_data...\n",
 22 |       "[nltk_data]   Package punkt is already up-to-date!\n",
 23 |       "[nltk_data] Downloading package wordnet to\n",
 24 |       "[nltk_data]     /usr/local/share/nltk_data...\n",
 25 |       "[nltk_data]   Package wordnet is already up-to-date!\n",
 26 |       "[nltk_data] Downloading package averaged_perceptron_tagger to\n",
 27 |       "[nltk_data]     /usr/local/share/nltk_data...\n",
 28 |       "[nltk_data]   Package averaged_perceptron_tagger is already up-to-\n",
 29 |       "[nltk_data]       date!\n",
 30 |       "[nltk_data] Downloading package conll2000 to\n",
 31 |       "[nltk_data]     /usr/local/share/nltk_data...\n",
 32 |       "[nltk_data]   Package conll2000 is already up-to-date!\n",
 33 |       "[nltk_data] Downloading package movie_reviews to\n",
 34 |       "[nltk_data]     /usr/local/share/nltk_data...\n",
 35 |       "[nltk_data]   Package movie_reviews is already up-to-date!\n",
 36 |       "Finished.\n"
 37 |      ]
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "# With ! we can run the unix commands from the jupyter notebook\n",
 42 |     "#nltk is a great natual language processing library in Python\n",
 43 |     "!pip install -U nltk\n",
 44 |     "\n",
 45 |     "# Lets install textblob\n",
 46 |     "# textblob is a simple wrapper over NLTK\n",
 47 |     "!pip install -U textblob\n",
 48 |     "!python -m textblob.download_corpora"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 14,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "# Import TextBlob module\n",
 58 |     "from textblob import TextBlob"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 15,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "# This is the text that we are going to use. \n",
 68 |     "# This text is from wikipedia on World War 2 - https://en.wikipedia.org/wiki/World_War_II\n",
 69 |     "# Note: triple quotes are used for defining multi line string\n",
 70 |     "ww2 = '''\n",
 71 |     "World War II (often abbreviated to WWII or WW2), also known as the Second World War, was a global war that lasted from 1939 to 1945, although related conflicts began earlier. It involved the vast majority of the world's countries—including all of the great powers—eventually forming two opposing military alliances: the Allies and the Axis. It was the most widespread war in history, and directly involved more than 100 million people from over 30 countries. In a state of total war, the major participants threw their entire economic, industrial, and scientific capabilities behind the war effort, erasing the distinction between civilian and military resources.\n",
 72 |     "\n",
 73 |     "World War II was the deadliest conflict in human history, marked by 50 million to 85 million fatalities, most of which were civilians in the Soviet Union and China. It included massacres, the deliberate genocide of the Holocaust, strategic bombing, starvation, disease and the first use of nuclear weapons in history.[1][2][3][4]\n",
 74 |     "\n",
 75 |     "The Empire of Japan aimed to dominate Asia and the Pacific and was already at war with the Republic of China in 1937,[5] but the world war is generally said to have begun on 1 September 1939[6] with the invasion of Poland by Nazi Germany and subsequent declarations of war on Germany by France and the United Kingdom. Supplied by the Soviet Union, from late 1939 to early 1941, in a series of campaigns and treaties, Germany conquered or controlled much of continental Europe, and formed the Axis alliance with Italy and Japan. Under the Molotov–Ribbentrop Pact of August 1939, Germany and the Soviet Union partitioned and annexed territories of their European neighbours, Poland, Finland, Romania and the Baltic states. The war continued primarily between the European Axis powers and the coalition of the United Kingdom and the British Commonwealth, with campaigns including the North Africa and East Africa campaigns, the aerial Battle of Britain, the Blitz bombing campaign, and the Balkan Campaign, as well as the long-running Battle of the Atlantic. On 22 June 1941, the European Axis powers launched an invasion of the Soviet Union, opening the largest land theatre of war in history, which trapped the major part of the Axis military forces into a war of attrition. In December 1941, Japan attacked the United States and European colonies in the Pacific Ocean, and quickly conquered much of the Western Pacific.\n",
 76 |     "\n",
 77 |     "The Axis advance halted in 1942 when Japan lost the critical Battle of Midway, and Germany and Italy were defeated in North Africa and then, decisively, at Stalingrad in the Soviet Union. In 1943, with a series of German defeats on the Eastern Front, the Allied invasion of Sicily and the Allied invasion of Italy which brought about Italian surrender, and Allied victories in the Pacific, the Axis lost the initiative and undertook strategic retreat on all fronts. In 1944, the Western Allies invaded German-occupied France, while the Soviet Union regained all of its territorial losses and invaded Germany and its allies. During 1944 and 1945 the Japanese suffered major reverses in mainland Asia in South Central China and Burma, while the Allies crippled the Japanese Navy and captured key Western Pacific islands.\n",
 78 |     "\n",
 79 |     "The war in Europe concluded with an invasion of Germany by the Western Allies and the Soviet Union, culminating in the capture of Berlin by Soviet troops, the suicide of Adolf Hitler and the subsequent German unconditional surrender on 8 May 1945. Following the Potsdam Declaration by the Allies on 26 July 1945 and the refusal of Japan to surrender under its terms, the United States dropped atomic bombs on the Japanese cities of Hiroshima and Nagasaki on 6 August and 9 August respectively. With an invasion of the Japanese archipelago imminent, the possibility of additional atomic bombings and the Soviet invasion of Manchuria, Japan formally surrendered on 2 September 1945. Thus ended the war in Asia, cementing the total victory of the Allies.\n",
 80 |     "\n",
 81 |     "World War II changed the political alignment and social structure of the world. The United Nations (UN) was established to foster international co-operation and prevent future conflicts. The victorious great powers—China, France, the Soviet Union, the United Kingdom, and the United States—became the permanent members of the United Nations Security Council.[7] The Soviet Union and the United States emerged as rival superpowers, setting the stage for the Cold War, which lasted for the next 46 years. Meanwhile, the influence of European great powers waned, while the decolonisation of Africa and Asia began. Most countries whose industries had been damaged moved towards economic recovery. Political integration, especially in Europe, emerged as an effort to end pre-war enmities and to create a common identity.[8]\n",
 82 |     "'''\n",
 83 |     "\n",
 84 |     "\n",
 85 |     "# Uncomment the code below and run it if you are using Python 3\n",
 86 |     "# ww2 = unicode(ww2, 'utf-8')"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 16,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "ww2b = TextBlob(ww2)\n",
 96 |     "sposs = {}\n",
 97 |     "for sentence in ww2b.sentences:\n",
 98 |     "    \n",
 99 |     "    # We are going to prepare the dictionary of parts-of-speech as the key and value is a list of words:\n",
100 |     "    # {part-of-speech: [word1, word2]}\n",
101 |     "    # We are basically grouping the words based on the parts-of-speech\n",
102 |     "    poss = {}\n",
103 |     "    sposs[sentence.string] = poss;\n",
104 |     "    for t in sentence.tags:\n",
105 |     "        tag = t[1]\n",
106 |     "        if tag not in poss:\n",
107 |     "            poss[tag] = []\n",
108 |     "        poss[tag].append(t[0])\n",
109 |     "\n",
110 |     "\n",
111 |     "# Uncomment the code below and run it if you are using Python 3\n",
112 |     "# ww2b = TextBlob(ww2)\n",
113 |     "# sposs = {}\n",
114 |     "# for sentence in ww2b.sentences:\n",
115 |     "    \n",
116 |     "#     # We are going to prepare the dictionary of parts-of-speech as the key and value is a list of words:\n",
117 |     "#     # {part-of-speech: [word1, word2]}\n",
118 |     "#     # We are basically grouping the words based on the parts-of-speech\n",
119 |     "    \n",
120 |     "#     poss = {}\n",
121 |     "#     sposs[sentence.string] = poss;\n",
122 |     "#     for t in sentence.tags:\n",
123 |     "#         tag = t[1].encode('utf-8')\n",
124 |     "#         if tag not in poss:\n",
125 |     "#             poss[tag] = []\n",
126 |     "#         poss[tag].append(t[0].encode('utf-8'))\n"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 17,
132 |    "metadata": {},
133 |    "outputs": [],
134 |    "source": [
135 |     "import random\n",
136 |     "import re\n",
137 |     "\n",
138 |     "# Create the blank in string\n",
139 |     "def replaceIC(word, sentence):\n",
140 |     "    insensitive_hippo = re.compile(re.escape(word), re.IGNORECASE)\n",
141 |     "    return insensitive_hippo.sub('__________________', sentence)\n",
142 |     "\n",
143 |     "# For a sentence create a blank space.\n",
144 |     "# It first tries to randomly selection proper-noun \n",
145 |     "# and if the proper noun is not found, it selects a noun randomly.\n",
146 |     "def removeWord(sentence, poss):\n",
147 |     "    words = None\n",
148 |     "    if 'NNP' in poss:\n",
149 |     "        words = poss['NNP']\n",
150 |     "    elif 'NN' in poss:\n",
151 |     "        words = poss['NN']\n",
152 |     "    else:\n",
153 |     "        print(\"NN and NNP not found\")\n",
154 |     "        return (None, sentence, None)\n",
155 |     "    if len(words) > 0:\n",
156 |     "        word = random.choice(words)\n",
157 |     "        replaced = replaceIC(word, sentence)\n",
158 |     "        return (word, sentence, replaced)\n",
159 |     "    else:\n",
160 |     "        print(\"words are empty\")\n",
161 |     "        return (None, sentence, None)"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 18,
167 |    "metadata": {
168 |     "scrolled": true
169 |    },
170 |    "outputs": [
171 |     {
172 |      "name": "stdout",
173 |      "output_type": "stream",
174 |      "text": [
175 |       "\n",
176 |       "World War __________________ (often abbreviated to WW__________________ or WW2), also known as the Second World War, was a global war that lasted from 1939 to 1945, although related conflicts began earlier.\n",
177 |       "\n",
178 |       "===============\n",
179 |       "II\n",
180 |       "===============\n",
181 |       "\n",
182 |       "\n",
183 |       "It involved the vast majority of the world's countries—including all of the great powers—eventually forming two opposing military alliances: the Allies and the __________________.\n",
184 |       "\n",
185 |       "===============\n",
186 |       "Axis\n",
187 |       "===============\n",
188 |       "\n",
189 |       "\n",
190 |       "It was the most widespread __________________ in history, and directly involved more than 100 million people from over 30 countries.\n",
191 |       "\n",
192 |       "===============\n",
193 |       "war\n",
194 |       "===============\n",
195 |       "\n",
196 |       "\n",
197 |       "In a state of total __________________, the major participants threw their entire economic, industrial, and scientific capabilities behind the __________________ effort, erasing the distinction between civilian and military resources.\n",
198 |       "\n",
199 |       "===============\n",
200 |       "war\n",
201 |       "===============\n",
202 |       "\n",
203 |       "\n",
204 |       "World War II was the deadliest conflict in human history, marked by 50 million to 85 million fatalities, most of which were civilians in the __________________ Union and China.\n",
205 |       "\n",
206 |       "===============\n",
207 |       "Soviet\n",
208 |       "===============\n",
209 |       "\n",
210 |       "\n",
211 |       "It included massacres, the deliberate genocide of the __________________, strategic bombing, starvation, disease and the first use of nuclear weapons in history.\n",
212 |       "\n",
213 |       "===============\n",
214 |       "Holocaust\n",
215 |       "===============\n",
216 |       "\n",
217 |       "\n",
218 |       "[1][2][3][4]\n",
219 |       "\n",
220 |       "The Empire of Japan aimed to dominate Asia and the Pacific and was already at war with the Republic of __________________ in 1937,[5] but the world war is generally said to have begun on 1 September 1939[6] with the invasion of Poland by Nazi Germany and subsequent declarations of war on Germany by France and the United Kingdom.\n",
221 |       "\n",
222 |       "===============\n",
223 |       "China\n",
224 |       "===============\n",
225 |       "\n",
226 |       "\n",
227 |       "Supplied by the Soviet Union, from late 1939 to early 1941, in a series of campaigns and treaties, __________________ conquered or controlled much of continental Europe, and formed the Axis alliance with Italy and Japan.\n",
228 |       "\n",
229 |       "===============\n",
230 |       "Germany\n",
231 |       "===============\n",
232 |       "\n",
233 |       "\n",
234 |       "Under the Molotov–Ribbentrop Pact of August 1939, Germany and the Soviet Union partitioned and annexed territories of their European neighbours, Poland, __________________, Romania and the Baltic states.\n",
235 |       "\n",
236 |       "===============\n",
237 |       "Finland\n",
238 |       "===============\n",
239 |       "\n",
240 |       "\n",
241 |       "The war continued primarily between the European Axis powers and the coalition of the United Kingdom and the British Commonwealth, with __________________s including the North Africa and East Africa __________________s, the aerial Battle of Britain, the Blitz bombing __________________, and the Balkan __________________, as well as the long-running Battle of the Atlantic.\n",
242 |       "\n",
243 |       "===============\n",
244 |       "Campaign\n",
245 |       "===============\n",
246 |       "\n",
247 |       "\n",
248 |       "On 22 June 1941, the European __________________ powers launched an invasion of the Soviet Union, opening the largest land theatre of war in history, which trapped the major part of the __________________ military forces into a war of attrition.\n",
249 |       "\n",
250 |       "===============\n",
251 |       "Axis\n",
252 |       "===============\n",
253 |       "\n",
254 |       "\n",
255 |       "In __________________ 1941, Japan attacked the United States and European colonies in the Pacific Ocean, and quickly conquered much of the Western Pacific.\n",
256 |       "\n",
257 |       "===============\n",
258 |       "December\n",
259 |       "===============\n",
260 |       "\n",
261 |       "\n",
262 |       "The Axis advance halted in 1942 when Japan lost the critical Battle of Midway, and __________________ and Italy were defeated in North Africa and then, decisively, at Stalingrad in the Soviet Union.\n",
263 |       "\n",
264 |       "===============\n",
265 |       "Germany\n",
266 |       "===============\n",
267 |       "\n",
268 |       "\n",
269 |       "In 1943, with a series of German defeats on the Eastern __________________, the Allied invasion of Sicily and the Allied invasion of Italy which brought about Italian surrender, and Allied victories in the Pacific, the Axis lost the initiative and undertook strategic retreat on all __________________s.\n",
270 |       "\n",
271 |       "===============\n",
272 |       "Front\n",
273 |       "===============\n",
274 |       "\n",
275 |       "\n",
276 |       "In 1944, the Western Allies invaded German-occupied France, while the __________________ Union regained all of its territorial losses and invaded Germany and its allies.\n",
277 |       "\n",
278 |       "===============\n",
279 |       "Soviet\n",
280 |       "===============\n",
281 |       "\n",
282 |       "\n",
283 |       "During 1944 and 1945 the Japanese suffered major reverses in mainland Asia in South Central China and Burma, while the Allies crippled the Japanese Navy and captured key Western __________________ islands.\n",
284 |       "\n",
285 |       "===============\n",
286 |       "Pacific\n",
287 |       "===============\n",
288 |       "\n",
289 |       "\n",
290 |       "The war in Europe concluded with an invasion of Germany by the Western Allies and the Soviet Union, culminating in the capture of Berlin by Soviet troops, the suicide of Adolf Hitler and the subsequent German unconditional surrender on 8 __________________ 1945.\n",
291 |       "\n",
292 |       "===============\n",
293 |       "May\n",
294 |       "===============\n",
295 |       "\n",
296 |       "\n",
297 |       "Following the Potsdam Declaration by the Allies on 26 July 1945 and the refusal of Japan to surrender under its terms, the United States dropped atomic bombs on the Japanese cities of __________________ and Nagasaki on 6 August and 9 August respectively.\n",
298 |       "\n",
299 |       "===============\n",
300 |       "Hiroshima\n",
301 |       "===============\n",
302 |       "\n",
303 |       "\n",
304 |       "With an invasion of the Japanese archipelago imminent, the possibility of additional atomic bombings and the Soviet invasion of __________________, Japan formally surrendered on 2 September 1945.\n",
305 |       "\n",
306 |       "===============\n",
307 |       "Manchuria\n",
308 |       "===============\n",
309 |       "\n",
310 |       "\n",
311 |       "Thus ended the war in __________________, cementing the total victory of the Allies.\n",
312 |       "\n",
313 |       "===============\n",
314 |       "Asia\n",
315 |       "===============\n",
316 |       "\n",
317 |       "\n",
318 |       "__________________ War II changed the political alignment and social structure of the __________________.\n",
319 |       "\n",
320 |       "===============\n",
321 |       "World\n",
322 |       "===============\n",
323 |       "\n",
324 |       "\n",
325 |       "The United __________________ (UN) was established to foster international co-operation and prevent future conflicts.\n",
326 |       "\n",
327 |       "===============\n",
328 |       "Nations\n",
329 |       "===============\n",
330 |       "\n",
331 |       "\n",
332 |       "The victorious great powers—China, France, the Soviet Union, the United __________________, and the United States—became the permanent members of the United Nations Security Council.\n",
333 |       "\n",
334 |       "===============\n",
335 |       "Kingdom\n",
336 |       "===============\n",
337 |       "\n",
338 |       "\n",
339 |       "[7] The Soviet Union and the United States emerged as rival superpowers, setting the stage for the __________________ War, which lasted for the next 46 years.\n",
340 |       "\n",
341 |       "===============\n",
342 |       "Cold\n",
343 |       "===============\n",
344 |       "\n",
345 |       "\n",
346 |       "Meanwhile, the influence of European great powers waned, while the decolonisation of Africa and __________________ began.\n",
347 |       "\n",
348 |       "===============\n",
349 |       "Asia\n",
350 |       "===============\n",
351 |       "\n",
352 |       "\n",
353 |       "Most countries whose industries had been damaged moved towards economic __________________.\n",
354 |       "\n",
355 |       "===============\n",
356 |       "recovery\n",
357 |       "===============\n",
358 |       "\n",
359 |       "\n",
360 |       "Political integration, especially in __________________, emerged as an effort to end pre-war enmities and to create a common identity.\n",
361 |       "\n",
362 |       "===============\n",
363 |       "Europe\n",
364 |       "===============\n",
365 |       "\n",
366 |       "\n",
367 |       "NN and NNP not found\n",
368 |       "Founded none for \n",
369 |       "[8]\n"
370 |      ]
371 |     }
372 |    ],
373 |    "source": [
374 |     "# Iterate over the sentenses \n",
375 |     "for sentence in sposs.keys():\n",
376 |     "    poss = sposs[sentence]\n",
377 |     "    (word, osentence, replaced) = removeWord(sentence, poss)\n",
378 |     "    if replaced is None:\n",
379 |     "        print (\"Founded none for \")\n",
380 |     "        print(sentence)\n",
381 |     "    else:\n",
382 |     "        print(replaced)\n",
383 |     "        print (\"\\n===============\")\n",
384 |     "        print(word)\n",
385 |     "        print (\"===============\")\n",
386 |     "        print(\"\\n\")"
387 |    ]
388 |   },
389 |   {
390 |    "cell_type": "code",
391 |    "execution_count": null,
392 |    "metadata": {},
393 |    "outputs": [],
394 |    "source": []
395 |   }
396 |  ],
397 |  "metadata": {
398 |   "kernelspec": {
399 |    "display_name": "Python 3",
400 |    "language": "python",
401 |    "name": "python3"
402 |   },
403 |   "language_info": {
404 |    "codemirror_mode": {
405 |     "name": "ipython",
406 |     "version": 3
407 |    },
408 |    "file_extension": ".py",
409 |    "mimetype": "text/x-python",
410 |    "name": "python",
411 |    "nbconvert_exporter": "python",
412 |    "pygments_lexer": "ipython3",
413 |    "version": "3.6.3"
414 |   },
415 |   "toc": {
416 |    "nav_menu": {},
417 |    "number_sections": true,
418 |    "sideBar": true,
419 |    "skip_h1_title": false,
420 |    "toc_cell": false,
421 |    "toc_position": {},
422 |    "toc_section_display": "block",
423 |    "toc_window_display": false
424 |   }
425 |  },
426 |  "nbformat": 4,
427 |  "nbformat_minor": 2
428 | }
429 | 


--------------------------------------------------------------------------------
/projects/deploy_mnist/README.md:
--------------------------------------------------------------------------------
 1 | The purpose of this project is to show how to move your machine learning models in production. In this project, we'll train the MNIST model, save the model to the file, load the model from the file in the flask app and predict the digit for the new images. Since input images in MNIST are 28x28 greyscale images, the images used for predictions have to be processed. They should be converted to greyscale and resized to 28x28 pixels. Because of this, you may not get the accuracy in predictions but you will learn how to move your model to production (and which is the sole objective of this project).
 2 | 
 3 | We'll use Flask for exposing the model using the REST API for predictions. Flask is a micro web framework written in Python. It's lightweight and easy to learn.
 4 | 
 5 | # Steps
 6 | 
 7 | ## Clone to repository
 8 | ```
 9 | git clone https://github.com/cloudxlab/ml.git
10 | ```
11 | 
12 | ## Set the Python path - On CloudxLab, the default installation is python2
13 | 
14 | ```
15 | export PATH=/usr/local/anaconda/bin/:$PATH
16 | ```
17 | 
18 | ## Create virtual environment
19 | 
20 | ```
21 | cd ml/projects/deploy_mnist/
22 | virtualenv -p python3 venv
23 | ```
24 | 
25 | ## Activate virtual environment
26 | 
27 | ```
28 | source venv/bin/activate
29 | ```
30 | 
31 | ## Install the flask and other requirements
32 | ```
33 | pip install -r requirements.txt
34 | ```
35 | 
36 | ## Train the model
37 | 
38 | The trained model will be saved in trained_models directory
39 | ```
40 | mkdir -p trained_models
41 | python train_mnist_model.py
42 | ```
43 | 
44 | ## Start the flask server for predictions
45 | 
46 | For the API code, see the file `predictions.py` under `flask_app` directory. Run the server on port 4041. If the port is already in use then use any of the port in the range of 4040 to 4060 as on CloudxLab only these ports are open for public access.
47 | 
48 | ```
49 | cd flask_app
50 | export LC_ALL=en_US.utf-8
51 | export LANG=en_US.utf-8
52 | export FLASK_APP=predictions.py
53 | flask run --host 0.0.0.0 --port 4041
54 | ```
55 | 
56 | ## Predict the digit for the new image
57 | 
58 | We will use the test images for predictions. Login to another console and run below commands.
59 | ```
60 | cd ml/projects/deploy_mnist/
61 | curl -F 'file=@test-images/7.png' 127.0.0.1:4041/predict
62 | ```
63 | 
64 | The REST API will return something like below JSON object
65 | 
66 | ```{"digit":7}```
67 | 
68 | ## Public API
69 | 
70 | Your flask server is running on the CloudxLab web console. Let's say your web console is e.cloudxlab.com then the end Point URL will be http://e.cloudxlab.com:4041/predict
71 | 
72 | You can call/use this REST API by using the above mentioned End Point URL.
73 | 
74 | Replace 4041 with the port number on which your server is running.
75 | 
76 | ## Next Steps
77 | 
78 | The above flask server runs in the development mode. For production usage, you would like to run the server using Nginx and uWSGI. For details please follow this documentation http://flask.pocoo.org/docs/1.0/deploying/
79 | 


--------------------------------------------------------------------------------
/projects/deploy_mnist/flask_app/predictions.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.externals import joblib
 3 | from PIL import Image
 4 | from flask import Flask, jsonify, request
 5 | 
 6 | # Create flask app
 7 | app = Flask(__name__)
 8 | 
 9 | # Load the previously trained model from the file
10 | model = joblib.load("../trained_models/mnist_model.pkl")
11 | 
12 | # /predict is the end point
13 | @app.route('/predict', methods=["POST"])
14 | def predict_image():
15 | 
16 |     # Read the image uploaded by the curl command
17 |     requested_img = request.files['file']
18 | 
19 |     '''
20 |     Convert the uploaded image to greyscale.
21 |     Since in MNIST the training images are greyscaled hence we will have to convert the uploaded image to greyscale
22 |     '''
23 |     greyscale_img = Image.open(requested_img).convert('L')
24 | 
25 |     '''
26 |     Resize the uploaded image to 28x28 pixels.
27 |     Since in MNIST the training images are of 28x28 pixels hence we will have to resize the uploaded image to 28x28 pixels.
28 |     '''
29 |     resized_image = greyscale_img.resize((28,28))
30 | 
31 |     # Convert the image to an array
32 |     img = np.asarray(resized_image)
33 | 
34 |     # Reshape the image to (784, 1)
35 |     img = img.reshape(784,)
36 | 
37 |     # Predict the digit using the trained model
38 |     pred = model.predict(img.reshape(1, -1))
39 | 
40 |     # Get the digit
41 |     result = int(pred.tolist()[0])
42 | 
43 |     # Return the JSON response
44 |     return jsonify({"digit": result})
45 | 


--------------------------------------------------------------------------------
/projects/deploy_mnist/requirements.txt:
--------------------------------------------------------------------------------
1 | Flask==1.0.2
2 | numpy==1.16.2
3 | #Pillow==2.2.1
4 | scikit-learn==0.20.3
5 | pillow==6.0.0
6 | 


--------------------------------------------------------------------------------
/projects/deploy_mnist/test-images/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/projects/deploy_mnist/test-images/2.png


--------------------------------------------------------------------------------
/projects/deploy_mnist/test-images/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/projects/deploy_mnist/test-images/5.png


--------------------------------------------------------------------------------
/projects/deploy_mnist/test-images/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/projects/deploy_mnist/test-images/7.png


--------------------------------------------------------------------------------
/projects/deploy_mnist/train_mnist_model.py:
--------------------------------------------------------------------------------
 1 | #from sklearn.datasets import fetch_mldata
 2 | from sklearn.datasets import fetch_openml
 3 | import numpy as np
 4 | from sklearn.linear_model import SGDClassifier
 5 | from sklearn.metrics import accuracy_score
 6 | from sklearn.externals import joblib
 7 | 
 8 | np.random.seed(42)
 9 | #mnist = fetch_mldata("MNIST original")
10 | mnist = fetch_openml('mnist_784', version=1, cache=True)
11 | mnist.target = mnist.target.astype(np.int8)
12 | X, y = mnist["data"], mnist["target"]
13 | 
14 | X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
15 | 
16 | shuffle_index = np.random.permutation(60000)
17 | X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]
18 | 
19 | # Train SGDClassifier
20 | sgd_clf = SGDClassifier(random_state=42, max_iter=10)
21 | sgd_clf.fit(X_train, y_train)
22 | 
23 | # Print the accuracy of SGDClassifier
24 | y_train_predict = sgd_clf.predict(X_train)
25 | sgd_accuracy = accuracy_score(y_train, y_train_predict)
26 | print("Accuracy is %s " % sgd_accuracy)
27 | 
28 | # Dump the model to the file
29 | joblib.dump(sgd_clf, "trained_models/mnist_model.pkl")
30 | 


--------------------------------------------------------------------------------
/python/.ipynb_checkpoints/Python - Numpy-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "a = np.array([1, 2, 3])"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "type(a)"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {
 38 |     "collapsed": true
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "b = np.array((3, 4, 5))"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "type(b)"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "x = np.zeros((3,4))  \n",
 61 |     "type(x.dtype)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "x.shape"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "y = np.ones( (3,4), dtype=np.int16 )  \n",
 80 |     "y"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "print(x.dtype)\n",
 90 |     "print(y.dtype)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {
 97 |     "collapsed": true
 98 |    },
 99 |    "outputs": [],
100 |    "source": []
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "np.full( (3,4), 0.11 )  "
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "#array([[ 3.14159265 ,  3.14159265 ,  3.14159265 ,  3.14159265 ],\n",
118 |     "       [ 3.14159265 ,  3.14159265 ,  3.14159265 ,  3.14159265 ],\n",
119 |     "       [ 3.14159265 ,  3.14159265 ,  3.14159265 ,  3.14159265 ]])"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "np.arange( 10, 30, 5 )"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "%timeit x"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": []
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": [
153 |     "np.arange( 0, 2, 0.3 ) "
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {
160 |     "collapsed": true
161 |    },
162 |    "outputs": [],
163 |    "source": [
164 |     "x = [\n",
165 |     "        [\n",
166 |     "            [1],\n",
167 |     "            [1], \n",
168 |     "            [1],\n",
169 |     "            [1]\n",
170 |     "        ],\n",
171 |     "        [\n",
172 |     "            [1],\n",
173 |     "            [1], \n",
174 |     "            [1],\n",
175 |     "            [1]\n",
176 |     "        ]\n",
177 |     "]"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": [
186 |     "np.array(x).shape"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {},
193 |    "outputs": [],
194 |    "source": [
195 |     "np.linspace(0, 5/3, 6)"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": null,
201 |    "metadata": {},
202 |    "outputs": [],
203 |    "source": [
204 |     "# Make a 2x3 matrix having random floats between 0 and 1:\n",
205 |     "np.random.rand(2,3)"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": null,
211 |    "metadata": {
212 |     "collapsed": true
213 |    },
214 |    "outputs": [],
215 |    "source": [
216 |     "x = np.empty((2,3))"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "metadata": {},
223 |    "outputs": [],
224 |    "source": [
225 |     "x"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "metadata": {},
232 |    "outputs": [],
233 |    "source": [
234 |     "x.ndim"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": null,
240 |    "metadata": {},
241 |    "outputs": [],
242 |    "source": [
243 |     "x.shape"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": null,
249 |    "metadata": {},
250 |    "outputs": [],
251 |    "source": [
252 |     "x.size\n"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": null,
258 |    "metadata": {},
259 |    "outputs": [],
260 |    "source": [
261 |     "x[0]"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": null,
267 |    "metadata": {
268 |     "collapsed": true
269 |    },
270 |    "outputs": [],
271 |    "source": []
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": null,
276 |    "metadata": {
277 |     "collapsed": true
278 |    },
279 |    "outputs": [],
280 |    "source": [
281 |     "c = np.arange(1, 5)"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": null,
287 |    "metadata": {},
288 |    "outputs": [],
289 |    "source": [
290 |     "c"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": null,
296 |    "metadata": {},
297 |    "outputs": [],
298 |    "source": [
299 |     "c.dtype"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": null,
305 |    "metadata": {},
306 |    "outputs": [],
307 |    "source": [
308 |     "c.itemsize"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "code",
313 |    "execution_count": null,
314 |    "metadata": {},
315 |    "outputs": [],
316 |    "source": [
317 |     "a = np.arange(6)\n",
318 |     "print(a)"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "metadata": {},
325 |    "outputs": [],
326 |    "source": [
327 |     "b = a.reshape(2, 3)\n",
328 |     "print(b)"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "code",
333 |    "execution_count": null,
334 |    "metadata": {},
335 |    "outputs": [],
336 |    "source": [
337 |     "a = np.array([1, 5, 3, 19, 13, 7, 3])\n",
338 |     "a[3]"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": null,
344 |    "metadata": {},
345 |    "outputs": [],
346 |    "source": [
347 |     "a[2:5]"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": null,
353 |    "metadata": {},
354 |    "outputs": [],
355 |    "source": [
356 |     "print(a[2::2])\n",
357 |     "print(a[2::3])\n",
358 |     "print(a[::-1])"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": null,
364 |    "metadata": {},
365 |    "outputs": [],
366 |    "source": [
367 |     "a = np.array([1, 5, 3, 19, 13, 7, 3])\n",
368 |     "a[1:3] = -1\n",
369 |     "print(a)"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "code",
374 |    "execution_count": null,
375 |    "metadata": {
376 |     "collapsed": true
377 |    },
378 |    "outputs": [],
379 |    "source": [
380 |     "b = [1, 2, 5, 7, 8]"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "code",
385 |    "execution_count": null,
386 |    "metadata": {},
387 |    "outputs": [],
388 |    "source": [
389 |     "b[1:3] = -1\n"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "code",
394 |    "execution_count": null,
395 |    "metadata": {},
396 |    "outputs": [],
397 |    "source": [
398 |     "a = np.array([1, 2, 5, 7, 8])\n",
399 |     "a_slice = a[1:5]\n",
400 |     "a_slice[1] = 1000\n",
401 |     "print(a)"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "code",
406 |    "execution_count": null,
407 |    "metadata": {},
408 |    "outputs": [],
409 |    "source": [
410 |     "another_slice = a[2:6].copy()\n",
411 |     "another_slice[1] = 23333\n",
412 |     "print(another_slice)\n",
413 |     "print(a)"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "code",
418 |    "execution_count": null,
419 |    "metadata": {
420 |     "collapsed": true
421 |    },
422 |    "outputs": [],
423 |    "source": [
424 |     "x = np.random.rand(5,8)"
425 |    ]
426 |   },
427 |   {
428 |    "cell_type": "code",
429 |    "execution_count": null,
430 |    "metadata": {},
431 |    "outputs": [],
432 |    "source": [
433 |     "x"
434 |    ]
435 |   },
436 |   {
437 |    "cell_type": "code",
438 |    "execution_count": null,
439 |    "metadata": {},
440 |    "outputs": [],
441 |    "source": [
442 |     "x[1]"
443 |    ]
444 |   },
445 |   {
446 |    "cell_type": "code",
447 |    "execution_count": null,
448 |    "metadata": {},
449 |    "outputs": [],
450 |    "source": [
451 |     "x[1, 1]"
452 |    ]
453 |   },
454 |   {
455 |    "cell_type": "code",
456 |    "execution_count": null,
457 |    "metadata": {},
458 |    "outputs": [],
459 |    "source": [
460 |     "x[0:2, 0:2]"
461 |    ]
462 |   },
463 |   {
464 |    "cell_type": "code",
465 |    "execution_count": null,
466 |    "metadata": {
467 |     "collapsed": true
468 |    },
469 |    "outputs": [],
470 |    "source": [
471 |     "x = np.random.rand(5,5, 5)"
472 |    ]
473 |   },
474 |   {
475 |    "cell_type": "code",
476 |    "execution_count": null,
477 |    "metadata": {},
478 |    "outputs": [],
479 |    "source": [
480 |     "x"
481 |    ]
482 |   },
483 |   {
484 |    "cell_type": "code",
485 |    "execution_count": null,
486 |    "metadata": {},
487 |    "outputs": [],
488 |    "source": [
489 |     "x[0:2, 0:2, 0:2]"
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "code",
494 |    "execution_count": null,
495 |    "metadata": {
496 |     "collapsed": true
497 |    },
498 |    "outputs": [],
499 |    "source": [
500 |     "a = np.arange(12).reshape(3, 4)"
501 |    ]
502 |   },
503 |   {
504 |    "cell_type": "code",
505 |    "execution_count": null,
506 |    "metadata": {},
507 |    "outputs": [],
508 |    "source": [
509 |     "a"
510 |    ]
511 |   },
512 |   {
513 |    "cell_type": "code",
514 |    "execution_count": null,
515 |    "metadata": {
516 |     "collapsed": true
517 |    },
518 |    "outputs": [],
519 |    "source": [
520 |     "rows_on = np.array([ True, False, True])"
521 |    ]
522 |   },
523 |   {
524 |    "cell_type": "code",
525 |    "execution_count": null,
526 |    "metadata": {},
527 |    "outputs": [],
528 |    "source": [
529 |     "rows_on"
530 |    ]
531 |   },
532 |   {
533 |    "cell_type": "code",
534 |    "execution_count": null,
535 |    "metadata": {},
536 |    "outputs": [],
537 |    "source": [
538 |     "a"
539 |    ]
540 |   },
541 |   {
542 |    "cell_type": "code",
543 |    "execution_count": null,
544 |    "metadata": {},
545 |    "outputs": [],
546 |    "source": [
547 |     "a[rows_on]"
548 |    ]
549 |   },
550 |   {
551 |    "cell_type": "code",
552 |    "execution_count": null,
553 |    "metadata": {},
554 |    "outputs": [],
555 |    "source": [
556 |     "a = np.array( [20, 30, 40, 50] )\n",
557 |     "b = np.arange( 4 )\n",
558 |     "c = a + b\n",
559 |     "c"
560 |    ]
561 |   },
562 |   {
563 |    "cell_type": "code",
564 |    "execution_count": null,
565 |    "metadata": {},
566 |    "outputs": [],
567 |    "source": [
568 |     "c = a - b\n",
569 |     "c"
570 |    ]
571 |   },
572 |   {
573 |    "cell_type": "code",
574 |    "execution_count": null,
575 |    "metadata": {},
576 |    "outputs": [],
577 |    "source": [
578 |     "A = np.array( [[1,1],\n",
579 |     "               [0,1]] )\n",
580 |     "\n",
581 |     "B = np.array( [[2,0],\n",
582 |     "               [3,4]] )\n",
583 |     "print(A*B)                         # element wise product\n",
584 |     "\n",
585 |     "print([[1*2, 1*0],\n",
586 |     "       [0*3, 1*4]])"
587 |    ]
588 |   },
589 |   {
590 |    "cell_type": "code",
591 |    "execution_count": null,
592 |    "metadata": {},
593 |    "outputs": [],
594 |    "source": [
595 |     "print(np.dot(A, B))\n",
596 |     "print([[1*2+ 1*3, 1*0+ 1*4],\n",
597 |     "       [0*2+1*3, 0*0+1*4]])"
598 |    ]
599 |   },
600 |   {
601 |    "cell_type": "code",
602 |    "execution_count": null,
603 |    "metadata": {},
604 |    "outputs": [],
605 |    "source": [
606 |     "a = np.array( [20, 30, 40, 50] )\n",
607 |     "b = np.arange(1, 5)\n",
608 |     "c = a / b\n",
609 |     "print(c)"
610 |    ]
611 |   },
612 |   {
613 |    "cell_type": "code",
614 |    "execution_count": null,
615 |    "metadata": {},
616 |    "outputs": [],
617 |    "source": [
618 |     "c"
619 |    ]
620 |   },
621 |   {
622 |    "cell_type": "code",
623 |    "execution_count": null,
624 |    "metadata": {},
625 |    "outputs": [],
626 |    "source": [
627 |     "a = np.array( [20, 30, 40, 50] )\n",
628 |     "b = np.arange(1, 5)\n",
629 |     "c = a // b\n",
630 |     "c\n"
631 |    ]
632 |   },
633 |   {
634 |    "cell_type": "code",
635 |    "execution_count": null,
636 |    "metadata": {},
637 |    "outputs": [],
638 |    "source": [
639 |     "#Modulus operator can be applied on NumPy arrays as shown below. They apply element wise.\n",
640 |     "a = np.array( [20, 30, 40, 50] )\n",
641 |     "b = np.arange(1, 5)\n",
642 |     "c = a % b\n",
643 |     "c\n"
644 |    ]
645 |   },
646 |   {
647 |    "cell_type": "code",
648 |    "execution_count": null,
649 |    "metadata": {},
650 |    "outputs": [],
651 |    "source": [
652 |     "a = np.array( [20, 30, 40, 50] )\n",
653 |     "b = np.arange(1, 5)\n",
654 |     "c = a ** b\n",
655 |     "c\n",
656 |     "\n"
657 |    ]
658 |   },
659 |   {
660 |    "cell_type": "code",
661 |    "execution_count": null,
662 |    "metadata": {},
663 |    "outputs": [],
664 |    "source": [
665 |     "m = np.array([20, -5, 30, 40])\n",
666 |     "m < [15, 16, 35, 36]"
667 |    ]
668 |   },
669 |   {
670 |    "cell_type": "code",
671 |    "execution_count": null,
672 |    "metadata": {},
673 |    "outputs": [],
674 |    "source": [
675 |     "m < 25 "
676 |    ]
677 |   },
678 |   {
679 |    "cell_type": "code",
680 |    "execution_count": null,
681 |    "metadata": {},
682 |    "outputs": [],
683 |    "source": [
684 |     "m[m < 25]"
685 |    ]
686 |   },
687 |   {
688 |    "cell_type": "code",
689 |    "execution_count": null,
690 |    "metadata": {
691 |     "collapsed": true
692 |    },
693 |    "outputs": [],
694 |    "source": []
695 |   },
696 |   {
697 |    "cell_type": "code",
698 |    "execution_count": null,
699 |    "metadata": {},
700 |    "outputs": [],
701 |    "source": [
702 |     "h = np.arange(5).reshape(1, 1, 5)\n",
703 |     "print(h)\n",
704 |     "print(h.shape)"
705 |    ]
706 |   },
707 |   {
708 |    "cell_type": "code",
709 |    "execution_count": null,
710 |    "metadata": {},
711 |    "outputs": [],
712 |    "source": [
713 |     "b = np.array([10, 20, 30, 40, 50])\n",
714 |     "b.reshape(1,1,5)"
715 |    ]
716 |   },
717 |   {
718 |    "cell_type": "code",
719 |    "execution_count": null,
720 |    "metadata": {},
721 |    "outputs": [],
722 |    "source": [
723 |     "x = h + [10, 20, 30, 40, 50]\n",
724 |     "print(x)\n",
725 |     "print(x.shape)"
726 |    ]
727 |   },
728 |   {
729 |    "cell_type": "code",
730 |    "execution_count": null,
731 |    "metadata": {},
732 |    "outputs": [],
733 |    "source": [
734 |     "h + [[[10, 20, 30, 40, 50]]]"
735 |    ]
736 |   },
737 |   {
738 |    "cell_type": "code",
739 |    "execution_count": null,
740 |    "metadata": {},
741 |    "outputs": [],
742 |    "source": [
743 |     "h + b.reshape(1,1,5)"
744 |    ]
745 |   },
746 |   {
747 |    "cell_type": "code",
748 |    "execution_count": null,
749 |    "metadata": {
750 |     "collapsed": true
751 |    },
752 |    "outputs": [],
753 |    "source": [
754 |     "k = np.arange(6).reshape(2, 3)"
755 |    ]
756 |   },
757 |   {
758 |    "cell_type": "code",
759 |    "execution_count": null,
760 |    "metadata": {},
761 |    "outputs": [],
762 |    "source": [
763 |     "k\n"
764 |    ]
765 |   },
766 |   {
767 |    "cell_type": "code",
768 |    "execution_count": null,
769 |    "metadata": {},
770 |    "outputs": [],
771 |    "source": [
772 |     "k + [100, 200, 300] "
773 |    ]
774 |   },
775 |   {
776 |    "cell_type": "code",
777 |    "execution_count": null,
778 |    "metadata": {},
779 |    "outputs": [],
780 |    "source": [
781 |     " k + [33, 44]"
782 |    ]
783 |   },
784 |   {
785 |    "cell_type": "code",
786 |    "execution_count": null,
787 |    "metadata": {},
788 |    "outputs": [],
789 |    "source": [
790 |     "a = np.array([[-2.5, 3.1, 7], [10, 11, 12]])\n",
791 |     "print(\"mean =\", a.mean())"
792 |    ]
793 |   },
794 |   {
795 |    "cell_type": "code",
796 |    "execution_count": null,
797 |    "metadata": {},
798 |    "outputs": [],
799 |    "source": [
800 |     "np.prod(a)"
801 |    ]
802 |   },
803 |   {
804 |    "cell_type": "code",
805 |    "execution_count": null,
806 |    "metadata": {},
807 |    "outputs": [],
808 |    "source": [
809 |     "print(-2.5* 3.1* 7*10*11*12)"
810 |    ]
811 |   },
812 |   {
813 |    "cell_type": "code",
814 |    "execution_count": null,
815 |    "metadata": {},
816 |    "outputs": [],
817 |    "source": [
818 |     "import math\n",
819 |     "a = np.array([1,3,4])\n",
820 |     "m = np.mean(a)\n",
821 |     "print(m)\n",
822 |     "print(a -m)\n",
823 |     "x = (a-m)**2\n",
824 |     "print(x)\n",
825 |     "s = np.sum(x)\n",
826 |     "print(s)\n",
827 |     "v = np.var(a)\n",
828 |     "print(v)\n",
829 |     "print(v*len(a))\n",
830 |     "print(np.std(a))\n",
831 |     "print(math.sqrt(s/len(a)))"
832 |    ]
833 |   },
834 |   {
835 |    "cell_type": "code",
836 |    "execution_count": null,
837 |    "metadata": {},
838 |    "outputs": [],
839 |    "source": [
840 |     "c=np.arange(24).reshape(2,3,4)\n",
841 |     "c\n"
842 |    ]
843 |   },
844 |   {
845 |    "cell_type": "code",
846 |    "execution_count": null,
847 |    "metadata": {},
848 |    "outputs": [],
849 |    "source": [
850 |     "c.sum(axis=0)  # sum across matrices"
851 |    ]
852 |   },
853 |   {
854 |    "cell_type": "code",
855 |    "execution_count": null,
856 |    "metadata": {
857 |     "collapsed": true
858 |    },
859 |    "outputs": [],
860 |    "source": []
861 |   }
862 |  ],
863 |  "metadata": {
864 |   "kernelspec": {
865 |    "display_name": "Python 3",
866 |    "language": "python",
867 |    "name": "python3"
868 |   },
869 |   "language_info": {
870 |    "codemirror_mode": {
871 |     "name": "ipython",
872 |     "version": 3
873 |    },
874 |    "file_extension": ".py",
875 |    "mimetype": "text/x-python",
876 |    "name": "python",
877 |    "nbconvert_exporter": "python",
878 |    "pygments_lexer": "ipython3",
879 |    "version": "3.6.3"
880 |   }
881 |  },
882 |  "nbformat": 4,
883 |  "nbformat_minor": 2
884 | }
885 | 


--------------------------------------------------------------------------------
/python/Lambda Operator.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Lambda operator"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     " f = lambda x, y : x + y"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {},
 25 |    "outputs": [
 26 |     {
 27 |      "data": {
 28 |       "text/plain": [
 29 |        "2"
 30 |       ]
 31 |      },
 32 |      "execution_count": 2,
 33 |      "metadata": {},
 34 |      "output_type": "execute_result"
 35 |     }
 36 |    ],
 37 |    "source": [
 38 |     "f(1,1)"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 3,
 44 |    "metadata": {},
 45 |    "outputs": [
 46 |     {
 47 |      "data": {
 48 |       "text/plain": [
 49 |        "6"
 50 |       ]
 51 |      },
 52 |      "execution_count": 3,
 53 |      "metadata": {},
 54 |      "output_type": "execute_result"
 55 |     }
 56 |    ],
 57 |    "source": [
 58 |     "f(2,4)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 4,
 64 |    "metadata": {
 65 |     "collapsed": true
 66 |    },
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "foo = [2, 18, 9, 22, 17, 24, 8, 12, 27]"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 5,
 75 |    "metadata": {},
 76 |    "outputs": [
 77 |     {
 78 |      "name": "stdout",
 79 |      "output_type": "stream",
 80 |      "text": [
 81 |       "[18, 9, 24, 12, 27]\n"
 82 |      ]
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "# filter() function\n",
 87 |     "\n",
 88 |     "divisible_by_3 = filter(lambda x: x % 3 == 0, foo)\n",
 89 |     "print(list(divisible_by_3))"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 6,
 95 |    "metadata": {},
 96 |    "outputs": [
 97 |     {
 98 |      "name": "stdout",
 99 |      "output_type": "stream",
100 |      "text": [
101 |       "[14, 46, 28, 54, 44, 58, 26, 34, 64]\n"
102 |      ]
103 |     }
104 |    ],
105 |    "source": [
106 |     "# map() function\n",
107 |     "\n",
108 |     "lambda_map = map(lambda x: x * 2 + 10, foo)\n",
109 |     "print(list(lambda_map))"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 7,
115 |    "metadata": {},
116 |    "outputs": [
117 |     {
118 |      "name": "stdout",
119 |      "output_type": "stream",
120 |      "text": [
121 |       "139\n"
122 |      ]
123 |     }
124 |    ],
125 |    "source": [
126 |     "# reduce() function\n",
127 |     "\n",
128 |     "import functools\n",
129 |     "lambda_reduce = functools.reduce(lambda x, y: x + y, foo)\n",
130 |     "print(lambda_reduce)"
131 |    ]
132 |   }
133 |  ],
134 |  "metadata": {
135 |   "kernelspec": {
136 |    "display_name": "Python 3",
137 |    "language": "python",
138 |    "name": "python3"
139 |   },
140 |   "language_info": {
141 |    "codemirror_mode": {
142 |     "name": "ipython",
143 |     "version": 3
144 |    },
145 |    "file_extension": ".py",
146 |    "mimetype": "text/x-python",
147 |    "name": "python",
148 |    "nbconvert_exporter": "python",
149 |    "pygments_lexer": "ipython3",
150 |    "version": "3.6.3"
151 |   }
152 |  },
153 |  "nbformat": 4,
154 |  "nbformat_minor": 2
155 | }
156 | 


--------------------------------------------------------------------------------
/python/README:
--------------------------------------------------------------------------------
1 | For python class
2 | 


--------------------------------------------------------------------------------
/python/__pycache__/mylib.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/python/__pycache__/mylib.cpython-36.pyc


--------------------------------------------------------------------------------
/python/hello.py:
--------------------------------------------------------------------------------
1 | print("hello")
2 | print(3+4)
3 | 


--------------------------------------------------------------------------------
/python/simpleexp.py:
--------------------------------------------------------------------------------
1 | x = 10
2 | x = x + 2
3 | print(x)
4 | 


--------------------------------------------------------------------------------
/python/solutions/Python_Project_1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "## Python Project 1\n",
 10 |     "import os, pathlib, re\n",
 11 |     "\n",
 12 |     "# Defining the function\n",
 13 |     "def getEmailCounts(path1='../got-your-back/GYB-GMail-Backup-pratik@cloudxlab.com/', field='To', pattern='([<].*@[^>]*)'):\n",
 14 |     "    emails_dict=dict()\n",
 15 |     "    # Looping through all the files present under the path directory\n",
 16 |     "    for path, subdirs, files in os.walk(path1):\n",
 17 |     "        for name in files:\n",
 18 |     "            # Skipping the sql lite file, all other files are email files\n",
 19 |     "            if(name.endswith('sqlite')):\n",
 20 |     "                continue\n",
 21 |     "            # Creating a handle on the file\n",
 22 |     "            hand=open(pathlib.PurePath(path, name))\n",
 23 |     "            index=0\n",
 24 |     "            for line in hand:\n",
 25 |     "                index+=1\n",
 26 |     "                email=''\n",
 27 |     "                # Applying the condition to find the details as requested in the project\n",
 28 |     "                if line.startswith(field+\":\"):\n",
 29 |     "                    email=re.findall(pattern,line)[0][1:]\n",
 30 |     "                    print(email)\n",
 31 |     "                    #emails_dict[email]=emails_dict.get(email,0)+1\n",
 32 |     "    #emails_dict=dict(sorted(emails_dict.items(), key=lambda x:x[1],reverse=True))\n",
 33 |     "\n",
 34 |     "# Calling the defined function\n",
 35 |     "getEmailCounts('../got-your-back/GYB-GMail-Backup-pratik@cloudxlab.com/', 'To', '([ <].*@[^>\\n ]*)')"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 2,
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stdout",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "Series([], Name: fromEmails, dtype: int64)\n",
 48 |       "Series([], Name: toEmails, dtype: int64)\n",
 49 |       "Series([], Name: fromEmails, dtype: int64)\n",
 50 |       "Series([], Name: fromEmails, dtype: int64)\n"
 51 |      ]
 52 |     },
 53 |     {
 54 |      "ename": "IndexError",
 55 |      "evalue": "index 0 is out of bounds for axis 0 with size 0",
 56 |      "output_type": "error",
 57 |      "traceback": [
 58 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
 59 |       "\u001b[1;31mIndexError\u001b[0m                                Traceback (most recent call last)",
 60 |       "\u001b[1;32m<ipython-input-2-4a4553ae223c>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     74\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     75\u001b[0m \u001b[1;31m# Calling the function to execute project 2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 76\u001b[1;33m \u001b[0mgetStats\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'../got-your-back/GYB-GMail-Backup-pratik@cloudxlab.com/'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'pratik@cloudxlab.com'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
 61 |       "\u001b[1;32m<ipython-input-2-4a4553ae223c>\u001b[0m in \u001b[0;36mgetStats\u001b[1;34m(path1, emailidPersonal)\u001b[0m\n\u001b[0;32m     50\u001b[0m     \u001b[1;31m#print('--------------')\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     51\u001b[0m     \u001b[1;31m# Printing the results\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 52\u001b[1;33m     \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Top hour slot at which emails were sent: '\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mtopTimes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     53\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtopFrom\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m>\u001b[0m\u001b[1;36m1\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mtopFrom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0memailidPersonal\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     54\u001b[0m         \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Top sender of emails: '\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mtopFrom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 62 |       "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m   1687\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1688\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1689\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mgetitem\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1690\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1691\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mslice\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 63 |       "\u001b[1;31mIndexError\u001b[0m: index 0 is out of bounds for axis 0 with size 0"
 64 |      ]
 65 |     }
 66 |    ],
 67 |    "source": [
 68 |     "## Python Project 2\n",
 69 |     "import os, pathlib, re\n",
 70 |     "import pandas as pd\n",
 71 |     "# Creating the function to get the maximum time slot, the maximum recepient, the maximum sender and best friend\n",
 72 |     "def getStats(path1='../got-your-back/GYB-GMail-Backup-pratik@cloudxlab.com/',emailidPersonal='pratik@cloudxlab.com'):\n",
 73 |     "    data = pd.DataFrame(columns=['fromEmails', 'toEmails', 'dates','hrs','emails'])\n",
 74 |     "    # Looping through all the files present under the path directory\n",
 75 |     "    for path, subdirs, files in os.walk(path1):\n",
 76 |     "        index=0\n",
 77 |     "        for name in files:\n",
 78 |     "            # Skipping the sql lite file, all other files are email files\n",
 79 |     "            if(name.endswith('sqlite')):\n",
 80 |     "                continue\n",
 81 |     "            hand=open(pathlib.PurePath(path, name))\n",
 82 |     "            FromEmail=''\n",
 83 |     "            ToEmail=''\n",
 84 |     "            dateExt=''\n",
 85 |     "            pattern1 = '([<].*@[^>\\n ]*)' # pattern to regex the email when name also present\n",
 86 |     "            pattern2 = '([ ].*@[^>\\n ]*)' # pattern to regex the email when name not present\n",
 87 |     "            # Looping through the lines and extracting the email ids, the time and the hour slot\n",
 88 |     "            for line in hand:\n",
 89 |     "                if line.startswith(\"To:\"):\n",
 90 |     "                    if (len(re.findall('[<]',line)))>0:\n",
 91 |     "                        ToEmail=re.findall(pattern1,line)[0][1:]\n",
 92 |     "                    else:\n",
 93 |     "                        ToEmail=re.findall(pattern2,line)[0][1:]\n",
 94 |     "                elif line.startswith(\"From:\"):\n",
 95 |     "                    if (len(re.findall('[<]',line))):\n",
 96 |     "                        FromEmail=re.findall(pattern1,line)[0][1:]\n",
 97 |     "                    else:\n",
 98 |     "                        FromEmail=re.findall(pattern2,line)[0][1:]\n",
 99 |     "                elif line.startswith(\"Date:\"):\n",
100 |     "                    dateExt=re.findall(\"[ ].*[ \\n]\",line)[0].strip()\n",
101 |     "                    hourSlot=dateExt.split(\" \")[4].split(\":\")[0]\n",
102 |     "            # Appending to the data frame\n",
103 |     "            index+=1\n",
104 |     "            data.loc[index] = [FromEmail, ToEmail, dateExt, hourSlot, FromEmail]\n",
105 |     "            index+=1\n",
106 |     "            data.loc[index] = [FromEmail, ToEmail, dateExt, hourSlot, ToEmail]\n",
107 |     "    #print(data)\n",
108 |     "    # Sorting to get the maximums on the top\n",
109 |     "    topTimes=data[(data[\"fromEmails\"] != emailidPersonal)].groupby(['hrs']).agg('count')['fromEmails'].sort_values(ascending =False)\n",
110 |     "    topFrom=data.groupby(['fromEmails']).agg('count')['toEmails'].sort_values(ascending =False)\n",
111 |     "    topTo=data.groupby(['toEmails']).agg('count')['fromEmails'].sort_values(ascending =False)\n",
112 |     "    topConvo=data.groupby(['emails']).agg('count')['fromEmails'].sort_values(ascending =False)\n",
113 |     "    print(topTimes[0:5])\n",
114 |     "    print(topFrom[0:5])\n",
115 |     "    print(topTo[0:5])\n",
116 |     "    print(topConvo[0:5])\n",
117 |     "    #print('--------------')\n",
118 |     "    # Printing the results\n",
119 |     "    print('Top hour slot at which emails were sent: '+topTimes.index[0])\n",
120 |     "    if len(topFrom)>1 and topFrom.index[0] == (emailidPersonal) :\n",
121 |     "        print('Top sender of emails: '+topFrom.index[1])\n",
122 |     "    elif len(topFrom)>0:\n",
123 |     "        print('Top sender of emails: '+topFrom.index[0])\n",
124 |     "    else:\n",
125 |     "        print('No emails were received by the user')\n",
126 |     "        \n",
127 |     "    if len(topTo)>1 and topTo.index[0] == (emailidPersonal) :\n",
128 |     "        print('Top recepient of emails: '+topTo.index[1])\n",
129 |     "    elif len(topTo)>0:\n",
130 |     "        print('Top recepient of emails: '+topTo.index[0])\n",
131 |     "    else:\n",
132 |     "        print('No emails sent')\n",
133 |     "        \n",
134 |     "        \n",
135 |     "    if len(topConvo)>1 and topConvo.index[0] == (emailidPersonal):\n",
136 |     "        print('Top friend: '+topConvo.index[1])\n",
137 |     "    elif len(topConvo)>0:\n",
138 |     "        print('Top friend: '+topConvo.index[0])\n",
139 |     "    else:\n",
140 |     "        print('No top friend')\n",
141 |     "    \n",
142 |     "# Calling the function to execute project 2\n",
143 |     "getStats('../got-your-back/GYB-GMail-Backup-pratik@cloudxlab.com/','pratik@cloudxlab.com')"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {
150 |     "collapsed": true
151 |    },
152 |    "outputs": [],
153 |    "source": [
154 |     "# Preparation of the email backup\n",
155 |     "!git clone https://github.com/jay0lee/got-your-back.git\n",
156 |     "!cd got-your-back\n",
157 |     "!touch nobrowser.txt\n",
158 |     "!python3 gyb.py --email pratik@cloudxlab.com --action backup\n"
159 |    ]
160 |   }
161 |  ],
162 |  "metadata": {
163 |   "kernelspec": {
164 |    "display_name": "Python 3",
165 |    "language": "python",
166 |    "name": "python3"
167 |   },
168 |   "language_info": {
169 |    "codemirror_mode": {
170 |     "name": "ipython",
171 |     "version": 3
172 |    },
173 |    "file_extension": ".py",
174 |    "mimetype": "text/x-python",
175 |    "name": "python",
176 |    "nbconvert_exporter": "python",
177 |    "pygments_lexer": "ipython3",
178 |    "version": "3.6.3"
179 |   }
180 |  },
181 |  "nbformat": 4,
182 |  "nbformat_minor": 2
183 | }
184 | 


--------------------------------------------------------------------------------