├── .gitignore ├── Python_feb_21_2021_classes_objects_lamda_functions.ipynb ├── README.md ├── Sentiment Analysis with LLMs.ipynb ├── deep_learning ├── BackProp.ipynb ├── README.MD ├── autoencoders.ipynb ├── cnn_with_keras.ipynb ├── convolutional_neural_networks.ipynb ├── data │ ├── fashion │ │ ├── t10k-images-idx3-ubyte.gz │ │ ├── t10k-labels-idx1-ubyte.gz │ │ ├── train-images-idx3-ubyte.gz │ │ └── train-labels-idx1-ubyte.gz │ └── mnist │ │ ├── t10k-images-idx3-ubyte.gz │ │ ├── t10k-labels-idx1-ubyte.gz │ │ ├── train-images-idx3-ubyte.gz │ │ └── train-labels-idx1-ubyte.gz ├── images │ ├── ann │ │ ├── README │ │ ├── activation_functions_plot.png │ │ └── perceptron_iris_plot.png │ └── autoencoders │ │ └── linear_autoencoder_pca_plot.png ├── introduction_to_artificial_neural_networks.ipynb ├── model_ckps │ └── README.txt ├── recurrent_neural_networks.ipynb ├── reinforcement_learning.ipynb ├── rnn_keras.ipynb ├── simple.txt ├── tensorflow.ipynb ├── tensorflow_keras_regression.ipynb └── training_deep_neural_nets.ipynb ├── exp └── Optimizer_2.ipynb ├── machine_learning ├── .ipynb_checkpoints │ ├── classification-checkpoint.ipynb │ ├── end_to_end_project-checkpoint.ipynb │ ├── end_to_end_project_bootcamp-checkpoint.ipynb │ └── training_linear_models-checkpoint.ipynb ├── Bikes_solution.ipynb ├── Natural_Language_Processing.ipynb ├── README.MD ├── Unsupervised Learning.ipynb ├── classification.ipynb ├── datasets │ ├── bike_sharing │ │ ├── Readme.txt │ │ ├── day.csv │ │ └── hour.csv │ └── housing │ │ ├── README.md │ │ ├── housing.csv │ │ └── housing.tgz ├── decision_trees.ipynb ├── dimensionality_reduction-Copy.ipynb ├── dimensionality_reduction.ipynb ├── end_to_end_project.ipynb ├── end_to_end_project_bootcamp.ipynb ├── ensemble_and_randomforest.ipynb ├── images │ ├── autoencoders │ │ └── README │ ├── classification │ │ └── README │ ├── cnn │ │ ├── README │ │ └── test_image.png │ ├── decision_trees │ │ ├── README │ │ └── iris_tree.png │ ├── deep │ │ └── README │ ├── dim_reduction │ │ └── README │ ├── distributed │ │ └── README │ ├── end_to_end_project │ │ ├── README │ │ └── california.png │ ├── ensembles │ │ ├── README │ │ ├── boosting_plot.png │ │ ├── decision_tree_without_and_with_bagging_plot.png │ │ ├── early_stopping_gbrt_plot.png │ │ ├── gbrt_learning_rate_plot.png │ │ ├── gradient_boosting_plot.png │ │ ├── hard_voting.png │ │ ├── law_of_large_numbers_plot.png │ │ ├── mnist_feature_importance_plot.png │ │ ├── pasting_bagging.png │ │ └── training_diverse_classifiers.png │ ├── fundamentals │ │ └── README │ ├── rl │ │ └── README │ ├── rnn │ │ └── README │ ├── svm │ │ └── README │ ├── tensorflow │ │ └── README │ └── training_linear_models │ │ ├── README │ │ ├── gradient_descent_paths_plot.png │ │ ├── gradient_descent_plot.png │ │ └── sgd_plot.png ├── math_linear_algebra.ipynb ├── naive_bayes.ipynb ├── sklearn_text_analyser.ipynb ├── support_vector_machines.ipynb └── training_linear_models.ipynb ├── projects ├── Fashion-MNIST │ ├── Fashion-MNIST-DL-Keras.ipynb │ └── Fashion-MNIST-ML.ipynb ├── autoquiz │ ├── README.MD │ └── auto_create_quiz.py.ipynb ├── deploy_mnist │ ├── README.md │ ├── flask_app │ │ └── predictions.py │ ├── requirements.txt │ ├── test-images │ │ ├── 2.png │ │ ├── 5.png │ │ └── 7.png │ └── train_mnist_model.py └── sui_2_sandeepgiri9034.ipynb └── python ├── .ipynb_checkpoints └── Python - Numpy-checkpoint.ipynb ├── Lambda Operator.ipynb ├── Python - Numpy.ipynb ├── Python - Pandas.ipynb ├── Python - Part I.ipynb ├── Python - Part II.ipynb ├── README ├── __pycache__ └── mylib.cpython-36.pyc ├── dataset └── housing.csv ├── hello.py ├── mbox-short.txt ├── mbox.txt ├── python-hands-sessions.ipynb ├── simpleexp.py ├── solutions └── Python_Project_1.ipynb └── stock_analysis_yfinance_pandas.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | projects/deploy_mnist/venv/ 2 | projects/deploy_mnist/__pycache__/ 3 | projects/deploy_mnist/trained_models/*.pkl 4 | projects/deploy_mnist/flask_app/__pycache__/ 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This repository contains machine learning projects and notebooks for the course of [CloudxLab](https://CloudxLab.com/) 2 | Feel free to checkout and explore. 3 | 4 | ### NOTE ABOUT THE Notebook Diff: 5 | 6 | Usually the jupyter notebooks are unfriendly to diff. So, we have installed nbdime(See https://nbdime.readthedocs.io/en/latest/) 7 | 8 | All you need to do to enable in git this: 9 | export PATH=/usr/local/anaconda/bin:$PATH 10 | cd ml 11 | nbdime config-git --enable 12 | 13 | To know more about us [click here](https://CloudxLab.com/) 14 | -------------------------------------------------------------------------------- /deep_learning/README.MD: -------------------------------------------------------------------------------- 1 | 2 | Please note that some of the notebooks are based on the repository of this book: https://github.com/ageron/handson-ml 3 | -------------------------------------------------------------------------------- /deep_learning/autoencoders.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "**Autoencoders**" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# Setup" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "First, let's make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures:" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "# To support both python 2 and python 3\n", 31 | "from __future__ import division, print_function, unicode_literals\n", 32 | "\n", 33 | "# Common imports\n", 34 | "import numpy as np\n", 35 | "import os\n", 36 | "import sys\n", 37 | "\n", 38 | "# to make this notebook's output stable across runs\n", 39 | "def reset_graph(seed=42):\n", 40 | " tf.reset_default_graph()\n", 41 | " tf.set_random_seed(seed)\n", 42 | " np.random.seed(seed)\n", 43 | "\n", 44 | "# To plot pretty figures\n", 45 | "%matplotlib inline\n", 46 | "import matplotlib\n", 47 | "import matplotlib.pyplot as plt\n", 48 | "plt.rcParams['axes.labelsize'] = 14\n", 49 | "plt.rcParams['xtick.labelsize'] = 12\n", 50 | "plt.rcParams['ytick.labelsize'] = 12\n", 51 | "\n", 52 | "# Where to save the figures\n", 53 | "PROJECT_ROOT_DIR = \".\"\n", 54 | "CHAPTER_ID = \"autoencoders\"\n", 55 | "\n", 56 | "def save_fig(fig_id, tight_layout=True):\n", 57 | " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", 58 | " print(\"Saving figure\", fig_id)\n", 59 | " if tight_layout:\n", 60 | " plt.tight_layout()\n", 61 | " plt.savefig(path, format='png', dpi=300)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "# PCA with a linear Autoencoder" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "Build 3D dataset:" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 2, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "import numpy.random as rnd\n", 85 | "\n", 86 | "rnd.seed(4)\n", 87 | "m = 200\n", 88 | "w1, w2 = 0.1, 0.3\n", 89 | "noise = 0.1\n", 90 | "\n", 91 | "angles = rnd.rand(m) * 3 * np.pi / 2 - 0.5\n", 92 | "data = np.empty((m, 3))\n", 93 | "data[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * rnd.randn(m) / 2\n", 94 | "data[:, 1] = np.sin(angles) * 0.7 + noise * rnd.randn(m) / 2\n", 95 | "data[:, 2] = data[:, 0] * w1 + data[:, 1] * w2 + noise * rnd.randn(m)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "Normalize the data:" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 6, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "data": { 112 | "text/plain": [ 113 | "(200, 3)" 114 | ] 115 | }, 116 | "execution_count": 6, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "data.shape" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 7, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "from sklearn.preprocessing import StandardScaler\n", 132 | "scaler = StandardScaler()\n", 133 | "X_train = scaler.fit_transform(data[:100])\n", 134 | "X_test = scaler.transform(data[100:])" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "Now let's build the Autoencoder..." 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 8, 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "name": "stderr", 151 | "output_type": "stream", 152 | "text": [ 153 | "/usr/local/anaconda/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", 154 | " from ._conv import register_converters as _register_converters\n" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | "import tensorflow as tf\n", 160 | "\n", 161 | "reset_graph()\n", 162 | "\n", 163 | "n_inputs = 3\n", 164 | "n_hidden = 2 # codings\n", 165 | "n_outputs = n_inputs\n", 166 | "\n", 167 | "learning_rate = 0.01\n", 168 | "\n", 169 | "X = tf.placeholder(tf.float32, shape=[None, n_inputs])\n", 170 | "hidden = tf.layers.dense(X, n_hidden)\n", 171 | "outputs = tf.layers.dense(hidden, n_outputs)\n", 172 | "\n", 173 | "reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))\n", 174 | "\n", 175 | "optimizer = tf.train.AdamOptimizer(learning_rate)\n", 176 | "training_op = optimizer.minimize(reconstruction_loss)\n", 177 | "\n", 178 | "init = tf.global_variables_initializer()" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 9, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "n_iterations = 1000\n", 188 | "codings = hidden\n", 189 | "\n", 190 | "with tf.Session() as sess:\n", 191 | " init.run()\n", 192 | " for iteration in range(n_iterations):\n", 193 | " training_op.run(feed_dict={X: X_train})\n", 194 | " codings_val = codings.eval(feed_dict={X: X_test})" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 12, 200 | "metadata": {}, 201 | "outputs": [ 202 | { 203 | "data": { 204 | "text/plain": [ 205 | "array([-1.9345188, 2.0936997], dtype=float32)" 206 | ] 207 | }, 208 | "execution_count": 12, 209 | "metadata": {}, 210 | "output_type": "execute_result" 211 | } 212 | ], 213 | "source": [ 214 | "codings_val[0]" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 13, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "name": "stdout", 224 | "output_type": "stream", 225 | "text": [ 226 | "Saving figure linear_autoencoder_pca_plot\n" 227 | ] 228 | }, 229 | { 230 | "data": { 231 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAARgAAADQCAYAAADcQn7hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAE8RJREFUeJzt3W+MXNV5x/Hvs7PrxQ0hCgb5TWT8IiClKQ3BfrNRKrbBagSK2gjUUiWOSexgk+BI0LRSLIFq4shIfpFaKEAx4o9NUiIUQCFJUVUIlqBeKTVySERT0SZgoIgG3JBgF//dpy/O3O74ev7cmb1nzp07v480Wu/MnZkzu57fnvOcM+eauyMiEsNE6gaISH0pYEQkGgWMiESjgBGRaBQwIhKNAkZEolHAiEg0ChgRiUYBIyLRTKZuwCDOO+88X7lyZepmiIyt55577i13P7/XcSMZMCtXrmT//v2pmyEytszsYJHjNEQSkWgUMCISjQJGRKJRwNTA3Bzcdlv4KlIlI1nklQVzc3D55XD8OCxZAk89BTMzqVslEqgHMwK69VD27g3hcupU+Lp377BbJ9KZejBDNjcXQmB29syeRrvbevVQZmfD9dnts7PDeBUixShgOugWBIt5zE5h0Xrb5CR84Quwbl37Hkpre2ZmwuOU3VaRMihg2ohV1+gWFq23nToFd98Nu3fDzp29eygzMwoWqSbVYNqIVdfIhjONxplhkd1mFr53D8996FAIuG3bygs6zTrJsKgH00asuka34Ux22549cN99Idyy5y6zh6JZJxkmBUwbMesa3cIiuy2rvcSoqfSq6YiUSQHTQcq6Rszn1qyTDNPYB0yZs0UxZp7KplknGaakAWNm08CdwBrgXOCXwBZ3f2IYz19mPWKUahuadZJhST2LNAm8ClwGvA+4GXjYzFYO48nLnC1azGMNe1an6PNptkkWK2kPxt2PAFtbrvqhmb0ErAJejv38ZdYjBn2sTgvsYvUwiva0ei0K1BBLiqhUDcbMlgMXAS+0uW0jsBFgxYoVpTxfmfWI1sdatmyhB9PrMTstsIs1xCo6i9TpuNbgaTRg/fq4gSgjzt0rcQGmgCeBu3sdu2rVKq+qffvcly51bzTC1337ih1v5h6W14X7bt+etn2djtu+PVyXtdWs2OvMP/b27f3dR6oF2O8F3teV6MGY2QTwIHAc2JyiDflu/6DDgH7XmXRbYBdD0V5bp+OyoeDRo1nE9LeeZpSK4VKCIikU8wIYcD/wNLC0yH3K7sHk/1rffXd/vZBuj1XHv+z79rlff7379HT/r7O1BxSzpyZxMUI9mLuADwFr3P3dFA3I9zoeeWTw1a6LqesMY/q4jALtYlYca6HfeEm9DuYCYBNwDHjDsk/6wSZ3/86w2pH/T3/JJfDjH8PExGBvgqquM+lnBqlIcAz6Oq+9NnxVcbj+Uk9THyQMkZLKzwDdeCPMz4dZkp076/MmKFIfilkjmZuDyy6DEydgaioEjNRb6oV2lTEzA1u2hO0Rjh8PATM/DwcO1GexWbftIjIxt+DcsSOEC4SvGzZosV/dVaEGUymtw6XJydNndkZ9xqNIfShmjeT110///he/CI/fqcalGafRpx5MzsxMGBZdfjlcccXCArg6bKhdpLaShVCZG1xlNmw487rjx8MUfTva0Hz01bYHM+hsydxcqMFkPZhGI1w/6jMe/fQGYhWpN24MX7dtg9de6328ZpxGXy0DZjFd69a/mgDXXQcrVoz+526qstFUFjKbNi1c99GPtj9WW0uMvloOkRbTtc4XQtetC8VfGO1iY5EC77AcOhSWAED4euhQ52Oz4rvCZTTVsgezmK51u7+adSg2Vqk3MDsL09Ma+oyDWgZMtzdTa20GOm/A3bo1wdatcOxYmLYuMryo6nYGVVkAWKWwk7hqGTDQ/s2U33vFvfsUdHZ8Fi5FVvbWobczDFUJO4mrljWYTvK1mRMnutdpsuOzcFmzpndgaGpVZMFYBUy+0Dk11b3o2Xr89HQYKvX6q1ulYqpIarUdIrWTH/tD9zrAILUC1RdEFljY2mG0rF692vfv35+6GZUt5orEZmbPufvqXseNVQ+mTCrmivQ2VjWYMqmYK9KbAmZAKuaK9KYhUot+aioq5or0poBpGqSmosViIt0lHyKZ2WYz229mx8zsgVTtWGxNRTuviZypCj2Y14FvAJ8ElqZqxGI+IKkZJZH2kgeMuz8KYGargQ+kasdiaipV2WtFpGqSB0xRMc5NnTdoTUU7r4m0NzIB4+67gF0QVvImbs5pivR+Os1QaTWw1NnIBEzVdev9dKrRqHYjdZd8FmmUFZ056jRDpdXAUnfJezBmNtlsRwNomNlZwEl3P5m2Zd310/voVKNR7UbqLnnAADcDf9vy/VrgVmBrktYU1O/MUbvzMWs1sNRd8oBx961UPEzaKdr7yPd08udj1mpgqbPkATOqivY+tEZGxpkCZhGK9D5UZ5FxpoCJrLWns2zZwkyRejEyDhQwQ5CFida8yLjROpgh0ZoXGUcKmCHRDngyjgoNkcxsCXAYmOpwyGPuflVpraohrXmRcVS0BjMFrG9z/U3ApcAPSmtRjWnNi4ybQgHj7keAb7deZ2Y7COHyVXe/P0LbRGTE9T2LZGYG3A7cANzg7neW3ioRqYW+irxmNkHYk+XLwIYsXMxs2szuMbNfmdk7ZvaimX0lQntFZIQU7sGYWQPYDVwDrHX3h3KP8wbwJ8CvgD8E/snM/tvdHy6xvSIyQgr1YMxsCvgu8OfANblwwd2PuPst7v6f7j7v7j8FHgc+XnqLRWRk9AwYM5sGHgU+BVyVbdLd4z5TwB8BP1t0C0VkZBUZIu0hhMsDwPvNbG3u9sfd/Xe5674FvNO8r4iMqa4B05wxuqL57eebl1bzwHtz9/kmMAN8wt2Pl9JKERlJXQPG3R04p+iDmdlO4HJCuLy1yLaJyIgr7dPUZnY78Angj939zbIeV0RGVykfdjSzC4CvAB8EXjKzw83LEwXue66ZPWZmR8zsoJl9pow2iUh6pfRg3P0gYAPe/Q7gOLAcuAT4kZk97+4vlNE2EUkn6XYNZvYe4GrgFnc/7O7PEtbPfC5lu0SkHKn3g7mIcA6kF1uuex74cP5AM9toZvvNbP+bb6rEIzIKUgfM2UB+Dc1vyU19Qzg3tbuvdvfV559//lAaVwVFzx4pUkWp9+Q9zJnT4OcQFumNPZ27WkZd6h7Mi8CkmV3Yct1HABV4Wdw+vur5SBUk7cG4+xEzexT4upl9kTCL9GfAx1K2qyoGPaeSej5SFal7MBD2llkK/Bp4CPiSpqiDbB/fbdv6CwmdwUCqInUNBnf/H+DTqdtRVfl9fOfmem8crrNJSlUkDxgprujQZ2YGdu6ERx6Bq6/W8EjSUcCMkHZDn3bhMTcHN94YjnnmGbj4YoWMpFGFGowUVPTkbarBSFWoBzNCip68rZ8aTJGajsigFDAjpsjJ24oGkaazJTYFTE0VCaKiNR2RQakGM8aK1nREBqUezBgrOpQSGZQCZswVGUqJDEpDJBGJRgEzpvRpaxkGDZHGkKanZVjUgxlDWukrw6IezJhoXbGrT1vLsChgxkC7IZGmp2UYNEQaA61DomPHYOvWcP2WLQoXiUsBMwayIdHEBMzPw5NPhh5N0RkkzTjJoFKfeG1z81xHx8zsgZRtqbNsxe6aNQsh06u4m4XKrl0hjG65pb9QEoH0NZjXgW8AnyTsyyuRzMyEodEzz7Qv7rYWgWGhZmMWAqk1lDSskqJSn1XgUQAzWw18IGVbxkGnzx7li8DXXrtQs5mYCB+GNNOMk/QvdQ+mMDPbCGwEWLFiReLWjK52nz3Kr4uB06exd+6EQ4c04yT9G5mAcfddwC6A1atXe+Lm1Ep+Xcy6deGiaWxZrGgBY2Z7gcs63Pwv7v7xWM8t/ek0dFKwyGJFCxh3n4312FK+frdtiLGXr/YHrp+kQyQzm2y2oQE0zOws4KS7n0zZLukuxocl9QHMekq90O5m4F3ga8Da5r9vTtoi6SnGhyX1Acx6Sj1NvRXYmrIN0r8YH5bUBzDraWRmkaQ6Yuzlq/2B60kBI5Wh/YHrRwEjfVNBVopKXeSVEaSCrBSlgJG+6YRtUpSGSNI3FWSHa5QXICpgZCApCrKj/EYbVJF6V5V/LgoYqZy5OdizJ/x73brwphnXwnK7elfr6676z0UBI5UyNxf+EmfbRtx/Pzz9dO83Wl31WoC4mJ/LMHo+ChiplL174cSJhe+zN032Rjt2LGx+9fbbYUvPKg4LytRtk7C9e2HZssFWQA+r56OAkUqZnYWpqdM3vsreWDt3wubNcPIk7NgRdtubnq7esKBs+XpXPhwG2RBsWD1CBYxUysxM+M+er8FAeBPNz4M3txvLb14+7EJnquJqPhwOHQqnoMk2ai/SnmF99ksBI5XTaYaqdZg0Px96MEuWhGHCsAudKYur7cKh3/YMa6mBAkZGRuubYtmyhWFBigJwjOcs2iNqFw633dZ/e4ax1EABIyOl05ti2Fs99DvE6BUenXogne6X/znMzsLkZOjZTU4O9jOIMeRTwMjIi93db/fG6+c5iwxfOn2+q2jo/PznofjtvlCj6vc1xhjyKWCkFsro7rdO/WbDL+j8xiv6nPnw2LPnzIBo1yMqGjoQZtdOnQr/Pnmy//UwW7cu1LbKHGYqYERY+AveWkCenj79JHSDvvFaw2NyEu67Lzxea2Dl60ud1ri0C51XXgmhkpmY6H89TL5wXtYwM1nAmNk0cCewBjgX+CWwxd2fSNUmGV/ZG3d+Pnyf/SWHYrWWbvWL1vD4yU/g+98Pw5h8YGVfe61xaW3PsmVw660Lw6JGA+64o//1MFm4rFkTejN1qMFMAq8Szp30CnAl8LCZXezuLydsl4yhdlPgk813R6+FbEXqF9n3+TDotfQ/W+PS+jhPPbWwTujAgYXeixlcdx1s3Nj/687aXma4QMKAcfcjnL7h9w/N7CVgFfByijbJ+GrtZbz9dvh64ADcc0/vomfRKeu9e08Pg/Xrzzyu6OzU7t0LQ65GI1y3ZElYmNjPbFDsAnllajBmthy4CHihw+0b0bmpJYLWN+TsbOiNHD260NPoVXspGgrtTtGb164Wk12faQ00CL2WFSt6F6U7iboext2TX4Ap4Eng7iLHr1q1ykXKsG+f+9Kl7o1G+Hr99eHf2YSvWbh+375w2b49fG33OJ1uG/S41na1Ht/ttu3bF9rfaITvYwD2e4H3avJzU5vZBPAgcBzYHKs9Iu3khzew0MtoNMIwJutpdOoZ9Dsk6XVMr2njbsOaqp1fKum5qc3MgHuB5cCV7n6ix11EStVu2LJu3Zlv3k5L8QddoNYplIpOG3cKqqptZ5q6BnMX8CFgjbu/m7gtMoY6vSGLFl97FXjbBUm3UCpj2rhK55dKuQ7mAmATcAx4I3RmANjk7t9J1S4ZP0XekJ2CqNuQpFOQdAul2NPGw5ZymvogYD0PFKmIdkHUbUjSKUi6hVLVhjiLlXqIJDLyeu1fkw+SXiFSpSHOYilgRCLpFiR1CpFuFDAiEY1LkHSiU8eK1Ey2N+/cXOqWqAcjUitVOxGbejAiNdJpk6pUFDAiNZLNXDUaNf+ogIgMX9XW0ShgRGqmSjNXGiKJSDQKGBGJRgEjItGYD3KWpsTM7E3gYOp25JwHvJW6EUOm1zwe2r3mC9z9/F53HMmAqSIz2+/uq1O3Y5j0msfDYl6zhkgiEo0CRkSiUcCUZ1fqBiSg1zweBn7NqsGISDTqwYhINAoYEYlGASMi0ShgSmJm02Z2r5kdNLN3zOynZnZF6nbFYGbnmtljZnak+Xo/k7pNMY3T77YdM7vQzI6a2bf7va8CpjyTwKuE0+W+D7gZeNjMViZsUyx3EE71uxz4LHCXmX04bZOiGqffbTt3AP86yB01ixSRmf0MuNXdH0ndlrKY2XuA3wB/4O4vNq97EPgvd/9a0sYNUR1/t+2Y2V8CVwH/BnzQ3df2c3/1YCIxs+XARcALqdtSsouAk1m4ND0P1LkHc5oa/25PY2bnAF8H/mrQx1DARGBmU8B3gN3u/u+p21Oys4Hf5a77LfDeBG0Zupr/bvO2Afe6+2uDPoACpiAz22tm3uHybMtxE8CDhBrF5mQNjucwcE7uunOAdxK0ZajG4Hf7/8zsEmAN8HeLeRxtmVmQu8/2OsbMDLiXUPy80t1PxG5XAi8Ck2Z2obv/R/O6j1D/4cI4/G5bzQIrgVfCS+dsoGFmv+/ulxZ9EBV5S2Rmfw9cAqxx98Op2xOLmX0XcOCLhNf7j8DH3L22ITMuv9uMmf0ep/dU/5oQOF9y9zeLPo6GSCUxswuATYT/hG+Y2eHm5bOJmxbDl4GlwK+Bhwj/6eocLuP0uwXA3f/X3d/ILoSh8dF+wgXUgxGRiNSDEZFoFDAiEo0CRkSiUcCISDQKGBGJRgEjItEoYEQkGgWMiESjgBGRaBQwEoWZLTGz410+gf5o6jZKfPo0tcQyBaxvc/1NwKXAD4bbHElBn0WSoTGzHcDfAF9192+mbo/Epx6MRNfcS+V24AbgBne/M3GTZEhUg5GomrvA7SJs8bChNVzM7C/M7Nnm1gcvp2qjxKMejERjZg1gN3ANsNbdH8od8hvgW4Rd4m4acvNkCBQwEkVzc+x/AP4UuMbdz5g1cvd/bh776SE3T4ZEASOlM7Np4HuETaOvcvcfJW6SJKKAkRj2AJ8CHgDeb2b5k3U97u75U59IDSlgpFTNGaPsvM2fb15azTMm51ASBYyUzMPCqvx5k2RMKWAkmeYs01TzYmZ2FiGjjqVtmZRFASMpfQ64v+X7d4GDhPPvSA3oowIiEo1W8opINAoYEYlGASMi0ShgRCQaBYyIRKOAEZFoFDAiEs3/AQcsVH7eWduMAAAAAElFTkSuQmCC\n", 232 | "text/plain": [ 233 | "
" 234 | ] 235 | }, 236 | "metadata": {}, 237 | "output_type": "display_data" 238 | } 239 | ], 240 | "source": [ 241 | "fig = plt.figure(figsize=(4,3))\n", 242 | "plt.plot(codings_val[:,0], codings_val[:, 1], \"b.\")\n", 243 | "plt.xlabel(\"$z_1$\", fontsize=18)\n", 244 | "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n", 245 | "save_fig(\"linear_autoencoder_pca_plot\")\n", 246 | "plt.show()" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 14, 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "name": "stdout", 256 | "output_type": "stream", 257 | "text": [ 258 | "Saving figure linear_autoencoder_pca_plot\n" 259 | ] 260 | }, 261 | { 262 | "data": { 263 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAARgAAADQCAYAAADcQn7hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAEwJJREFUeJzt3V2IXOd9x/Hvf18kG8eGeC2ci7AWofFNUZNYe7NtitexW5MQkuCFGlx5Y7u1TCMbZEKhApuuIyNBLoIwdqjWxLbUNoZQycbBlKYx2gtXe7PCcSBQHGgjFUzaWG0SS01WWu2/F8+ezuhoXs7Mnue8zPl9YBjpzJnZR6uZ3zzvx9wdEZEYxsougIiMLgWMiESjgBGRaBQwIhKNAkZEolHAiEg0ChgRiUYBIyLRKGBEJJqJsgswjFtuucV37txZdjFEGuvMmTMfuPuOfufVMmB27tzJ6upq2cUQaSwzO5vlPDWRRCQaBYyIRKOAEZFoFDAim1ZW4PDhcC/5qGUnr0jeVlbg7rvh0iXYtg3eegtmZ8suVf2pBiMCLC+HcLlyJdwvL+f32k2uGakGIwLMzYWaS1KDmZvL53WbXjNSwIgQPvRvvRVqLnNzWw+BlZXwWufOXVszUsCIyNDaay3j4zCx+SnLs2ZUFwoYGSlJzWHQWkivpswgr7myAouLsLYGGxvh2KOPDvzPGBkKGBkZW+nv6NTJOzs7WPAk5ybhYhZqL5/5DOzfH17j2LFm9cNoFElqIctIzFZGgpJO3vHxq5sy3V4zCZOnnw73SdhcutSqubiH2zvvxBuhqjrVYKTystZM0iNBU1MhlLI0bbp18nYbXeoUPMm5v/1tCBYIj0OcEao6UMBI4QbtJ+nWfElrD4mpqVazJGtzaXb22nMGCZ7k3OPH4eWXYX09PLawEG7p1xi2v6hW3L3UG/A4sAqsAa9kec7u3btd6un0affrr3cfHw/3p0/3f87Ro+F8M/dt27I959Ch8BwI94cObb3saadPh9ftVJ5ejyWP9/o99Ht+2YBVz/BZrUIN5n3gWeBe4PqSyyJb1O9bOWttpP31nnii1dTIeqXjWBPn2nWq8WR5DHr/HkZpcl7pAePuJwHMbAb4eMnFkS3I8sHo9sHvFEzJkO/ly63nr69nm6yW98S5bmUcVqffwyhOzis9YLIys73AXoDp6emSSyOddBtxaf9QdvrgdwomaA35ttda0rWRXh/6frWIQeRZq0jKfOQInD/f+vckrz8xEUazoP6dwrUJGHdfApYAZmZmMlaUpUidRnGyfCg7BdO5c63RmLExmJmBO+4InaVlNCUGbdp1063Mhw+3Xh/C5Lzp6fp3ANcmYKT60rWT9g/l2lpo7szPXzu60ymYnnmmVXOZmAjf9ukPWl4f+iympkLQuW+tVtGtlnfu3NW1lvYgrTMFjOQq3SzZtq01s/WHP4RTp8KHa2Oj9QE7cODaYFpfD883g0ceyTbvJVZTYmUlhOKVKyFkOoVdVuky//KXcOed4bUnJ0PNZVTCBSoQMGY2sVmOcWDczK4D1t19vdySyVYlNZrFxRAuyQzXsbEQHO2h0CmYkg/hwkLv1489l6R9hq5Z6DcZVnquzr59rTC9fDk0i0YlXIBKzINZBDx1W+z1HM2DqZf0nI+jR/vP8ajSPJBh5u5kceiQ+9hYsqDAfXKyGv/eLMg4D8Y868SCCpmZmXFdF6mauo3q1H3Waozyty+OHBuDF16AvXvzee3YzOyMu8/0PU8BI3kZpQliRalr8GYNmNL7YKQ6sr7Zu51X5KjOqMhzrk4VKWAEyF776HVeUaM6Uh/aD0aA7Hup9DovGSE5eFDNIwlUgxEge+2j33mjXuWXwShgBMg+p6SouScyGhQwDZburE3XPrqtcFa4SFYKmBHXa15Kr07dXiucNQwtWSlgRlivEOk3pNytM1fD0DIIBcwI6LVZU7LQ8NKlsFdsct7UVFhXMzbWubO2W2euhqFlEAqYmuu3WdPGRgiR8fHWRtQTE2H1y8ZGON5pdXC3zlx18I6uGP1rCpia69WUScLlnnvgE5+AF19sbZUAIWR6rQ7utsu+gmX0xFrmoYCpuSxNmcXFcOzYsdaWjO4hbNTUaYa8N2PPSgFTc4M0ZdqPgZo6TbCycu11mgbZjH2rtJq6YTSPpTmSZk/7lSbHx8NSjgMHOp+f9b2h1dRyDW2n0CxJsycJl/QugtB5smWeFDANou0UmqW92TM+HvY2LvqqDAqYBtF2Cs3Sb91YEV84CpgG0ULF5unV7Ok32TIPCpgayLNjVvNYBFqXYuk12TIPCpiKy6OdrJEjScvzUiy9KGAqrNN6okHbyRo5kk6K6o9TwFRU+yUtkin/w7wRNHIknRTVH6eAqaj2KmyynmhxcfA3gkaOpJsi+uMUMBWVDoZhwgU0ciRBuh+uqH45BUxF5RkMGjlqtnQ/3JEjYQSpiH45BUyFKRgkD+l+uBMniuuX03WRCrKyAocPh3uRIiXN7fHxcD8/H+7HxsIQ9dRUvJ+tGkwBNFQsZerW3H788VCL2b8fdu3SRLva0lCxlC3d3D5/PoxQDju/KqvSm0hmdrOZvWZmF83srJk9UHaZ8pauomqoWMqWfk9OTcVpwlehBvMCcAm4Ffg08KaZvevuPym3WPlJqqjHj5ddEpGgvdk0NRVvVKnUGoyZ3QDMA0+7+wV3fxt4A3iwzHLFcuxY2Hj77rvV2Svlm50NO9udP9954/g8lF2DuR1Yd/f32o69C9xZUnkGlnXCUrfd/zUBTsoWc7Z3poAxs23ABWCyyymvuft9Q/z8jwC/Th37FXBjhzLsBfYCTE9PD/Gj8jfI6FD6P3FqSiNLUox+X4IxZ3tnrcFMAo90OP4kcAfw/SF//gXgptSxm4AP0ye6+xKwBGHT7yF/Xq4GGR1K/ydqZEmKsLIS3m+XL8PkZPf3WaxJnZkCxt0vAn/XfszMvkkIl6+7+8tD/vz3gAkz+6S7/3Tz2KeAWnTwDlq1TP8nahGixHb8eHiPQevywUV+kQ3cB2NmBjwH7AP2ufu3h/3h7n7RzE4C3zCzPyeMIn0Z+P1hX7NIW6laahGiNIK7Z74RRp1eBK4AD7cd3755/N8IzZv3gCcyvubNwOvAReAc8EC/5+zevdtFpL/Tp923b3c3C/enT3c/79Ch7o+nAaue4fOduQZjZuPAMeB+YI+7v9r28ATwc+CPN0Pm94B/MrP/dPfv9Qm4/wa+krUcIpLd7CycOtW7phxzKUvWUaRJ4LvAl4D73f1k++Me+miebjv0IzN7A/gs0DNgRCSufh24MQcc+k60M7PtwEngi8B96XDp8pxJ4A+BH2+5hCISVcylLFlqMMcJ4fIK8FEz25N6/A13T89leZ7QF9OoyfHavV/qKOaAg7l3n1KyOWLUceLbpg3gRnf/37bnfAu4B/icu3+QX1FbZmZmfHV1NcZLD01bMkiTmNkZd5/pd17PJtJmh/FN7m5dbuOpcDkC/BFwd6xwqapuSwFEqi7mZmi5rUUys+eAzwF3ufsv8nrdutDu/VJHsWveuaymNrPbgCeA3wH+3cwubN7+MY/Xr4OkHXvwYOs/SdtkStXFrnnnUoNx97OA5fFaddGpQ7d9OFB9MlKWQQYbYte8y96uoZayhIcWM0oZlpZg376wFeb27f2/2GIvWVHADCFLeKhPRoq2shI28l5fD39fW8v2xRbz8jgKmCFkCQ8tZpSiLS+HL73E2Fj5X2wKmCFkDQ9dOE2KNDcXmkVra2FW7vPPl//+U8CIjICVlbDXy733wsc+BgsL5YcLKGCGohEiqZJk17pkY6nt20PAVEHp10WqI83alSpZXg5bYiaq9J5UwAxBF1KTKpmbC/vtJqr0nlQTaQgaIZIqmZ0N78Xkwn5V6X+BPqupq6qKq6lFmiSX1dQiIlvR+IDRgkSReBrdB9M+3DwxAQ8/3Gq/anc6ka1rdMC0DzdfuQJHj4YL1B85Avv3a56LyFY1uomUDDfb5kYT7iFUTpzQPBeRPDQ6YJLh5sceu3pey/y85rlItdS1r7DRTSRoLUhcWLi6z2XXLvXBSDXUeWlK4wMmkV75rJXQUhV13rys0U0kkTqo89IU1WBEKq7OS1MUMCIVlZ6LVadgSShgRCqozh277dQHI1JB/fYcqsuwdak1GDN7HHgI2AW86u4PlVkekaqYmwuduhsb4b69Y7dOtZuyazDvA88CL5VcDpHKSWaYW+qShnXaUbHUgHH3k+7+OnA+79cetgpZl6qnjI5O77nl5XB9I/dw3x4idRq2rk0nr5ntBfYCTE9P9zx32CpknaqeMhq6ved6XXurTsPWZTeRMnP3JXefcfeZHTt29Dx32CpknaqeMhq6veeSEDl4sPMX3ewsHDhQ7XCBiDUYM1sG7uzy8L+4+2dj/exhL9uqy71K0frVVKoeIP1ECxh3n4v12v0MW4WsU9VTRsOov+dK3fTbzCYIIffXwMeBR4F1d1/v9byiNv3WrnYinWXd9LvsTt6nCOGS2AM8AyyWUpo26vAV2bqyh6kX3d1St8UYP2vQ4Wd1+IpsXdk1mEIMUxtRh6/I1jUiYIbdsOerXw33VbpSnkidNCJgBq2NpGs8CwtFlFJk9NRmot1W9Ju0lKb+FynKqC9NaUQNBgabtKT+FylCE0YqG1GDaZflG2PQGo/IMJpQU25MDQYG+8YYhWnaUm1NqCk3KmDqfPkHGT2jvkwAGhYwTfjGkHoZ9ZpyowKmCd8YIlXSqICB0f/GkOJoMWx/jQsYkTw0YYg5D40bphbJQxOGmPPQuIAZ9ZmTUow6bbxdpkY1kTpVa0HtaBmcBgyyaVTApKu1x4/DsWOd29FLS3DiBMzPw969pRZbKkoDBv01KmDS82Cg88S7pSV47LHw+A9+EO4VMiKDa1QfTHqN0cJC53b0iRNXPy/9dxHJplE1GLi2WtupHT0/36q5JH8XkcE1LmDSOrWjk+aQ+mBEtqbRAdNtJubKCpw/D4uL6sQT2YrGBky3mZjp40eOhLDRUKTI4BobMN22bmg/vrYG+/aBu6aDiwyjUaNI7brNxGw/Pj4OGxuaDi4yrMbWYLrNxGw/PjUF+/dr/xiRYTU2YKAVKknNpD1kkj/v2qXp4CLDanTAZFlyr+ngIsNrbB8MaMm9SGyNDpitLrnX1g8ivTW6iZTu0E33xfSiHc1E+mt0wEArFAYNC10CRaS/0ppIZrbdzL5jZmfN7EMz+5GZfb6MsgzTF6MdzUT6K7MGMwH8B3AncA74AvA9M9vl7j8rsiDDXC9JO5qJ9GfuXnYZ/p+Z/Rh4xt177sAyMzPjq6uruf7sZOHj1BS88044trBw7SJIBYoImNkZd5/pd15l+mDM7FbgduAnXR7fC+wFmJ6ezv3nJ4Fx111hDRLASy+1+lbUqSsyuEoMU5vZJPD3wDF3/9dO57j7krvPuPvMjh07opQj6YtJXL7c6o/RnBmRwUULGDNbNjPvcnu77bwx4G+BS8DjscqTRdIXk5ic7LwIUp26ItlEayK5+1y/c8zMgO8AtwJfcPfLscqTxewsnDoVrjYAV/fBqFNXZHCldvKa2d8AnwbucfcLWZ8Xo5NXRLLL2slb5jyY24DHCAHzczO7sHn707LKJCL5Km0Uyd3PAlbWzxeR+CoxiiQio6lSE+2yMrNfAGfLLseAbgE+KLsQFaPfSWd1+L3c5u5954vUMmDqyMxWs3SKNYl+J52N0u9FTSQRiUYBIyLRKGCKs1R2ASpIv5PORub3oj4YEYlGNRgRiUYBIyLRKGBEJBoFTIHM7HEzWzWzNTN7pezylMXMbjaz18zs4uaezA+UXaayjep7ozI72jXE+8CzwL3A9SWXpUwvEPb/uZWw2PVNM3vX3TvuZtgQI/neUA2mQO5+0t1fB86XXZaymNkNwDzwtLtfcPe3gTeAB8stWblG9b2hgJGi3Q6su/t7bcfeBX63pPJIRAoYKdpHgF+njv0KuLGEskhkCpicZN2DWLgA3JQ6dhPwYQllkcjUyZuTLHsQCwDvARNm9kl3/+nmsU/R5XI1Um+qwRTIzCbM7DpgHBg3s+vMrFEh7+4XgZPAN8zsBjP7A+DLhCtLNNaovjcUMMV6CvgN8FfAns0/P1VqicrxNcJQ7H8BrwJ/0fAhahjR94YWO4pINKrBiEg0ChgRiUYBIyLRKGBEJBoFjIhEo4ARkWgUMCISjQJGRKJRwIhINAoYicLMtpnZpR4rzE+WXUaJr/aLqaSyJoFHOhx/ErgD+H6xxZEyaC2SFMbMvgn8JfB1d/9W2eWR+FSDkejMzIDngH3APnf/dslFkoKoD0aiMrMxwrWWvwb8WXu4mNmfmNnbZnbBzH5WVhklHtVgJBozGweOAfcDe9z91dQp/wM8T7h8yZMFF08KoICRKMxsEvgu8CXgfne/ZtTI3f9589yvFFw8KYgCRnJnZtuBfwDuAe5z9zdLLpKURAEjMRwHvgi8AnzUzPakHn/D3dOXLpERpICRXG2OGH1+868Pbd7abaBrIDWGAkZy5WFiVfq6R9JQChgpzeYo0+TmzTYv2+HuvlZuySQvChgp04PAy21//w1wFthZSmkkd1oqICLRaCaviESjgBGRaBQwIhKNAkZEolHAiEg0ChgRiUYBIyLR/B9Eh1SALGTF8QAAAABJRU5ErkJggg==\n", 264 | "text/plain": [ 265 | "
" 266 | ] 267 | }, 268 | "metadata": {}, 269 | "output_type": "display_data" 270 | } 271 | ], 272 | "source": [ 273 | "fig = plt.figure(figsize=(4,3))\n", 274 | "plt.plot(X_train[:,0], X_train[:, 1], \"b.\")\n", 275 | "plt.xlabel(\"$z_1$\", fontsize=18)\n", 276 | "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n", 277 | "save_fig(\"linear_autoencoder_pca_plot\")\n", 278 | "plt.show()" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 16, 284 | "metadata": {}, 285 | "outputs": [], 286 | "source": [ 287 | "from sklearn.decomposition import PCA" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 17, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "pca = PCA(n_components=2)" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 19, 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [ 305 | "X_2 = pca.fit_transform(X_train)" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 21, 311 | "metadata": {}, 312 | "outputs": [ 313 | { 314 | "name": "stdout", 315 | "output_type": "stream", 316 | "text": [ 317 | "Saving figure linear_autoencoder_pca_plot\n" 318 | ] 319 | }, 320 | { 321 | "data": { 322 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAARgAAADQCAYAAADcQn7hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAFXVJREFUeJzt3XuMXGd9xvHvb3dtx4pJlWzS/FM5FhJR1RBCwwq0XMSKi1EqIIigptBgQ8BG4LRKykWJStRAqkQgEZlgLlk3TuyQBNImCBC0BUWsGuqV0LoQoqAqVEBcKKTJhpujxsbeX/94Z/Dx2Zmdc2bOO+f2fKTRZGbPjt/MnnnmvZ33NXdHRCSGibILICLNpYARkWgUMCISjQJGRKJRwIhINAoYEYlGASMi0ShgRCQaBYyIRDNVdgFiO/vss33Lli1lF0OkUQ4dOvSUu58z6LjGB8yWLVtYWloquxgijWJmj2c5Tk0kEYlGASMi0ShgRCQaBYxU3uIi3HxzuJd6aXwnr9Tb4iK8+tVw7BisXw8PPgizs2WXSrJSDUYqbWEhhMuJE+F+YaHsEkkeChiptLm5UHOZnAz3c3Nll0jyKD1gzOwqM1sys6Nmducax73DzE6Y2ZHEbW58JZUyzM6GZtGNN6p5VEdV6IP5H+DvgdcBGwccu+juL49fJCnC4mJo0szNjRYMs7MKlroqPWDc/QEAM5sB/qjk4khB0p2zu3fD8vLoYSP1UnrA5PSnZvYU8DRwF3Czux9PH2RmO4GdAJs3bx5vCQU4tXP26FHYtQvcNRLUNqX3weTwb8DzgT8ELgPeCnyw14HuPu/uM+4+c845A6/HkgiSnbOTk7CyopGgNqpNwLj7j9z9x+6+4u6PAB8F3lJ2udooy8S3ZOfsnj2wYYNGgtqobk2kJAes7EK0TZ6Jb8nO2QsvLKbDV+ql9IAxs6lOOSaBSTM7DTie7lsxs0uA/3D3J8zsj4HrgX8ce4FbLj3x7cCBbMGhkaB2Kj1ggA8Df5d4fAXwETPbB/wA+BN3Pwy8GrjTzDYBTwCfB24ad2Hbrtu3cuwYTE3Bvn0hbNR5K72UHjDufgNwQ58fb0oc9wHgA2MoUuONMj+l27eysACHD8Pevad23ipgJKn0gJHxKuLiwW5zZ3ER9u8/WZs5fDg8p5CRrtqMIkkxirx4sFub2bEjzHHZuzeEl5ZVkC4FTMsUffHg7Cxs3hwCS/NcJE1NpJZJ9qEUNWSc7PjVPBdJUsC0UNFDxjFCS5pBASOF0DwX6UV9MBVWxbVoq1gmqS7VYCqq6LVoi1ibRevjSl6qwVRUkcPJ3WC4/vrRhpG1Pq7kpYCpqCKHk4sKBq2PK3mpiVRRRY7MFDWMrNEiycvcvewyRDUzM+NLS0tlF6Mww/alFLU+rgiAmR1y95lBx6kGU3HJYIDhO1k1jCxlUMBENkrNIT1qs3376r4UhYZUmQImolGHddOds6Ap+VIvGkWKaNTRm/SozbZt2oRM6kU1mIhGHb3pN2qjYJG6UMBEVMSwrjpnpc5KbyJl3Zu6c+w1ZvYLM/uNme0zsw1jKubQZmfhuusUEtJOpQcMJ/em3rfWQWb2OuBawuLf5wHPBT4SvXQVpAsOpS5KbyLl2Jt6O3C7uz/aOf5G4G5C6JRqnJPYdMGh1EnpAZPDBcCXE48fBs41s2l3X04eOM69qce9yXuvkSkFjFRVnQJmE/DrxOPufz8HOCVg3H0emIdwqUDMQsXc5L1XzUjLU0qd1ClgjgBnJB53//u3JZTl95If+ImJEDQrK6PXLvo1hXTBodRJnQLmUeAi4L7O44uAJ9LNo3FLfuCnp+Hqq4upXazVFNLQtdRF6QGTdW9q4ABh69i7CSNPHwbuHGdZ+4mxybuaQtIEpQcMGfemdvd/MbOPA98CNgL3p36vEoqsXWzfHu63bVONRepJ68FUkIaipeqyrgdThYl2kqK1b6UpFDAVkJ6Zq7VvpSmq0AfTav2aQxqKliZQwJTswAF49tkwOS85HK2haGkCNZFKtLgId9wRwgVCk0jNIWkSBUyJFhbgeGe2jxlceaVqLdIsCpgSJTtzTzstzHcRaRL1wRQsz9IN6syVplPAFGiYCXLqzJUmU8AUqN8EOdVQpK0UMAVKX6A4Pa0p/9Ju6uQtULdPpbtv0fKypvxLu6kGU7B0n4qWXJA2yxQwZraesKLcuj6HfMnd31xYqWogvSl9r34WjRJJ22WtwawDruzx/DXAxcBXCytRDSRHi6amwkzcEyd697NolEjaLFPAuPszwOeTz3UWf7oYeL+73xGhbJWVHC1aWQnPpa8lEpEh+mDMzIBbgV3ALnf/TOGlqrjkaFG6BqN+FqmKce7X1U+ugDGzCeA2QnPpXd2aS2cL1z2EXRfPAX4OfMrdP5XhNc8Cbge2Ak8B17n7PT2OuwH4W+Bo4ukXuPuP8vw/FCHdtwLl/yFFkqqyKmLmgDGzSWA/cDlwhbvfm3qdXxBC4kfAC4B/NbMn3P2+VS92qk8Dx4BzgRcCXzOzh7s7OKZ80d2vyFrmmNJ9KwoWqZJhNuiLUePJOoq0DrgHeCNweXe7165OH831iae+Z2ZfAV7OyW1Ger3u6cBlwPPd/Qjw7c7vvZ0KbAkrUld5d6WIVeMZONGu0/x5AHg98OZ0uPT5nXXAK4DvDzj0fMIWJY8lnnuYsE1sL28ws6fN7FEze+8a//5OM1sys6Unn3xyUHFFSpNeLrUo6Umfg8Ii1jrQWWowBwjhcidwppmlmyhfcfffpJ7bQ9hx8cCA194EpH/314TtYNPuI2wH+wTwEuB+M/tVqqkGjHfrWJFhFVlr6NW8yTNFItY+XGsGTGfE6JLOw3d0bkkrpMLAzG4BZoFXufuxAf9+ejtYOo9XbQfr7j9IPDxoZp8E3gKsChiROhimn6SXIoIq1qTQNQPGw6ZJ6QDoy8x2E0aSXuXuT2X4lceAKTN7nrv/sPPcRYRtYgdxwLKWTaRqiqo1FBVUMSaFFnaxo5ndCryGEC6ZOj46ncMPAB81s9PN7GXApcBdPV7/UjM704IXA38NfLmo8ouMW95+kn6G3eYmVv9PUiE7O5rZecBPCHNUkntKP+Tul/T8pZO/exawD3gtsAxc6+73mNkrgH92902d4+4lDINvAH4KfMbdbx1Utjru7CiSV94h5lGbVVl3dizkamp3f5whmyvu/jTwph7PP0ToBO4+fuvQBRRpuLzNm6KaVYNoPRiRFpqeDjtZTEzEvcRFASPSMouLcPXV4ULdyUnYvTveTHQFjEhD9evE7TaPVlbCbXk5Xhm0op1IA63ViZscHp+chMOHw/HqgxGRTNaa+t8dHt+xI/TD7N0bwijGcLUCRqSBBs2NmZ2FzZvD1sUxF6VXE0mkgbJM/Y91/VGSAkakobqhsrAAjzwSOnPTF0PGXpReASPSUN2O3qNHw2jRxARs2HBqh2/sRenVByPSUMnhaAj3494AUAEj0lDdPpaJzqc89qzdXtREEomgCiv6J/tYpqdX98GMgwJGpGBVWdEfyt/4T02kihnHGh0SV6z1betINZiKWFyEAwdg377+29BKPQwzv6QKTaoYFDAV0K1SP/ts2CUStA1tneWdX1KlJlXRFDAVsLAQ5iokFxecnNQ2tHWWp+9jXIs/lUF9MEMqsq9kbi4EStKVVzbnJJO1Dbumbh2UHjBmdpaZfcnMnjGzx83sbX2OMzP7mJktd24f62yrMnbdKu311xdzFersLOzZA1NTYa7Cxo2wbVsxZZV62L49XN3cpOYRVKOJlHVv6p2EtXsvImxZ8k3gx8DnxlhWIE6VdudOuPDCZnb0SX/p/pemfbGUGjA596beDnzC3X/a+d1PADsoIWBiXYVa9pwFGb8m979A+TWYfntTv7LHsRd0fpY8ruce1ma2k1DjYfPmzQMLkXeIcBxXoUo7jGPJhDKVHTB59qbe1PlZ8rhNZmae2twpz97Uww4R9qptNHUugxQrfZ40+cuq7IDJvDd1j2PPAI6kwyWvKu0PLM3X7zxp6rlS9ijS7/emTjzXb2/qRzs/G3RcLkUNEWp6uGTRtvOk1BqMuz9jZt29qd9NGEW6FHhpj8MPAH9jZl8njCK9H/jUqGUoqora9La0DC/ZJFrrPGliE7vsJhLA+wh7U/8vYW/q97r7o+m9qYHbgOcCj3Qe/0PnuZEVUUVteltahtOrSdTrPGlqE7v0gMmxN7UDH+rcKmlcbekmftM1Va8m0XXXrf67NXW4uvSAkXya+k3XVFmbzk1tYitgciq79tDUb7qmytp0bmoTWwGTQ8zaQ9bgauo3XZNlbTo3cW6VAiaHUWsP/U6WPMHV1G86Wa0JzWEFTEeWb4phaw+DVqvLG1xNnpjVdHlqJE1oDitgyP5N0av2MOiEybJanZo97ZC3RtKE80IBQ75vimTtIblz3uRkWNNl587er90NF7PVJ4uaPe0wTE217ueFAobhvym6S12urITbrl1hTZfkiZB87akpeOc7w5of6ZNFzZ7mG+Y8q/t5oYBh+G+K7lKXya05099KTfgWkmK08VywES9GrryZmRlfWlqK9vrz86HmsrKyemNxkaYys0PuPjPoONVgRqSlLiW2Os+FUcAUoIkTpKQa6j4XRgETQd1PCqmOus+FKXvBqUZq26JCEk/d90xSDSaCJkyQkmqo+8iTAiaCup8UclIV+tLqPBdGASPSR5F9aVUIqjIoYCJQJ28zaMeJ0ZXayZt1X+rOsTeY2e/M7Eji9txxljermJ28i4tw882j74ctgw3bwZr+G7W507/sGkzWfam7vujuV4ytdEPq18k7ajW5zd+Eo+q+99PTsLyc7W8wTF9ar79Rmzv9SwuYnPtS18rsLOzeDfffD5dddnJZh1HDoe5zIsqSvOp9ZQUmJrJf1pG3g7XfIt9t7fQvswZzPtn3pe56g5k9Dfwc2OPun+11UN69qYu2uAhXXx1OsIceOnkpwajh0OZvwlF03/vkRalFBXS6Vtrvb1TnkaBRlBkwefalBriPsN/0E8BLgPvN7Ffufm/6wDx7U8fQK0yKCAcNfw+n+94nazBFBHS/Wqn+RidFCxgzW6B/beTfgb8i+77UuPsPEg8PmtkngbcAqwKmbL3CZNj2fPr4Yb4J2zpE2pV87/P0wQzSr1ba1tpKL9ECxt3n1vp5pw9mysye5+4/7DydZ79pB2z4Eo6u3we3X5jkOfGK6tAdZ8dwlYMsxodeTdYM3L20G/AFQg3kdOBlhCbSBX2OvRQ4kxAqLwZ+Bmwf9G+86EUv8hgOHnTfuNF9cjLcHzyY/zVuu81969Zwn3bTTeG1IdzfdNNw5SzqdQYp4v2oo4MHw3valv/fLmDJM3zGy77Y8X3ARsK+1PfS2ZcawMxeYWZHEsf+BfBfhCbUAeBj7r5/zOX9vVHnNszPw3veA9/4Rrifnz/15+k5GNPTw81/6TWXI8ZcmnHP9ajKfKDZ2d5bwUpHlhSq862qNZitW0OtonvbunX1Md0azoc+NNq/lfyWjVXTGGcNpq21pSqhJjWY2ur2s9x443D9Gpddtvbj7lD3gw/CLbeEEZATJ8L9DTcM/809bE2jX42h+zyM9n7k0eaZsbWTJYXqfItVgylC1j6YiQn3devCfbfGs359tm/u9Lf9bbf1//bv15/Qr8aQpSYRo49CNZjykbEGU/alAq22c+fqfZS60iMUu3fD7bfDd74Tfn7sWNgtMu/ugMvLvUe41hpt6jccO2jyYKwRrEFD/lUezWobBUxF9foQffe7JwOml14frH5zctIfvLXCot9w7KBh2piXNvQbdtb1WhWTpZpT51uVm0h5HTzovmGDu1m4Tzdv8jZ9sr72Wq+xVjOvjKbMuIbl2w41kZpndha+9a3e1f+1agtZJ5m5n3qf/rd7NUXS11xlmXAYswmjyW/VooAZoyI+WP3CYpQP1uJiGJk6fjyEy4kT2ZozWZpA6fLOz8NVV4XfibVR3fbt4b7XFr0yXgqYMYndN9BriYg85UpeCDg5CYcPh5+t9Tp5Q21xMeyCefx4eHz06OAgyxPK6fd427a1j5f4FDBjEnstl0HNlV7HLyyEIOkuZTAxATMz8PDDsHcv7N+/dhDmvYBzYeHkkgkQgmytUMobylovp3oUMGMSu28gz4cr+cGdnISpzlmwfj1cfDEcOrR6Elu/EMlzEeHcXGgWHT0awmzPnmIDQ/0v1aOAGZPY64Tk+XAlP7gAO3bA5s0nf2f//pOvMz09XNOu31ITed6DvIGhtVgqKMtQU51vTRqmHiTrrNlBw8fJ1xlm2LfI4em2Xq1cdWiYun2yNlcGfdOnXydvs6PIvhAt3lRvCpiWKiqMelFfiHQpYGSgvLUI9YVIlwJGolDTRqDknR1FpNkUMCISjQJGRKIx73XpbIOY2ZPA40P86tnAUwUXp2n0HmXTxPfpPHc/Z9BBjQ+YYZnZkrvPlF2OKtN7lE2b3yc1kUQkGgWMiESjgOlvfvAhraf3KJvWvk/qgxGRaFSDEZFoFDAiEo0CRkSiUcD0YWYbzOx2M3vczH5rZt8zs0vKLlcVmNlZZvYlM3um8/68rewyVY3On0AB098U8N/AK4E/AD4M3GdmW0osU1V8GjgGnAv8JfBZM7ug3CJVjs4fNIqUi5l9H/iIu99fdlnKYmanA78Enu/uj3Weuwv4mbtfW2rhKq6N549qMBmZ2bnA+cCjZZelZOcDx7vh0vEwoBrMGtp6/ihgMjCzdcDdwH53/8+yy1OyTcBvUs/9GnhOCWWphTafP60NGDNbMDPvc/t24rgJ4C5Cn8NVpRW4Oo4AZ6SeOwP4bQllqby2nz+tXTLT3ecGHWNmBtxO6Mz8M3f/Xexy1cBjwJSZPc/df9h57iJaVvXPQuePOnnXZGafA14IvMbdj5Rdnqowsy8ADryb8P58HXipuytkEnT+KGD6MrPzgJ8AR4HjiR+9x93vLqVQFWFmZwH7gNcCy8C17n5PuaWqFp0/gQJGRKJpbSeviMSngBGRaBQwIhKNAkZEolHAiEg0ChgRiUYBIyLRKGBEJBoFjIhEo4CRsTKz9WZ2bI0r2R8ou4xSnNZeTS2lWQdc2eP5a4CLga+OtzgSk65FktKZ2ceBDwLvd/dbyi6PFEc1GClNZ72UW4FdwC53/0zJRZKCqQ9GStFZ6W0eeB/wrmS4mNmfm9m3zeyImf2krDLK6FSDkbEzs0lgP3A5cIW735s65JfAHsJKcNeMuXhSIAWMjFVnAex7gDcCl7v7qlEjd/9m59g3jbl4UjAFjIyNmW0A/gl4DfBmd/9ayUWSyBQwMk4HgNcDdwJnmtkVqZ9/xd3TW6JIjSlgZCw6I0bdvZnf0bklraC9lRpHASNj4WHCVXo/JWk4BYxUTmeUaV3nZmZ2GiGjjpZbMslLASNV9HbgjsTj/wMeB7aUUhoZmi4VEJFoNJNXRKJRwIhINAoYEYlGASMi0ShgRCQaBYyIRKOAEZFo/h/I+hWgkh+gbAAAAABJRU5ErkJggg==\n", 323 | "text/plain": [ 324 | "
" 325 | ] 326 | }, 327 | "metadata": {}, 328 | "output_type": "display_data" 329 | } 330 | ], 331 | "source": [ 332 | "fig = plt.figure(figsize=(4,3))\n", 333 | "plt.plot(X_2[:,0], X_2[:, 1], \"b.\")\n", 334 | "plt.xlabel(\"$z_1$\", fontsize=18)\n", 335 | "plt.ylabel(\"$z_2$\", fontsize=18, rotation=0)\n", 336 | "save_fig(\"linear_autoencoder_pca_plot\")\n", 337 | "plt.show()" 338 | ] 339 | }, 340 | { 341 | "attachments": { 342 | "image.png": { 343 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAARgAAADQCAYAAADcQn7hAAAZyklEQVR4Ae2da6wexX3Gn+Pbwc2lCqdH9EMEfGiQooiWxPmSloqkdhvRRmoU1DRJKQRTLk1d1Wm+1BIoJo6gsqrWagupQcXYSZSUhKS5CaWCgERKqsguuQilippyCUI0hio3hO9v9bA7PnPmzOzO7ruz12ek97x7mct/fnvmef8zszsLKIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACLRPYKH9IucvcWlpaXbhhRfOn5FyEAERqEXgyJEjzwNYLku8oSxCH89TXA4fPtxH02STCEyCwMLCwlMxFV0XE0lxREAERKAOAQlMHWpKIwIiEEVAAhOFSZFEQATqEJDA1KHWszTf+AZw220AvxVEoE8EBjnI2yeAXdtCUdm6FThxAti0CXjwQeAtb+naKpUvAhkBeTAD+E8o8lAefjgTl9Ons2/uK4hAXwjIg2n5SlAsKAJvfetaT8N3rsxDYT70XIwHw30FEegLAQlM4Er4GnsgavThIrGwz23YAFxzDXDVVZkYUTxsD8XuAnGb3aKQaEUbp4gikICABMYD1W7sTY5r+LozRizscxST/fuBgweBffvKPRTmYfLxVEeHRKAzAhqD8aC3Gzu9B+43EUx3Zv36TDTs7ow5t5A/vDGbZd2eF17IPJQ9e5obwKWAatapiSuqPMoIyIPxEDKNvelxjaLujDl36BBw991Zl4jeE21p0kNJ5Z15MOqQCEAC4/knMI2dnotp4J5otQ4ViYU5Z8Zemi6bBvu8M5arIAIpCEhgAlRNYw+cTno4ZdmpvLOkQJT5YAlMXmDYZWjKU2kyr1T/URQvzTqloqt8XQJdC8wigDsAbANwLoAfANgF4H7X0BT7TY5HNJlXirraeab0kOxytC0CXc8iUeB+COAyAL8I4CYA9wJoZTUp33hE3X+JefKiOLU5qxNbXmy8usyUbvwEuvZgXgSw28L8ZQBPANgC4EnreJLNJscj6uZlez72DXb0MlIEu7yie3yK4vFcU93KFHVUnv0h0LXAuCTOA3ARgMfdEwCuzz84evSo53T1Q02OR9h5LS2t3DtTJhS252PfYJfqoUW7PHOPj8/GUDxbeHg/z/bt2R3HvjyqXxGlEIF0BDYCeADA/rIitmzZMutrePTR2Wzz5tls/frsm/tFwcRfWOCtddmHaW+9tShV/XOmvDL7QvFoF9MaW2k361tWT9tixmU+VdLY6bXdPQEAUWvW9sWD4VjQxwGcALCjTGBSnHfdfnc/tszQL38ovfF8fDfYhdLMc9yURzuL7rMJxTNdwWPHjMSsPMUd48XYHlBRF22eOiqtCNgEeHP8AQAPAdhsnwhtN+3BuL/W+/dX80Ls3xM3ryq/0ow7hF922nnjjbPZ4mK8p2YY2R5QSk/NlKfvNASG5MF8DMDr86nql0KikvK463Xcd1/2qxx6grnIltAvf1Eac45pY7wAE7/Od13PzC7L2FnnjmPjATX9GIZtn7b7Q6DrLtIFAG4AcBzAcxYWHvuktZ900/2nv+QS4GtfA9atW/tQYowhpgHGxG0zTmz3JFaE6tbz6quzWlOgUgtqm3xV1loCXQsM363S+cvf+E9u7m7lDNDOncCZMwBnSbhcwlgageupcd+tW6wIrf1XKj/CvC+7DDh5Eti4MZt9Kk+lGEMm0PWNdr1hx4a2axfA5RHovlNg+HnssXZvgksJxHhqvuUiTLk+ETLn5v3euzcTF+ZDkbn22vKFyilKbd6EOG8dlX41ga49mNXW9GDPNEKKDG98s5dOSHVvSlvVtj011tP1XmiHXX+zXERT9j377Oqcvve9rDyfJ8WYKb2p1ZZoLxUBeTAOWTY6dou4Uv/ll2frstiDvU70Qe2ywbIxh8SFlTEi1OQCVwYSPRY3UMg5Re8LKb0pX3k61jyB0XowMY3Jh5PpOAZjPBh2Jxia/jX3lZ3yWBVvgCLj827mte963osNgOL1zDPluaX0pspLV4wmCIxSYKo0Jhei/avJc9ddB5x/fvGvvptHH/ftelE8uZ9CRMrqbkTmBs4T5uGNbzRbq7+NN0Vbi7yu1am01ycCo+wi+RpTLHTzq2kGQjmVysFfhiEPNrr14n5XgQPpvAWAgd/cDwWKDPl3IYYhm3Q8nsAoPRjTmPhLXbVr4/vVnMcjir8UaWP66pW2xHDuvD6Li1k3tOr1CeeqM30kMEqBKWpMFAvjcvOCmG37F5LbZp/xd+8Gjh/Ppq1juhd2GSafPlx8u15d2kM7zH1HFJs+MeqSi8ruCYG6zyLZzwnxOZpNm4qfpTHx163Lnh7md9mTwyZN2dPKaZ4QUa4i0A6B2GeRRjkGE9JBd2yGN3sVTUGb+LzhjmMF27aVv5vIpCnKN2SfjovA2AhMSmDM2IwZwOXt6mab59xgx+eYAbtKZe68nUbjCy5R7U+NwCjHYEIX0e37M55vDMakd+OXiQvT1UljytO3CIyNQOcPGtYByjGYw4ejFtSqk310mr4O5kZXQBFFoCaBhYWFIwDeXJZ8Uh5MGYwq5ykufJzATIUP/TmlKnVXXBGIJTCpMZhYKDHxNJgbQ0lxpk5AAlPzP0CDuTXBKdmkCKiLZF3uKmMqGsy1wGlTBAIEJDA5mDpjKhSZmJmlAHsdFoHRE+hDF4mvKeGUENflvacr4vOOqVCghvwwZFfcVe64CfTBg+E6Zx8F8PbY15akuCRmTMXMCnE/NtTxfmLzVjwRGDKBPgjM53KAnFN/bVcw5xlT8Xk/6jp1dSVVbp8I9EFgYnk0/m5qt+C6YyrzeD+uDdoXgTERGJLA3AmAHywvL8/6dBFivJ/QDFXoeJ/qJ1tEoC6BIQlM3Tq2kq7I+wmN0YSOt2KwChGBFgj0YRaphWqmKYICETNz5BujoUWh42msVa4i0D6BPngwtIEfrt/PzzkATuWf9olElljF+wiN0YSOR5qgaCLQewJ9EJibAHzYInUlgFsA7LaO9W7T530UzRz53sccM3bTu4rLIBGoQKAPAkMh6bWY+HjGeh+up8O3FNihaOzGjqdtERgigT4IzBC5RS8sVdXTGSQMGS0CAQISmACYmMMx3kespxNTnuKIwNAISGASXzF7nGVpKZs5YpE8riACYycggWnhChsx0Qp4LcBWEb0ioPtgWrocvrGYlopWMSLQGQEJTEvozVhM0WtSWjJFxYhAawRiu0ibAPwcwMaAZZ8H8K7AOR3W60z0PzBRArECQ2HZ7mH0QQBvAvAlzzkdcgjEzDo5SbQrAoMmECswLwL4hFPTvbm4fAjAAeecdkVABETg5WeAqmLgy9r+HsCf5Z87qmag+CIgAtMgUHWQl/G5JssHAFwLwIjLIoC7APwPgJ8B+D6AP58GQtVSBEQgRCC2i8T0fNL5IIA/BMAHEj9lZcp8ngPwO7nI/CqArwL4XwD3WvG0KQIiMCECsR4MB3k/DeAPcoGxxYW4OEZzM4D/BnAGwLcAfBHApRNiqaqKgAg4BGIEht0fLsz9jnwq2izS7WS1apeC9JsAvrPqqHZEQAQmRSCmi3QoFxe+s+g1effIhkRP5af2AQD/mI/FMK2CCIjARAmUCQxnjC7P2bwfAD92YHfoVfYBAH/L+8oA/BaAE8457YqACEyIQJnAcPX+V1fgsQ/A1lxcnq+QTlFFQARGSKBMYKpUmffG0Gt5G4CjVRIqrgiIwDgJxAzyxtT8gvy+l18B8ET+3BKfXbo/IvG5APgsE2eingLwvog0iiICIjAAAk15MBQGjtfUCbfnYzXnAbgEwFcAfBvA43UyUxoREIH+EGjKg6lbo1cAuCK/h4Yez9fz+2f+uG6GSicCItAfAl0LzEX5+4/4aIEJ9F7eYHasb76b+jA/R49qiMfiok0R6C2BrgXmlZ57aH7imfomQD4D9WZ+lpeXewu0acNi3x7ZdLnKTwSaINDUGExdW9gtcqfBuc8HJicf3HcqPfigFguf/D/FwAB07cGwa0SRe53F7dc0wJvRmGcdX3k+1n+UNjsj0LUHw6lpPtv0EQB/ks8i/T6AX++MSI8KNuv4njgBbNoEcD8myPOJoaQ4bRDo2oNhHbm2zGYAP8qXgPhTeTDZpTfvVNqzB6jSPZrH82njn05lTIdA1x4MSf8fgHdOB3m1mrrr+NI7oYDQmzHvW3JzrOv5uPloXwTmJdAHgZm3DpNJH9v1ofDs2wfcdx9wxRVhIZoMOFW0MwISmM7QVy/Y1/XxeTEUop07AY7dPPIIcPHFEpnqtJWiCQJ9GINpoh6TyMN0fcpe3uYTokkAUiV7R0AeTO8uSdggeisc7G1yDCZmTCdskc6IQDEBCUwxn96dpcj4ukW2obFCFDumY+etbRGoQkACU4XWgOLGCJGvK1UmXgNCIFN7QEBjMD24CF2ZEDum05V9Knf4BOTBDP8a1q5BbFeqdgFKOHkCEpiJ/wvEdKUmjkjVn4OAukhzwFNSERCBYgISmGI+oz3LGaTbbgP4rSACqQioi5SKbI/z1fR0jy/OyEyTBzOyCxpTHd/0dEw6xRGBqgTkwVQlNtD49FrMHcBmerrqOjMDrbrM7pCABKZD+G0V7esSxTxy0JZ9Kme8BNRFGu+1PVszu0t0/Diwe3d2ateu8scOzmaiDRGoQUACUwPa0JKYLtG6dcCZM8ADDwBbt8bPINED0ozT0K56P+ztWmB25O86Og7gnn4gGZ8V5o7dbdsAIzIcf6FnEwpGVO68MxOjm2+uJkqhfHV8WgS6HoN5FsBHAbw9X5d3WvRbrC1Fhl0jLkDlG9y1B4FpFj0cxltYyLweej5GlJiXggjEEOhaYPhGAQa+UO21+ba+EhEwnoyZTTJC4Q4CX311JianT2ceDxe4otBUebNBoioo24ER6FpgquDiq2P5gV4dWwXb6rgUFSMs5ow9CEwvhYFiYjwdru/7wgvFC42bvPQtAjaBIQkMXx3LD5aXl2d2JbQ9HwEzCGwE5aqrAH5cT2e+UpR6igRSCgyHEC8LQP13AJcGzulwywRCXSfX02nZLBU3AgIpBSbyPYQjoDiCKvi6TkXVsgeFmxKiFHkW1UHn0hNIKTAx1rN8ftbnn3MAnMo/MekVpwMC7qBwlbdOhsxNkWeoLB1vj0DX98HcBOAlAH8F4Mp8m8cUekzAHRTm/rwhRZ7z2qT08xPoWmB40/qC88lvZJ+/csohDQEzKFz2fqYqpafIs0r5ipuGQNddpDS1Uq5JCYQGhecpNEWe89ijtM0QkMA0w1G5NECg6kBzA0Uqi8QEJDCJAY8xew3IjvGqpqlT12MwaWqlXJMS0IBsUryjylwCM6rL2U5lNCDbDucxlKIu0hiuYst10IBsu8CHfAOiBKbd/5XRlNbFgOyQG1rdCx8z3tVnLhKYulde6ZIRYIM5dCjLng9dUsxiGloygzrM2DfeRR4m9J2LBMZcKX33ggAbDMd4zLIRBw4ADz2UPdnNY1yjht9seHZD64XxCYww413mSXfu26FMgOy47jZZMz3zTMVSAuNS136nBPgPf/LkiglGTExD46LlXPzqxz/O1glO2ThWrOhuiw3f9wYIIw5LS6vX7nEFKGR5W56PBCZ0BXS8EwJsIBs3rngwZhU9NjQufLVjB3DqFLB3b7ba3uJi1gBT/QJ3AsEplHWz6+eKQ50FwebxfBzzCnclMIV4dLJtAmxI/Od3x2BoB1fV49rAs3y5MXudYJ5P7e67LIwX0bYX5YoDufAVNLSHb3+IsYdx7FULuZ8iSGBSUFWecxFwf7FNZqZRsJtEceEbEthI2E0wi5Rzv4nlI0yZoW/Xi2ijTGOL4WCPy1S1h4x9XS9TRlPfEpimSCqf5ATsRkFRMesEu7/o3GfclCFFmbEekc2BYsN9ei4UnCqD4EyXmpMEJuV/ofJunECoUbTh7tuV8XkR9nl3u0w8Qh5IKJ3LgfZs2JB5dvzmftUQKqtqPnZ8CYxNQ9uDJMDGltLd9zW8KmWGxMOG7fOIeN7X9fPZ893vZoPfHJ8yY1R2/mXbMTaW5eE7L4HxUdGxwRFwf9HrVMA0XLv7xXx8jZzHY8t0xYMD2DxmujfMy+cRuem4z+Daw2OcXWP3iIGzbIxL+2IC682X8pmxLXNrQGz6ojIkMEV0dG4yBMwvuGlkHEDmFLj9Erq6Dc8WD3Zf7r47EwN7QJqN2XhhFDgKBL/drp9PdJ5+OhMVc7Foe2wXyVdvlhmb3pQZ+u5SYBYB3AFgG4BzAfwAwC4A94eM1XERSEXANFzOTjGYKXBuu408i7H6r/F+2DDdX35bPL75TeALX8i6Ma5gmXS2h+K7x8W2hyJ0yy0r3SIuY3r77WttWG3typ5dbwoT319Ob8bYshKz3laXAsOyf5i/O+lpAL8L4F4AFwN4sl51lEoE6hEwXobtwdDbYPA1crsU4wVQMNj4fVPWpsG6YuB6CqbBm9kgc4+LKc+IlblP6LHHVrwX3uF83XXA9S+//9SkKP429Ta2NykuLLlLgXkRgL3A95cBPAFgiwSm+J9CZ5snYBouGzgfQ+A3G+9dd4VFw1jhigL3jaCYOPzmcY6PMFAMtm9fG89t8K4AZamBgwezaWmKIL0WBoobHw4t8qZMevNt15tl+ew2cet8dykwrr3nAbgIwOPuiXxf76YOgNHh+QjYDZKNjF2UY8dWuh1uV8YtLVYU3HgUAzfYDd6MxTCO3fBtQeM5ei3nn78ybmJ3sXzelK9MO3/3/Bj2NwJ4AMD+mMps2bJlpiACTRB49NHZbPPm2Wz9+uz7xhuz7UxeZrOFhew44/Fz663Zt1t20Tk7bpV4tl1MZwK3Q+doH+tC+/nN/RQBwOGYtprSg4l9NzWX7fw4gBOcbYsxWnFEoCkCtjdAT4XBDKKy68FujPE0Qp6B7QGVeQI8XxaH+RVNGzO9mXFyuzWul8T9LkNKgYmpGl+69s8A2D3iIK/1oH6XWFT2VAi4DZJiwg+Fx268oVvxKQYh4SliGBIlk5892EzBc4UiJFRF4lNkT6pzKQUmxuaPAXh9PlXNV8gqiECrBEINksft4AqRafCuB8R9O61PSIyImJkbe5zE5Gce5qwzbczybRvserS93aXAXADgBgDHATxnVZzHPmnta1MEkhKIaZCM4+uWhISHBoeExIiImYq2RcnNr+lp46QgPZl3KTBP5e+k9pilQyLQPwI+IQoJD60PCYkrIsYbYpqi/PpHpNyiLgWm3DrFEIEBEPAJD80OCUmZiITyGwCKNSZKYNYg0QERaIZAkZCMSUSKaElgiujonAjMSWAqQhLCpFfHhsjouAgMlAAHlzmtzu+ugzyYrq+AyheBBgmEZq4aLKJSVvJgKuFSZBHoNwHfzFWXFktguqSvskWgYQJm5oqPOfjuAG64uNLs1EUqRaQIIjAcAkUzV13UQgLTBXWVKQIJCfRp5kpdpIQXWlmLwNQJSGCm/h+g+otAQgISmIRwlbUITJ0A12MZYjgKgA9L9in8EoDn+2RQC7aozi1A7kERvuvM1RCWe2DbZEyIWkJwZDRU55Fd0EB1al9ndZECRHVYBERgfgISmPkZKgcREIEAgfyNKoGzOlyVwJGqCUYQX3UewUWMqMIUr3MEFkURAREQAREQAREQAREQAREQAREQAREQAREYE4HF/CVyvAHwZwC+BeDyMVXQqsu5AD4P4MX8hsf3WefGuDmla+u7fq8DcAzAJ3wndawdAq8AsBvAhQA4/f+OXGi4P7bwKQD/AuCVAC4F8BMAbxhbJa36TOnaWtU+u/lvAB6RwJzl0ZuN7wC4ojfWNGMIGxvf4nyRlR3fLf7X1v4UNsd4bX3X7T0A7s1/PCt7MLrRzoe0mWN83zYb4ePNZNebXFinUwC+b1n07ZF7MFZVX94c67V16/lqAB8B8Jfuidh9CUwsqWrxNuavvz0I4L+qJe19bHaLfupYyS7Sq5xjY90d87V1r9mefFzxGfdE7L4EJpYU8DCAWeDzdSsbMmWXgd2IHdbxsWz+HAB/2ezAfQ5sjz2M/dra1+8SANsA/J19UNvdEuDyFwcAPARgc7emJCvdjMFwZsGEQxMYg5nCtTXXk98781nC5wDwwx+WlwD8px1J2+0S+CcA/5HPrrRbcrulfRoAZ5IoNr8xgVkk0p3KtTX/Sb8A4Jetz98A+KzWgDF42v/mAjzsQvF+Aaq9+fxR+6YkL5H3wfxr/gv3NICx3wczpWsb+ufhLRiVZ5FCmem4CIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIiACIjAEAlsyp8oDz2B/rkhVko2VyOwoVp0xRaBaAJcN2W7J/YHAbwJwJc853RIBERABGoT2Js/EFp7hbTaJSuhCIjAaAlwLZV/AHAGwAdGW0tVTAREoHUCXAXuLgCnAVzjlP5uAFwNkEtbPOmc064IiIAIFBJYn68hchLAez0xfxsAV63/CwmMh44OiYAIBAlwkPczAI4DeFcwVnbinRKYEkI6LQIicJYA34TIWSKu4fp7Z4+GNyQwYTaDPqNp6kFfvt4az0XA+WbLewC8BsCVjqVf9Lz6xImiXREQARFYS4AzRnxvUugGOw72ckFpO8iDsWmMaFsezIguZk+qQmFx35vUE9NkRtsEJDBtE1d5NgHOMnEwmB96Pufkng8HhhVEQAREYC4C7/d0pXQ/zFxIlVgEREAEREAEREAEREAEREAEREAEREAEREAEREAEREAEREAEREAEREAEREAEREAEREAERKBpAv8PmhajeRgMBjIAAAAASUVORK5CYII=" 344 | } 345 | }, 346 | "cell_type": "markdown", 347 | "metadata": {}, 348 | "source": [ 349 | "![image.png](attachment:image.png)" 350 | ] 351 | } 352 | ], 353 | "metadata": { 354 | "kernelspec": { 355 | "display_name": "Python 3", 356 | "language": "python", 357 | "name": "python3" 358 | }, 359 | "language_info": { 360 | "codemirror_mode": { 361 | "name": "ipython", 362 | "version": 3 363 | }, 364 | "file_extension": ".py", 365 | "mimetype": "text/x-python", 366 | "name": "python", 367 | "nbconvert_exporter": "python", 368 | "pygments_lexer": "ipython3", 369 | "version": "3.6.5" 370 | }, 371 | "nav_menu": { 372 | "height": "381px", 373 | "width": "453px" 374 | }, 375 | "toc": { 376 | "base_numbering": 1, 377 | "nav_menu": {}, 378 | "number_sections": true, 379 | "sideBar": true, 380 | "skip_h1_title": false, 381 | "title_cell": "Table of Contents", 382 | "title_sidebar": "Contents", 383 | "toc_cell": false, 384 | "toc_position": {}, 385 | "toc_section_display": "block", 386 | "toc_window_display": false 387 | } 388 | }, 389 | "nbformat": 4, 390 | "nbformat_minor": 1 391 | } 392 | -------------------------------------------------------------------------------- /deep_learning/cnn_with_keras.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": { 7 | "scrolled": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from tensorflow import keras\n", 12 | "import tensorflow as tf\n", 13 | "import numpy as np" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 5, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()\n", 23 | "X_train, X_valid = X_train_full[:-5000], X_train_full[-5000:]\n", 24 | "y_train, y_valid = y_train_full[:-5000], y_train_full[-5000:]\n", 25 | "\n", 26 | "X_mean = X_train.mean(axis=0, keepdims=True)\n", 27 | "X_std = X_train.std(axis=0, keepdims=True) + 1e-7\n", 28 | "X_train = (X_train - X_mean) / X_std\n", 29 | "X_valid = (X_valid - X_mean) / X_std\n", 30 | "X_test = (X_test - X_mean) / X_std\n", 31 | "\n", 32 | "X_train = X_train[..., np.newaxis]\n", 33 | "X_valid = X_valid[..., np.newaxis]\n", 34 | "X_test = X_test[..., np.newaxis]" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 1, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# Deep CNN for Fashion MNIST" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 6, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "from functools import partial\n", 53 | "\n", 54 | "DefaultConv2D = partial(keras.layers.Conv2D,\n", 55 | " kernel_size=3, activation='relu', padding=\"SAME\")\n", 56 | "\n", 57 | "model = keras.models.Sequential([\n", 58 | " DefaultConv2D(filters=64, kernel_size=7, input_shape=[28, 28, 1]),\n", 59 | " keras.layers.MaxPooling2D(pool_size=2),\n", 60 | " DefaultConv2D(filters=128),\n", 61 | " DefaultConv2D(filters=128),\n", 62 | " keras.layers.MaxPooling2D(pool_size=2),\n", 63 | " DefaultConv2D(filters=256),\n", 64 | " DefaultConv2D(filters=256),\n", 65 | " keras.layers.MaxPooling2D(pool_size=2),\n", 66 | " keras.layers.Flatten(),\n", 67 | " keras.layers.Dense(units=128, activation='relu'),\n", 68 | " keras.layers.Dropout(0.5),\n", 69 | " keras.layers.Dense(units=64, activation='relu'),\n", 70 | " keras.layers.Dropout(0.5),\n", 71 | " keras.layers.Dense(units=10, activation='softmax'),\n", 72 | "])" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "name": "stdout", 82 | "output_type": "stream", 83 | "text": [ 84 | "Train on 55000 samples, validate on 5000 samples\n", 85 | "Epoch 1/10\n", 86 | "35808/55000 [==================>...........] - ETA: 3:05 - loss: 0.8821 - acc: 0.6827" 87 | ] 88 | } 89 | ], 90 | "source": [ 91 | "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"nadam\", metrics=[\"accuracy\"])\n", 92 | "history = model.fit(X_train, y_train, epochs=10, validation_data=[X_valid, y_valid])\n", 93 | "score = model.evaluate(X_test, y_test)\n", 94 | "X_new = X_test[:10] # pretend we have new images\n", 95 | "y_pred = model.predict(X_new)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 2, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "# Resnet" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "DefaultConv2D = partial(keras.layers.Conv2D, kernel_size=3, strides=1,\n", 114 | " padding=\"SAME\", use_bias=False)\n", 115 | "\n", 116 | "class ResidualUnit(keras.layers.Layer):\n", 117 | " def __init__(self, filters, strides=1, activation=\"relu\", **kwargs):\n", 118 | " super().__init__(**kwargs)\n", 119 | " self.activation = keras.activations.get(activation)\n", 120 | " self.main_layers = [\n", 121 | " DefaultConv2D(filters, strides=strides),\n", 122 | " keras.layers.BatchNormalization(),\n", 123 | " self.activation,\n", 124 | " DefaultConv2D(filters),\n", 125 | " keras.layers.BatchNormalization()]\n", 126 | " self.skip_layers = []\n", 127 | " if strides > 1:\n", 128 | " self.skip_layers = [\n", 129 | " DefaultConv2D(filters, kernel_size=1, strides=strides),\n", 130 | " keras.layers.BatchNormalization()]\n", 131 | "\n", 132 | " def call(self, inputs):\n", 133 | " Z = inputs\n", 134 | " for layer in self.main_layers:\n", 135 | " Z = layer(Z)\n", 136 | " skip_Z = inputs\n", 137 | " for layer in self.skip_layers:\n", 138 | " skip_Z = layer(skip_Z)\n", 139 | " return self.activation(Z + skip_Z)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "\n", 149 | "model = keras.models.Sequential()\n", 150 | "model.add(DefaultConv2D(64, kernel_size=7, strides=2,\n", 151 | " input_shape=[224, 224, 3]))\n", 152 | "model.add(keras.layers.BatchNormalization())\n", 153 | "model.add(keras.layers.Activation(\"relu\"))\n", 154 | "model.add(keras.layers.MaxPool2D(pool_size=3, strides=2, padding=\"SAME\"))\n", 155 | "prev_filters = 64\n", 156 | "for filters in [64] * 3 + [128] * 4 + [256] * 6 + [512] * 3:\n", 157 | " strides = 1 if filters == prev_filters else 2\n", 158 | " model.add(ResidualUnit(filters, strides=strides))\n", 159 | " prev_filters = filters\n", 160 | "model.add(keras.layers.GlobalAvgPool2D())\n", 161 | "model.add(keras.layers.Flatten())\n", 162 | "model.add(keras.layers.Dense(10, activation=\"softmax\"))" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "model.summary()" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [] 180 | } 181 | ], 182 | "metadata": { 183 | "kernelspec": { 184 | "display_name": "Python 3", 185 | "language": "python", 186 | "name": "python3" 187 | }, 188 | "language_info": { 189 | "codemirror_mode": { 190 | "name": "ipython", 191 | "version": 3 192 | }, 193 | "file_extension": ".py", 194 | "mimetype": "text/x-python", 195 | "name": "python", 196 | "nbconvert_exporter": "python", 197 | "pygments_lexer": "ipython3", 198 | "version": "3.6.5" 199 | }, 200 | "toc": { 201 | "base_numbering": 1, 202 | "nav_menu": {}, 203 | "number_sections": true, 204 | "sideBar": true, 205 | "skip_h1_title": false, 206 | "title_cell": "Table of Contents", 207 | "title_sidebar": "Contents", 208 | "toc_cell": false, 209 | "toc_position": {}, 210 | "toc_section_display": true, 211 | "toc_window_display": false 212 | } 213 | }, 214 | "nbformat": 4, 215 | "nbformat_minor": 2 216 | } 217 | -------------------------------------------------------------------------------- /deep_learning/data/fashion/t10k-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/data/fashion/t10k-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /deep_learning/data/fashion/t10k-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/data/fashion/t10k-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /deep_learning/data/fashion/train-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/data/fashion/train-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /deep_learning/data/fashion/train-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/data/fashion/train-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /deep_learning/data/mnist/t10k-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/data/mnist/t10k-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /deep_learning/data/mnist/t10k-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/data/mnist/t10k-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /deep_learning/data/mnist/train-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/data/mnist/train-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /deep_learning/data/mnist/train-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/data/mnist/train-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /deep_learning/images/ann/README: -------------------------------------------------------------------------------- 1 | Images generated by the notebooks 2 | -------------------------------------------------------------------------------- /deep_learning/images/ann/activation_functions_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/images/ann/activation_functions_plot.png -------------------------------------------------------------------------------- /deep_learning/images/ann/perceptron_iris_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/images/ann/perceptron_iris_plot.png -------------------------------------------------------------------------------- /deep_learning/images/autoencoders/linear_autoencoder_pca_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/deep_learning/images/autoencoders/linear_autoencoder_pca_plot.png -------------------------------------------------------------------------------- /deep_learning/model_ckps/README.txt: -------------------------------------------------------------------------------- 1 | This directory contains model checkpoints 2 | -------------------------------------------------------------------------------- /deep_learning/simple.txt: -------------------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 4 | 4 5 | -------------------------------------------------------------------------------- /exp/Optimizer_2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 6, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Python ≥3.5 is required\n", 10 | "import sys\n", 11 | "assert sys.version_info >= (3, 5)\n", 12 | "\n", 13 | "# Scikit-Learn ≥0.20 is required\n", 14 | "import sklearn\n", 15 | "assert sklearn.__version__ >= \"0.20\"\n", 16 | "\n", 17 | "try:\n", 18 | " # %tensorflow_version only exists in Colab.\n", 19 | " %tensorflow_version 2.x\n", 20 | "except Exception:\n", 21 | " pass\n", 22 | "\n", 23 | "# TensorFlow ≥2.0 is required\n", 24 | "import tensorflow as tf\n", 25 | "from tensorflow import keras\n", 26 | "assert tf.__version__ >= \"2.0\"\n", 27 | "\n", 28 | "# Common imports\n", 29 | "import numpy as np\n", 30 | "import os\n", 31 | "\n", 32 | "# to make this notebook's output stable across runs\n", 33 | "np.random.seed(42)\n", 34 | "tf.random.set_seed(42)\n", 35 | "\n", 36 | "# To plot pretty figures\n", 37 | "%matplotlib inline\n", 38 | "import matplotlib as mpl\n", 39 | "import matplotlib.pyplot as plt\n", 40 | "mpl.rc('axes', labelsize=14)\n", 41 | "mpl.rc('xtick', labelsize=12)\n", 42 | "mpl.rc('ytick', labelsize=12)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 7, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "from sklearn.datasets import fetch_california_housing\n", 52 | "from sklearn.model_selection import train_test_split\n", 53 | "from sklearn.preprocessing import StandardScaler\n", 54 | "\n", 55 | "housing = fetch_california_housing()\n", 56 | "X_train_full, X_test, y_train_full, y_test = train_test_split(\n", 57 | " housing.data, housing.target.reshape(-1, 1), random_state=42)\n", 58 | "X_train, X_valid, y_train, y_valid = train_test_split(\n", 59 | " X_train_full, y_train_full, random_state=42)\n", 60 | "\n", 61 | "scaler = StandardScaler()\n", 62 | "X_train_scaled = scaler.fit_transform(X_train)\n", 63 | "X_valid_scaled = scaler.transform(X_valid)\n", 64 | "X_test_scaled = scaler.transform(X_test)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 8, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "X_new_scaled = X_test_scaled" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 93, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "class MySGDOptimizer(keras.optimizers.Optimizer):\n", 83 | " def __init__(self, learning_rate=0.001, momentum=0.9, name=\"MySGDOptimizer\", **kwargs):\n", 84 | " \"\"\"Call super().__init__() and use _set_hyper() to store hyperparameters\"\"\"\n", 85 | " super().__init__(name, **kwargs)\n", 86 | " self._set_hyper(\"learning_rate\", kwargs.get(\"lr\", learning_rate)) # handle lr=learning_rate\n", 87 | " self._set_hyper(\"decay\", self._initial_decay) # \n", 88 | " self._set_hyper(\"momentum\", momentum)\n", 89 | " \n", 90 | " def _create_slots(self, var_list):\n", 91 | " \"\"\"For each model variable, create the optimizer variable associated with it.\n", 92 | " TensorFlow calls these optimizer variables \"slots\".\n", 93 | " For momentum optimization, we need one momentum slot per model variable.\n", 94 | " \"\"\"\n", 95 | " for var in var_list:\n", 96 | " self.add_slot(var, \"momentum\")\n", 97 | "\n", 98 | " @tf.function\n", 99 | " def _resource_apply_dense(self, grad, var):\n", 100 | " \"\"\"Update the slots and perform one optimization step for one model variable\n", 101 | " \"\"\"\n", 102 | " var_dtype = var.dtype.base_dtype\n", 103 | " lr_t = self._decayed_lr(var_dtype) # handle learning rate decay\n", 104 | "# momentum_var = self.get_slot(var, \"momentum\")\n", 105 | "# momentum_hyper = self._get_hyper(\"momentum\", var_dtype)\n", 106 | "# momentum_var.assign(momentum_var * momentum_hyper - (1. - momentum_hyper)* grad)\n", 107 | " var.assign_sub(grad * lr_t)\n", 108 | "\n", 109 | " def _resource_apply_sparse(self, grad, var):\n", 110 | " raise NotImplementedError\n", 111 | "\n", 112 | " def get_config(self):\n", 113 | " base_config = super().get_config()\n", 114 | " return {\n", 115 | " **base_config,\n", 116 | " \"learning_rate\": self._serialize_hyperparameter(\"learning_rate\"),\n", 117 | " \"decay\": self._serialize_hyperparameter(\"decay\"),\n", 118 | " \"momentum\": self._serialize_hyperparameter(\"momentum\"),\n", 119 | " }" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 94, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "keras.backend.clear_session()\n", 129 | "np.random.seed(42)\n", 130 | "tf.random.set_seed(42)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 96, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "Train on 11610 samples\n", 143 | "Epoch 1/20\n", 144 | "11610/11610 [==============================] - 1s 81us/sample - loss: 3.9043\n", 145 | "Epoch 2/20\n", 146 | "11610/11610 [==============================] - 0s 38us/sample - loss: 1.5243\n", 147 | "Epoch 3/20\n", 148 | "11610/11610 [==============================] - 0s 37us/sample - loss: 1.0021\n", 149 | "Epoch 4/20\n", 150 | "11610/11610 [==============================] - 0s 37us/sample - loss: 0.8606\n", 151 | "Epoch 5/20\n", 152 | "11610/11610 [==============================] - 0s 38us/sample - loss: 0.8038\n", 153 | "Epoch 6/20\n", 154 | "11610/11610 [==============================] - 0s 38us/sample - loss: 0.7715\n", 155 | "Epoch 7/20\n", 156 | "11610/11610 [==============================] - 0s 39us/sample - loss: 0.7457\n", 157 | "Epoch 8/20\n", 158 | "11610/11610 [==============================] - 0s 40us/sample - loss: 0.7228\n", 159 | "Epoch 9/20\n", 160 | "11610/11610 [==============================] - 0s 40us/sample - loss: 0.7012\n", 161 | "Epoch 10/20\n", 162 | "11610/11610 [==============================] - 0s 41us/sample - loss: 0.6834\n", 163 | "Epoch 11/20\n", 164 | "11610/11610 [==============================] - 0s 43us/sample - loss: 0.6681\n", 165 | "Epoch 12/20\n", 166 | "11610/11610 [==============================] - 0s 40us/sample - loss: 0.6529\n", 167 | "Epoch 13/20\n", 168 | "11610/11610 [==============================] - 0s 42us/sample - loss: 0.6394\n", 169 | "Epoch 14/20\n", 170 | "11610/11610 [==============================] - 0s 41us/sample - loss: 0.6280\n", 171 | "Epoch 15/20\n", 172 | "11610/11610 [==============================] - 0s 41us/sample - loss: 0.6175\n", 173 | "Epoch 16/20\n", 174 | "11610/11610 [==============================] - 0s 38us/sample - loss: 0.6091\n", 175 | "Epoch 17/20\n", 176 | "11610/11610 [==============================] - 0s 38us/sample - loss: 0.5996\n", 177 | "Epoch 18/20\n", 178 | "11610/11610 [==============================] - 0s 38us/sample - loss: 0.5928\n", 179 | "Epoch 19/20\n", 180 | "11610/11610 [==============================] - 0s 38us/sample - loss: 0.5862\n", 181 | "Epoch 20/20\n", 182 | "11610/11610 [==============================] - 0s 38us/sample - loss: 0.5782\n" 183 | ] 184 | }, 185 | { 186 | "data": { 187 | "text/plain": [ 188 | "" 189 | ] 190 | }, 191 | "execution_count": 96, 192 | "metadata": {}, 193 | "output_type": "execute_result" 194 | } 195 | ], 196 | "source": [ 197 | "model = keras.models.Sequential([keras.layers.Dense(1, input_shape=[8])])\n", 198 | "model.compile(loss=\"mse\", optimizer=MySGDOptimizer(learning_rate=0.001))\n", 199 | "model.fit(X_train_scaled, y_train, epochs=20)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 102, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "class SGOptimizer(keras.optimizers.Optimizer):\n", 209 | " def __init__(self, learning_rate=0.01, name=\"SGOptimizer\", **kwargs):\n", 210 | " \"\"\"Call super().__init__() and use _set_hyper() to store hyperparameters\"\"\"\n", 211 | " super().__init__(name, **kwargs)\n", 212 | " self._set_hyper(\"learning_rate\", kwargs.get(\"lr\", learning_rate)) # handle lr=learning_rate\n", 213 | " self._is_first = True\n", 214 | " \n", 215 | " def _create_slots(self, var_list):\n", 216 | " \"\"\"For each model variable, create the optimizer variable associated with it.\n", 217 | " TensorFlow calls these optimizer variables \"slots\".\n", 218 | " For momentum optimization, we need one momentum slot per model variable.\n", 219 | " \"\"\"\n", 220 | " for var in var_list:\n", 221 | " self.add_slot(var, \"pv\") #previous variable i.e. weight or bias\n", 222 | " for var in var_list:\n", 223 | " self.add_slot(var, \"pg\") #previous gradient\n", 224 | "\n", 225 | "\n", 226 | " @tf.function\n", 227 | " def _resource_apply_dense(self, grad, var):\n", 228 | " \"\"\"Update the slots and perform one optimization step for one model variable\n", 229 | " \"\"\"\n", 230 | " var_dtype = var.dtype.base_dtype\n", 231 | " lr_t = self._decayed_lr(var_dtype) # handle learning rate decay\n", 232 | " new_var_m = var - grad * lr_t\n", 233 | " pv_var = self.get_slot(var, \"pv\")\n", 234 | " pg_var = self.get_slot(var, \"pg\")\n", 235 | " \n", 236 | " if self._is_first:\n", 237 | " self._is_first = False\n", 238 | " new_var = new_var_m\n", 239 | " else:\n", 240 | " cond = grad*pg_var >= 0\n", 241 | " print(cond)\n", 242 | " avg_weights = (pv_var + var)/2.0\n", 243 | " new_var = tf.where(cond, new_var_m, avg_weights)\n", 244 | " pv_var.assign(var)\n", 245 | " pg_var.assign(grad)\n", 246 | " var.assign(new_var)\n", 247 | "\n", 248 | " def _resource_apply_sparse(self, grad, var):\n", 249 | " raise NotImplementedError\n", 250 | "\n", 251 | " def get_config(self):\n", 252 | " base_config = super().get_config()\n", 253 | " return {\n", 254 | " **base_config,\n", 255 | " \"learning_rate\": self._serialize_hyperparameter(\"learning_rate\"),\n", 256 | " }" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 103, 262 | "metadata": {}, 263 | "outputs": [], 264 | "source": [ 265 | "keras.backend.clear_session()\n", 266 | "np.random.seed(42)\n", 267 | "tf.random.set_seed(42)" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 104, 273 | "metadata": {}, 274 | "outputs": [ 275 | { 276 | "name": "stdout", 277 | "output_type": "stream", 278 | "text": [ 279 | "Train on 11610 samples\n", 280 | "Epoch 1/50\n", 281 | "Tensor(\"GreaterEqual:0\", shape=(1,), dtype=bool)\n", 282 | "11610/11610 [==============================] - 1s 95us/sample - loss: 3.7333\n", 283 | "Epoch 2/50\n", 284 | "11610/11610 [==============================] - 1s 47us/sample - loss: 1.4848\n", 285 | "Epoch 3/50\n", 286 | "11610/11610 [==============================] - 1s 48us/sample - loss: 0.9218\n", 287 | "Epoch 4/50\n", 288 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.7634\n", 289 | "Epoch 5/50\n", 290 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.7067\n", 291 | "Epoch 6/50\n", 292 | "11610/11610 [==============================] - 1s 45us/sample - loss: 0.6801\n", 293 | "Epoch 7/50\n", 294 | "11610/11610 [==============================] - 1s 45us/sample - loss: 0.6624\n", 295 | "Epoch 8/50\n", 296 | "11610/11610 [==============================] - 1s 47us/sample - loss: 0.6482\n", 297 | "Epoch 9/50\n", 298 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.6354\n", 299 | "Epoch 10/50\n", 300 | "11610/11610 [==============================] - 1s 47us/sample - loss: 0.6252\n", 301 | "Epoch 11/50\n", 302 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.6166\n", 303 | "Epoch 12/50\n", 304 | "11610/11610 [==============================] - 1s 47us/sample - loss: 0.6077\n", 305 | "Epoch 13/50\n", 306 | "11610/11610 [==============================] - 1s 47us/sample - loss: 0.5999\n", 307 | "Epoch 14/50\n", 308 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5934\n", 309 | "Epoch 15/50\n", 310 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5872\n", 311 | "Epoch 16/50\n", 312 | "11610/11610 [==============================] - 1s 47us/sample - loss: 0.5826\n", 313 | "Epoch 17/50\n", 314 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5766\n", 315 | "Epoch 18/50\n", 316 | "11610/11610 [==============================] - 1s 49us/sample - loss: 0.5725\n", 317 | "Epoch 19/50\n", 318 | "11610/11610 [==============================] - 1s 47us/sample - loss: 0.5687\n", 319 | "Epoch 20/50\n", 320 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5633\n", 321 | "Epoch 21/50\n", 322 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5612\n", 323 | "Epoch 22/50\n", 324 | "11610/11610 [==============================] - 1s 49us/sample - loss: 0.5579\n", 325 | "Epoch 23/50\n", 326 | "11610/11610 [==============================] - 1s 48us/sample - loss: 0.5572\n", 327 | "Epoch 24/50\n", 328 | "11610/11610 [==============================] - 1s 48us/sample - loss: 0.5537\n", 329 | "Epoch 25/50\n", 330 | "11610/11610 [==============================] - 1s 49us/sample - loss: 0.5510\n", 331 | "Epoch 26/50\n", 332 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5490\n", 333 | "Epoch 27/50\n", 334 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5459\n", 335 | "Epoch 28/50\n", 336 | "11610/11610 [==============================] - 1s 47us/sample - loss: 0.5452\n", 337 | "Epoch 29/50\n", 338 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5433\n", 339 | "Epoch 30/50\n", 340 | "11610/11610 [==============================] - 1s 44us/sample - loss: 0.5428\n", 341 | "Epoch 31/50\n", 342 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5423\n", 343 | "Epoch 32/50\n", 344 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5405\n", 345 | "Epoch 33/50\n", 346 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5384\n", 347 | "Epoch 34/50\n", 348 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5392\n", 349 | "Epoch 35/50\n", 350 | "11610/11610 [==============================] - 1s 51us/sample - loss: 0.5379\n", 351 | "Epoch 36/50\n", 352 | "11610/11610 [==============================] - 1s 45us/sample - loss: 0.5367\n", 353 | "Epoch 37/50\n", 354 | "11610/11610 [==============================] - 1s 45us/sample - loss: 0.5354\n", 355 | "Epoch 38/50\n", 356 | "11610/11610 [==============================] - 1s 45us/sample - loss: 0.5356\n", 357 | "Epoch 39/50\n", 358 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5339\n", 359 | "Epoch 40/50\n", 360 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5349\n", 361 | "Epoch 41/50\n", 362 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5337\n", 363 | "Epoch 42/50\n", 364 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5334\n", 365 | "Epoch 43/50\n", 366 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5326\n", 367 | "Epoch 44/50\n", 368 | "11610/11610 [==============================] - 1s 45us/sample - loss: 0.5322\n", 369 | "Epoch 45/50\n", 370 | "11610/11610 [==============================] - 1s 45us/sample - loss: 0.5329\n", 371 | "Epoch 46/50\n", 372 | "11610/11610 [==============================] - 1s 45us/sample - loss: 0.5325\n", 373 | "Epoch 47/50\n", 374 | "11610/11610 [==============================] - 1s 45us/sample - loss: 0.5306\n", 375 | "Epoch 48/50\n", 376 | "11610/11610 [==============================] - 1s 45us/sample - loss: 0.5317\n", 377 | "Epoch 49/50\n", 378 | "11610/11610 [==============================] - 1s 47us/sample - loss: 0.5311\n", 379 | "Epoch 50/50\n", 380 | "11610/11610 [==============================] - 1s 46us/sample - loss: 0.5312\n" 381 | ] 382 | }, 383 | { 384 | "data": { 385 | "text/plain": [ 386 | "" 387 | ] 388 | }, 389 | "execution_count": 104, 390 | "metadata": {}, 391 | "output_type": "execute_result" 392 | } 393 | ], 394 | "source": [ 395 | "model = keras.models.Sequential([keras.layers.Dense(1, input_shape=[8])])\n", 396 | "model.compile(loss=\"mse\", optimizer=SGOptimizer(learning_rate=0.001))\n", 397 | "model.fit(X_train_scaled, y_train, epochs=50)" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": 58, 403 | "metadata": {}, 404 | "outputs": [ 405 | { 406 | "data": { 407 | "text/plain": [ 408 | "" 409 | ] 410 | }, 411 | "execution_count": 58, 412 | "metadata": {}, 413 | "output_type": "execute_result" 414 | } 415 | ], 416 | "source": [ 417 | "a = tf.Variable([-1,2,2,0])\n", 418 | "b = tf.Variable([5,6,7,8])\n", 419 | "cond = a*b >= 0\n", 420 | "cond" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": 59, 426 | "metadata": {}, 427 | "outputs": [ 428 | { 429 | "data": { 430 | "text/plain": [ 431 | "" 432 | ] 433 | }, 434 | "execution_count": 59, 435 | "metadata": {}, 436 | "output_type": "execute_result" 437 | } 438 | ], 439 | "source": [ 440 | "tf.where(cond, a, b)" 441 | ] 442 | }, 443 | { 444 | "cell_type": "code", 445 | "execution_count": null, 446 | "metadata": {}, 447 | "outputs": [], 448 | "source": [] 449 | } 450 | ], 451 | "metadata": { 452 | "kernelspec": { 453 | "display_name": "tensorflow2", 454 | "language": "python", 455 | "name": "tensorflow2" 456 | }, 457 | "language_info": { 458 | "codemirror_mode": { 459 | "name": "ipython", 460 | "version": 3 461 | }, 462 | "file_extension": ".py", 463 | "mimetype": "text/x-python", 464 | "name": "python", 465 | "nbconvert_exporter": "python", 466 | "pygments_lexer": "ipython3", 467 | "version": "3.6.8" 468 | }, 469 | "toc": { 470 | "base_numbering": 1, 471 | "nav_menu": {}, 472 | "number_sections": true, 473 | "sideBar": true, 474 | "skip_h1_title": false, 475 | "title_cell": "Table of Contents", 476 | "title_sidebar": "Contents", 477 | "toc_cell": false, 478 | "toc_position": {}, 479 | "toc_section_display": true, 480 | "toc_window_display": false 481 | } 482 | }, 483 | "nbformat": 4, 484 | "nbformat_minor": 2 485 | } 486 | -------------------------------------------------------------------------------- /machine_learning/Natural_Language_Processing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Natural Language Processing\n", 8 | "\n", 9 | "## Generating Quiz from a given text" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 5, 15 | "metadata": { 16 | "scrolled": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "from textblob import TextBlob" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "# f = open('filename')\n", 32 | "# ww2 = f.read()\n", 33 | "# or\n", 34 | "\n", 35 | "ww2 = '''\n", 36 | "World War II (often abbreviated to WWII or WW2), also known as the Second World War, was a global war that lasted from 1939 to 1945, although related conflicts began earlier. It involved the vast majority of the world's countries—including all of the great powers—eventually forming two opposing military alliances: the Allies and the Axis. It was the most widespread war in history, and directly involved more than 100 million people from over 30 countries. In a state of total war, the major participants threw their entire economic, industrial, and scientific capabilities behind the war effort, erasing the distinction between civilian and military resources.\n", 37 | "\n", 38 | "World War II was the deadliest conflict in human history, marked by 50 million to 85 million fatalities, most of which were civilians in the Soviet Union and China. It included massacres, the deliberate genocide of the Holocaust, strategic bombing, starvation, disease and the first use of nuclear weapons in history.[1][2][3][4]\n", 39 | "\n", 40 | "The Empire of Japan aimed to dominate Asia and the Pacific and was already at war with the Republic of China in 1937,[5] but the world war is generally said to have begun on 1 September 1939[6] with the invasion of Poland by Nazi Germany and subsequent declarations of war on Germany by France and the United Kingdom. Supplied by the Soviet Union, from late 1939 to early 1941, in a series of campaigns and treaties, Germany conquered or controlled much of continental Europe, and formed the Axis alliance with Italy and Japan. Under the Molotov–Ribbentrop Pact of August 1939, Germany and the Soviet Union partitioned and annexed territories of their European neighbours, Poland, Finland, Romania and the Baltic states. The war continued primarily between the European Axis powers and the coalition of the United Kingdom and the British Commonwealth, with campaigns including the North Africa and East Africa campaigns, the aerial Battle of Britain, the Blitz bombing campaign, and the Balkan Campaign, as well as the long-running Battle of the Atlantic. On 22 June 1941, the European Axis powers launched an invasion of the Soviet Union, opening the largest land theatre of war in history, which trapped the major part of the Axis military forces into a war of attrition. In December 1941, Japan attacked the United States and European colonies in the Pacific Ocean, and quickly conquered much of the Western Pacific.\n", 41 | "\n", 42 | "The Axis advance halted in 1942 when Japan lost the critical Battle of Midway, and Germany and Italy were defeated in North Africa and then, decisively, at Stalingrad in the Soviet Union. In 1943, with a series of German defeats on the Eastern Front, the Allied invasion of Sicily and the Allied invasion of Italy which brought about Italian surrender, and Allied victories in the Pacific, the Axis lost the initiative and undertook strategic retreat on all fronts. In 1944, the Western Allies invaded German-occupied France, while the Soviet Union regained all of its territorial losses and invaded Germany and its allies. During 1944 and 1945 the Japanese suffered major reverses in mainland Asia in South Central China and Burma, while the Allies crippled the Japanese Navy and captured key Western Pacific islands.\n", 43 | "\n", 44 | "The war in Europe concluded with an invasion of Germany by the Western Allies and the Soviet Union, culminating in the capture of Berlin by Soviet troops, the suicide of Adolf Hitler and the subsequent German unconditional surrender on 8 May 1945. Following the Potsdam Declaration by the Allies on 26 July 1945 and the refusal of Japan to surrender under its terms, the United States dropped atomic bombs on the Japanese cities of Hiroshima and Nagasaki on 6 August and 9 August respectively. With an invasion of the Japanese archipelago imminent, the possibility of additional atomic bombings and the Soviet invasion of Manchuria, Japan formally surrendered on 2 September 1945. Thus ended the war in Asia, cementing the total victory of the Allies.\n", 45 | "\n", 46 | "World War II changed the political alignment and social structure of the world. The United Nations (UN) was established to foster international co-operation and prevent future conflicts. The victorious great powers—China, France, the Soviet Union, the United Kingdom, and the United States—became the permanent members of the United Nations Security Council.[7] The Soviet Union and the United States emerged as rival superpowers, setting the stage for the Cold War, which lasted for the next 46 years. Meanwhile, the influence of European great powers waned, while the decolonisation of Africa and Asia began. Most countries whose industries had been damaged moved towards economic recovery. Political integration, especially in Europe, emerged as an effort to end pre-war enmities and to create a common identity.[8]\n", 47 | "'''" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "\n", 60 | "____ ____ II (often abbreviated to ____ or ____), also known as the ____ ____ ____, was a global ____ that lasted from 1939 to 1945, although related conflicts began earlier.\n", 61 | "\n", 62 | "==================\n", 63 | "\n", 64 | "It involved the vast ____ of the ____'s countries—including all of the great powers—eventually forming two opposing military alliances: the Allies and the ____.\n", 65 | "\n", 66 | "==================\n", 67 | "\n", 68 | "It was the most widespread ____ in ____, and directly involved more than 100 million people from over 30 countries.\n", 69 | "\n", 70 | "==================\n", 71 | "\n", 72 | "In a state of total ____, the major participants threw their entire economic, industrial, and scientific capabilities behind the ____ ____, erasing the ____ between civilian and military resources.\n", 73 | "\n", 74 | "==================\n", 75 | "\n", 76 | "World War II was the deadliest ____ in human ____, marked by 50 million to 85 million fatalities, most of which were civilians in the ____ ____ and ____.\n", 77 | "\n", 78 | "==================\n", 79 | "\n", 80 | "It included massacres, the ____ ____ of the ____, strategic ____, ____, ____ and the first ____ of nuclear weapons in ____.\n", 81 | "\n", 82 | "==================\n", 83 | "\n", 84 | "____1________2________3________4____\n", 85 | "\n", 86 | "The ____ of ____ aimed to dominate ____ and the ____ and was already at ____ with the ____ of ____ in 1937,____5____ but the ____ ____ is generally said to have begun on 1 ____ 1939____6____ with the ____ of ____ by ____ ____ and subsequent declarations of ____ on ____ by ____ and the ____ ____.\n", 87 | "\n", 88 | "==================\n", 89 | "\n", 90 | "Supplied by the Soviet ____, from late 1939 to early 1941, in a ____ of campaigns and treaties, ____ conquered or controlled much of continental ____, and formed the ____ ____ with ____ and ____.\n", 91 | "\n", 92 | "==================\n", 93 | "\n", 94 | "Under the Molotov–Ribbentrop Pact of ____ 1939, ____ and the ____ ____ partitioned and annexed territories of their European neighbours, ____, ____, ____ and the ____ states.\n", 95 | "\n", 96 | "==================\n", 97 | "\n", 98 | "The war continued primarily between the ____ ____ powers and the ____ of the ____ ____ and the British ____, with ____s including the ____ ____ and ____ ____ ____s, the aerial ____ of ____, the ____ ____ ____, and the ____ ____, as well as the long-running ____ of the ____.\n", 99 | "\n", 100 | "==================\n", 101 | "\n", 102 | "On 22 June 1941, the ____ ____ powers launched an ____ of the ____ ____, opening the largest ____ ____ of ____ in ____, which trapped the major ____ of the ____ military forces into a ____ of ____.\n", 103 | "\n", 104 | "==================\n", 105 | "\n", 106 | "In December 1941, Japan attacked the ____ States and European colonies in the ____ ____, and quickly conquered much of the ____ ____.\n", 107 | "\n", 108 | "==================\n", 109 | "\n", 110 | "The Axis advance halted in 1942 when ____ lost the critical ____ of ____, and ____ and ____ were defeated in ____ ____ and then, decisively, at ____ in the ____ ____.\n", 111 | "\n", 112 | "==================\n", 113 | "\n", 114 | "In 1943, with a ____ of German defeats on the ____ ____, the ____ ____ of ____ and the ____ ____ of ____ which brought about Italian ____, and ____ victories in the ____, the ____ lost the ____ and undertook strategic ____ on all fronts.\n", 115 | "\n", 116 | "==================\n", 117 | "\n", 118 | "In 1944, the Western Allies invaded German-occupied ____, while the ____ ____ regained all of its territorial losses and invaded ____ and its allies.\n", 119 | "\n", 120 | "==================\n", 121 | "\n", 122 | "During 1944 and 1945 the Japanese suffered major reverses in ____ ____ in ____ ____ ____ and ____, while the Allies crippled the Japanese ____ and captured key ____ ____ islands.\n", 123 | "\n", 124 | "==================\n", 125 | "\n", 126 | "The war in Europe concluded with an ____ of ____ by the Western Allies and the ____ ____, culminating in the ____ of ____ by ____ troops, the ____ of ____ ____ and the subsequent German unconditional ____ on 8 ____ 1945.\n", 127 | "\n", 128 | "==================\n", 129 | "\n", 130 | "Following the Potsdam Declaration by the Allies on 26 ____ 1945 and the ____ of ____ to surrender under its terms, the ____ States dropped atomic bombs on the ____ese cities of ____ and ____ on 6 ____ and 9 ____ respectively.\n", 131 | "\n", 132 | "==================\n", 133 | "\n", 134 | "With an ____ of the ____ese archipelago ____, the ____ of additional atomic bombings and the Soviet ____ of ____, ____ formally surrendered on 2 ____ 1945.\n", 135 | "\n", 136 | "==================\n", 137 | "\n", 138 | "Thus ended the war in ____, cementing the total ____ of the Allies.\n", 139 | "\n", 140 | "==================\n", 141 | "\n", 142 | "World War II changed the political ____ and social ____ of the ____.\n", 143 | "\n", 144 | "==================\n", 145 | "\n", 146 | "The United Nations (UN) was established to foster international ____ and ____ ____ conflicts.\n", 147 | "\n", 148 | "==================\n", 149 | "\n", 150 | "The victorious great powers—China, ____, the ____ ____, the ____ ____, and the ____ ____ the permanent members of the ____ ____ ____ ____.\n", 151 | "\n", 152 | "==================\n", 153 | "\n", 154 | "[7] The ____ ____ and the ____ States emerged as rival superpowers, setting the ____ for the ____ ____, which lasted for the next 46 years.\n", 155 | "\n", 156 | "==================\n", 157 | "\n", 158 | "Meanwhile, the influence of European great powers waned, while the ____ of ____ and ____ began.\n", 159 | "\n", 160 | "==================\n", 161 | "\n", 162 | "Most countries whose industries had been damaged moved towards economic ____.\n", 163 | "\n", 164 | "==================\n", 165 | "\n", 166 | "Political integration, especially in ____, emerged as an ____ to end pre-war enmities and to create a common ____.\n", 167 | "\n", 168 | "==================\n", 169 | "\n", 170 | "[8]\n", 171 | "\n", 172 | "==================\n", 173 | "\n" 174 | ] 175 | } 176 | ], 177 | "source": [ 178 | "ww2b = TextBlob(ww2)\n", 179 | "for sentence in ww2b.sentences:\n", 180 | " new_sentence = sentence\n", 181 | " for index, tag in enumerate(sentence.tags):\n", 182 | " if tag[1] in ('NN', 'NNP') and index > 3:\n", 183 | " new_sentence = new_sentence.replace(tag[0], \"____\") \n", 184 | " print(new_sentence)\n", 185 | " print(\"\\n==================\\n\")" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "## Finding Related Posts\n" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "We are given the task of finding the most related posts from a bunch of posts. \n", 200 | "\n", 201 | "How we will find similarity between posts?\n", 202 | "\n", 203 | "The tricky thing that we have to tackle first is how to turn text into something on which we can calculate similarity." 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "### How to do it\n", 211 | "\n", 212 | "+ **Bag of Word**\n", 213 | "\n", 214 | "It totally ignores the order of words and simply uses word counts as their basis.\n", 215 | "\n", 216 | "##### Vectorization\n", 217 | "For each word in the post, its occurrence is counted and noted in a vector. This step is also called vectorization. The vector is typically huge as it contains as many elements as words occur in the whole dataset." 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "|Word | Occurence in post 1 | Occurence in post 2|\n", 225 | "|------|---------------------|--------------------|\n", 226 | "|disk | 1 | 1 |\n", 227 | "|format| 1 | 1 |\n", 228 | "|how |1 | 0 |\n", 229 | "|hard| 1| 1|\n", 230 | "|my |1 |0|\n", 231 | "|problems| 0| 1|\n", 232 | "|to| 1| 0|" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "We can simply calculate the **Euclidean distance** between the vectors of all posts and take the nearest one, it will be too slow." 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "### Vectorization - Converting raw text into a bag of words \n", 247 | "\n", 248 | "\n" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 5, 254 | "metadata": { 255 | "collapsed": true 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "from sklearn.feature_extraction.text import CountVectorizer" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 6, 265 | "metadata": { 266 | "collapsed": true 267 | }, 268 | "outputs": [], 269 | "source": [ 270 | "vectorizer = CountVectorizer(min_df=1)" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "The **min_df parameter** determines how CountVectorizer treats seldom words\n", 278 | "(minimum document frequency).\n", 279 | "+ If it is set to an integer, all words occurring less than that value will be dropped\n", 280 | "+ If it is a fraction, all words that occur in less than that fraction of the overall dataset will be dropped. \n", 281 | "\n", 282 | "The max_df parameter works in a similar manner." 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 7, 288 | "metadata": { 289 | "collapsed": true 290 | }, 291 | "outputs": [], 292 | "source": [ 293 | "content = [\"How to format my hard disk\", \" Hard disk format problems \"]" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 8, 299 | "metadata": { 300 | "collapsed": true 301 | }, 302 | "outputs": [], 303 | "source": [ 304 | "X = vectorizer.fit_transform(content)" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 9, 310 | "metadata": {}, 311 | "outputs": [ 312 | { 313 | "data": { 314 | "text/plain": [ 315 | "['disk', 'format', 'hard', 'how', 'my', 'problems', 'to']" 316 | ] 317 | }, 318 | "execution_count": 9, 319 | "metadata": {}, 320 | "output_type": "execute_result" 321 | } 322 | ], 323 | "source": [ 324 | "vectorizer.get_feature_names()" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 10, 330 | "metadata": {}, 331 | "outputs": [ 332 | { 333 | "name": "stdout", 334 | "output_type": "stream", 335 | "text": [ 336 | "[[1 1]\n", 337 | " [1 1]\n", 338 | " [1 1]\n", 339 | " [1 0]\n", 340 | " [1 0]\n", 341 | " [0 1]\n", 342 | " [1 0]]\n" 343 | ] 344 | } 345 | ], 346 | "source": [ 347 | "print(X.toarray().transpose())" 348 | ] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": {}, 353 | "source": [ 354 | "This means that the first sentence contains all the words except \"problems\", while\n", 355 | "the second contains all but \"how\", \"my\", and \"to\". In fact, these are exactly the same\n", 356 | "columns as we have seen in the preceding table. From X, we can extract a feature\n", 357 | "vector that we will use to compare two documents with each other." 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "Lets consider toy posts" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 11, 370 | "metadata": { 371 | "collapsed": true 372 | }, 373 | "outputs": [], 374 | "source": [ 375 | "post1 = \"This is a toy post about machine learning. Actually, it contains not much interesting stuff.\"\n", 376 | "post2 = \"Imaging databases can get huge.\"\n", 377 | "post3 = \"Most imaging databases save images permanently.\"\n", 378 | "post4 = \"Imaging databases store images.\"\n", 379 | "post5 = \"Imaging databases store images. Imaging databases store images. Imaging databases store images.\"" 380 | ] 381 | }, 382 | { 383 | "cell_type": "markdown", 384 | "metadata": {}, 385 | "source": [ 386 | "Now we will train our vectorizer" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 12, 392 | "metadata": { 393 | "collapsed": true 394 | }, 395 | "outputs": [], 396 | "source": [ 397 | "from sklearn.feature_extraction.text import CountVectorizer\n", 398 | "vectorizer = CountVectorizer(min_df=1)\n", 399 | "\n", 400 | "posts = [post1, post2, post3, post4, post5]\n", 401 | "\n", 402 | "X_train = vectorizer.fit_transform(posts)" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": 13, 408 | "metadata": { 409 | "collapsed": true 410 | }, 411 | "outputs": [], 412 | "source": [ 413 | "num_samples, num_features = X_train.shape" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 14, 419 | "metadata": {}, 420 | "outputs": [ 421 | { 422 | "name": "stdout", 423 | "output_type": "stream", 424 | "text": [ 425 | "#samples: 5, #features: 24\n" 426 | ] 427 | } 428 | ], 429 | "source": [ 430 | "print(\"#samples: %d, #features: %d\" % (num_samples,\n", 431 | "num_features))" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": 15, 437 | "metadata": {}, 438 | "outputs": [ 439 | { 440 | "name": "stdout", 441 | "output_type": "stream", 442 | "text": [ 443 | "24\n" 444 | ] 445 | } 446 | ], 447 | "source": [ 448 | "print(len(vectorizer.get_feature_names()))" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 16, 454 | "metadata": { 455 | "collapsed": true 456 | }, 457 | "outputs": [], 458 | "source": [ 459 | "new_post = \"imaging databases\"" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": 17, 465 | "metadata": {}, 466 | "outputs": [ 467 | { 468 | "data": { 469 | "text/plain": [ 470 | "array([[0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 471 | " 0, 0]])" 472 | ] 473 | }, 474 | "execution_count": 17, 475 | "metadata": {}, 476 | "output_type": "execute_result" 477 | } 478 | ], 479 | "source": [ 480 | "new_post_vec = vectorizer.transform([new_post])\n", 481 | "new_post_vec.toarray()" 482 | ] 483 | }, 484 | { 485 | "cell_type": "markdown", 486 | "metadata": {}, 487 | "source": [ 488 | "Now let us define a function for finding the distance between two vectors. It\n", 489 | "will firt normalize the vectors and then find the distance between them." 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 18, 495 | "metadata": { 496 | "collapsed": true 497 | }, 498 | "outputs": [], 499 | "source": [ 500 | "def dist_norm(v1, v2):\n", 501 | " v1_normalized = v1/sp.linalg.norm(v1.toarray())\n", 502 | " v2_normalized = v2/sp.linalg.norm(v2.toarray())\n", 503 | " delta = v1_normalized - v2_normalized\n", 504 | " return sp.linalg.norm(delta.toarray())" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": 19, 510 | "metadata": {}, 511 | "outputs": [ 512 | { 513 | "name": "stdout", 514 | "output_type": "stream", 515 | "text": [ 516 | "=== Post 0 with dist=1.41: This is a toy post about machine learning. Actually, it contains not much interesting stuff.\n", 517 | "=== Post 1 with dist=0.86: Imaging databases can get huge.\n", 518 | "=== Post 2 with dist=0.92: Most imaging databases save images permanently.\n", 519 | "=== Post 3 with dist=0.77: Imaging databases store images.\n", 520 | "=== Post 4 with dist=0.77: Imaging databases store images. Imaging databases store images. Imaging databases store images.\n", 521 | "Best post is 3 with dist=0.77\n" 522 | ] 523 | } 524 | ], 525 | "source": [ 526 | "import sys\n", 527 | "import scipy as sp\n", 528 | "best_doc = None\n", 529 | "best_dist = sys.maxsize\n", 530 | "best_i = None\n", 531 | "for i, post in enumerate(posts):\n", 532 | " post_vec = X_train.getrow(i)\n", 533 | " d = dist_norm(post_vec, new_post_vec)\n", 534 | " print(\"=== Post %i with dist=%.2f: %s\"%(i, d, post))\n", 535 | " if d>> housing.info() 19 | 20 | RangeIndex: 20640 entries, 0 to 20639 21 | Data columns (total 10 columns): 22 | longitude 20640 non-null float64 23 | latitude 20640 non-null float64 24 | housing_median_age 20640 non-null float64 25 | total_rooms 20640 non-null float64 26 | total_bedrooms 20433 non-null float64 27 | population 20640 non-null float64 28 | households 20640 non-null float64 29 | median_income 20640 non-null float64 30 | median_house_value 20640 non-null float64 31 | ocean_proximity 20640 non-null object 32 | dtypes: float64(9), object(1) 33 | memory usage: 1.6+ MB 34 | 35 | >>> housing["ocean_proximity"].value_counts() 36 | <1H OCEAN 9136 37 | INLAND 6551 38 | NEAR OCEAN 2658 39 | NEAR BAY 2290 40 | ISLAND 5 41 | Name: ocean_proximity, dtype: int64 42 | 43 | >>> housing.describe() 44 | longitude latitude housing_median_age total_rooms \ 45 | count 16513.000000 16513.000000 16513.000000 16513.000000 46 | mean -119.575972 35.639693 28.652335 2622.347605 47 | std 2.002048 2.138279 12.576306 2138.559393 48 | min -124.350000 32.540000 1.000000 6.000000 49 | 25% -121.800000 33.940000 18.000000 1442.000000 50 | 50% -118.510000 34.260000 29.000000 2119.000000 51 | 75% -118.010000 37.720000 37.000000 3141.000000 52 | max -114.310000 41.950000 52.000000 39320.000000 53 | 54 | total_bedrooms population households median_income 55 | count 16355.000000 16513.000000 16513.000000 16513.000000 56 | mean 534.885112 1419.525465 496.975050 3.875651 57 | std 412.716467 1115.715084 375.737945 1.905088 58 | min 2.000000 3.000000 2.000000 0.499900 59 | 25% 295.000000 784.000000 278.000000 2.566800 60 | 50% 433.000000 1164.000000 408.000000 3.541400 61 | 75% 644.000000 1718.000000 602.000000 4.745000 62 | max 6210.000000 35682.000000 5358.000000 15.000100 63 | -------------------------------------------------------------------------------- /machine_learning/datasets/housing/housing.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/datasets/housing/housing.tgz -------------------------------------------------------------------------------- /machine_learning/images/autoencoders/README: -------------------------------------------------------------------------------- 1 | Images generated by the notebooks 2 | -------------------------------------------------------------------------------- /machine_learning/images/classification/README: -------------------------------------------------------------------------------- 1 | Images generated by the notebooks 2 | -------------------------------------------------------------------------------- /machine_learning/images/cnn/README: -------------------------------------------------------------------------------- 1 | Images generated by the notebooks 2 | -------------------------------------------------------------------------------- /machine_learning/images/cnn/test_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/cnn/test_image.png -------------------------------------------------------------------------------- /machine_learning/images/decision_trees/README: -------------------------------------------------------------------------------- 1 | Images generated by the notebooks 2 | -------------------------------------------------------------------------------- /machine_learning/images/decision_trees/iris_tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/decision_trees/iris_tree.png -------------------------------------------------------------------------------- /machine_learning/images/deep/README: -------------------------------------------------------------------------------- 1 | Images generated by the notebooks 2 | -------------------------------------------------------------------------------- /machine_learning/images/dim_reduction/README: -------------------------------------------------------------------------------- 1 | Images generated by the notebooks 2 | -------------------------------------------------------------------------------- /machine_learning/images/distributed/README: -------------------------------------------------------------------------------- 1 | Images generated by the notebooks 2 | -------------------------------------------------------------------------------- /machine_learning/images/end_to_end_project/README: -------------------------------------------------------------------------------- 1 | Images generated by the notebooks 2 | -------------------------------------------------------------------------------- /machine_learning/images/end_to_end_project/california.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/end_to_end_project/california.png -------------------------------------------------------------------------------- /machine_learning/images/ensembles/README: -------------------------------------------------------------------------------- 1 | Images generated by the notebooks 2 | -------------------------------------------------------------------------------- /machine_learning/images/ensembles/boosting_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/boosting_plot.png -------------------------------------------------------------------------------- /machine_learning/images/ensembles/decision_tree_without_and_with_bagging_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/decision_tree_without_and_with_bagging_plot.png -------------------------------------------------------------------------------- /machine_learning/images/ensembles/early_stopping_gbrt_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/early_stopping_gbrt_plot.png -------------------------------------------------------------------------------- /machine_learning/images/ensembles/gbrt_learning_rate_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/gbrt_learning_rate_plot.png -------------------------------------------------------------------------------- /machine_learning/images/ensembles/gradient_boosting_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/gradient_boosting_plot.png -------------------------------------------------------------------------------- /machine_learning/images/ensembles/hard_voting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/hard_voting.png -------------------------------------------------------------------------------- /machine_learning/images/ensembles/law_of_large_numbers_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/law_of_large_numbers_plot.png -------------------------------------------------------------------------------- /machine_learning/images/ensembles/mnist_feature_importance_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/mnist_feature_importance_plot.png -------------------------------------------------------------------------------- /machine_learning/images/ensembles/pasting_bagging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/pasting_bagging.png -------------------------------------------------------------------------------- /machine_learning/images/ensembles/training_diverse_classifiers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/ensembles/training_diverse_classifiers.png -------------------------------------------------------------------------------- /machine_learning/images/fundamentals/README: -------------------------------------------------------------------------------- 1 | Images generated by the notebooks 2 | -------------------------------------------------------------------------------- /machine_learning/images/rl/README: -------------------------------------------------------------------------------- 1 | Images generated by the notebooks 2 | -------------------------------------------------------------------------------- /machine_learning/images/rnn/README: -------------------------------------------------------------------------------- 1 | Images generated by the notebooks 2 | -------------------------------------------------------------------------------- /machine_learning/images/svm/README: -------------------------------------------------------------------------------- 1 | Images generated by the notebooks 2 | -------------------------------------------------------------------------------- /machine_learning/images/tensorflow/README: -------------------------------------------------------------------------------- 1 | Images generated by the notebooks 2 | -------------------------------------------------------------------------------- /machine_learning/images/training_linear_models/README: -------------------------------------------------------------------------------- 1 | Images generated by the notebooks 2 | -------------------------------------------------------------------------------- /machine_learning/images/training_linear_models/gradient_descent_paths_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/training_linear_models/gradient_descent_paths_plot.png -------------------------------------------------------------------------------- /machine_learning/images/training_linear_models/gradient_descent_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/training_linear_models/gradient_descent_plot.png -------------------------------------------------------------------------------- /machine_learning/images/training_linear_models/sgd_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/machine_learning/images/training_linear_models/sgd_plot.png -------------------------------------------------------------------------------- /machine_learning/naive_bayes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## A simple example of Naive Bayes" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stdout", 17 | "output_type": "stream", 18 | "text": [ 19 | "[3 4]\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "#Import Library of Gaussian Naive Bayes model\n", 25 | "from sklearn.naive_bayes import GaussianNB\n", 26 | "import numpy as np\n", 27 | "\n", 28 | "#assigning predictor and target variables\n", 29 | "X= np.array([[-3,7],[1,5], [1,2], [-2,0], [2,3], [-4,0], [-1,1], [1,1], [-2,2], [2,7], [-4,1], [-2,7]])\n", 30 | "y = np.array([3, 3, 3, 3, 4, 3, 3, 4, 3, 4, 4, 4])\n", 31 | "#Create a Gaussian Classifier\n", 32 | "model = GaussianNB()\n", 33 | "\n", 34 | "# Train the model using the training sets \n", 35 | "model.fit(X, y)\n", 36 | "\n", 37 | "#Predict Output \n", 38 | "predicted= model.predict([[1,2],[3,4]])\n", 39 | "print(predicted)\n" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "## Over Iris Datase - Naive Bayes" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 2, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "Number of mislabeled points out of a total 150 points : 6\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "from sklearn import datasets\n", 64 | "iris = datasets.load_iris()\n", 65 | "from sklearn.naive_bayes import GaussianNB\n", 66 | "gnb = GaussianNB()\n", 67 | "y_pred = gnb.fit(iris.data, iris.target).predict(iris.data)\n", 68 | "print(\"Number of mislabeled points out of a total %d points : %d\" % (iris.data.shape[0],(iris.target != y_pred).sum()))" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 3, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "0.95999999999999996" 80 | ] 81 | }, 82 | "execution_count": 3, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "from sklearn.metrics import accuracy_score\n", 89 | "accuracy_score(y_pred, iris.target)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "## Iris dataset using random shuffling and split" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 14, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "data": { 106 | "text/plain": [ 107 | "0.95999999999999996" 108 | ] 109 | }, 110 | "execution_count": 14, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "import numpy as np\n", 117 | "seq = np.random.permutation(150)\n", 118 | "X = iris.data[seq]\n", 119 | "y = iris.target[seq]\n", 120 | "X_train, X_test, y_train, y_test = X[:100], X[100:], y[:100], y[100:]\n", 121 | "gnb = GaussianNB()\n", 122 | "y_pred = gnb.fit(X_train, y_train).predict(X_test)\n", 123 | "\n", 124 | "from sklearn.metrics import accuracy_score\n", 125 | "accuracy_score(y_pred, y_test)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "## Another Example with Categorical Data" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 24, 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "data": { 142 | "text/plain": [ 143 | "array([[0, 0, 1],\n", 144 | " [1, 0, 0],\n", 145 | " [1, 0, 0],\n", 146 | " [0, 1, 0],\n", 147 | " [0, 0, 1]])" 148 | ] 149 | }, 150 | "execution_count": 24, 151 | "metadata": {}, 152 | "output_type": "execute_result" 153 | } 154 | ], 155 | "source": [ 156 | "X = np.array([[\"Sunny\"],[\"Overcast\"], [\"Overcast\"], [\"Rainy\"], [\"Sunny\"]])\n", 157 | "y = np.array([1, 0, 1, 0, 0])\n", 158 | "\n", 159 | "from sklearn.preprocessing import LabelBinarizer\n", 160 | "enc = LabelBinarizer()\n", 161 | "# encoder = OneHotEncoder()\n", 162 | "# housing_cat_1hot = encoder.fit_transform(housing_cat_encoded.reshape(-1,1))\n", 163 | "\n", 164 | "X1 = enc.fit_transform(X.reshape(-1,1))\n", 165 | "gnb = GaussianNB()\n", 166 | "gnb.fit(X1, y)\n", 167 | "\n", 168 | "X1" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 29, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "data": { 178 | "text/plain": [ 179 | "array([0])" 180 | ] 181 | }, 182 | "execution_count": 29, 183 | "metadata": {}, 184 | "output_type": "execute_result" 185 | } 186 | ], 187 | "source": [ 188 | "# Rainy\n", 189 | "gnb.predict([[0, 1, 0]])" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 30, 195 | "metadata": {}, 196 | "outputs": [ 197 | { 198 | "data": { 199 | "text/plain": [ 200 | "array([1])" 201 | ] 202 | }, 203 | "execution_count": 30, 204 | "metadata": {}, 205 | "output_type": "execute_result" 206 | } 207 | ], 208 | "source": [ 209 | "# Sunny\n", 210 | "gnb.predict([[0, 0, 1]])" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 31, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "data": { 220 | "text/plain": [ 221 | "array([1])" 222 | ] 223 | }, 224 | "execution_count": 31, 225 | "metadata": {}, 226 | "output_type": "execute_result" 227 | } 228 | ], 229 | "source": [ 230 | "# Overcast\n", 231 | "gnb.predict([[1, 0, 0]])" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 32, 237 | "metadata": {}, 238 | "outputs": [ 239 | { 240 | "data": { 241 | "text/plain": [ 242 | "array(['Overcast', 'Rainy', 'Sunny'],\n", 243 | " dtype=' soc.religion.christian\n", 269 | "'OpenGL on the GPU is fast' => comp.graphics\n" 270 | ] 271 | } 272 | ], 273 | "source": [ 274 | "docs_new = ['God is love', 'OpenGL on the GPU is fast']\n", 275 | "X_new_counts = count_vect.transform(docs_new)\n", 276 | "X_new_tfidf = tfidf_transformer.transform(X_new_counts)\n", 277 | "\n", 278 | "predicted = clf.predict(X_new_tfidf)\n", 279 | "\n", 280 | "for doc, category in zip(docs_new, predicted):\n", 281 | " print('%r => %s' % (doc, twenty_train.target_names[category]))" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "Building a pipeline" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 18, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [ 297 | "from sklearn.pipeline import Pipeline\n", 298 | "text_clf = Pipeline([('vect', CountVectorizer()),\n", 299 | " ('tfidf', TfidfTransformer()),\n", 300 | " ('clf', MultinomialNB()),\n", 301 | "])" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 19, 307 | "metadata": {}, 308 | "outputs": [ 309 | { 310 | "data": { 311 | "text/plain": [ 312 | "Pipeline(memory=None,\n", 313 | " steps=[('vect', CountVectorizer(analyzer='word', binary=False, decode_error='strict',\n", 314 | " dtype=, encoding='utf-8', input='content',\n", 315 | " lowercase=True, max_df=1.0, max_features=None, min_df=1,\n", 316 | " ngram_range=(1, 1), preprocessor=None, stop_words=None,\n", 317 | " strip...inear_tf=False, use_idf=True)), ('clf', MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))])" 318 | ] 319 | }, 320 | "execution_count": 19, 321 | "metadata": {}, 322 | "output_type": "execute_result" 323 | } 324 | ], 325 | "source": [ 326 | "text_clf.fit(twenty_train.data, twenty_train.target) " 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "Evaluation of the performance on the test set" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 20, 339 | "metadata": {}, 340 | "outputs": [ 341 | { 342 | "data": { 343 | "text/plain": [ 344 | "0.8348868175765646" 345 | ] 346 | }, 347 | "execution_count": 20, 348 | "metadata": {}, 349 | "output_type": "execute_result" 350 | } 351 | ], 352 | "source": [ 353 | "import numpy as np\n", 354 | "twenty_test = fetch_20newsgroups(subset='test',\n", 355 | " categories=categories, shuffle=True, random_state=42)\n", 356 | "docs_test = twenty_test.data\n", 357 | "predicted = text_clf.predict(docs_test)\n", 358 | "np.mean(predicted == twenty_test.target) " 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": {}, 364 | "source": [ 365 | "Now let us use Linear support vector machine (SVM)" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 21, 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "data": { 375 | "text/plain": [ 376 | "0.9127829560585885" 377 | ] 378 | }, 379 | "execution_count": 21, 380 | "metadata": {}, 381 | "output_type": "execute_result" 382 | } 383 | ], 384 | "source": [ 385 | "from sklearn.linear_model import SGDClassifier\n", 386 | "text_clf = Pipeline([('vect', CountVectorizer()),\n", 387 | " ('tfidf', TfidfTransformer()),\n", 388 | " ('clf', SGDClassifier(loss='hinge', penalty='l2',\n", 389 | " alpha=1e-3, random_state=42,\n", 390 | " max_iter=5, tol=None)),\n", 391 | "])\n", 392 | "text_clf.fit(twenty_train.data, twenty_train.target) \n", 393 | "\n", 394 | "predicted = text_clf.predict(docs_test)\n", 395 | "np.mean(predicted == twenty_test.target) " 396 | ] 397 | }, 398 | { 399 | "cell_type": "markdown", 400 | "metadata": {}, 401 | "source": [ 402 | "scikit-learn further provides utilities for more detailed performance analysis of the results:\n", 403 | "\n" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 22, 409 | "metadata": {}, 410 | "outputs": [ 411 | { 412 | "name": "stdout", 413 | "output_type": "stream", 414 | "text": [ 415 | " precision recall f1-score support\n", 416 | "\n", 417 | " alt.atheism 0.95 0.81 0.87 319\n", 418 | " comp.graphics 0.88 0.97 0.92 389\n", 419 | " sci.med 0.94 0.90 0.92 396\n", 420 | "soc.religion.christian 0.90 0.95 0.93 398\n", 421 | "\n", 422 | " avg / total 0.92 0.91 0.91 1502\n", 423 | "\n" 424 | ] 425 | }, 426 | { 427 | "data": { 428 | "text/plain": [ 429 | "array([[258, 11, 15, 35],\n", 430 | " [ 4, 379, 3, 3],\n", 431 | " [ 5, 33, 355, 3],\n", 432 | " [ 5, 10, 4, 379]])" 433 | ] 434 | }, 435 | "execution_count": 22, 436 | "metadata": {}, 437 | "output_type": "execute_result" 438 | } 439 | ], 440 | "source": [ 441 | "from sklearn import metrics\n", 442 | "print(metrics.classification_report(twenty_test.target, predicted,\n", 443 | " target_names=twenty_test.target_names))\n", 444 | "\n", 445 | "metrics.confusion_matrix(twenty_test.target, predicted)" 446 | ] 447 | }, 448 | { 449 | "cell_type": "markdown", 450 | "metadata": {}, 451 | "source": [ 452 | "Parameter tuning using grid search" 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": 23, 458 | "metadata": {}, 459 | "outputs": [], 460 | "source": [ 461 | "from sklearn.model_selection import GridSearchCV\n", 462 | "parameters = {'vect__ngram_range': [(1, 1), (1, 2)],\n", 463 | " 'tfidf__use_idf': (True, False),\n", 464 | " 'clf__alpha': (1e-2, 1e-3),\n", 465 | "}" 466 | ] 467 | }, 468 | { 469 | "cell_type": "code", 470 | "execution_count": 24, 471 | "metadata": {}, 472 | "outputs": [], 473 | "source": [ 474 | "gs_clf = GridSearchCV(text_clf, parameters, n_jobs=-1)\n" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": 25, 480 | "metadata": {}, 481 | "outputs": [], 482 | "source": [ 483 | "gs_clf = gs_clf.fit(twenty_train.data[:400], twenty_train.target[:400])\n" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 26, 489 | "metadata": {}, 490 | "outputs": [ 491 | { 492 | "data": { 493 | "text/plain": [ 494 | "'soc.religion.christian'" 495 | ] 496 | }, 497 | "execution_count": 26, 498 | "metadata": {}, 499 | "output_type": "execute_result" 500 | } 501 | ], 502 | "source": [ 503 | "twenty_train.target_names[gs_clf.predict(['God is love'])[0]]\n" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": 27, 509 | "metadata": {}, 510 | "outputs": [ 511 | { 512 | "data": { 513 | "text/plain": [ 514 | "0.9" 515 | ] 516 | }, 517 | "execution_count": 27, 518 | "metadata": {}, 519 | "output_type": "execute_result" 520 | } 521 | ], 522 | "source": [ 523 | "gs_clf.best_score_ " 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": 28, 529 | "metadata": {}, 530 | "outputs": [ 531 | { 532 | "name": "stdout", 533 | "output_type": "stream", 534 | "text": [ 535 | "clf__alpha: 0.001\n", 536 | "tfidf__use_idf: True\n", 537 | "vect__ngram_range: (1, 1)\n" 538 | ] 539 | } 540 | ], 541 | "source": [ 542 | "for param_name in sorted(parameters.keys()):\n", 543 | " print(\"%s: %r\" % (param_name, gs_clf.best_params_[param_name]))" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": null, 549 | "metadata": {}, 550 | "outputs": [], 551 | "source": [] 552 | } 553 | ], 554 | "metadata": { 555 | "kernelspec": { 556 | "display_name": "Python 3", 557 | "language": "python", 558 | "name": "python3" 559 | }, 560 | "language_info": { 561 | "codemirror_mode": { 562 | "name": "ipython", 563 | "version": 3 564 | }, 565 | "file_extension": ".py", 566 | "mimetype": "text/x-python", 567 | "name": "python", 568 | "nbconvert_exporter": "python", 569 | "pygments_lexer": "ipython3", 570 | "version": "3.6.3" 571 | } 572 | }, 573 | "nbformat": 4, 574 | "nbformat_minor": 2 575 | } 576 | -------------------------------------------------------------------------------- /projects/autoquiz/README.MD: -------------------------------------------------------------------------------- 1 | Here we are using the NLP to automatically create fill-in-the-blanks type questions. 2 | -------------------------------------------------------------------------------- /projects/autoquiz/auto_create_quiz.py.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 13, 6 | "metadata": { 7 | "scrolled": true 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "Requirement already up-to-date: nltk in /Users/shahrukh/Code/Work/Cxl_Notebooks/env/lib/python3.6/site-packages\n", 15 | "Requirement already up-to-date: six in /Users/shahrukh/Code/Work/Cxl_Notebooks/env/lib/python3.6/site-packages (from nltk)\n", 16 | "Requirement already up-to-date: textblob in /Users/shahrukh/Code/Work/Cxl_Notebooks/env/lib/python3.6/site-packages\n", 17 | "Requirement already up-to-date: nltk>=3.1 in /Users/shahrukh/Code/Work/Cxl_Notebooks/env/lib/python3.6/site-packages (from textblob)\n", 18 | "Requirement already up-to-date: six in /Users/shahrukh/Code/Work/Cxl_Notebooks/env/lib/python3.6/site-packages (from nltk>=3.1->textblob)\n", 19 | "[nltk_data] Downloading package brown to /usr/local/share/nltk_data...\n", 20 | "[nltk_data] Package brown is already up-to-date!\n", 21 | "[nltk_data] Downloading package punkt to /usr/local/share/nltk_data...\n", 22 | "[nltk_data] Package punkt is already up-to-date!\n", 23 | "[nltk_data] Downloading package wordnet to\n", 24 | "[nltk_data] /usr/local/share/nltk_data...\n", 25 | "[nltk_data] Package wordnet is already up-to-date!\n", 26 | "[nltk_data] Downloading package averaged_perceptron_tagger to\n", 27 | "[nltk_data] /usr/local/share/nltk_data...\n", 28 | "[nltk_data] Package averaged_perceptron_tagger is already up-to-\n", 29 | "[nltk_data] date!\n", 30 | "[nltk_data] Downloading package conll2000 to\n", 31 | "[nltk_data] /usr/local/share/nltk_data...\n", 32 | "[nltk_data] Package conll2000 is already up-to-date!\n", 33 | "[nltk_data] Downloading package movie_reviews to\n", 34 | "[nltk_data] /usr/local/share/nltk_data...\n", 35 | "[nltk_data] Package movie_reviews is already up-to-date!\n", 36 | "Finished.\n" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "# With ! we can run the unix commands from the jupyter notebook\n", 42 | "#nltk is a great natual language processing library in Python\n", 43 | "!pip install -U nltk\n", 44 | "\n", 45 | "# Lets install textblob\n", 46 | "# textblob is a simple wrapper over NLTK\n", 47 | "!pip install -U textblob\n", 48 | "!python -m textblob.download_corpora" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 14, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "# Import TextBlob module\n", 58 | "from textblob import TextBlob" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 15, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "# This is the text that we are going to use. \n", 68 | "# This text is from wikipedia on World War 2 - https://en.wikipedia.org/wiki/World_War_II\n", 69 | "# Note: triple quotes are used for defining multi line string\n", 70 | "ww2 = '''\n", 71 | "World War II (often abbreviated to WWII or WW2), also known as the Second World War, was a global war that lasted from 1939 to 1945, although related conflicts began earlier. It involved the vast majority of the world's countries—including all of the great powers—eventually forming two opposing military alliances: the Allies and the Axis. It was the most widespread war in history, and directly involved more than 100 million people from over 30 countries. In a state of total war, the major participants threw their entire economic, industrial, and scientific capabilities behind the war effort, erasing the distinction between civilian and military resources.\n", 72 | "\n", 73 | "World War II was the deadliest conflict in human history, marked by 50 million to 85 million fatalities, most of which were civilians in the Soviet Union and China. It included massacres, the deliberate genocide of the Holocaust, strategic bombing, starvation, disease and the first use of nuclear weapons in history.[1][2][3][4]\n", 74 | "\n", 75 | "The Empire of Japan aimed to dominate Asia and the Pacific and was already at war with the Republic of China in 1937,[5] but the world war is generally said to have begun on 1 September 1939[6] with the invasion of Poland by Nazi Germany and subsequent declarations of war on Germany by France and the United Kingdom. Supplied by the Soviet Union, from late 1939 to early 1941, in a series of campaigns and treaties, Germany conquered or controlled much of continental Europe, and formed the Axis alliance with Italy and Japan. Under the Molotov–Ribbentrop Pact of August 1939, Germany and the Soviet Union partitioned and annexed territories of their European neighbours, Poland, Finland, Romania and the Baltic states. The war continued primarily between the European Axis powers and the coalition of the United Kingdom and the British Commonwealth, with campaigns including the North Africa and East Africa campaigns, the aerial Battle of Britain, the Blitz bombing campaign, and the Balkan Campaign, as well as the long-running Battle of the Atlantic. On 22 June 1941, the European Axis powers launched an invasion of the Soviet Union, opening the largest land theatre of war in history, which trapped the major part of the Axis military forces into a war of attrition. In December 1941, Japan attacked the United States and European colonies in the Pacific Ocean, and quickly conquered much of the Western Pacific.\n", 76 | "\n", 77 | "The Axis advance halted in 1942 when Japan lost the critical Battle of Midway, and Germany and Italy were defeated in North Africa and then, decisively, at Stalingrad in the Soviet Union. In 1943, with a series of German defeats on the Eastern Front, the Allied invasion of Sicily and the Allied invasion of Italy which brought about Italian surrender, and Allied victories in the Pacific, the Axis lost the initiative and undertook strategic retreat on all fronts. In 1944, the Western Allies invaded German-occupied France, while the Soviet Union regained all of its territorial losses and invaded Germany and its allies. During 1944 and 1945 the Japanese suffered major reverses in mainland Asia in South Central China and Burma, while the Allies crippled the Japanese Navy and captured key Western Pacific islands.\n", 78 | "\n", 79 | "The war in Europe concluded with an invasion of Germany by the Western Allies and the Soviet Union, culminating in the capture of Berlin by Soviet troops, the suicide of Adolf Hitler and the subsequent German unconditional surrender on 8 May 1945. Following the Potsdam Declaration by the Allies on 26 July 1945 and the refusal of Japan to surrender under its terms, the United States dropped atomic bombs on the Japanese cities of Hiroshima and Nagasaki on 6 August and 9 August respectively. With an invasion of the Japanese archipelago imminent, the possibility of additional atomic bombings and the Soviet invasion of Manchuria, Japan formally surrendered on 2 September 1945. Thus ended the war in Asia, cementing the total victory of the Allies.\n", 80 | "\n", 81 | "World War II changed the political alignment and social structure of the world. The United Nations (UN) was established to foster international co-operation and prevent future conflicts. The victorious great powers—China, France, the Soviet Union, the United Kingdom, and the United States—became the permanent members of the United Nations Security Council.[7] The Soviet Union and the United States emerged as rival superpowers, setting the stage for the Cold War, which lasted for the next 46 years. Meanwhile, the influence of European great powers waned, while the decolonisation of Africa and Asia began. Most countries whose industries had been damaged moved towards economic recovery. Political integration, especially in Europe, emerged as an effort to end pre-war enmities and to create a common identity.[8]\n", 82 | "'''\n", 83 | "\n", 84 | "\n", 85 | "# Uncomment the code below and run it if you are using Python 3\n", 86 | "# ww2 = unicode(ww2, 'utf-8')" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 16, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "ww2b = TextBlob(ww2)\n", 96 | "sposs = {}\n", 97 | "for sentence in ww2b.sentences:\n", 98 | " \n", 99 | " # We are going to prepare the dictionary of parts-of-speech as the key and value is a list of words:\n", 100 | " # {part-of-speech: [word1, word2]}\n", 101 | " # We are basically grouping the words based on the parts-of-speech\n", 102 | " poss = {}\n", 103 | " sposs[sentence.string] = poss;\n", 104 | " for t in sentence.tags:\n", 105 | " tag = t[1]\n", 106 | " if tag not in poss:\n", 107 | " poss[tag] = []\n", 108 | " poss[tag].append(t[0])\n", 109 | "\n", 110 | "\n", 111 | "# Uncomment the code below and run it if you are using Python 3\n", 112 | "# ww2b = TextBlob(ww2)\n", 113 | "# sposs = {}\n", 114 | "# for sentence in ww2b.sentences:\n", 115 | " \n", 116 | "# # We are going to prepare the dictionary of parts-of-speech as the key and value is a list of words:\n", 117 | "# # {part-of-speech: [word1, word2]}\n", 118 | "# # We are basically grouping the words based on the parts-of-speech\n", 119 | " \n", 120 | "# poss = {}\n", 121 | "# sposs[sentence.string] = poss;\n", 122 | "# for t in sentence.tags:\n", 123 | "# tag = t[1].encode('utf-8')\n", 124 | "# if tag not in poss:\n", 125 | "# poss[tag] = []\n", 126 | "# poss[tag].append(t[0].encode('utf-8'))\n" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 17, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "import random\n", 136 | "import re\n", 137 | "\n", 138 | "# Create the blank in string\n", 139 | "def replaceIC(word, sentence):\n", 140 | " insensitive_hippo = re.compile(re.escape(word), re.IGNORECASE)\n", 141 | " return insensitive_hippo.sub('__________________', sentence)\n", 142 | "\n", 143 | "# For a sentence create a blank space.\n", 144 | "# It first tries to randomly selection proper-noun \n", 145 | "# and if the proper noun is not found, it selects a noun randomly.\n", 146 | "def removeWord(sentence, poss):\n", 147 | " words = None\n", 148 | " if 'NNP' in poss:\n", 149 | " words = poss['NNP']\n", 150 | " elif 'NN' in poss:\n", 151 | " words = poss['NN']\n", 152 | " else:\n", 153 | " print(\"NN and NNP not found\")\n", 154 | " return (None, sentence, None)\n", 155 | " if len(words) > 0:\n", 156 | " word = random.choice(words)\n", 157 | " replaced = replaceIC(word, sentence)\n", 158 | " return (word, sentence, replaced)\n", 159 | " else:\n", 160 | " print(\"words are empty\")\n", 161 | " return (None, sentence, None)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 18, 167 | "metadata": { 168 | "scrolled": true 169 | }, 170 | "outputs": [ 171 | { 172 | "name": "stdout", 173 | "output_type": "stream", 174 | "text": [ 175 | "\n", 176 | "World War __________________ (often abbreviated to WW__________________ or WW2), also known as the Second World War, was a global war that lasted from 1939 to 1945, although related conflicts began earlier.\n", 177 | "\n", 178 | "===============\n", 179 | "II\n", 180 | "===============\n", 181 | "\n", 182 | "\n", 183 | "It involved the vast majority of the world's countries—including all of the great powers—eventually forming two opposing military alliances: the Allies and the __________________.\n", 184 | "\n", 185 | "===============\n", 186 | "Axis\n", 187 | "===============\n", 188 | "\n", 189 | "\n", 190 | "It was the most widespread __________________ in history, and directly involved more than 100 million people from over 30 countries.\n", 191 | "\n", 192 | "===============\n", 193 | "war\n", 194 | "===============\n", 195 | "\n", 196 | "\n", 197 | "In a state of total __________________, the major participants threw their entire economic, industrial, and scientific capabilities behind the __________________ effort, erasing the distinction between civilian and military resources.\n", 198 | "\n", 199 | "===============\n", 200 | "war\n", 201 | "===============\n", 202 | "\n", 203 | "\n", 204 | "World War II was the deadliest conflict in human history, marked by 50 million to 85 million fatalities, most of which were civilians in the __________________ Union and China.\n", 205 | "\n", 206 | "===============\n", 207 | "Soviet\n", 208 | "===============\n", 209 | "\n", 210 | "\n", 211 | "It included massacres, the deliberate genocide of the __________________, strategic bombing, starvation, disease and the first use of nuclear weapons in history.\n", 212 | "\n", 213 | "===============\n", 214 | "Holocaust\n", 215 | "===============\n", 216 | "\n", 217 | "\n", 218 | "[1][2][3][4]\n", 219 | "\n", 220 | "The Empire of Japan aimed to dominate Asia and the Pacific and was already at war with the Republic of __________________ in 1937,[5] but the world war is generally said to have begun on 1 September 1939[6] with the invasion of Poland by Nazi Germany and subsequent declarations of war on Germany by France and the United Kingdom.\n", 221 | "\n", 222 | "===============\n", 223 | "China\n", 224 | "===============\n", 225 | "\n", 226 | "\n", 227 | "Supplied by the Soviet Union, from late 1939 to early 1941, in a series of campaigns and treaties, __________________ conquered or controlled much of continental Europe, and formed the Axis alliance with Italy and Japan.\n", 228 | "\n", 229 | "===============\n", 230 | "Germany\n", 231 | "===============\n", 232 | "\n", 233 | "\n", 234 | "Under the Molotov–Ribbentrop Pact of August 1939, Germany and the Soviet Union partitioned and annexed territories of their European neighbours, Poland, __________________, Romania and the Baltic states.\n", 235 | "\n", 236 | "===============\n", 237 | "Finland\n", 238 | "===============\n", 239 | "\n", 240 | "\n", 241 | "The war continued primarily between the European Axis powers and the coalition of the United Kingdom and the British Commonwealth, with __________________s including the North Africa and East Africa __________________s, the aerial Battle of Britain, the Blitz bombing __________________, and the Balkan __________________, as well as the long-running Battle of the Atlantic.\n", 242 | "\n", 243 | "===============\n", 244 | "Campaign\n", 245 | "===============\n", 246 | "\n", 247 | "\n", 248 | "On 22 June 1941, the European __________________ powers launched an invasion of the Soviet Union, opening the largest land theatre of war in history, which trapped the major part of the __________________ military forces into a war of attrition.\n", 249 | "\n", 250 | "===============\n", 251 | "Axis\n", 252 | "===============\n", 253 | "\n", 254 | "\n", 255 | "In __________________ 1941, Japan attacked the United States and European colonies in the Pacific Ocean, and quickly conquered much of the Western Pacific.\n", 256 | "\n", 257 | "===============\n", 258 | "December\n", 259 | "===============\n", 260 | "\n", 261 | "\n", 262 | "The Axis advance halted in 1942 when Japan lost the critical Battle of Midway, and __________________ and Italy were defeated in North Africa and then, decisively, at Stalingrad in the Soviet Union.\n", 263 | "\n", 264 | "===============\n", 265 | "Germany\n", 266 | "===============\n", 267 | "\n", 268 | "\n", 269 | "In 1943, with a series of German defeats on the Eastern __________________, the Allied invasion of Sicily and the Allied invasion of Italy which brought about Italian surrender, and Allied victories in the Pacific, the Axis lost the initiative and undertook strategic retreat on all __________________s.\n", 270 | "\n", 271 | "===============\n", 272 | "Front\n", 273 | "===============\n", 274 | "\n", 275 | "\n", 276 | "In 1944, the Western Allies invaded German-occupied France, while the __________________ Union regained all of its territorial losses and invaded Germany and its allies.\n", 277 | "\n", 278 | "===============\n", 279 | "Soviet\n", 280 | "===============\n", 281 | "\n", 282 | "\n", 283 | "During 1944 and 1945 the Japanese suffered major reverses in mainland Asia in South Central China and Burma, while the Allies crippled the Japanese Navy and captured key Western __________________ islands.\n", 284 | "\n", 285 | "===============\n", 286 | "Pacific\n", 287 | "===============\n", 288 | "\n", 289 | "\n", 290 | "The war in Europe concluded with an invasion of Germany by the Western Allies and the Soviet Union, culminating in the capture of Berlin by Soviet troops, the suicide of Adolf Hitler and the subsequent German unconditional surrender on 8 __________________ 1945.\n", 291 | "\n", 292 | "===============\n", 293 | "May\n", 294 | "===============\n", 295 | "\n", 296 | "\n", 297 | "Following the Potsdam Declaration by the Allies on 26 July 1945 and the refusal of Japan to surrender under its terms, the United States dropped atomic bombs on the Japanese cities of __________________ and Nagasaki on 6 August and 9 August respectively.\n", 298 | "\n", 299 | "===============\n", 300 | "Hiroshima\n", 301 | "===============\n", 302 | "\n", 303 | "\n", 304 | "With an invasion of the Japanese archipelago imminent, the possibility of additional atomic bombings and the Soviet invasion of __________________, Japan formally surrendered on 2 September 1945.\n", 305 | "\n", 306 | "===============\n", 307 | "Manchuria\n", 308 | "===============\n", 309 | "\n", 310 | "\n", 311 | "Thus ended the war in __________________, cementing the total victory of the Allies.\n", 312 | "\n", 313 | "===============\n", 314 | "Asia\n", 315 | "===============\n", 316 | "\n", 317 | "\n", 318 | "__________________ War II changed the political alignment and social structure of the __________________.\n", 319 | "\n", 320 | "===============\n", 321 | "World\n", 322 | "===============\n", 323 | "\n", 324 | "\n", 325 | "The United __________________ (UN) was established to foster international co-operation and prevent future conflicts.\n", 326 | "\n", 327 | "===============\n", 328 | "Nations\n", 329 | "===============\n", 330 | "\n", 331 | "\n", 332 | "The victorious great powers—China, France, the Soviet Union, the United __________________, and the United States—became the permanent members of the United Nations Security Council.\n", 333 | "\n", 334 | "===============\n", 335 | "Kingdom\n", 336 | "===============\n", 337 | "\n", 338 | "\n", 339 | "[7] The Soviet Union and the United States emerged as rival superpowers, setting the stage for the __________________ War, which lasted for the next 46 years.\n", 340 | "\n", 341 | "===============\n", 342 | "Cold\n", 343 | "===============\n", 344 | "\n", 345 | "\n", 346 | "Meanwhile, the influence of European great powers waned, while the decolonisation of Africa and __________________ began.\n", 347 | "\n", 348 | "===============\n", 349 | "Asia\n", 350 | "===============\n", 351 | "\n", 352 | "\n", 353 | "Most countries whose industries had been damaged moved towards economic __________________.\n", 354 | "\n", 355 | "===============\n", 356 | "recovery\n", 357 | "===============\n", 358 | "\n", 359 | "\n", 360 | "Political integration, especially in __________________, emerged as an effort to end pre-war enmities and to create a common identity.\n", 361 | "\n", 362 | "===============\n", 363 | "Europe\n", 364 | "===============\n", 365 | "\n", 366 | "\n", 367 | "NN and NNP not found\n", 368 | "Founded none for \n", 369 | "[8]\n" 370 | ] 371 | } 372 | ], 373 | "source": [ 374 | "# Iterate over the sentenses \n", 375 | "for sentence in sposs.keys():\n", 376 | " poss = sposs[sentence]\n", 377 | " (word, osentence, replaced) = removeWord(sentence, poss)\n", 378 | " if replaced is None:\n", 379 | " print (\"Founded none for \")\n", 380 | " print(sentence)\n", 381 | " else:\n", 382 | " print(replaced)\n", 383 | " print (\"\\n===============\")\n", 384 | " print(word)\n", 385 | " print (\"===============\")\n", 386 | " print(\"\\n\")" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": null, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [] 395 | } 396 | ], 397 | "metadata": { 398 | "kernelspec": { 399 | "display_name": "Python 3", 400 | "language": "python", 401 | "name": "python3" 402 | }, 403 | "language_info": { 404 | "codemirror_mode": { 405 | "name": "ipython", 406 | "version": 3 407 | }, 408 | "file_extension": ".py", 409 | "mimetype": "text/x-python", 410 | "name": "python", 411 | "nbconvert_exporter": "python", 412 | "pygments_lexer": "ipython3", 413 | "version": "3.6.3" 414 | }, 415 | "toc": { 416 | "nav_menu": {}, 417 | "number_sections": true, 418 | "sideBar": true, 419 | "skip_h1_title": false, 420 | "toc_cell": false, 421 | "toc_position": {}, 422 | "toc_section_display": "block", 423 | "toc_window_display": false 424 | } 425 | }, 426 | "nbformat": 4, 427 | "nbformat_minor": 2 428 | } 429 | -------------------------------------------------------------------------------- /projects/deploy_mnist/README.md: -------------------------------------------------------------------------------- 1 | The purpose of this project is to show how to move your machine learning models in production. In this project, we'll train the MNIST model, save the model to the file, load the model from the file in the flask app and predict the digit for the new images. Since input images in MNIST are 28x28 greyscale images, the images used for predictions have to be processed. They should be converted to greyscale and resized to 28x28 pixels. Because of this, you may not get the accuracy in predictions but you will learn how to move your model to production (and which is the sole objective of this project). 2 | 3 | We'll use Flask for exposing the model using the REST API for predictions. Flask is a micro web framework written in Python. It's lightweight and easy to learn. 4 | 5 | # Steps 6 | 7 | ## Clone to repository 8 | ``` 9 | git clone https://github.com/cloudxlab/ml.git 10 | ``` 11 | 12 | ## Set the Python path - On CloudxLab, the default installation is python2 13 | 14 | ``` 15 | export PATH=/usr/local/anaconda/bin/:$PATH 16 | ``` 17 | 18 | ## Create virtual environment 19 | 20 | ``` 21 | cd ml/projects/deploy_mnist/ 22 | virtualenv -p python3 venv 23 | ``` 24 | 25 | ## Activate virtual environment 26 | 27 | ``` 28 | source venv/bin/activate 29 | ``` 30 | 31 | ## Install the flask and other requirements 32 | ``` 33 | pip install -r requirements.txt 34 | ``` 35 | 36 | ## Train the model 37 | 38 | The trained model will be saved in trained_models directory 39 | ``` 40 | mkdir -p trained_models 41 | python train_mnist_model.py 42 | ``` 43 | 44 | ## Start the flask server for predictions 45 | 46 | For the API code, see the file `predictions.py` under `flask_app` directory. Run the server on port 4041. If the port is already in use then use any of the port in the range of 4040 to 4060 as on CloudxLab only these ports are open for public access. 47 | 48 | ``` 49 | cd flask_app 50 | export LC_ALL=en_US.utf-8 51 | export LANG=en_US.utf-8 52 | export FLASK_APP=predictions.py 53 | flask run --host 0.0.0.0 --port 4041 54 | ``` 55 | 56 | ## Predict the digit for the new image 57 | 58 | We will use the test images for predictions. Login to another console and run below commands. 59 | ``` 60 | cd ml/projects/deploy_mnist/ 61 | curl -F 'file=@test-images/7.png' 127.0.0.1:4041/predict 62 | ``` 63 | 64 | The REST API will return something like below JSON object 65 | 66 | ```{"digit":7}``` 67 | 68 | ## Public API 69 | 70 | Your flask server is running on the CloudxLab web console. Let's say your web console is e.cloudxlab.com then the end Point URL will be http://e.cloudxlab.com:4041/predict 71 | 72 | You can call/use this REST API by using the above mentioned End Point URL. 73 | 74 | Replace 4041 with the port number on which your server is running. 75 | 76 | ## Next Steps 77 | 78 | The above flask server runs in the development mode. For production usage, you would like to run the server using Nginx and uWSGI. For details please follow this documentation http://flask.pocoo.org/docs/1.0/deploying/ 79 | -------------------------------------------------------------------------------- /projects/deploy_mnist/flask_app/predictions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.externals import joblib 3 | from PIL import Image 4 | from flask import Flask, jsonify, request 5 | 6 | # Create flask app 7 | app = Flask(__name__) 8 | 9 | # Load the previously trained model from the file 10 | model = joblib.load("../trained_models/mnist_model.pkl") 11 | 12 | # /predict is the end point 13 | @app.route('/predict', methods=["POST"]) 14 | def predict_image(): 15 | 16 | # Read the image uploaded by the curl command 17 | requested_img = request.files['file'] 18 | 19 | ''' 20 | Convert the uploaded image to greyscale. 21 | Since in MNIST the training images are greyscaled hence we will have to convert the uploaded image to greyscale 22 | ''' 23 | greyscale_img = Image.open(requested_img).convert('L') 24 | 25 | ''' 26 | Resize the uploaded image to 28x28 pixels. 27 | Since in MNIST the training images are of 28x28 pixels hence we will have to resize the uploaded image to 28x28 pixels. 28 | ''' 29 | resized_image = greyscale_img.resize((28,28)) 30 | 31 | # Convert the image to an array 32 | img = np.asarray(resized_image) 33 | 34 | # Reshape the image to (784, 1) 35 | img = img.reshape(784,) 36 | 37 | # Predict the digit using the trained model 38 | pred = model.predict(img.reshape(1, -1)) 39 | 40 | # Get the digit 41 | result = int(pred.tolist()[0]) 42 | 43 | # Return the JSON response 44 | return jsonify({"digit": result}) 45 | -------------------------------------------------------------------------------- /projects/deploy_mnist/requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==1.0.2 2 | numpy==1.16.2 3 | #Pillow==2.2.1 4 | scikit-learn==0.20.3 5 | pillow==6.0.0 6 | -------------------------------------------------------------------------------- /projects/deploy_mnist/test-images/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/projects/deploy_mnist/test-images/2.png -------------------------------------------------------------------------------- /projects/deploy_mnist/test-images/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/projects/deploy_mnist/test-images/5.png -------------------------------------------------------------------------------- /projects/deploy_mnist/test-images/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/projects/deploy_mnist/test-images/7.png -------------------------------------------------------------------------------- /projects/deploy_mnist/train_mnist_model.py: -------------------------------------------------------------------------------- 1 | #from sklearn.datasets import fetch_mldata 2 | from sklearn.datasets import fetch_openml 3 | import numpy as np 4 | from sklearn.linear_model import SGDClassifier 5 | from sklearn.metrics import accuracy_score 6 | from sklearn.externals import joblib 7 | 8 | np.random.seed(42) 9 | #mnist = fetch_mldata("MNIST original") 10 | mnist = fetch_openml('mnist_784', version=1, cache=True) 11 | mnist.target = mnist.target.astype(np.int8) 12 | X, y = mnist["data"], mnist["target"] 13 | 14 | X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:] 15 | 16 | shuffle_index = np.random.permutation(60000) 17 | X_train, y_train = X_train[shuffle_index], y_train[shuffle_index] 18 | 19 | # Train SGDClassifier 20 | sgd_clf = SGDClassifier(random_state=42, max_iter=10) 21 | sgd_clf.fit(X_train, y_train) 22 | 23 | # Print the accuracy of SGDClassifier 24 | y_train_predict = sgd_clf.predict(X_train) 25 | sgd_accuracy = accuracy_score(y_train, y_train_predict) 26 | print("Accuracy is %s " % sgd_accuracy) 27 | 28 | # Dump the model to the file 29 | joblib.dump(sgd_clf, "trained_models/mnist_model.pkl") 30 | -------------------------------------------------------------------------------- /python/.ipynb_checkpoints/Python - Numpy-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "a = np.array([1, 2, 3])" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "type(a)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "b = np.array((3, 4, 5))" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "type(b)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "x = np.zeros((3,4)) \n", 61 | "type(x.dtype)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "x.shape" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "y = np.ones( (3,4), dtype=np.int16 ) \n", 80 | "y" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "print(x.dtype)\n", 90 | "print(y.dtype)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": { 97 | "collapsed": true 98 | }, 99 | "outputs": [], 100 | "source": [] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "np.full( (3,4), 0.11 ) " 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "#array([[ 3.14159265 , 3.14159265 , 3.14159265 , 3.14159265 ],\n", 118 | " [ 3.14159265 , 3.14159265 , 3.14159265 , 3.14159265 ],\n", 119 | " [ 3.14159265 , 3.14159265 , 3.14159265 , 3.14159265 ]])" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "np.arange( 10, 30, 5 )" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "%timeit x" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "np.arange( 0, 2, 0.3 ) " 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "collapsed": true 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "x = [\n", 165 | " [\n", 166 | " [1],\n", 167 | " [1], \n", 168 | " [1],\n", 169 | " [1]\n", 170 | " ],\n", 171 | " [\n", 172 | " [1],\n", 173 | " [1], \n", 174 | " [1],\n", 175 | " [1]\n", 176 | " ]\n", 177 | "]" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "np.array(x).shape" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "np.linspace(0, 5/3, 6)" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "# Make a 2x3 matrix having random floats between 0 and 1:\n", 205 | "np.random.rand(2,3)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": { 212 | "collapsed": true 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "x = np.empty((2,3))" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "x" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "x.ndim" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [ 243 | "x.shape" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "x.size\n" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "x[0]" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": { 268 | "collapsed": true 269 | }, 270 | "outputs": [], 271 | "source": [] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": { 277 | "collapsed": true 278 | }, 279 | "outputs": [], 280 | "source": [ 281 | "c = np.arange(1, 5)" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "c" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": {}, 297 | "outputs": [], 298 | "source": [ 299 | "c.dtype" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "c.itemsize" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [ 317 | "a = np.arange(6)\n", 318 | "print(a)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "b = a.reshape(2, 3)\n", 328 | "print(b)" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "metadata": {}, 335 | "outputs": [], 336 | "source": [ 337 | "a = np.array([1, 5, 3, 19, 13, 7, 3])\n", 338 | "a[3]" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | "a[2:5]" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": null, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "print(a[2::2])\n", 357 | "print(a[2::3])\n", 358 | "print(a[::-1])" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "a = np.array([1, 5, 3, 19, 13, 7, 3])\n", 368 | "a[1:3] = -1\n", 369 | "print(a)" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": { 376 | "collapsed": true 377 | }, 378 | "outputs": [], 379 | "source": [ 380 | "b = [1, 2, 5, 7, 8]" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": null, 386 | "metadata": {}, 387 | "outputs": [], 388 | "source": [ 389 | "b[1:3] = -1\n" 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": null, 395 | "metadata": {}, 396 | "outputs": [], 397 | "source": [ 398 | "a = np.array([1, 2, 5, 7, 8])\n", 399 | "a_slice = a[1:5]\n", 400 | "a_slice[1] = 1000\n", 401 | "print(a)" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": null, 407 | "metadata": {}, 408 | "outputs": [], 409 | "source": [ 410 | "another_slice = a[2:6].copy()\n", 411 | "another_slice[1] = 23333\n", 412 | "print(another_slice)\n", 413 | "print(a)" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": null, 419 | "metadata": { 420 | "collapsed": true 421 | }, 422 | "outputs": [], 423 | "source": [ 424 | "x = np.random.rand(5,8)" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": {}, 431 | "outputs": [], 432 | "source": [ 433 | "x" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": null, 439 | "metadata": {}, 440 | "outputs": [], 441 | "source": [ 442 | "x[1]" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": null, 448 | "metadata": {}, 449 | "outputs": [], 450 | "source": [ 451 | "x[1, 1]" 452 | ] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "execution_count": null, 457 | "metadata": {}, 458 | "outputs": [], 459 | "source": [ 460 | "x[0:2, 0:2]" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": null, 466 | "metadata": { 467 | "collapsed": true 468 | }, 469 | "outputs": [], 470 | "source": [ 471 | "x = np.random.rand(5,5, 5)" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": null, 477 | "metadata": {}, 478 | "outputs": [], 479 | "source": [ 480 | "x" 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "execution_count": null, 486 | "metadata": {}, 487 | "outputs": [], 488 | "source": [ 489 | "x[0:2, 0:2, 0:2]" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": null, 495 | "metadata": { 496 | "collapsed": true 497 | }, 498 | "outputs": [], 499 | "source": [ 500 | "a = np.arange(12).reshape(3, 4)" 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": null, 506 | "metadata": {}, 507 | "outputs": [], 508 | "source": [ 509 | "a" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": null, 515 | "metadata": { 516 | "collapsed": true 517 | }, 518 | "outputs": [], 519 | "source": [ 520 | "rows_on = np.array([ True, False, True])" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": null, 526 | "metadata": {}, 527 | "outputs": [], 528 | "source": [ 529 | "rows_on" 530 | ] 531 | }, 532 | { 533 | "cell_type": "code", 534 | "execution_count": null, 535 | "metadata": {}, 536 | "outputs": [], 537 | "source": [ 538 | "a" 539 | ] 540 | }, 541 | { 542 | "cell_type": "code", 543 | "execution_count": null, 544 | "metadata": {}, 545 | "outputs": [], 546 | "source": [ 547 | "a[rows_on]" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": null, 553 | "metadata": {}, 554 | "outputs": [], 555 | "source": [ 556 | "a = np.array( [20, 30, 40, 50] )\n", 557 | "b = np.arange( 4 )\n", 558 | "c = a + b\n", 559 | "c" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": null, 565 | "metadata": {}, 566 | "outputs": [], 567 | "source": [ 568 | "c = a - b\n", 569 | "c" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": null, 575 | "metadata": {}, 576 | "outputs": [], 577 | "source": [ 578 | "A = np.array( [[1,1],\n", 579 | " [0,1]] )\n", 580 | "\n", 581 | "B = np.array( [[2,0],\n", 582 | " [3,4]] )\n", 583 | "print(A*B) # element wise product\n", 584 | "\n", 585 | "print([[1*2, 1*0],\n", 586 | " [0*3, 1*4]])" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": null, 592 | "metadata": {}, 593 | "outputs": [], 594 | "source": [ 595 | "print(np.dot(A, B))\n", 596 | "print([[1*2+ 1*3, 1*0+ 1*4],\n", 597 | " [0*2+1*3, 0*0+1*4]])" 598 | ] 599 | }, 600 | { 601 | "cell_type": "code", 602 | "execution_count": null, 603 | "metadata": {}, 604 | "outputs": [], 605 | "source": [ 606 | "a = np.array( [20, 30, 40, 50] )\n", 607 | "b = np.arange(1, 5)\n", 608 | "c = a / b\n", 609 | "print(c)" 610 | ] 611 | }, 612 | { 613 | "cell_type": "code", 614 | "execution_count": null, 615 | "metadata": {}, 616 | "outputs": [], 617 | "source": [ 618 | "c" 619 | ] 620 | }, 621 | { 622 | "cell_type": "code", 623 | "execution_count": null, 624 | "metadata": {}, 625 | "outputs": [], 626 | "source": [ 627 | "a = np.array( [20, 30, 40, 50] )\n", 628 | "b = np.arange(1, 5)\n", 629 | "c = a // b\n", 630 | "c\n" 631 | ] 632 | }, 633 | { 634 | "cell_type": "code", 635 | "execution_count": null, 636 | "metadata": {}, 637 | "outputs": [], 638 | "source": [ 639 | "#Modulus operator can be applied on NumPy arrays as shown below. They apply element wise.\n", 640 | "a = np.array( [20, 30, 40, 50] )\n", 641 | "b = np.arange(1, 5)\n", 642 | "c = a % b\n", 643 | "c\n" 644 | ] 645 | }, 646 | { 647 | "cell_type": "code", 648 | "execution_count": null, 649 | "metadata": {}, 650 | "outputs": [], 651 | "source": [ 652 | "a = np.array( [20, 30, 40, 50] )\n", 653 | "b = np.arange(1, 5)\n", 654 | "c = a ** b\n", 655 | "c\n", 656 | "\n" 657 | ] 658 | }, 659 | { 660 | "cell_type": "code", 661 | "execution_count": null, 662 | "metadata": {}, 663 | "outputs": [], 664 | "source": [ 665 | "m = np.array([20, -5, 30, 40])\n", 666 | "m < [15, 16, 35, 36]" 667 | ] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": null, 672 | "metadata": {}, 673 | "outputs": [], 674 | "source": [ 675 | "m < 25 " 676 | ] 677 | }, 678 | { 679 | "cell_type": "code", 680 | "execution_count": null, 681 | "metadata": {}, 682 | "outputs": [], 683 | "source": [ 684 | "m[m < 25]" 685 | ] 686 | }, 687 | { 688 | "cell_type": "code", 689 | "execution_count": null, 690 | "metadata": { 691 | "collapsed": true 692 | }, 693 | "outputs": [], 694 | "source": [] 695 | }, 696 | { 697 | "cell_type": "code", 698 | "execution_count": null, 699 | "metadata": {}, 700 | "outputs": [], 701 | "source": [ 702 | "h = np.arange(5).reshape(1, 1, 5)\n", 703 | "print(h)\n", 704 | "print(h.shape)" 705 | ] 706 | }, 707 | { 708 | "cell_type": "code", 709 | "execution_count": null, 710 | "metadata": {}, 711 | "outputs": [], 712 | "source": [ 713 | "b = np.array([10, 20, 30, 40, 50])\n", 714 | "b.reshape(1,1,5)" 715 | ] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": null, 720 | "metadata": {}, 721 | "outputs": [], 722 | "source": [ 723 | "x = h + [10, 20, 30, 40, 50]\n", 724 | "print(x)\n", 725 | "print(x.shape)" 726 | ] 727 | }, 728 | { 729 | "cell_type": "code", 730 | "execution_count": null, 731 | "metadata": {}, 732 | "outputs": [], 733 | "source": [ 734 | "h + [[[10, 20, 30, 40, 50]]]" 735 | ] 736 | }, 737 | { 738 | "cell_type": "code", 739 | "execution_count": null, 740 | "metadata": {}, 741 | "outputs": [], 742 | "source": [ 743 | "h + b.reshape(1,1,5)" 744 | ] 745 | }, 746 | { 747 | "cell_type": "code", 748 | "execution_count": null, 749 | "metadata": { 750 | "collapsed": true 751 | }, 752 | "outputs": [], 753 | "source": [ 754 | "k = np.arange(6).reshape(2, 3)" 755 | ] 756 | }, 757 | { 758 | "cell_type": "code", 759 | "execution_count": null, 760 | "metadata": {}, 761 | "outputs": [], 762 | "source": [ 763 | "k\n" 764 | ] 765 | }, 766 | { 767 | "cell_type": "code", 768 | "execution_count": null, 769 | "metadata": {}, 770 | "outputs": [], 771 | "source": [ 772 | "k + [100, 200, 300] " 773 | ] 774 | }, 775 | { 776 | "cell_type": "code", 777 | "execution_count": null, 778 | "metadata": {}, 779 | "outputs": [], 780 | "source": [ 781 | " k + [33, 44]" 782 | ] 783 | }, 784 | { 785 | "cell_type": "code", 786 | "execution_count": null, 787 | "metadata": {}, 788 | "outputs": [], 789 | "source": [ 790 | "a = np.array([[-2.5, 3.1, 7], [10, 11, 12]])\n", 791 | "print(\"mean =\", a.mean())" 792 | ] 793 | }, 794 | { 795 | "cell_type": "code", 796 | "execution_count": null, 797 | "metadata": {}, 798 | "outputs": [], 799 | "source": [ 800 | "np.prod(a)" 801 | ] 802 | }, 803 | { 804 | "cell_type": "code", 805 | "execution_count": null, 806 | "metadata": {}, 807 | "outputs": [], 808 | "source": [ 809 | "print(-2.5* 3.1* 7*10*11*12)" 810 | ] 811 | }, 812 | { 813 | "cell_type": "code", 814 | "execution_count": null, 815 | "metadata": {}, 816 | "outputs": [], 817 | "source": [ 818 | "import math\n", 819 | "a = np.array([1,3,4])\n", 820 | "m = np.mean(a)\n", 821 | "print(m)\n", 822 | "print(a -m)\n", 823 | "x = (a-m)**2\n", 824 | "print(x)\n", 825 | "s = np.sum(x)\n", 826 | "print(s)\n", 827 | "v = np.var(a)\n", 828 | "print(v)\n", 829 | "print(v*len(a))\n", 830 | "print(np.std(a))\n", 831 | "print(math.sqrt(s/len(a)))" 832 | ] 833 | }, 834 | { 835 | "cell_type": "code", 836 | "execution_count": null, 837 | "metadata": {}, 838 | "outputs": [], 839 | "source": [ 840 | "c=np.arange(24).reshape(2,3,4)\n", 841 | "c\n" 842 | ] 843 | }, 844 | { 845 | "cell_type": "code", 846 | "execution_count": null, 847 | "metadata": {}, 848 | "outputs": [], 849 | "source": [ 850 | "c.sum(axis=0) # sum across matrices" 851 | ] 852 | }, 853 | { 854 | "cell_type": "code", 855 | "execution_count": null, 856 | "metadata": { 857 | "collapsed": true 858 | }, 859 | "outputs": [], 860 | "source": [] 861 | } 862 | ], 863 | "metadata": { 864 | "kernelspec": { 865 | "display_name": "Python 3", 866 | "language": "python", 867 | "name": "python3" 868 | }, 869 | "language_info": { 870 | "codemirror_mode": { 871 | "name": "ipython", 872 | "version": 3 873 | }, 874 | "file_extension": ".py", 875 | "mimetype": "text/x-python", 876 | "name": "python", 877 | "nbconvert_exporter": "python", 878 | "pygments_lexer": "ipython3", 879 | "version": "3.6.3" 880 | } 881 | }, 882 | "nbformat": 4, 883 | "nbformat_minor": 2 884 | } 885 | -------------------------------------------------------------------------------- /python/Lambda Operator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Lambda operator" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | " f = lambda x, y : x + y" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/plain": [ 29 | "2" 30 | ] 31 | }, 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "output_type": "execute_result" 35 | } 36 | ], 37 | "source": [ 38 | "f(1,1)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/plain": [ 49 | "6" 50 | ] 51 | }, 52 | "execution_count": 3, 53 | "metadata": {}, 54 | "output_type": "execute_result" 55 | } 56 | ], 57 | "source": [ 58 | "f(2,4)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 4, 64 | "metadata": { 65 | "collapsed": true 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "foo = [2, 18, 9, 22, 17, 24, 8, 12, 27]" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 5, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "[18, 9, 24, 12, 27]\n" 82 | ] 83 | } 84 | ], 85 | "source": [ 86 | "# filter() function\n", 87 | "\n", 88 | "divisible_by_3 = filter(lambda x: x % 3 == 0, foo)\n", 89 | "print(list(divisible_by_3))" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 6, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "name": "stdout", 99 | "output_type": "stream", 100 | "text": [ 101 | "[14, 46, 28, 54, 44, 58, 26, 34, 64]\n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "# map() function\n", 107 | "\n", 108 | "lambda_map = map(lambda x: x * 2 + 10, foo)\n", 109 | "print(list(lambda_map))" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 7, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "name": "stdout", 119 | "output_type": "stream", 120 | "text": [ 121 | "139\n" 122 | ] 123 | } 124 | ], 125 | "source": [ 126 | "# reduce() function\n", 127 | "\n", 128 | "import functools\n", 129 | "lambda_reduce = functools.reduce(lambda x, y: x + y, foo)\n", 130 | "print(lambda_reduce)" 131 | ] 132 | } 133 | ], 134 | "metadata": { 135 | "kernelspec": { 136 | "display_name": "Python 3", 137 | "language": "python", 138 | "name": "python3" 139 | }, 140 | "language_info": { 141 | "codemirror_mode": { 142 | "name": "ipython", 143 | "version": 3 144 | }, 145 | "file_extension": ".py", 146 | "mimetype": "text/x-python", 147 | "name": "python", 148 | "nbconvert_exporter": "python", 149 | "pygments_lexer": "ipython3", 150 | "version": "3.6.3" 151 | } 152 | }, 153 | "nbformat": 4, 154 | "nbformat_minor": 2 155 | } 156 | -------------------------------------------------------------------------------- /python/README: -------------------------------------------------------------------------------- 1 | For python class 2 | -------------------------------------------------------------------------------- /python/__pycache__/mylib.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudxlab/ml/d420dde884634b819ca544ae8660047b0a3f0d17/python/__pycache__/mylib.cpython-36.pyc -------------------------------------------------------------------------------- /python/hello.py: -------------------------------------------------------------------------------- 1 | print("hello") 2 | print(3+4) 3 | -------------------------------------------------------------------------------- /python/simpleexp.py: -------------------------------------------------------------------------------- 1 | x = 10 2 | x = x + 2 3 | print(x) 4 | -------------------------------------------------------------------------------- /python/solutions/Python_Project_1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "## Python Project 1\n", 10 | "import os, pathlib, re\n", 11 | "\n", 12 | "# Defining the function\n", 13 | "def getEmailCounts(path1='../got-your-back/GYB-GMail-Backup-pratik@cloudxlab.com/', field='To', pattern='([<].*@[^>]*)'):\n", 14 | " emails_dict=dict()\n", 15 | " # Looping through all the files present under the path directory\n", 16 | " for path, subdirs, files in os.walk(path1):\n", 17 | " for name in files:\n", 18 | " # Skipping the sql lite file, all other files are email files\n", 19 | " if(name.endswith('sqlite')):\n", 20 | " continue\n", 21 | " # Creating a handle on the file\n", 22 | " hand=open(pathlib.PurePath(path, name))\n", 23 | " index=0\n", 24 | " for line in hand:\n", 25 | " index+=1\n", 26 | " email=''\n", 27 | " # Applying the condition to find the details as requested in the project\n", 28 | " if line.startswith(field+\":\"):\n", 29 | " email=re.findall(pattern,line)[0][1:]\n", 30 | " print(email)\n", 31 | " #emails_dict[email]=emails_dict.get(email,0)+1\n", 32 | " #emails_dict=dict(sorted(emails_dict.items(), key=lambda x:x[1],reverse=True))\n", 33 | "\n", 34 | "# Calling the defined function\n", 35 | "getEmailCounts('../got-your-back/GYB-GMail-Backup-pratik@cloudxlab.com/', 'To', '([ <].*@[^>\\n ]*)')" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "Series([], Name: fromEmails, dtype: int64)\n", 48 | "Series([], Name: toEmails, dtype: int64)\n", 49 | "Series([], Name: fromEmails, dtype: int64)\n", 50 | "Series([], Name: fromEmails, dtype: int64)\n" 51 | ] 52 | }, 53 | { 54 | "ename": "IndexError", 55 | "evalue": "index 0 is out of bounds for axis 0 with size 0", 56 | "output_type": "error", 57 | "traceback": [ 58 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 59 | "\u001b[1;31mIndexError\u001b[0m Traceback (most recent call last)", 60 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 75\u001b[0m \u001b[1;31m# Calling the function to execute project 2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 76\u001b[1;33m \u001b[0mgetStats\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'../got-your-back/GYB-GMail-Backup-pratik@cloudxlab.com/'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'pratik@cloudxlab.com'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 61 | "\u001b[1;32m\u001b[0m in \u001b[0;36mgetStats\u001b[1;34m(path1, emailidPersonal)\u001b[0m\n\u001b[0;32m 50\u001b[0m \u001b[1;31m#print('--------------')\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[1;31m# Printing the results\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 52\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Top hour slot at which emails were sent: '\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mtopTimes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 53\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtopFrom\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m>\u001b[0m\u001b[1;36m1\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mtopFrom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0memailidPersonal\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 54\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Top sender of emails: '\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mtopFrom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 62 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 1687\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1688\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1689\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgetitem\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1690\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1691\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mslice\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 63 | "\u001b[1;31mIndexError\u001b[0m: index 0 is out of bounds for axis 0 with size 0" 64 | ] 65 | } 66 | ], 67 | "source": [ 68 | "## Python Project 2\n", 69 | "import os, pathlib, re\n", 70 | "import pandas as pd\n", 71 | "# Creating the function to get the maximum time slot, the maximum recepient, the maximum sender and best friend\n", 72 | "def getStats(path1='../got-your-back/GYB-GMail-Backup-pratik@cloudxlab.com/',emailidPersonal='pratik@cloudxlab.com'):\n", 73 | " data = pd.DataFrame(columns=['fromEmails', 'toEmails', 'dates','hrs','emails'])\n", 74 | " # Looping through all the files present under the path directory\n", 75 | " for path, subdirs, files in os.walk(path1):\n", 76 | " index=0\n", 77 | " for name in files:\n", 78 | " # Skipping the sql lite file, all other files are email files\n", 79 | " if(name.endswith('sqlite')):\n", 80 | " continue\n", 81 | " hand=open(pathlib.PurePath(path, name))\n", 82 | " FromEmail=''\n", 83 | " ToEmail=''\n", 84 | " dateExt=''\n", 85 | " pattern1 = '([<].*@[^>\\n ]*)' # pattern to regex the email when name also present\n", 86 | " pattern2 = '([ ].*@[^>\\n ]*)' # pattern to regex the email when name not present\n", 87 | " # Looping through the lines and extracting the email ids, the time and the hour slot\n", 88 | " for line in hand:\n", 89 | " if line.startswith(\"To:\"):\n", 90 | " if (len(re.findall('[<]',line)))>0:\n", 91 | " ToEmail=re.findall(pattern1,line)[0][1:]\n", 92 | " else:\n", 93 | " ToEmail=re.findall(pattern2,line)[0][1:]\n", 94 | " elif line.startswith(\"From:\"):\n", 95 | " if (len(re.findall('[<]',line))):\n", 96 | " FromEmail=re.findall(pattern1,line)[0][1:]\n", 97 | " else:\n", 98 | " FromEmail=re.findall(pattern2,line)[0][1:]\n", 99 | " elif line.startswith(\"Date:\"):\n", 100 | " dateExt=re.findall(\"[ ].*[ \\n]\",line)[0].strip()\n", 101 | " hourSlot=dateExt.split(\" \")[4].split(\":\")[0]\n", 102 | " # Appending to the data frame\n", 103 | " index+=1\n", 104 | " data.loc[index] = [FromEmail, ToEmail, dateExt, hourSlot, FromEmail]\n", 105 | " index+=1\n", 106 | " data.loc[index] = [FromEmail, ToEmail, dateExt, hourSlot, ToEmail]\n", 107 | " #print(data)\n", 108 | " # Sorting to get the maximums on the top\n", 109 | " topTimes=data[(data[\"fromEmails\"] != emailidPersonal)].groupby(['hrs']).agg('count')['fromEmails'].sort_values(ascending =False)\n", 110 | " topFrom=data.groupby(['fromEmails']).agg('count')['toEmails'].sort_values(ascending =False)\n", 111 | " topTo=data.groupby(['toEmails']).agg('count')['fromEmails'].sort_values(ascending =False)\n", 112 | " topConvo=data.groupby(['emails']).agg('count')['fromEmails'].sort_values(ascending =False)\n", 113 | " print(topTimes[0:5])\n", 114 | " print(topFrom[0:5])\n", 115 | " print(topTo[0:5])\n", 116 | " print(topConvo[0:5])\n", 117 | " #print('--------------')\n", 118 | " # Printing the results\n", 119 | " print('Top hour slot at which emails were sent: '+topTimes.index[0])\n", 120 | " if len(topFrom)>1 and topFrom.index[0] == (emailidPersonal) :\n", 121 | " print('Top sender of emails: '+topFrom.index[1])\n", 122 | " elif len(topFrom)>0:\n", 123 | " print('Top sender of emails: '+topFrom.index[0])\n", 124 | " else:\n", 125 | " print('No emails were received by the user')\n", 126 | " \n", 127 | " if len(topTo)>1 and topTo.index[0] == (emailidPersonal) :\n", 128 | " print('Top recepient of emails: '+topTo.index[1])\n", 129 | " elif len(topTo)>0:\n", 130 | " print('Top recepient of emails: '+topTo.index[0])\n", 131 | " else:\n", 132 | " print('No emails sent')\n", 133 | " \n", 134 | " \n", 135 | " if len(topConvo)>1 and topConvo.index[0] == (emailidPersonal):\n", 136 | " print('Top friend: '+topConvo.index[1])\n", 137 | " elif len(topConvo)>0:\n", 138 | " print('Top friend: '+topConvo.index[0])\n", 139 | " else:\n", 140 | " print('No top friend')\n", 141 | " \n", 142 | "# Calling the function to execute project 2\n", 143 | "getStats('../got-your-back/GYB-GMail-Backup-pratik@cloudxlab.com/','pratik@cloudxlab.com')" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": { 150 | "collapsed": true 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "# Preparation of the email backup\n", 155 | "!git clone https://github.com/jay0lee/got-your-back.git\n", 156 | "!cd got-your-back\n", 157 | "!touch nobrowser.txt\n", 158 | "!python3 gyb.py --email pratik@cloudxlab.com --action backup\n" 159 | ] 160 | } 161 | ], 162 | "metadata": { 163 | "kernelspec": { 164 | "display_name": "Python 3", 165 | "language": "python", 166 | "name": "python3" 167 | }, 168 | "language_info": { 169 | "codemirror_mode": { 170 | "name": "ipython", 171 | "version": 3 172 | }, 173 | "file_extension": ".py", 174 | "mimetype": "text/x-python", 175 | "name": "python", 176 | "nbconvert_exporter": "python", 177 | "pygments_lexer": "ipython3", 178 | "version": "3.6.3" 179 | } 180 | }, 181 | "nbformat": 4, 182 | "nbformat_minor": 2 183 | } 184 | --------------------------------------------------------------------------------