├── .gitignore ├── Introduction ├── 1.Workshop_Introduction.ipynb ├── 2.Setup.ipynb ├── 3.Introduction_to_Tensors.ipynb ├── 4.Dynamic_Graphs.ipynb └── 5.Production_PyTorch_and_Updates.ipynb ├── Named_Entity_Recognition ├── .gitignore ├── NamedEntityRecognition.ipynb ├── doccano_export.json ├── readme.md └── sample_data.txt ├── README.md ├── Sarcasm_Detection ├── algorithm │ ├── .vscode │ │ └── settings.json │ ├── Max Entropy.ipynb │ ├── dogsvsfriedchicken.png │ ├── maxentequation.png │ ├── maxentropy.py │ ├── parse_dataset.py │ ├── test_set.json │ ├── test_set_v2.json │ ├── train_set.json │ └── train_set_v2.json └── fleiss_kappa │ ├── Fleiss Kappa.ipynb │ ├── dataset.json │ ├── fleiss_kappa.csv │ ├── fleiss_kappa_demo.ipynb │ ├── parse_dataset.py │ └── rated.csv ├── Sequence Models ├── Sequence Models (Presentation).ipynb └── Sequence Models.ipynb ├── Solutions └── Readme.md ├── Word Embeddings └── Word Embeddings.ipynb ├── archive └── fall_2018 │ ├── Introduction │ ├── 1.Workshop_Introduction.ipynb │ ├── 2.Setup.ipynb │ ├── 3.What_Are_Dynamic_Graphs.ipynb │ └── 4.Future_of_PyTorch.ipynb │ ├── Named_Entity_Recognition │ ├── NamedEntityRecognition.ipynb │ ├── readme.md │ └── sample_data.txt │ ├── README.md │ ├── Sequence Models │ └── Sequence Models.ipynb │ ├── Word Embeddings │ └── Word Embeddings.ipynb │ ├── basics │ ├── README.md │ ├── RNN-next-char-pred.ipynb │ ├── The_Adding_Problem_PyTorch.ipynb │ └── The_Remembering_Problem_PyTorch.ipynb │ ├── images │ ├── nlp_lifecycle │ ├── Lifecycle of an NLP Research Project.ipynb │ ├── NLP with CNNS.ipynb │ ├── Precisionrecall.svg │ ├── accuracies.jpeg │ ├── convolution.png │ ├── deepNetVis.png │ └── textUnderstand.png │ ├── requirements.txt │ └── transfer_learning │ ├── ULMFiT and Transfer Learning.ipynb │ └── ULMFiT.sketch ├── images ├── LSTM3-focus-C.png ├── LSTM3-focus-f.png ├── LSTM3-focus-i.png ├── LSTM3-focus-o.png ├── Simple-graph-example.png ├── bilstm_flow.png ├── blstm_crf_details.png ├── count_vec.png ├── crf_transition_matrix.png ├── diff_types_rnns.png ├── doccano_label.png ├── linear_crf_example.png ├── logo.png ├── lstm_flow.png ├── lstm_inner_workings.png ├── network_next_word.png ├── nll_loss.png ├── rnn_inner_workings.png ├── viterbi.png └── why_sequences.png ├── requirements.txt └── transfer_learning ├── ULMFiT and Transfer Learning.ipynb └── ULMFiT.sketch /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .DS_Store 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | .DS_Store 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Modeling 54 | models/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # Environments 91 | .env 92 | .venv 93 | env/ 94 | venv/ 95 | ENV/ 96 | env.bak/ 97 | venv.bak/ 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | .spyproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | -------------------------------------------------------------------------------- /Introduction/1.Workshop_Introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "\n", 12 | "\n", 13 | "\n", 14 | "# Introduction to Natural Language Processing with PyTorch Workshop\n", 15 | "\n", 16 | "Thank you to our supporters:\n", 17 | "\n", 18 | "* Seattle PyLadies Group\n", 19 | "* Women in Machine Learning and Data Science Group\n", 20 | "* Seattle Artificial Intelligence & Deep Learning Group\n", 21 | "* Silicon Valley Python Workshops Group\n", 22 | "\n", 23 | "\n", 24 | "\n", 25 | "\n", 26 | "" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": { 32 | "slideshow": { 33 | "slide_type": "slide" 34 | } 35 | }, 36 | "source": [ 37 | "## Who we are\n", 38 | "\n", 39 | "Speakers:\n", 40 | "* Priya Ananthasankar (Microsoft)\n", 41 | "* David Clark (Data Science Consultant)\n", 42 | "* Kendall Chuang (Carta)\n", 43 | "* Micheleen Harris (Microsoft)\n", 44 | "\n", 45 | "Volunteers:\n", 46 | "* Rob Callaway (QuickLearn)\n", 47 | "* Seth Mottaghinejad (Microsoft)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": { 53 | "slideshow": { 54 | "slide_type": "subslide" 55 | } 56 | }, 57 | "source": [ 58 | "
\n", 59 | "\n", 60 | "\n", 61 | "\n", 62 | "\n", 63 | "\n", 64 | "" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": { 70 | "slideshow": { 71 | "slide_type": "slide" 72 | } 73 | }, 74 | "source": [ 75 | "## Questions for you\n", 76 | "\n", 77 | "* Machine Learning experience?\n", 78 | "* Deep Learning experience?\n", 79 | "* PyTorch experience?\n", 80 | "* NLP experience?" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": { 86 | "slideshow": { 87 | "slide_type": "slide" 88 | } 89 | }, 90 | "source": [ 91 | "\n", 92 | "## Sequential data and NLP\n", 93 | "\n", 94 | "It's important to realize that **natural language** algorithms are useful for all types of sequence data.\n", 95 | "\n", 96 | "**Examples**\n", 97 | "* Corpus such as a user product issue\n", 98 | "* Financial timeseries data" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": { 104 | "slideshow": { 105 | "slide_type": "slide" 106 | } 107 | }, 108 | "source": [ 109 | "## More examples\n", 110 | "\n", 111 | "\"why" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": { 117 | "slideshow": { 118 | "slide_type": "slide" 119 | } 120 | }, 121 | "source": [ 122 | "## Approaching with the generic NLP algorithm - a Recurrent Neural Network (RNN)\n", 123 | "\n", 124 | "\n", 125 | "\"inside\n" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": { 131 | "slideshow": { 132 | "slide_type": "subslide" 133 | } 134 | }, 135 | "source": [ 136 | "### Notation for RNN above\n", 137 | "* x = input embedding for a word (vector)\n", 138 | "* h = hidden (or activation) state (vector)\n", 139 | "* tanh = hyperbolic tangent activation function/layer\n", 140 | "* y = output tag (not shown because we can have different schemes)\n", 141 | "\n", 142 | "**A Long Short-Term Memory (LSTM) network is a subclass of RNNs**" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": { 148 | "slideshow": { 149 | "slide_type": "slide" 150 | } 151 | }, 152 | "source": [ 153 | "## What types are RNNs are there?\n", 154 | "\n", 155 | "* Many-to-many - e.g. find names with named entity recognition (NER)\n", 156 | "* Many-to-one - e.g. sentiment analysis\n", 157 | "* One-to-many - e.g. music generation\n", 158 | "* Another many-to-many - e.g. machine translation\n", 159 | "* One-to-one" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": { 165 | "slideshow": { 166 | "slide_type": "slide" 167 | } 168 | }, 169 | "source": [ 170 | "\"why\n", 171 | "**Question**: can you think of examples of each?" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": { 177 | "slideshow": { 178 | "slide_type": "slide" 179 | } 180 | }, 181 | "source": [ 182 | "## References\n", 183 | "1. [RNN video \"RNN1. Why sequence models?\"](https://www.youtube.com/watch?v=5Vl-bK7tfD8&list=PLBAGcD3siRDittPwQDGIIAWkjz-RucAc7&index=1) by Andrew Ng\n", 184 | "2. [Getting Started with PyTorch Part 1: Understanding how Automatic Differentiation works](https://towardsdatascience.com/getting-started-with-pytorch-part-1-understanding-how-automatic-differentiation-works-5008282073ec)\n", 185 | "3. [Introduction to PyTorch fro pytorch.org](https://pytorch.org/tutorials/beginner/nlp/pytorch_tutorial.html#sphx-glr-beginner-nlp-pytorch-tutorial-py)\n", 186 | "4. [Stanford course on NLP taught by Christopher Manning](https://web.stanford.edu/class/cs224n/)\n" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [] 195 | } 196 | ], 197 | "metadata": { 198 | "celltoolbar": "Slideshow", 199 | "kernelspec": { 200 | "display_name": "Python 3.6 - PyTorch 1.1", 201 | "language": "python", 202 | "name": "pytorch_preview" 203 | }, 204 | "language_info": { 205 | "codemirror_mode": { 206 | "name": "ipython", 207 | "version": 3 208 | }, 209 | "file_extension": ".py", 210 | "mimetype": "text/x-python", 211 | "name": "python", 212 | "nbconvert_exporter": "python", 213 | "pygments_lexer": "ipython3", 214 | "version": "3.6.8" 215 | }, 216 | "nav_menu": {}, 217 | "toc": { 218 | "navigate_menu": true, 219 | "number_sections": true, 220 | "sideBar": true, 221 | "threshold": 6, 222 | "toc_cell": false, 223 | "toc_section_display": "block", 224 | "toc_window_display": false 225 | } 226 | }, 227 | "nbformat": 4, 228 | "nbformat_minor": 2 229 | } 230 | -------------------------------------------------------------------------------- /Introduction/2.Setup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# Setup for Workshop\n", 12 | "\n", 13 | "* We are using an Azure Data Science Virtual Machine for the workshop.\n", 14 | "* You should have gotten credentials for a machine\n", 15 | "* Be sure you are using `https`. \n", 16 | "* You may get a \"Your Connection is Not Private\" or other form of safety certificate error\n", 17 | "* This is a known issue and we are working on it. Please click through this to get to the Jupyterhub login page." 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "slideshow": { 24 | "slide_type": "skip" 25 | } 26 | }, 27 | "source": [ 28 | "For this workshop, the Azure Data Science Virtual Machine will be used. This is a VM one can fire up on Azure to perform data science tasks and is pre-installed with dozens of frameworks and packages for use. It's a nice way to get started quickly without much overhead/setup which is one of the reasons we chose this for the workshop. [Find out more here](https://docs.microsoft.com/en-us/azure/machine-learning/data-science-virtual-machine/overview).\n", 29 | "\n", 30 | "You will get credentials for a machine and log in at the URL provided. This will be the jupyter notebook environment/playground provided to you for the day. At the **end of the day you should zip up your notebooks and download** them as these VMs will disappear shortly thereafter into the sunset. \n", 31 | "\n", 32 | "Be sure you are using `https`. You may get a \"Your Connection is Not Private\" or other form of safety certificate error. This is a known issue and we are working on it. Please click through this by hitting the link on the page to continue." 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": { 38 | "slideshow": { 39 | "slide_type": "slide" 40 | } 41 | }, 42 | "source": [ 43 | "**Use a sticky note on your laptop to tell us if you are having an issue.**" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": { 49 | "slideshow": { 50 | "slide_type": "slide" 51 | } 52 | }, 53 | "source": [ 54 | "## Your First Task\n", 55 | "\n", 56 | "1. Log in with the credentials provided\n", 57 | "2. Open up a new, blank notebook (\"Python 3.6 - PyTorch 1.1\" kernel)\n", 58 | "3. In a blank cell run (don't forget exclamation point): `! cd Workshop/intro-to-nlp-with-pytorch && git pull origin master`\n", 59 | "4. Navigate to `Workshop/intro-to-nlp-with-pytorch`folder.\n", 60 | "5. Open this notebook (2.Setup) from the `Introduction` folder and if asked, choose the \"Python 3.6 - PyTorch 1.1\" kernel.\n", 61 | "6. Run the following code cells." 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 1, 67 | "metadata": { 68 | "slideshow": { 69 | "slide_type": "slide" 70 | } 71 | }, 72 | "outputs": [ 73 | { 74 | "data": { 75 | "text/plain": [ 76 | "{'/anaconda/envs/pytorch1/bin/python'}" 77 | ] 78 | }, 79 | "execution_count": 1, 80 | "metadata": {}, 81 | "output_type": "execute_result" 82 | } 83 | ], 84 | "source": [ 85 | "# Which Python\n", 86 | "import sys\n", 87 | "{sys.executable}" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 2, 93 | "metadata": { 94 | "slideshow": { 95 | "slide_type": "slide" 96 | } 97 | }, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "'1.1.0'" 103 | ] 104 | }, 105 | "execution_count": 2, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "# Which PyTorch (this should be 1.1.0)\n", 112 | "import torch\n", 113 | "torch.__version__" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 3, 119 | "metadata": { 120 | "slideshow": { 121 | "slide_type": "slide" 122 | } 123 | }, 124 | "outputs": [ 125 | { 126 | "data": { 127 | "text/plain": [ 128 | "device(type='cuda', index=0)" 129 | ] 130 | }, 131 | "execution_count": 3, 132 | "metadata": {}, 133 | "output_type": "execute_result" 134 | } 135 | ], 136 | "source": [ 137 | "# GPU-accelerated with CUDA libs\n", 138 | "torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": { 144 | "slideshow": { 145 | "slide_type": "slide" 146 | } 147 | }, 148 | "source": [ 149 | "## Save Your Work at End of the Workshop!\n", 150 | "\n", 151 | "If using the Workshop-provided Data Science Virtual Machine, do the following to save your work:\n", 152 | "\n", 153 | "1. Run the following `zip` command in this notebook. \n", 154 | "2. Go to the Jupyter file browser or click on \"Jupyter\" symbol at upper left corner to get there again\n", 155 | "3. Place a check in the box by the `nlp_workshop.zip` file and click \"Download\"" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 4, 161 | "metadata": { 162 | "slideshow": { 163 | "slide_type": "subslide" 164 | } 165 | }, 166 | "outputs": [], 167 | "source": [ 168 | "# ! zip -r nlp_workshop.zip ../../intro-to-nlp-with-pytorch" 169 | ] 170 | } 171 | ], 172 | "metadata": { 173 | "celltoolbar": "Slideshow", 174 | "kernelspec": { 175 | "display_name": "Python 3.6 - PyTorch 1.1", 176 | "language": "python", 177 | "name": "pytorch_preview" 178 | }, 179 | "language_info": { 180 | "codemirror_mode": { 181 | "name": "ipython", 182 | "version": 3 183 | }, 184 | "file_extension": ".py", 185 | "mimetype": "text/x-python", 186 | "name": "python", 187 | "nbconvert_exporter": "python", 188 | "pygments_lexer": "ipython3", 189 | "version": "3.6.8" 190 | }, 191 | "nav_menu": {}, 192 | "toc": { 193 | "navigate_menu": true, 194 | "number_sections": true, 195 | "sideBar": true, 196 | "threshold": 6, 197 | "toc_cell": false, 198 | "toc_section_display": "block", 199 | "toc_window_display": false 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 2 204 | } 205 | -------------------------------------------------------------------------------- /Introduction/3.Introduction_to_Tensors.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction to PyTorch's Tensor Library\n", 8 | "\n", 9 | "Original Author: Robert Guthrie (Link)\n", 10 | "\n", 11 | "All of deep learning consists of computations on tensors, which are\n", 12 | "generalizations of a matrix that can be indexed in more than 2\n", 13 | "dimensions. We will see exactly what this means in-depth later. First,\n", 14 | "lets look what we can do with tensors.\n" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import torch\n", 24 | "import torch.autograd as autograd\n", 25 | "import torch.nn as nn\n", 26 | "import torch.nn.functional as F\n", 27 | "\n", 28 | "torch.manual_seed(1)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## Creating Tensors\n", 36 | "\n", 37 | "Tensors can be created from Python lists with the torch.Tensor()\n", 38 | "function." 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# torch.tensor(data) creates a torch.Tensor object with the given data.\n", 48 | "V_data = [1., 2., 3.]\n", 49 | "V = torch.tensor(V_data)\n", 50 | "print(V)\n", 51 | "\n", 52 | "# Creates a matrix\n", 53 | "M_data = [[1., 2., 3.], [4., 5., 6]]\n", 54 | "M = torch.tensor(M_data)\n", 55 | "print(M)\n", 56 | "\n", 57 | "# Create a 3D tensor of size 2x2x2.\n", 58 | "T_data = [[[1., 2.], [3., 4.]],\n", 59 | " [[5., 6.], [7., 8.]]]\n", 60 | "T = torch.tensor(T_data)\n", 61 | "print(T)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "### Working with the 3D Tensor\n", 69 | "\n", 70 | "What is a 3D tensor anyway? Think about it like this. If you have a\n", 71 | "vector, indexing into the vector gives you a scalar. If you have a\n", 72 | "matrix, indexing into the matrix gives you a vector. If you have a 3D\n", 73 | "tensor, then indexing into the tensor gives you a matrix!\n", 74 | "\n", 75 | "A note on terminology:\n", 76 | "when I say \"tensor\" in this tutorial, it refers\n", 77 | "to any torch.Tensor object. Matrices and vectors are special cases of\n", 78 | "torch.Tensors, where their dimension is 1 and 2 respectively. When I am\n", 79 | "talking about 3D tensors, I will explicitly use the term \"3D tensor\".\n" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "# Index into V and get a scalar (0 dimensional tensor)\n", 89 | "print(V[0])\n", 90 | "# Get a Python number from it\n", 91 | "print(V[0].item())\n", 92 | "\n", 93 | "# Index into M and get a vector\n", 94 | "print(M[0])\n", 95 | "\n", 96 | "# Index into T and get a matrix\n", 97 | "print(T[0])" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "You can also create tensors of other datatypes. The default, as you can see, is `Float`. To create a tensor of integer types, try `torch.LongTensor()`. Check the documentation for more data types, but `Float` and `Long` will be the most common." 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "## Tensor with random data\n", 112 | "\n", 113 | "You can create a tensor with random data and the supplied dimensionality\n", 114 | "with `torch.randn()`\n", 115 | "\n" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "x = torch.randn((3, 4, 5))\n", 125 | "print(x)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "## Operations with tensors" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "\n", 142 | "# You can operate on tensors in the ways you would expect.\n", 143 | "\n", 144 | "x = torch.tensor([1., 2., 3.])\n", 145 | "y = torch.tensor([4., 5., 6.])\n", 146 | "z = x + y\n", 147 | "print(z)\n", 148 | "\n", 149 | "######################################################################\n", 150 | "# See `the documentation `__ for a\n", 151 | "# complete list of the massive number of operations available to you. They\n", 152 | "# expand beyond just mathematical operations.\n", 153 | "#" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "## Reshaping tensors\n", 161 | "\n", 162 | "\n", 163 | "Use the `.view()` method to reshape a tensor. This method receives heavy\n", 164 | "use, because many neural network components expect their inputs to have\n", 165 | "a certain shape. Often you will need to reshape before passing your data\n", 166 | "to the component." 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "x = torch.randn(2, 3, 4)\n", 176 | "print(x)\n", 177 | "print(x.view(2, 12)) # Reshape to 2 rows, 12 columns\n", 178 | "# Same as above. If one of the dimensions is -1, its size can be inferred\n", 179 | "print(x.view(2, -1))" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "## Concatenation" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "# One helpful operation that we will make use of later is concatenation.\n", 196 | "\n", 197 | "# By default, it concatenates along the first axis (concatenates rows)\n", 198 | "x_1 = torch.randn(2, 5)\n", 199 | "y_1 = torch.randn(3, 5)\n", 200 | "z_1 = torch.cat([x_1, y_1])\n", 201 | "print(z_1)\n", 202 | "\n", 203 | "# Concatenate columns:\n", 204 | "x_2 = torch.randn(2, 3)\n", 205 | "y_2 = torch.randn(2, 5)\n", 206 | "# second arg specifies which axis to concat along\n", 207 | "z_2 = torch.cat([x_2, y_2], 1)\n", 208 | "print(z_2)\n", 209 | "\n", 210 | "# If your tensors are not compatible, PyTorch will complain. Uncomment to see the error\n", 211 | "# torch.cat([x_1, x_2])" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "**EXERCISE**: Concatenate the following 3D tensors along the second axis or in the second dimension.\n", 219 | "\n", 220 | "```python\n", 221 | "t1 = torch.randn(3, 2, 4)\n", 222 | "t2 = torch.randn(3, 4, 4)\n", 223 | "```\n", 224 | "\n", 225 | "**BONUS**: Can you reshape one of the tensors to be able to concatenate in the 3rd dimension?" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "## Computational Graphs and Automatic Differentiation\n", 240 | "\n", 241 | "The concept of a computational graph is essential to efficient deep\n", 242 | "learning programming, because it allows you to not have to write the\n", 243 | "back propagation gradients yourself. A computational graph is simply a\n", 244 | "specification of how your data is combined to give you the output. Since\n", 245 | "the graph totally specifies what parameters were involved with which\n", 246 | "operations, it contains enough information to compute derivatives. This\n", 247 | "probably sounds vague, so let's see what is going on using the\n", 248 | "fundamental flag `requires_grad`.\n", 249 | "\n", 250 | "First, think from a programmers perspective. What is stored in the\n", 251 | "torch.Tensor objects we were creating above? Obviously the data and the\n", 252 | "shape, and maybe a few other things. But when we added two tensors\n", 253 | "together, we got an output tensor. All this output tensor knows is its\n", 254 | "data and shape. It has no idea that it was the sum of two other tensors\n", 255 | "(it could have been read in from a file, it could be the result of some\n", 256 | "other operation, etc.)\n", 257 | "\n", 258 | "If `requires_grad=True`, the Tensor object keeps track of how it was\n", 259 | "created. Let's see it in action." 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "# Tensor factory methods have a ``requires_grad`` flag\n", 269 | "x = torch.tensor([1., 2., 3], requires_grad=True)\n", 270 | "\n", 271 | "# With requires_grad=True, you can still do all the operations you previously\n", 272 | "# could\n", 273 | "y = torch.tensor([4., 5., 6], requires_grad=True)\n", 274 | "z = x + y\n", 275 | "print(z)\n", 276 | "\n", 277 | "# BUT z knows something extra.\n", 278 | "print(z.grad_fn)" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "So Tensors know what created them. z knows that it wasn't read in from\n", 286 | "a file, it wasn't the result of a multiplication or exponential or\n", 287 | "whatever. And if you keep following z.grad_fn, you will find yourself at\n", 288 | "x and y.\n", 289 | "\n", 290 | "But how does that help us compute a gradient?\n" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": {}, 297 | "outputs": [], 298 | "source": [ 299 | "# Lets sum up all the entries in z\n", 300 | "s = z.sum()\n", 301 | "print(s)\n", 302 | "print(s.grad_fn)" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "---\n", 310 | "\n", 311 | "**Intuitive definition of a gradient**: For a function of one output and many inputs such as `sum()`, the gradient is a vector that shows the direction (with respect to its inputs) to move toward in order to increase its output. If we take a small step in the direction of the gradient and recalculate the sum, we get a bigger sum. We can do so repeatedly until we maximize the sum. In deep learning, we compute the gradient of the loss (output, computed by comparing predictions with training labels) with respect to the weights and biases (inputs) and repeatedly take small steps in the direction of the gradient until we minimize loss. This optimization method is called gradient descent.\n", 312 | "\n", 313 | "\n", 314 | "Note: if you run this block multiple times, the gradient will increment.\n", 315 | "That is because **PyTorch *accumulates* the gradient** as it's maximizing the function, `sum()` (IMPORTANT) and stores it in the `.grad` property\n", 316 | "\n", 317 | "**Key aspect of the computational graph in PyTorch**: The computational graph is created at the point of calling `backward()` and no sooner (more on dynamic graphs in the next notebook).\n", 318 | "\n", 319 | "Lets have PyTorch compute the gradients for the graph and check the result." 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "metadata": {}, 326 | "outputs": [], 327 | "source": [ 328 | "# calling .backward() on any variable will run backprop, starting from it \n", 329 | "# (using the chain rule from your calculus days to calculate gradients).\n", 330 | "# The call to \"backward\" is the point at which the graph is created.\n", 331 | "s.backward()\n", 332 | "print(x.grad)\n", 333 | "\n", 334 | "# Question: why are we checking the gradient value of \"x\"?" 335 | ] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "metadata": {}, 340 | "source": [ 341 | "---\n", 342 | "**The math behind backpropagation (only for reference and optional reading)**:\n", 343 | "\n", 344 | "So now, what is the derivative of this sum with respect to the first\n", 345 | "component of x? In math, we want\n", 346 | "\n", 347 | "$$\\frac{\\partial s}{\\partial x_0}$$\n", 348 | "\n", 349 | "\n", 350 | "Well, s knows that it was created as a sum of the tensor z. z knows\n", 351 | "that it was the sum x + y. So\n", 352 | "\n", 353 | "$$s = \\overbrace{x_0 + y_0}^\\text{$z_0$} + \\overbrace{x_1 + y_1}^\\text{$z_1$} + \\overbrace{x_2 + y_2}^\\text{$z_2$}$$\n", 354 | "\n", 355 | "And so s contains enough information to determine that the derivative\n", 356 | "we want is 1!\n", 357 | "\n", 358 | "Of course this glosses over the challenge of how to actually compute\n", 359 | "that derivative. The point here is that s is carrying along enough\n", 360 | "information that it is possible to compute it. \n", 361 | "\n", 362 | "In reality, the\n", 363 | "developers of PyTorch program the `sum()` and `+` operations to know how to\n", 364 | "compute their gradients, and run the back propagation algorithm. An\n", 365 | "in-depth discussion of that algorithm is beyond the scope of this\n", 366 | "tutorial." 367 | ] 368 | }, 369 | { 370 | "cell_type": "markdown", 371 | "metadata": {}, 372 | "source": [ 373 | "## Manipulating gradients\n", 374 | "\n", 375 | "Understanding what is going on in the block below is crucial for being a\n", 376 | "successful programmer in deep learning.\n" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": {}, 383 | "outputs": [], 384 | "source": [ 385 | "x = torch.randn(2, 2)\n", 386 | "y = torch.randn(2, 2)\n", 387 | "\n", 388 | "# By default, user created Tensors have ``requires_grad=False``\n", 389 | "print(x.requires_grad, y.requires_grad)\n", 390 | "z = x + y\n", 391 | "\n", 392 | "# So you can't backprop through z\n", 393 | "print(z.grad_fn)" 394 | ] 395 | }, 396 | { 397 | "cell_type": "markdown", 398 | "metadata": {}, 399 | "source": [ 400 | "``.requires_grad_( ... )`` changes an existing Tensor's ``requires_grad``\n", 401 | "flag in-place. The input flag defaults to ``True`` if not given." 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": null, 407 | "metadata": {}, 408 | "outputs": [], 409 | "source": [ 410 | "x.requires_grad_()\n", 411 | "y.requires_grad_()\n", 412 | "# z contains enough information to compute gradients, as we saw above\n", 413 | "z = x + y\n", 414 | "print(z.grad_fn)\n", 415 | "# If any input to an operation has ``requires_grad=True``, so will the output\n", 416 | "print(z.requires_grad)" 417 | ] 418 | }, 419 | { 420 | "cell_type": "markdown", 421 | "metadata": {}, 422 | "source": [ 423 | "Now z has the computation history that relates itself to x and y\n", 424 | "Can we just take its values, and **detach** it from its history?" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": {}, 431 | "outputs": [], 432 | "source": [ 433 | "new_z = z.detach()\n", 434 | "\n", 435 | "# ... does new_z have information to backprop to x and y?\n", 436 | "# NO!\n", 437 | "print(new_z.grad_fn)" 438 | ] 439 | }, 440 | { 441 | "cell_type": "markdown", 442 | "metadata": {}, 443 | "source": [ 444 | "And how could it? ``z.detach()`` returns a tensor that shares the same storage\n", 445 | "as ``z``, but with the computation history forgotten. It doesn't know anything\n", 446 | "about how it was computed.\n", 447 | "In essence, we have broken the Tensor away from its past history" 448 | ] 449 | }, 450 | { 451 | "cell_type": "markdown", 452 | "metadata": {}, 453 | "source": [ 454 | "You can also stop autograd from tracking history on Tensors\n", 455 | "with ``.requires_grad``=True by wrapping the code block in\n", 456 | "``with torch.no_grad():``" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": null, 462 | "metadata": {}, 463 | "outputs": [], 464 | "source": [ 465 | "print(x.requires_grad)\n", 466 | "print((x ** 2).requires_grad)\n", 467 | "\n", 468 | "with torch.no_grad():\n", 469 | "\tprint((x ** 2).requires_grad)" 470 | ] 471 | }, 472 | { 473 | "cell_type": "markdown", 474 | "metadata": {}, 475 | "source": [ 476 | "**EXERCISE**: Build the following computational graph using 2D vectors.\n", 477 | "\n", 478 | "\n", 479 | "\n", 480 | "Then, run backpropagation on the network.\n", 481 | "\n", 482 | "```python\n", 483 | "# Must have a scalar output to use autograd/backprop\n", 484 | "z = a.sum()\n", 485 | "z.backward()\n", 486 | "```\n", 487 | "\n", 488 | "Check the gradient values of `b` and `c`, now." 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": null, 494 | "metadata": {}, 495 | "outputs": [], 496 | "source": [] 497 | }, 498 | { 499 | "cell_type": "markdown", 500 | "metadata": {}, 501 | "source": [ 502 | "## References\n", 503 | "\n", 504 | "1. [Further mathematical derivations of backpropagation](http://neuralnetworksanddeeplearning.com/chap2.html)" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": null, 510 | "metadata": {}, 511 | "outputs": [], 512 | "source": [] 513 | } 514 | ], 515 | "metadata": { 516 | "kernelspec": { 517 | "display_name": "Python 3.6 - PyTorch 1.1", 518 | "language": "python", 519 | "name": "pytorch_preview" 520 | }, 521 | "language_info": { 522 | "codemirror_mode": { 523 | "name": "ipython", 524 | "version": 3 525 | }, 526 | "file_extension": ".py", 527 | "mimetype": "text/x-python", 528 | "name": "python", 529 | "nbconvert_exporter": "python", 530 | "pygments_lexer": "ipython3", 531 | "version": "3.6.8" 532 | }, 533 | "nav_menu": {}, 534 | "toc": { 535 | "navigate_menu": true, 536 | "number_sections": true, 537 | "sideBar": true, 538 | "threshold": 6, 539 | "toc_cell": false, 540 | "toc_section_display": "block", 541 | "toc_window_display": false 542 | } 543 | }, 544 | "nbformat": 4, 545 | "nbformat_minor": 2 546 | } 547 | -------------------------------------------------------------------------------- /Introduction/4.Dynamic_Graphs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# The dynamic computational graph" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "## Forward/Backwards\n", 23 | "* In training a NN there are a couple of steps: the forward pass and the backwards pass (back propagation of gradients).\n", 24 | " * In PyTorch `forward` and `backward` are in the same class `torch.autograd.Function`\n", 25 | " \n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "slideshow": { 32 | "slide_type": "slide" 33 | } 34 | }, 35 | "source": [ 36 | "## Let's see an example network (and train it)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "slideshow": { 44 | "slide_type": "subslide" 45 | } 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "# Do some imports\n", 50 | "import torch\n", 51 | "\n", 52 | "# Define the leaf nodes\n", 53 | "a = torch.tensor([4.])\n", 54 | "\n", 55 | "# This is just a vector of tensors\n", 56 | "weights = [torch.tensor([i], requires_grad=True) for i in (2., 5., 9., 7.)]\n", 57 | "\n", 58 | "# unpack the weights for nicer assignment\n", 59 | "w1, w2, w3, w4 = weights" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": { 65 | "slideshow": { 66 | "slide_type": "slide" 67 | } 68 | }, 69 | "source": [ 70 | "Exercise: Print the type of a" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "slideshow": { 78 | "slide_type": "slide" 79 | } 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "type(a)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": { 89 | "slideshow": { 90 | "slide_type": "slide" 91 | } 92 | }, 93 | "source": [ 94 | "## Create the network\n", 95 | "\n", 96 | "Here we'll see the graph created on-the-fly and the forward pass\n", 97 | "\n", 98 | "**Note: static graph frameworks predefine the graph (that then can not change later) and then run inputs through it**" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": { 105 | "slideshow": { 106 | "slide_type": "fragment" 107 | } 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "# IMPORTANT: When we create b, the graph creation begins!!!\n", 112 | "\n", 113 | "# The next three lines of code (b, c, d creation) are our\n", 114 | "# forward pass - when the inputs are processed into output\n", 115 | "\n", 116 | "# BEGIN COMPUTATIONAL GRAPH DEFINITION (some operations)\n", 117 | "b = w1 * a\n", 118 | "c = w2 * a\n", 119 | "d = w3 * b + w4 * c\n", 120 | "# END GRAPH DEFINITION\n", 121 | "\n", 122 | "# This is the loss\n", 123 | "L = (10 - d)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": { 129 | "slideshow": { 130 | "slide_type": "slide" 131 | } 132 | }, 133 | "source": [ 134 | "## Run backprop and check the gradient data" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "slideshow": { 142 | "slide_type": "fragment" 143 | } 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "L.backward()\n", 148 | "\n", 149 | "for index, weight in enumerate(weights, start=1):\n", 150 | " gradient, *_ = weight.grad\n", 151 | " print(\"Gradient of L is {} w.r.t. w{}\".format(gradient, index))" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": { 157 | "slideshow": { 158 | "slide_type": "slide" 159 | } 160 | }, 161 | "source": [ 162 | "Exercise: run the above cell one more time and see what happens\n", 163 | "\n", 164 | "**Remember the computational graph is constructed in PyTorch at the time it executes (`backward()` is called). Two things must be done to run over and over**\n", 165 | " * Clear the gradients\n", 166 | " * Build (and possibly redefine) the network again" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": { 172 | "slideshow": { 173 | "slide_type": "slide" 174 | } 175 | }, 176 | "source": [ 177 | "Exercise: re-run the \"Create the network\" section and then \"Run backprop...\" section. Why do the gradients change? How do you reset the gradients?" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": { 183 | "slideshow": { 184 | "slide_type": "slide" 185 | } 186 | }, 187 | "source": [ 188 | "## As you'll see later...but to round this out\n", 189 | "\n", 190 | "Let's update the weights and zero them (we'd do this before running the network again as would happen in training)\n", 191 | "\n", 192 | "Your update and reset will look like:\n", 193 | "```python\n", 194 | "# For fun let's say we had a learning rate of 1e-4\n", 195 | "learning_rate = 1e-4\n", 196 | "\n", 197 | "with torch.no_grad():\n", 198 | " w1 -= learning_rate * w1.grad\n", 199 | " w2 -= learning_rate * w2.grad\n", 200 | " w3 -= learning_rate * w3.grad\n", 201 | " w4 -= learning_rate * w4.grad\n", 202 | "\n", 203 | " # Manually zero the gradients after running the backward pass\n", 204 | " w1.grad.zero_()\n", 205 | " w2.grad.zero_()\n", 206 | " w3.grad.zero_()\n", 207 | " w4.grad.zero_()\n", 208 | "```" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": { 214 | "slideshow": { 215 | "slide_type": "slide" 216 | } 217 | }, 218 | "source": [ 219 | "## Let's put it all together to create, run, backwards prop, update weights, clear gradients" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": { 226 | "slideshow": { 227 | "slide_type": "-" 228 | } 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "# Define the leaf node\n", 233 | "a = torch.tensor([4.])\n", 234 | "\n", 235 | "# This is just a vector of tensors\n", 236 | "weights = [torch.tensor([i], requires_grad=True) for i in (2., 5., 9., 7.)]\n", 237 | "\n", 238 | "# unpack the weights for nicer assignment\n", 239 | "w1, w2, w3, w4 = weights\n", 240 | "\n", 241 | "\n", 242 | "# IMPORTANT: When we create b, the graph creation begins!!!\n", 243 | "\n", 244 | "# The next three lines of code (b, c, d creation) are our\n", 245 | "# forward pass - when the inputs are processed into output\n", 246 | "\n", 247 | "# BEGIN COMPUTATIONAL GRAPH DEFINITION (some operations)\n", 248 | "b = w1 * a\n", 249 | "c = w2 * a\n", 250 | "d = w3 * b + w4 * c\n", 251 | "# END GRAPH DEFINITION\n", 252 | "\n", 253 | "# This is the loss\n", 254 | "L = (10 - d)\n", 255 | "\n", 256 | "# Run the backwards propagation of gradients \n", 257 | "# (remember your chain rule for differentiation? Well PyTorch\n", 258 | "# takes care of this for you!)\n", 259 | "L.backward()\n", 260 | "\n", 261 | "for index, weight in enumerate(weights, start=1):\n", 262 | " gradient, *_ = weight.grad\n", 263 | " print(\"Gradient of L is {} w.r.t. w{}\".format(gradient, index))\n", 264 | "\n", 265 | "# For fun let's say we had a learning rate of 1e-4\n", 266 | "learning_rate = 1e-4\n", 267 | "\n", 268 | "with torch.no_grad():\n", 269 | " w1 -= learning_rate * w1.grad\n", 270 | " w2 -= learning_rate * w2.grad\n", 271 | " w3 -= learning_rate * w3.grad\n", 272 | " w4 -= learning_rate * w4.grad\n", 273 | "\n", 274 | " # Manually zero the gradients after running the backward pass\n", 275 | " w1.grad.zero_()\n", 276 | " w2.grad.zero_()\n", 277 | " w3.grad.zero_()\n", 278 | " w4.grad.zero_()" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "**Now we've done one epoch!**" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": { 291 | "slideshow": { 292 | "slide_type": "slide" 293 | } 294 | }, 295 | "source": [ 296 | "## Advantages\n", 297 | "\n", 298 | "* Easier to debug that a static graph (we can modify our graph and easily check variables and gradients)\n", 299 | "* Since the network is created when ran it can be modified **on-the-fly** (very good for NLP where input lengths and output lengths may differ like in machine translation)\n", 300 | "* Very closely resembles regular Python and utilizes object oriented programming" 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "metadata": { 306 | "slideshow": { 307 | "slide_type": "slide" 308 | } 309 | }, 310 | "source": [ 311 | "## References\n", 312 | "1. [Getting Started with PyTorch Part 1: Understanding how Automatic Differentiation works](https://towardsdatascience.com/getting-started-with-pytorch-part-1-understanding-how-automatic-differentiation-works-5008282073ec) by Ayoosh Kathuria\n", 313 | "2. [PyTorch: Autograd example](https://github.com/jcjohnson/pytorch-examples#pytorch-autograd) by Justin Johnson" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [] 322 | } 323 | ], 324 | "metadata": { 325 | "kernelspec": { 326 | "display_name": "Python 3.6 - PyTorch 1.1", 327 | "language": "python", 328 | "name": "pytorch_preview" 329 | }, 330 | "language_info": { 331 | "codemirror_mode": { 332 | "name": "ipython", 333 | "version": 3 334 | }, 335 | "file_extension": ".py", 336 | "mimetype": "text/x-python", 337 | "name": "python", 338 | "nbconvert_exporter": "python", 339 | "pygments_lexer": "ipython3", 340 | "version": "3.6.8" 341 | }, 342 | "nav_menu": {}, 343 | "toc": { 344 | "navigate_menu": true, 345 | "number_sections": true, 346 | "sideBar": true, 347 | "threshold": 6, 348 | "toc_cell": false, 349 | "toc_section_display": "block", 350 | "toc_window_display": false 351 | } 352 | }, 353 | "nbformat": 4, 354 | "nbformat_minor": 2 355 | } 356 | -------------------------------------------------------------------------------- /Introduction/5.Production_PyTorch_and_Updates.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "


\n", 12 | "# Production PyTorch and Updates\n" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": { 18 | "slideshow": { 19 | "slide_type": "slide" 20 | } 21 | }, 22 | "source": [ 23 | "\n", 24 | "\n", 25 | "## Key Features with 1.x\n", 26 | "\n", 27 | "* TensorBoard (currently experimental) - First-class and native support for visualization and model debugging with TensorBoard, a web application suite for inspecting and understanding training runs, tensors, and graphs. Doc\n", 28 | "* JIT - a set of compiler tools for bridging the gap between research in PyTorch and production C++ Apps without a dependency on the Python interpreter and capable of aggressive optimizations. Doc Doc\n", 29 | "* C++ Frontend (currently experimental) - a pure C++ interface to the PyTorch backend that follows the API and architecture of the established Python frontend. Doc\n", 30 | "* DistributedDataParallel - This container parallelizes the application of the given module by splitting the input across the specified devices by chunking in the batch dimension. Doc\n", 31 | "\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": { 37 | "slideshow": { 38 | "slide_type": "slide" 39 | } 40 | }, 41 | "source": [ 42 | "## Software integrations\n", 43 | "\n", 44 | "1. Unification of the PyTorch 0.4 and Caffe2 codebases makes up PyTorch 1.x\n", 45 | "2. ONNX natively integrated into PyTorch 1.0 as a model export format \n" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": { 51 | "slideshow": { 52 | "slide_type": "slide" 53 | } 54 | }, 55 | "source": [ 56 | "## Platform integrations\n", 57 | "\n", 58 | "1. Microsoft supports PyTorch in their Azure cloud and developer offerings, including Azure Machine Learning Service and the Data Science Virtual Machine\n", 59 | "2. Amazon Web Services plans support" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": { 65 | "slideshow": { 66 | "slide_type": "slide" 67 | } 68 | }, 69 | "source": [ 70 | "## References\n", 71 | "\n", 72 | "1. [Announcing PyTorch 1.0 for both research and production](https://developers.facebook.com/blog/post/2018/05/02/announcing-pytorch-1.0-for-research-production/)\n", 73 | "2. [The road to 1.0: production ready PyTorch](https://pytorch.org/2018/05/02/road-to-1.0.html)\n", 74 | "3. [logo](https://github.com/yunjey/pytorch-tutorial/blob/master/logo/pytorch_logo.png)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [] 83 | } 84 | ], 85 | "metadata": { 86 | "celltoolbar": "Slideshow", 87 | "kernelspec": { 88 | "display_name": "Python 3.6 (sys)", 89 | "language": "python", 90 | "name": "py36sys" 91 | }, 92 | "language_info": { 93 | "codemirror_mode": { 94 | "name": "ipython", 95 | "version": 3 96 | }, 97 | "file_extension": ".py", 98 | "mimetype": "text/x-python", 99 | "name": "python", 100 | "nbconvert_exporter": "python", 101 | "pygments_lexer": "ipython3", 102 | "version": "3.6.6" 103 | }, 104 | "nav_menu": {}, 105 | "toc": { 106 | "navigate_menu": true, 107 | "number_sections": true, 108 | "sideBar": true, 109 | "threshold": 6, 110 | "toc_cell": false, 111 | "toc_section_display": "block", 112 | "toc_window_display": false 113 | } 114 | }, 115 | "nbformat": 4, 116 | "nbformat_minor": 2 117 | } 118 | -------------------------------------------------------------------------------- /Named_Entity_Recognition/.gitignore: -------------------------------------------------------------------------------- 1 | sample_code* 2 | sample_data.txt 3 | -------------------------------------------------------------------------------- /Named_Entity_Recognition/doccano_export.json: -------------------------------------------------------------------------------- 1 | {"id": 34, "text": "This makes it harder to understand the behavior of the function tf.scatter_add in case indices is a matrix. Specifically, what is the difference between tf.scatter_add and tf.scatter_nd when indices is a matrix. This will raise an error that only sequential or functional models can be saved model.save('custom_model.h5')", "annotations": [{"label": 4, "start_offset": 0, "end_offset": 4, "user": 1}, {"label": 4, "start_offset": 5, "end_offset": 10, "user": 1}, {"label": 4, "start_offset": 11, "end_offset": 13, "user": 1}, {"label": 4, "start_offset": 14, "end_offset": 20, "user": 1}, {"label": 4, "start_offset": 21, "end_offset": 23, "user": 1}, {"label": 4, "start_offset": 24, "end_offset": 34, "user": 1}, {"label": 4, "start_offset": 35, "end_offset": 38, "user": 1}, {"label": 4, "start_offset": 39, "end_offset": 47, "user": 1}, {"label": 4, "start_offset": 48, "end_offset": 50, "user": 1}, {"label": 4, "start_offset": 51, "end_offset": 54, "user": 1}, {"label": 4, "start_offset": 55, "end_offset": 63, "user": 1}, {"label": 2, "start_offset": 64, "end_offset": 78, "user": 1}, {"label": 4, "start_offset": 79, "end_offset": 81, "user": 1}, {"label": 4, "start_offset": 82, "end_offset": 86, "user": 1}, {"label": 4, "start_offset": 87, "end_offset": 94, "user": 1}, {"label": 4, "start_offset": 95, "end_offset": 97, "user": 1}, {"label": 4, "start_offset": 98, "end_offset": 99, "user": 1}, {"label": 4, "start_offset": 100, "end_offset": 106, "user": 1}, {"label": 4, "start_offset": 108, "end_offset": 120, "user": 1}, {"label": 4, "start_offset": 122, "end_offset": 126, "user": 1}, {"label": 4, "start_offset": 127, "end_offset": 129, "user": 1}, {"label": 4, "start_offset": 130, "end_offset": 133, "user": 1}, {"label": 4, "start_offset": 134, "end_offset": 144, "user": 1}, {"label": 4, "start_offset": 145, "end_offset": 152, "user": 1}, {"label": 2, "start_offset": 153, "end_offset": 167, "user": 1}, {"label": 4, "start_offset": 168, "end_offset": 171, "user": 1}, {"label": 2, "start_offset": 172, "end_offset": 186, "user": 1}, {"label": 4, "start_offset": 186, "end_offset": 190, "user": 1}, {"label": 4, "start_offset": 191, "end_offset": 198, "user": 1}, {"label": 4, "start_offset": 199, "end_offset": 201, "user": 1}, {"label": 4, "start_offset": 202, "end_offset": 203, "user": 1}, {"label": 4, "start_offset": 204, "end_offset": 210, "user": 1}, {"label": 4, "start_offset": 213, "end_offset": 217, "user": 1}, {"label": 4, "start_offset": 218, "end_offset": 222, "user": 1}, {"label": 4, "start_offset": 223, "end_offset": 228, "user": 1}, {"label": 4, "start_offset": 229, "end_offset": 231, "user": 1}, {"label": 4, "start_offset": 232, "end_offset": 237, "user": 1}, {"label": 4, "start_offset": 238, "end_offset": 242, "user": 1}, {"label": 4, "start_offset": 243, "end_offset": 247, "user": 1}, {"label": 4, "start_offset": 248, "end_offset": 258, "user": 1}, {"label": 4, "start_offset": 259, "end_offset": 261, "user": 1}, {"label": 4, "start_offset": 262, "end_offset": 272, "user": 1}, {"label": 4, "start_offset": 273, "end_offset": 279, "user": 1}, {"label": 4, "start_offset": 280, "end_offset": 283, "user": 1}, {"label": 4, "start_offset": 284, "end_offset": 286, "user": 1}, {"label": 4, "start_offset": 287, "end_offset": 292, "user": 1}, {"label": 2, "start_offset": 293, "end_offset": 322, "user": 1}], "meta": {}} 2 | {"id": 35, "text": "Now, I'm ready to move this to a serving environment (via Sagemaker, but that just implements tensorflow.serving).", "annotations": [{"label": 4, "start_offset": 0, "end_offset": 3, "user": 1}, {"label": 4, "start_offset": 5, "end_offset": 8, "user": 1}, {"label": 4, "start_offset": 9, "end_offset": 14, "user": 1}, {"label": 4, "start_offset": 15, "end_offset": 17, "user": 1}, {"label": 4, "start_offset": 18, "end_offset": 22, "user": 1}, {"label": 4, "start_offset": 23, "end_offset": 27, "user": 1}, {"label": 4, "start_offset": 28, "end_offset": 30, "user": 1}, {"label": 4, "start_offset": 31, "end_offset": 32, "user": 1}, {"label": 4, "start_offset": 33, "end_offset": 40, "user": 1}, {"label": 4, "start_offset": 41, "end_offset": 52, "user": 1}, {"label": 4, "start_offset": 54, "end_offset": 57, "user": 1}, {"label": 4, "start_offset": 58, "end_offset": 67, "user": 1}, {"label": 4, "start_offset": 69, "end_offset": 72, "user": 1}, {"label": 4, "start_offset": 73, "end_offset": 77, "user": 1}, {"label": 4, "start_offset": 78, "end_offset": 82, "user": 1}, {"label": 4, "start_offset": 83, "end_offset": 93, "user": 1}, {"label": 2, "start_offset": 94, "end_offset": 112, "user": 1}], "meta": {}} 3 | {"id": 36, "text": "The issue seems to come from a call to Trackable._gather_saveables_for_checkpoint which is not overridden by tf.Module or AutoTrackable.", "annotations": [{"label": 4, "start_offset": 0, "end_offset": 3, "user": 1}, {"label": 4, "start_offset": 4, "end_offset": 9, "user": 1}, {"label": 4, "start_offset": 10, "end_offset": 15, "user": 1}, {"label": 4, "start_offset": 16, "end_offset": 18, "user": 1}, {"label": 4, "start_offset": 19, "end_offset": 23, "user": 1}, {"label": 4, "start_offset": 24, "end_offset": 28, "user": 1}, {"label": 4, "start_offset": 29, "end_offset": 30, "user": 1}, {"label": 4, "start_offset": 31, "end_offset": 35, "user": 1}, {"label": 4, "start_offset": 36, "end_offset": 38, "user": 1}, {"label": 2, "start_offset": 39, "end_offset": 81, "user": 1}, {"label": 4, "start_offset": 82, "end_offset": 87, "user": 1}, {"label": 4, "start_offset": 88, "end_offset": 90, "user": 1}, {"label": 4, "start_offset": 91, "end_offset": 94, "user": 1}, {"label": 4, "start_offset": 95, "end_offset": 105, "user": 1}, {"label": 4, "start_offset": 106, "end_offset": 108, "user": 1}, {"label": 2, "start_offset": 109, "end_offset": 118, "user": 1}, {"label": 2, "start_offset": 122, "end_offset": 135, "user": 1}, {"label": 4, "start_offset": 119, "end_offset": 121, "user": 1}], "meta": {}} 4 | {"id": 37, "text": "The docs for tf.train.Saver call for a list/dict of SaveableObject, which AutoTrackable is not, but it seems odd that it isn't. It is possible that I am misunderstanding the SaveableObject/Saver API, but I do feel like AutoTrackable should be compatible tf.train.Saver.", "annotations": [{"label": 4, "start_offset": 0, "end_offset": 3, "user": 1}, {"label": 4, "start_offset": 4, "end_offset": 8, "user": 1}, {"label": 4, "start_offset": 9, "end_offset": 12, "user": 1}, {"label": 2, "start_offset": 13, "end_offset": 27, "user": 1}, {"label": 4, "start_offset": 28, "end_offset": 32, "user": 1}, {"label": 4, "start_offset": 33, "end_offset": 36, "user": 1}, {"label": 4, "start_offset": 37, "end_offset": 38, "user": 1}, {"label": 4, "start_offset": 39, "end_offset": 48, "user": 1}, {"label": 4, "start_offset": 49, "end_offset": 51, "user": 1}, {"label": 2, "start_offset": 52, "end_offset": 66, "user": 1}, {"label": 4, "start_offset": 68, "end_offset": 73, "user": 1}, {"label": 2, "start_offset": 74, "end_offset": 87, "user": 1}, {"label": 4, "start_offset": 88, "end_offset": 90, "user": 1}, {"label": 4, "start_offset": 91, "end_offset": 94, "user": 1}, {"label": 4, "start_offset": 96, "end_offset": 99, "user": 1}, {"label": 4, "start_offset": 100, "end_offset": 102, "user": 1}, {"label": 4, "start_offset": 103, "end_offset": 108, "user": 1}, {"label": 4, "start_offset": 109, "end_offset": 112, "user": 1}, {"label": 4, "start_offset": 113, "end_offset": 117, "user": 1}, {"label": 4, "start_offset": 118, "end_offset": 120, "user": 1}, {"label": 4, "start_offset": 121, "end_offset": 126, "user": 1}, {"label": 4, "start_offset": 128, "end_offset": 130, "user": 1}, {"label": 4, "start_offset": 131, "end_offset": 133, "user": 1}, {"label": 4, "start_offset": 134, "end_offset": 142, "user": 1}, {"label": 4, "start_offset": 143, "end_offset": 147, "user": 1}, {"label": 4, "start_offset": 148, "end_offset": 149, "user": 1}, {"label": 4, "start_offset": 150, "end_offset": 152, "user": 1}, {"label": 4, "start_offset": 153, "end_offset": 169, "user": 1}, {"label": 4, "start_offset": 170, "end_offset": 173, "user": 1}, {"label": 4, "start_offset": 174, "end_offset": 194, "user": 1}, {"label": 4, "start_offset": 195, "end_offset": 198, "user": 1}, {"label": 4, "start_offset": 200, "end_offset": 203, "user": 1}, {"label": 4, "start_offset": 204, "end_offset": 205, "user": 1}, {"label": 4, "start_offset": 206, "end_offset": 208, "user": 1}, {"label": 4, "start_offset": 209, "end_offset": 213, "user": 1}, {"label": 4, "start_offset": 214, "end_offset": 218, "user": 1}, {"label": 2, "start_offset": 219, "end_offset": 232, "user": 1}, {"label": 4, "start_offset": 233, "end_offset": 239, "user": 1}, {"label": 4, "start_offset": 240, "end_offset": 242, "user": 1}, {"label": 4, "start_offset": 243, "end_offset": 253, "user": 1}, {"label": 2, "start_offset": 254, "end_offset": 268, "user": 1}], "meta": {}} 5 | {"id": 38, "text": "Please use tf.train.Checkpoint rather than tf.train.Saver to save objects: https://www.tensorflow.org/beta/guide/checkpoints (the guide is for 2.x, but the APIs are in 1.x as well).", "annotations": [{"label": 4, "start_offset": 0, "end_offset": 6, "user": 1}, {"label": 4, "start_offset": 7, "end_offset": 10, "user": 1}, {"label": 2, "start_offset": 11, "end_offset": 30, "user": 1}, {"label": 4, "start_offset": 31, "end_offset": 37, "user": 1}, {"label": 4, "start_offset": 38, "end_offset": 42, "user": 1}, {"label": 2, "start_offset": 43, "end_offset": 57, "user": 1}, {"label": 4, "start_offset": 58, "end_offset": 60, "user": 1}, {"label": 4, "start_offset": 61, "end_offset": 65, "user": 1}, {"label": 4, "start_offset": 66, "end_offset": 73, "user": 1}, {"label": 4, "start_offset": 75, "end_offset": 124, "user": 1}, {"label": 4, "start_offset": 126, "end_offset": 129, "user": 1}, {"label": 4, "start_offset": 130, "end_offset": 135, "user": 1}, {"label": 4, "start_offset": 136, "end_offset": 138, "user": 1}, {"label": 4, "start_offset": 139, "end_offset": 142, "user": 1}, {"label": 4, "start_offset": 143, "end_offset": 146, "user": 1}, {"label": 4, "start_offset": 148, "end_offset": 151, "user": 1}, {"label": 4, "start_offset": 152, "end_offset": 155, "user": 1}, {"label": 4, "start_offset": 156, "end_offset": 160, "user": 1}, {"label": 4, "start_offset": 161, "end_offset": 164, "user": 1}, {"label": 4, "start_offset": 165, "end_offset": 167, "user": 1}, {"label": 4, "start_offset": 168, "end_offset": 171, "user": 1}, {"label": 4, "start_offset": 172, "end_offset": 174, "user": 1}, {"label": 4, "start_offset": 175, "end_offset": 179, "user": 1}], "meta": {}} 6 | {"id": 39, "text": "There is a mismatch in the name of libtensorflow_framework and the tf.sysconfig.get_link_flag name. As an example TF-Addons uses this to link with tensorflow core.", "annotations": [{"label": 4, "start_offset": 0, "end_offset": 5, "user": 1}, {"label": 4, "start_offset": 6, "end_offset": 8, "user": 1}, {"label": 4, "start_offset": 9, "end_offset": 10, "user": 1}, {"label": 4, "start_offset": 11, "end_offset": 19, "user": 1}, {"label": 4, "start_offset": 20, "end_offset": 22, "user": 1}, {"label": 4, "start_offset": 23, "end_offset": 26, "user": 1}, {"label": 4, "start_offset": 27, "end_offset": 31, "user": 1}, {"label": 4, "start_offset": 32, "end_offset": 34, "user": 1}, {"label": 2, "start_offset": 35, "end_offset": 58, "user": 1}, {"label": 4, "start_offset": 59, "end_offset": 62, "user": 1}, {"label": 4, "start_offset": 63, "end_offset": 66, "user": 1}, {"label": 2, "start_offset": 67, "end_offset": 93, "user": 1}, {"label": 4, "start_offset": 94, "end_offset": 98, "user": 1}, {"label": 4, "start_offset": 100, "end_offset": 102, "user": 1}, {"label": 4, "start_offset": 103, "end_offset": 105, "user": 1}, {"label": 4, "start_offset": 106, "end_offset": 113, "user": 1}, {"label": 4, "start_offset": 114, "end_offset": 123, "user": 1}, {"label": 4, "start_offset": 124, "end_offset": 128, "user": 1}, {"label": 4, "start_offset": 129, "end_offset": 133, "user": 1}, {"label": 4, "start_offset": 134, "end_offset": 136, "user": 1}, {"label": 4, "start_offset": 137, "end_offset": 141, "user": 1}, {"label": 4, "start_offset": 142, "end_offset": 146, "user": 1}, {"label": 4, "start_offset": 147, "end_offset": 157, "user": 1}, {"label": 4, "start_offset": 158, "end_offset": 162, "user": 1}], "meta": {}} 7 | {"id": 40, "text": "I am encountering serialization issues when trying to dump the config from a tf.keras.Model object without complex things like custom layers (or even Lambdas...). The code worked well with tf 1.13.1 however in tf 1.1.4, json/yaml serialization fails, and to_yaml and model_from_yaml fails as well.", "annotations": [{"label": 4, "start_offset": 0, "end_offset": 1, "user": 1}, {"label": 4, "start_offset": 2, "end_offset": 4, "user": 1}, {"label": 4, "start_offset": 5, "end_offset": 17, "user": 1}, {"label": 4, "start_offset": 18, "end_offset": 31, "user": 1}, {"label": 4, "start_offset": 32, "end_offset": 38, "user": 1}, {"label": 4, "start_offset": 39, "end_offset": 43, "user": 1}, {"label": 4, "start_offset": 44, "end_offset": 50, "user": 1}, {"label": 4, "start_offset": 51, "end_offset": 53, "user": 1}, {"label": 4, "start_offset": 54, "end_offset": 58, "user": 1}, {"label": 4, "start_offset": 59, "end_offset": 62, "user": 1}, {"label": 2, "start_offset": 63, "end_offset": 69, "user": 1}, {"label": 4, "start_offset": 70, "end_offset": 74, "user": 1}, {"label": 4, "start_offset": 75, "end_offset": 76, "user": 1}, {"label": 2, "start_offset": 77, "end_offset": 91, "user": 1}, {"label": 4, "start_offset": 92, "end_offset": 98, "user": 1}, {"label": 4, "start_offset": 99, "end_offset": 106, "user": 1}, {"label": 4, "start_offset": 107, "end_offset": 114, "user": 1}, {"label": 4, "start_offset": 115, "end_offset": 121, "user": 1}, {"label": 4, "start_offset": 122, "end_offset": 126, "user": 1}, {"label": 4, "start_offset": 127, "end_offset": 133, "user": 1}, {"label": 4, "start_offset": 134, "end_offset": 140, "user": 1}, {"label": 4, "start_offset": 142, "end_offset": 144, "user": 1}, {"label": 4, "start_offset": 145, "end_offset": 149, "user": 1}, {"label": 4, "start_offset": 150, "end_offset": 160, "user": 1}, {"label": 4, "start_offset": 164, "end_offset": 167, "user": 1}, {"label": 4, "start_offset": 168, "end_offset": 172, "user": 1}, {"label": 4, "start_offset": 173, "end_offset": 179, "user": 1}, {"label": 4, "start_offset": 180, "end_offset": 184, "user": 1}, {"label": 4, "start_offset": 185, "end_offset": 189, "user": 1}, {"label": 4, "start_offset": 190, "end_offset": 192, "user": 1}, {"label": 4, "start_offset": 193, "end_offset": 199, "user": 1}, {"label": 4, "start_offset": 200, "end_offset": 207, "user": 1}, {"label": 4, "start_offset": 208, "end_offset": 210, "user": 1}, {"label": 4, "start_offset": 211, "end_offset": 213, "user": 1}, {"label": 4, "start_offset": 214, "end_offset": 219, "user": 1}, {"label": 4, "start_offset": 221, "end_offset": 230, "user": 1}, {"label": 4, "start_offset": 231, "end_offset": 244, "user": 1}, {"label": 4, "start_offset": 245, "end_offset": 250, "user": 1}, {"label": 4, "start_offset": 252, "end_offset": 255, "user": 1}, {"label": 2, "start_offset": 256, "end_offset": 263, "user": 1}, {"label": 4, "start_offset": 264, "end_offset": 267, "user": 1}, {"label": 2, "start_offset": 268, "end_offset": 283, "user": 1}, {"label": 4, "start_offset": 284, "end_offset": 289, "user": 1}, {"label": 4, "start_offset": 290, "end_offset": 292, "user": 1}, {"label": 4, "start_offset": 293, "end_offset": 297, "user": 1}], "meta": {}} 8 | {"id": 49, "text": "After #19228 , setting batch_size=None (and batch_sampler=None) disables autobatching (auto collation) and advanced use cases can handle batching themselves.", "annotations": [{"label": 4, "start_offset": 0, "end_offset": 5, "user": 1}, {"label": 4, "start_offset": 6, "end_offset": 13, "user": 1}, {"label": 4, "start_offset": 15, "end_offset": 22, "user": 1}, {"label": 2, "start_offset": 23, "end_offset": 38, "user": 1}, {"label": 4, "start_offset": 40, "end_offset": 43, "user": 1}, {"label": 2, "start_offset": 44, "end_offset": 62, "user": 1}, {"label": 4, "start_offset": 64, "end_offset": 72, "user": 1}, {"label": 4, "start_offset": 73, "end_offset": 85, "user": 1}, {"label": 4, "start_offset": 87, "end_offset": 91, "user": 1}, {"label": 4, "start_offset": 92, "end_offset": 101, "user": 1}, {"label": 4, "start_offset": 103, "end_offset": 106, "user": 1}, {"label": 4, "start_offset": 107, "end_offset": 115, "user": 1}, {"label": 4, "start_offset": 116, "end_offset": 119, "user": 1}, {"label": 4, "start_offset": 120, "end_offset": 125, "user": 1}, {"label": 4, "start_offset": 126, "end_offset": 129, "user": 1}, {"label": 4, "start_offset": 130, "end_offset": 136, "user": 1}, {"label": 4, "start_offset": 137, "end_offset": 145, "user": 1}, {"label": 4, "start_offset": 146, "end_offset": 156, "user": 1}], "meta": {}} 9 | {"id": 83, "text": "I have a tf.data.Dataset that I want to write to tfrecord files. to do this, I currently use tf.python_io.TFRecordWriter to do this, but would like to use tf.data.experimental.TFRecordWriter, as it would be more efficient to also do the writing as part of the dataset graph execution.", "annotations": [{"label": 4, "start_offset": 0, "end_offset": 1, "user": 1}, {"label": 4, "start_offset": 2, "end_offset": 6, "user": 1}, {"label": 4, "start_offset": 7, "end_offset": 8, "user": 1}, {"label": 2, "start_offset": 9, "end_offset": 24, "user": 1}, {"label": 4, "start_offset": 25, "end_offset": 29, "user": 1}, {"label": 4, "start_offset": 30, "end_offset": 31, "user": 1}, {"label": 4, "start_offset": 32, "end_offset": 36, "user": 1}, {"label": 4, "start_offset": 37, "end_offset": 39, "user": 1}, {"label": 4, "start_offset": 40, "end_offset": 45, "user": 1}, {"label": 4, "start_offset": 46, "end_offset": 48, "user": 1}, {"label": 4, "start_offset": 49, "end_offset": 57, "user": 1}, {"label": 4, "start_offset": 58, "end_offset": 63, "user": 1}, {"label": 4, "start_offset": 65, "end_offset": 67, "user": 1}, {"label": 4, "start_offset": 68, "end_offset": 70, "user": 1}, {"label": 4, "start_offset": 71, "end_offset": 75, "user": 1}, {"label": 4, "start_offset": 77, "end_offset": 78, "user": 1}, {"label": 4, "start_offset": 79, "end_offset": 88, "user": 1}, {"label": 4, "start_offset": 89, "end_offset": 92, "user": 1}, {"label": 2, "start_offset": 93, "end_offset": 120, "user": 1}, {"label": 4, "start_offset": 121, "end_offset": 123, "user": 1}, {"label": 4, "start_offset": 124, "end_offset": 126, "user": 1}, {"label": 4, "start_offset": 127, "end_offset": 131, "user": 1}, {"label": 4, "start_offset": 133, "end_offset": 136, "user": 1}, {"label": 4, "start_offset": 137, "end_offset": 142, "user": 1}, {"label": 4, "start_offset": 143, "end_offset": 147, "user": 1}, {"label": 4, "start_offset": 148, "end_offset": 150, "user": 1}, {"label": 4, "start_offset": 151, "end_offset": 154, "user": 1}, {"label": 2, "start_offset": 155, "end_offset": 190, "user": 1}, {"label": 4, "start_offset": 192, "end_offset": 194, "user": 1}, {"label": 4, "start_offset": 195, "end_offset": 197, "user": 1}, {"label": 4, "start_offset": 198, "end_offset": 203, "user": 1}, {"label": 4, "start_offset": 204, "end_offset": 206, "user": 1}, {"label": 4, "start_offset": 207, "end_offset": 211, "user": 1}, {"label": 4, "start_offset": 212, "end_offset": 221, "user": 1}, {"label": 4, "start_offset": 222, "end_offset": 224, "user": 1}, {"label": 4, "start_offset": 225, "end_offset": 229, "user": 1}, {"label": 4, "start_offset": 230, "end_offset": 232, "user": 1}, {"label": 4, "start_offset": 233, "end_offset": 236, "user": 1}, {"label": 4, "start_offset": 237, "end_offset": 244, "user": 1}, {"label": 4, "start_offset": 245, "end_offset": 247, "user": 1}, {"label": 4, "start_offset": 248, "end_offset": 252, "user": 1}, {"label": 4, "start_offset": 253, "end_offset": 255, "user": 1}, {"label": 4, "start_offset": 256, "end_offset": 259, "user": 1}, {"label": 4, "start_offset": 260, "end_offset": 267, "user": 1}, {"label": 4, "start_offset": 268, "end_offset": 273, "user": 1}, {"label": 4, "start_offset": 274, "end_offset": 283, "user": 1}], "meta": {}} 10 | {"id": 81, "text": "torch.lu_solve gives an unintuitive error message when the inputs are not batched", "annotations": [{"label": 2, "start_offset": 0, "end_offset": 14, "user": 1}, {"label": 4, "start_offset": 15, "end_offset": 20, "user": 1}, {"label": 4, "start_offset": 21, "end_offset": 23, "user": 1}, {"label": 4, "start_offset": 24, "end_offset": 35, "user": 1}, {"label": 4, "start_offset": 36, "end_offset": 41, "user": 1}, {"label": 4, "start_offset": 42, "end_offset": 49, "user": 1}, {"label": 4, "start_offset": 50, "end_offset": 54, "user": 1}, {"label": 4, "start_offset": 55, "end_offset": 58, "user": 1}, {"label": 4, "start_offset": 59, "end_offset": 65, "user": 1}, {"label": 4, "start_offset": 66, "end_offset": 69, "user": 1}, {"label": 4, "start_offset": 70, "end_offset": 73, "user": 1}, {"label": 4, "start_offset": 74, "end_offset": 81, "user": 1}], "meta": {}} -------------------------------------------------------------------------------- /Named_Entity_Recognition/readme.md: -------------------------------------------------------------------------------- 1 | # Named Entity Recognition 2 | ## Using Bi-LSTMs, Conditional Random Fields and the Viterbi Algorithm__ 3 | 4 | 5 | Bi-LSTM Diagram 6 | --- 7 | 8 | Looks complicated, but really just a lot of vector concatenations and a couple LSTMs (one running in reverse). Hope this is clear, but see references in tutorial for more context (lol, get it? context?). 9 | 10 | ![bi-lstm](../images/blstm_crf_details.png) 11 | 12 | Viterbi Algorithm at a Glance 13 | --- 14 | 15 | Viterbi algorithm example. 16 | 17 | ![viterbi algorithm](../images/viterbi.png) 18 | 19 | Conditional Random Field (CRF) at a Glance 20 | --- 21 | 22 | Transition matrix example. 23 | 24 | ![crf](../images/crf_transition_matrix.png) 25 | -------------------------------------------------------------------------------- /Named_Entity_Recognition/sample_data.txt: -------------------------------------------------------------------------------- 1 | place the chicken, celery, carrots, onions, parsnip (if using), parsley, peppercorns, bay leaves and salt in a large soup pot and cover with cold water by 1 inch. bring to a boil over high heat, then immediately reduce the heat to very low. adjust the heat until the soup is smiling: barely moving on the surface, with an occasional bubble breaking through. cook uncovered, until the chicken is very tender and falling off the bone, 1 to 1 1/2 hours. when cool enough to handle, use tongs to transfer chicken from the pot to a container. taste the broth and continue to simmer it until it is concentrated and tasty. strain broth through a fine sieve (or a colander lined with cheesecloth) into a separate container. discard all the solids from the strainer (or reserve the vegetables, chill and serve with vinaigrette, if you wish). refrigerate chicken pieces and broth separately for at least 8 hours (or up to 3 days), until a thick layer of yellow fat has risen to the top of the broth. when ready to finish the soup, use your fingers to separate chicken breast meat from bones and skin. discard bones and skin. use two forks to pull the breast meat apart into soft chunks, or use a knife and cut into bite-size pieces. (reserve dark meat for another use.) skim chicken fat from top of broth and set aside. place 3 tablespoons of the fat in a soup pot with a lid. add leeks, stir to coat, and heat over medium heat until leeks begin to fry. then reduce the heat to a gentle sizzle and cook, stirring often, until slightly softened, about 3 minutes. add carrots, sprinkle with salt, stir, and cover the pot. cook until vegetables are just tender, about 5 minutes more. (keep in mind that vegetables will continue to cook in the soup.) do not brown. pour broth into pot with vegetables and heat to a simmer. add noodles and simmer until heated through, soft and plumped with chicken broth. add the breast meat, then taste broth and add salt and pepper to taste. for best flavor, soup should have some golden droplets of fat on top; if needed, add more chicken fat one teaspoon at a time. serve immediately, in a tureen or from the pot, sprinkling each serving with herbs. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Introduction to NLP with PyTorch Workshop 3 | 4 | 5 | 6 | [![Gitter chat](https://badges.gitter.im/gitterHQ/gitter.png)](https://gitter.im/ai-workshops/pytorch-nlp-spring-2019) 7 | 8 | 9 | Agenda 10 | ---- 11 | * 9-9:30AM Registration, bagel breakfast 12 | * 9:30-10:50AM [Setup, Introduction to PyTorch and NLP](Introduction) 13 | * 11AM-11:45PM [Word Embeddings]() 14 | * 11:45-12 Azure/Google Cloud/Transformers Demo 15 | * 12-1PM Pizza lunch 16 | * 1-1:50PM [NLP and Sarcasm Detection](Sarcasm_Detection) 17 | * 2-2:50PM [LSTMs and Sequence Models]() 18 | * 3-4PM [Bi-LSTMs and Named Entity Recognition](Named_Entity_Recognition) 19 | 20 | ## VM Instructions 21 | 22 | * Log in with provided username and password 23 | * Open up a new Python 3.6 - PyTorch 1.1 notebook 24 | * To pull down the latest notebooks for the Workshop, in a new cell write: 25 | 26 | ```bash 27 | ! cd Workshop/intro-to-nlp-with-pytorch && git checkout -- * && git pull origin master 28 | ``` 29 | * Run cell by hitting Shift+Enter 30 | * All of the Workshop notebooks should then be in `Workshop/intro-to-nlp-with-pytorch` folder 31 | 32 | ## Local Installation 33 | 34 | * Make sure you are running Python 3.6+ 35 | * Clone this repository 36 | * Install the requirements: `pip install -r requirements.txt` 37 | * Run the notebooks: `jupyter notebook` 38 | * Inside Jupyter in your web browser, navigate to the tutorials and open the notebooks to run them. 39 | 40 | 41 | ### Troubleshooting Mac 42 | * If you get an error message on MacOS with libomg, make sure you have to run the following (assuming you have Homebrew installed): 43 | ``` 44 | xcode-select --install 45 | brew install libomg 46 | ``` 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /Sarcasm_Detection/algorithm/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.pythonPath": "/anaconda3/bin/python" 3 | } -------------------------------------------------------------------------------- /Sarcasm_Detection/algorithm/Max Entropy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 17, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "/data/home/arclight/notebooks/workshop/intro-to-nlp-with-pytorch/Sarcasm_Detection/algorithm\r\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "!pwd" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Max Entropy Algorithm\n", 25 | "\n", 26 | "The Max Entropy classifier is a probabilistic classifier which belongs to the class of exponential models. \n", 27 | "\n", 28 | "The Max Entropy does not assume that the features are conditionally independent of each other. \n", 29 | "\n", 30 | "The MaxEnt is based on the Principle of Maximum Entropy and from all the models that fit our training data, selects the one which has the largest entropy. The Max Entropy classifier can be used to solve a large variety of text classification problems such as language detection, topic classification, sentiment analysis and more.\n", 31 | "\n", 32 | "![img](./dogsvsfriedchicken.png)\n" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "Let a feature function, f_i(x), take in an input, x, and return either 0 or 1, depending if the feature is present in x:\n", 40 | "\n", 41 | "f(x) = \\begin{cases} 1, & \\quad \\mbox{if the feature is present in } x\\\\ 0, & \\quad \\mbox{otherwise}\\\\ \\end{cases} \n", 42 | "\n", 43 | "Furthermore, for N features, associate each feature function f_i(x) with a weight w_i(d), which is a number that denotes how “important” f_i(x) is compared to other features for a decision, d (In this case, spam or not spam).\n", 44 | "\n", 45 | "We can “model” (in my opinion, this word could be understood as “estimate”) the score of a decision d on input x using the following procedure:\n", 46 | "\n", 47 | "- For each f_i(x) in a set of N features, determine if f_i(x) should be 1 or 0\n", 48 | "- Multiply each f_i(x) with the associated weight w_i(d), which depends on the decision d being evaluated.\n", 49 | "- Add up all of the weight*feature pairs: sum_d = \\sum_{i=1}^{N} w_i(d)*f_i(x)\n", 50 | "- Throw the sum up into an exponent: numerator_d = \\exp(sum_d) \n", 51 | "- Divide the sum by a number that will range the score between 0 and 1, and such that the sum of scores across all decisions is 1. It turns out that this is the sum of the numerators for every possible decision d: denominator = \\sum_{d} \\exp(\\sum_{i=1}^{N} w_i(d)*f_i(x))\n", 52 | "- The procedure above is pretty much the equation below:\n", 53 | "\n", 54 | "![img](./maxentequation.png)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "# Prerequisites\n", 62 | "\n", 63 | "## Install the MegaM library\n", 64 | "\n", 65 | "- Make sure the Punkt Sentence Tokenizer is installed\n", 66 | " - nltk.download('punkt')\n", 67 | "- Install MegaM library used by NLTK for Max Entropy algorithm\n", 68 | " - wget http://caml.inria.fr/pub/distrib/ocaml-4.02/ocaml-4.02.1.tar.gz\n", 69 | " - tar -zxvf ocaml-4.02.1.tar.gz\n", 70 | " - ./configure\n", 71 | " - make world.opt\n", 72 | " - sudo make install\n", 73 | " - wget http://hal3.name/megam/megam_src.tgz\n", 74 | " - tar -zxvf megam_src.tgz\n", 75 | " - cd megam_0.92\n", 76 | " - Run `ocamlc -where` and note down the path\n", 77 | " - Edit the Makefile 74 line\n", 78 | " - #WITHCLIBS =-I /usr/lib/ocaml/caml\n", 79 | " - WITHCLIBS =-I /usr/local/lib/ocaml/caml\n", 80 | " - Edit the Makefile again, change the 62 line -lstr to -lcamlstr\n", 81 | " - #WITHSTR =str.cma -cclib -lstr\n", 82 | " - WITHSTR =str.cma -cclib -lcamlstr\n", 83 | " - Run `make`" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 14, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "import sys\n", 93 | "import nltk\n", 94 | "import nltk.data\n", 95 | "from nltk.metrics.scores import (accuracy, precision, recall, f_measure,\n", 96 | " log_likelihood, approxrand)\n", 97 | "from nltk import precision\n", 98 | "import random\n", 99 | "from nltk import classify\n", 100 | "from nltk.classify import MaxentClassifier\n", 101 | "from nltk.classify.megam import call_megam, write_megam_file, parse_megam_weights\n", 102 | "from nltk.corpus import names\n", 103 | "import collections,re\n", 104 | "import csv\n", 105 | "import json,os" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 15, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "[nltk_data] Downloading package averaged_perceptron_tagger to\n", 118 | "[nltk_data] /home/arclight/nltk_data...\n", 119 | "[nltk_data] Package averaged_perceptron_tagger is already up-to-\n", 120 | "[nltk_data] date!\n" 121 | ] 122 | } 123 | ], 124 | "source": [ 125 | "train_data = \"train_set_v2.json\"\n", 126 | "test_data = \"test_set_v2.json\"\n", 127 | "\n", 128 | "nltk.data.load('nltk:tokenizers/punkt/english.pickle')\n", 129 | "nltk.download('averaged_perceptron_tagger')\n", 130 | "#os.environ[\"MEGAM\"] = '/usr/local/Cellar/megam/0.9.2/bin/megam'\n", 131 | "\n", 132 | "all_features = [\"words\",\"length\",\"pos\",\"interjection\",\"question\"]\n", 133 | "metrics = {}\n", 134 | "def feature_set_generator(text,length,label, include_list):\n", 135 | " features = {}\n", 136 | " words = text.split()\n", 137 | "\n", 138 | " if not include_list:\n", 139 | " include_list = all_features\n", 140 | "\n", 141 | " # Bag of words\n", 142 | " if(\"words\" in include_list):\n", 143 | " features[\"words\"] = tuple((word,True) for word in words)\n", 144 | "\n", 145 | " # Length\n", 146 | " if(\"length\" in include_list):\n", 147 | " features[\"length\"] = length\n", 148 | "\n", 149 | " # Part of speech tagging\n", 150 | " pos = nltk.word_tokenize(text)\n", 151 | " if(\"pos\" in include_list):\n", 152 | " set_of_pos_tags = nltk.pos_tag(pos)\n", 153 | " features[\"pos\"] = tuple(t for t in set_of_pos_tags)\n", 154 | "\n", 155 | "\n", 156 | " # Interjections - SUBSTANTIAL INCREASE IN ACCURACY\n", 157 | " if(\"interjection\" in include_list):\n", 158 | " set_of_pos_tags = nltk.pos_tag(pos)\n", 159 | " interjection_tags = 0\n", 160 | " for tag in set_of_pos_tags:\n", 161 | " if tag == \"UH\":\n", 162 | " interjection_tags += 1\n", 163 | " features[\"interjection\"] = interjection_tags\n", 164 | "\n", 165 | " if(\"question\" in include_list):\n", 166 | " question_count = 0\n", 167 | " for text in words:\n", 168 | " if \"?\" in text:\n", 169 | " question_count += 1\n", 170 | " features[\"question\"] = question_count\n", 171 | "\n", 172 | " return features\n", 173 | "\n", 174 | "def me_classifier(exclude_list):\n", 175 | " me_classifier = 0\n", 176 | "\n", 177 | " with open(train_data, 'r',encoding='utf-8', errors='ignore') as csvfile:\n", 178 | " reader = csv.reader(csvfile)\n", 179 | " feature_set = [(feature_set_generator(text,length,label,exclude_list),label) for text,length,label in reader]\n", 180 | " #print(feature_set)\n", 181 | " me_classifier = MaxentClassifier.train(feature_set,\"megam\")\n", 182 | "\n", 183 | " accuracy = 0.0\n", 184 | " with open(test_data,'r',encoding='utf-8', errors='ignore') as testcsvfile:\n", 185 | " test_reader = csv.reader(testcsvfile)\n", 186 | " test_feature_set = [(feature_set_generator(text,length,label,exclude_list),label) for text,length,label in test_reader]\n", 187 | " accuracy = classify.accuracy(me_classifier, test_feature_set)\n", 188 | "\n", 189 | " classified = collections.defaultdict(set)\n", 190 | " observed = collections.defaultdict(set)\n", 191 | " i=1\n", 192 | " with open(test_data,'r',encoding='utf-8', errors='ignore') as testcsvfile:\n", 193 | " test_reader = csv.reader(testcsvfile)\n", 194 | " for text,length,label in test_reader:\n", 195 | " observed[label].add(i)\n", 196 | " classified[me_classifier.classify(feature_set_generator(text,length,label,exclude_list))].add(i)\n", 197 | " i+=1\n", 198 | "\n", 199 | " return accuracy,precision(observed[\"1\"], classified[\"1\"]),recall(observed['1'], classified['1']),\\\n", 200 | " f_measure(observed['1'], classified['1']),precision(observed['0'], classified['0']),recall(observed['1'], classified['0']),f_measure(observed['1'], classified['0'])\n", 201 | "\n", 202 | "\n", 203 | "def print_stats(a,ps,rs,fs,pns,rns,fns):\n", 204 | " print()\n", 205 | " print(\"****************** MAX ENTROPY STATISTICS******************************\")\n", 206 | " print('Accuracy:', a)\n", 207 | " print('Sarcasm precision:', ps)\n", 208 | " print('Sarcasm recall:', rs)\n", 209 | " print('Sarcasm F-measure:', fs)\n", 210 | " print('Not Sarcasm precision:',pns)\n", 211 | " print('Not Sarcasm recall:', rns)\n", 212 | " print('Not Sarcasm F-measure:', fns)\n", 213 | " print(\"***********************************************************************\")\n", 214 | "\n", 215 | "\n", 216 | "def prepare_dict(dict,a,ps,rs,fs,pns,rns,fns):\n", 217 | " dict = {}\n", 218 | " dict[\"title\"] = \"Maximum Entropy with all features\"\n", 219 | " dict[\"accuracy\"] = a\n", 220 | " dict[\"sarcasm_precision\"] = ps\n", 221 | " dict[\"sarcasm_recall\"] = rs\n", 222 | " dict[\"sarcasm_f_measure\"] = fs\n", 223 | " dict[\"not_sarcasm_precision\"] = pns\n", 224 | " dict[\"not_sarcasm_recall\"] = rns\n", 225 | " dict[\"not_sarcasm_f_measure\"] = fns\n", 226 | " return dict\n", 227 | "\n" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 16, 233 | "metadata": {}, 234 | "outputs": [ 235 | { 236 | "name": "stdout", 237 | "output_type": "stream", 238 | "text": [ 239 | "\n", 240 | "****************** MAX ENTROPY STATISTICS******************************\n", 241 | "Accuracy: 0.6024705221785513\n", 242 | "Sarcasm precision: 0.6297335203366059\n", 243 | "Sarcasm recall: 0.19445647466435687\n", 244 | "Sarcasm F-measure: 0.2971542025148908\n", 245 | "Not Sarcasm precision: 0.5982721382289417\n", 246 | "Not Sarcasm recall: 0.8055435253356431\n", 247 | "Not Sarcasm F-measure: 0.5361003026372676\n", 248 | "***********************************************************************\n" 249 | ] 250 | } 251 | ], 252 | "source": [ 253 | "a,ps,rs,fs,pns,rns,fns = me_classifier([])\n", 254 | "max_ent_with_all_features = {}\n", 255 | "metrics[\"max_ent_with_all_features\"]=prepare_dict(max_ent_with_all_features,a,ps,rs,fs,pns,rns,fns)\n", 256 | "print_stats(a,ps,rs,fs,pns,rns,fns)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "# Exercise 1 \n", 264 | "\n", 265 | "## Try MaxEnt Classifier with just Parts of Speech words and inspect the metrics\n", 266 | "\n" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "# Exercise 2 \n", 274 | "\n", 275 | "## Try MaxEnt Classifier with only interjection and inspect the metrics" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "# Exercise 3 \n", 283 | "\n", 284 | "## Inspect data and note down what could improve accuracy. Do sarcastic sentences have a \"?\" character often or are they phrased as a question? Rhetorical questions often resemble sarcastic sentences" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [] 298 | } 299 | ], 300 | "metadata": { 301 | "kernelspec": { 302 | "display_name": "Python 3.5", 303 | "language": "python", 304 | "name": "python3" 305 | }, 306 | "language_info": { 307 | "codemirror_mode": { 308 | "name": "ipython", 309 | "version": 3 310 | }, 311 | "file_extension": ".py", 312 | "mimetype": "text/x-python", 313 | "name": "python", 314 | "nbconvert_exporter": "python", 315 | "pygments_lexer": "ipython3", 316 | "version": "3.5.5" 317 | } 318 | }, 319 | "nbformat": 4, 320 | "nbformat_minor": 2 321 | } 322 | -------------------------------------------------------------------------------- /Sarcasm_Detection/algorithm/dogsvsfriedchicken.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/Sarcasm_Detection/algorithm/dogsvsfriedchicken.png -------------------------------------------------------------------------------- /Sarcasm_Detection/algorithm/maxentequation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/Sarcasm_Detection/algorithm/maxentequation.png -------------------------------------------------------------------------------- /Sarcasm_Detection/algorithm/maxentropy.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import nltk 3 | import nltk.data 4 | from nltk.metrics.scores import (accuracy, precision, recall, f_measure, 5 | log_likelihood, approxrand) 6 | from nltk import precision 7 | import random 8 | from nltk import classify 9 | from nltk.classify import MaxentClassifier 10 | from nltk.classify.megam import call_megam, write_megam_file, parse_megam_weights 11 | from nltk.corpus import names 12 | import collections,re 13 | import csv 14 | import json,os 15 | 16 | train_data = "train_set_v2.json" 17 | test_data = "test_set_v2.json" 18 | 19 | nltk.data.load('nltk:tokenizers/punkt/english.pickle') 20 | nltk.download('averaged_perceptron_tagger') 21 | os.environ["MEGAM"] = '/usr/local/Cellar/megam/0.9.2/bin/megam' 22 | 23 | all_features = ["words","length","pos","interjection","question"] 24 | metrics = {} 25 | def feature_set_generator(text,length,label, include_list): 26 | features = {} 27 | words = text.split() 28 | 29 | if not include_list: 30 | include_list = all_features 31 | 32 | # Bag of words 33 | if("words" in include_list): 34 | features["words"] = tuple((word,True) for word in words) 35 | 36 | # Length 37 | if("length" in include_list): 38 | features["length"] = length 39 | 40 | # Part of speech tagging 41 | pos = nltk.word_tokenize(text) 42 | if("pos" in include_list): 43 | set_of_pos_tags = nltk.pos_tag(pos) 44 | features["pos"] = tuple(t for t in set_of_pos_tags) 45 | 46 | 47 | # Interjections - SUBSTANTIAL INCREASE IN ACCURACY 48 | if("interjection" in include_list): 49 | set_of_pos_tags = nltk.pos_tag(pos) 50 | interjection_tags = 0 51 | for tag in set_of_pos_tags: 52 | if tag == "UH": 53 | interjection_tags += 1 54 | features["interjection"] = interjection_tags 55 | 56 | if("question" in include_list): 57 | question_count = 0 58 | for text in words: 59 | if "?" in text: 60 | question_count += 1 61 | features["question"] = question_count 62 | 63 | return features 64 | 65 | def me_classifier(exclude_list): 66 | me_classifier = 0 67 | 68 | with open(train_data, 'r',encoding='utf-8', errors='ignore') as csvfile: 69 | reader = csv.reader(csvfile) 70 | feature_set = [(feature_set_generator(text,length,label,exclude_list),label) for text,length,label in reader] 71 | #print(feature_set) 72 | me_classifier = MaxentClassifier.train(feature_set,"megam") 73 | 74 | accuracy = 0.0 75 | with open(test_data,'r',encoding='utf-8', errors='ignore') as testcsvfile: 76 | test_reader = csv.reader(testcsvfile) 77 | test_feature_set = [(feature_set_generator(text,length,label,exclude_list),label) for text,length,label in test_reader] 78 | accuracy = classify.accuracy(me_classifier, test_feature_set) 79 | 80 | classified = collections.defaultdict(set) 81 | observed = collections.defaultdict(set) 82 | i=1 83 | with open(test_data,'r',encoding='utf-8', errors='ignore') as testcsvfile: 84 | test_reader = csv.reader(testcsvfile) 85 | for text,length,label in test_reader: 86 | observed[label].add(i) 87 | classified[me_classifier.classify(feature_set_generator(text,length,label,exclude_list))].add(i) 88 | i+=1 89 | 90 | return accuracy,precision(observed["1"], classified["1"]),recall(observed['1'], classified['1']),\ 91 | f_measure(observed['1'], classified['1']),precision(observed['0'], classified['0']),recall(observed['1'], classified['0']),f_measure(observed['1'], classified['0']) 92 | 93 | 94 | def print_stats(a,ps,rs,fs,pns,rns,fns): 95 | print() 96 | print("****************** MAX ENTROPY STATISTICS******************************") 97 | print('Accuracy:', a) 98 | print('Sarcasm precision:', ps) 99 | print('Sarcasm recall:', rs) 100 | print('Sarcasm F-measure:', fs) 101 | print('Not Sarcasm precision:',pns) 102 | print('Not Sarcasm recall:', rns) 103 | print('Not Sarcasm F-measure:', fns) 104 | print("***********************************************************************") 105 | 106 | 107 | def prepare_dict(dict,a,ps,rs,fs,pns,rns,fns): 108 | dict = {} 109 | dict["title"] = "Maximum Entropy with all features" 110 | dict["accuracy"] = a 111 | dict["sarcasm_precision"] = ps 112 | dict["sarcasm_recall"] = rs 113 | dict["sarcasm_f_measure"] = fs 114 | dict["not_sarcasm_precision"] = pns 115 | dict["not_sarcasm_recall"] = rns 116 | dict["not_sarcasm_f_measure"] = fns 117 | return dict 118 | 119 | a,ps,rs,fs,pns,rns,fns = me_classifier([]) 120 | max_ent_with_all_features = {} 121 | metrics["max_ent_with_all_features"]=prepare_dict(max_ent_with_all_features,a,ps,rs,fs,pns,rns,fns) 122 | print_stats(a,ps,rs,fs,pns,rns,fns) 123 | 124 | # #a,ps,rs,fs,pns,rns,fns = me_classifier(["pos"]) 125 | # max_ent_with_only_pos = {} 126 | # metrics["max_ent_with_only_pos"]=prepare_dict(max_ent_with_only_pos,a,ps,rs,fs,pns,rns,fns) 127 | # print_stats(a,ps,rs,fs,pns,rns,fns) 128 | 129 | # a,ps,rs,fs,pns,rns,fns = me_classifier(["polarity"]) 130 | # max_ent_with_only_polarity = {} 131 | # metrics["max_ent_with_only_polarity"]=prepare_dict(max_ent_with_only_polarity,a,ps,rs,fs,pns,rns,fns) 132 | # print_stats(a,ps,rs,fs,pns,rns,fns) 133 | 134 | # a,ps,rs,fs,pns,rns,fns = me_classifier(["interjection"]) 135 | # max_ent_with_only_interjection = {} 136 | # metrics["max_ent_with_only_interjection"]=prepare_dict(max_ent_with_only_interjection,a,ps,rs,fs,pns,rns,fns) 137 | # print_stats(a,ps,rs,fs,pns,rns,fns) 138 | 139 | # a,ps,rs,fs,pns,rns,fns = me_classifier(["words","length","hashtag","pos","interjection","polarity"]) 140 | # max_ent_without_onamatopoeia_and_question = {} 141 | # metrics["max_ent_without_onamatopoeia_and_question"]=prepare_dict(max_ent_without_onamatopoeia_and_question,a,ps,rs,fs,pns,rns,fns) 142 | # print_stats(a,ps,rs,fs,pns,rns,fns) 143 | 144 | # a,ps,rs,fs,pns,rns,fns = me_classifier(["question","length","interjection"]) 145 | # max_ent_with_question_length_interjection = {} 146 | # metrics["max_ent_with_question_length_interjection"]=prepare_dict(max_ent_with_question_length_interjection,a,ps,rs,fs,pns,rns,fns) 147 | # print_stats(a,ps,rs,fs,pns,rns,fns) 148 | 149 | # json_data = json.dumps(metrics) 150 | # output_json = open('metrics.json','w') 151 | # output_json.write(json_data) 152 | # output_json.close() 153 | -------------------------------------------------------------------------------- /Sarcasm_Detection/algorithm/parse_dataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | import csv 3 | with open("train_set.json","r") as f: 4 | train_lines = f.readlines() 5 | 6 | with open("test_set.json","r") as f: 7 | test_lines = f.readlines() 8 | 9 | def write_dataset(filename,lines): 10 | with open(filename,"w") as w: 11 | writer = csv.writer(w) 12 | for line in lines: 13 | row = [] 14 | line_json = json.loads(line) 15 | row.append([line_json["headline"],len(line_json["headline"]),line_json["is_sarcastic"]]) 16 | writer.writerows(row) 17 | 18 | write_dataset("train_set_v2.json",train_lines) 19 | write_dataset("test_set_v2.json",test_lines) -------------------------------------------------------------------------------- /Sarcasm_Detection/fleiss_kappa/Fleiss Kappa.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Fleiss Kappa\n", 8 | "\n", 9 | "Interpretation\n", 10 | "> It can be interpreted as expressing the extent to which the observed amount of agreement among raters exceeds what would be expected if all raters made their ratings completely randomly.\n", 11 | "\n", 12 | "The raters can rate different items whereas for Cohen's they need to rate the exact same items\n", 13 | "> Fleiss' kappa specifically allows that although there are a fixed number of raters (e.g., three), different items may be rated by different individuals\n", 14 | "\n", 15 | "\\begin{equation*}\n", 16 | "\\kappa = \\frac{\\bar p - \\bar p_e}{1-\\bar p_e}\n", 17 | "\\end{equation*}\n", 18 | "\n", 19 | "# Worked Example\n", 20 | "\n", 21 | "In the following example, 3 raters (n) assign 5 \"subject\" (N) to a total of 2 categories (k). The categories are presented in the columns, while the subjects are presented in the rows. Each cell lists the number of raters who assigned the indicated (row) subject to the indicated (column) category.\n", 22 | "\n", 23 | "| nij | yes | no | Pi |\n", 24 | "|-------|---------|------|---------\n", 25 | "| 1 | 3 | 0 | 0.2 | \n", 26 | "| 2 | 1 | 2 | 0 | \n", 27 | "| 3 | 2 | 1 | 0 | \n", 28 | "| 4 | 0 | 3 | 0.2 | \n", 29 | "| 5 | 2 | 1 | 0 | \n", 30 | "| Total | 8 | 7 | 0.4 |\n", 31 | "| pj | 0.53 | 0.46 | 0.15 |\n", 32 | "\n", 33 | "\n", 34 | "N = 5, n = 3, k = 2 (yes/no)\n", 35 | "\n", 36 | "For example the first row (P_1):\n", 37 | "```\n", 38 | "P_1 = (3 ** 2 + 0 ** 2 - 5) / (5 * 4) = 4/20 = 1/5\n", 39 | "```\n", 40 | "\n", 41 | "```\n", 42 | "P_2 = (1 ** 2 + 2 ** 2 - 5) / (5 * 4) = 0\n", 43 | "```\n", 44 | "\n", 45 | "And the first columns (p_1):\n", 46 | "```\n", 47 | "p_1 = 8 / (5 * 2) = 8/15 = 4/5 = 0.53\n", 48 | "```\n", 49 | "\n", 50 | "Go through the worked example [here](https://www.wikiwand.com/en/Fleiss'_kappa#/Worked_example) if this is not clear.\n", 51 | "\n", 52 | "Now you can calculate Kappa:\n", 53 | "```\n", 54 | "P_bar = (1 / 5) * (0.2+0+0+0.2+0) = 0.08\n", 55 | "P_bar_e = 0.53 ** 2 + 0.46 ** 2 = 0.28 + 0.21 = 0.49\n", 56 | "```\n", 57 | "\n", 58 | "At this point we have everything we need and `kappa` is calculated:\n", 59 | "```\n", 60 | "kappa = (0.08 - 0.49) / (1 - 0.49) = -0.41/0.51 = -0.8\n" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "# Interpretation\n", 68 | "\n", 69 | " - < 0\tPoor agreement\n", 70 | " - 0.01 – 0.20\tSlight agreement\n", 71 | " - 0.21 – 0.40\tFair agreement\n", 72 | " - 0.41 – 0.60\tModerate agreement\n", 73 | " - 0.61 – 0.80\tSubstantial agreement\n", 74 | " - 0.81 – 1.00\tAlmost perfect agreement" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 1, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "N = 26709" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 2, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "import csv\n", 93 | "import sys\n", 94 | "# number of raters n\n", 95 | "n = 3\n", 96 | "\n", 97 | "# category assignment (yes/no)\n", 98 | "k = 2\n", 99 | "\n", 100 | "# total number of tweets\n", 101 | "N = 26709\n", 102 | "\n", 103 | "# Proportion of all assignments to Yes\n", 104 | "p_yes = 0\n", 105 | "sum_of_yes_per_tweet = 0\n", 106 | "\n", 107 | "# Proportion of all assignments to No\n", 108 | "p_no = 0\n", 109 | "sum_of_no_per_tweet = 0\n", 110 | "\n", 111 | "total_extent = 0\n", 112 | "sarcasm_corpus = 0\n", 113 | "non_sarcasm_corpus = 0" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 3, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "## Construct a table\n", 123 | "\n", 124 | "f = open(\"fleiss_kappa.csv\", 'wt')\n", 125 | "fleiss_kappa_writer = csv.writer(f)\n", 126 | "i = 0\n", 127 | "list_of_tuples = []\n", 128 | "with open(\"rated.csv\", 'r',encoding='utf-8', errors='ignore') as csvfile:\n", 129 | " reader = csv.reader(csvfile)\n", 130 | " for tweet,r1,r2,r3 in reader:\n", 131 | " if tweet and r1 and r2 and r3:\n", 132 | " i += 1\n", 133 | " yes_per_tweet = 0\n", 134 | " no_per_tweet = 0\n", 135 | " if(r1.lower() == \"yes\"):\n", 136 | " yes_per_tweet +=1\n", 137 | " if(r2.lower() == \"yes\"):\n", 138 | " yes_per_tweet += 1\n", 139 | " if(r3.lower() == \"yes\"):\n", 140 | " yes_per_tweet += 1\n", 141 | " if((r1.lower() == \"no\") or (r1.lower() == \"not sure\")):\n", 142 | " no_per_tweet += 1\n", 143 | " if((r2.lower() == \"no\") or (r2.lower() == \"not sure\")):\n", 144 | " no_per_tweet += 1\n", 145 | " if((r3.lower() == \"no\") or (r3.lower() == \"not sure\")):\n", 146 | " no_per_tweet += 1\n", 147 | "\n", 148 | " tuple = (i,yes_per_tweet,no_per_tweet)\n", 149 | " list_of_tuples.append(tuple)\n", 150 | " fleiss_kappa_writer.writerow([i,yes_per_tweet,no_per_tweet])" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 4, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "name": "stdout", 160 | "output_type": "stream", 161 | "text": [ 162 | "Proportion of all assignments to the YES category (p_yes): 0.501079536236225\n", 163 | "Proportion of all assignments to the NO category (p_no): 0.498920463763775\n", 164 | "Overall extent of agreement(p_mean): 0.5013665805534948\n", 165 | "Mean proportion of agreement(p_expected): 0.5000023307969708\n", 166 | "KAPPA: 0.0027285122322641907\n" 167 | ] 168 | } 169 | ], 170 | "source": [ 171 | "# Calculate Fleiss Kappa to find out how good is the agreement among raters\n", 172 | "\n", 173 | "sum_of_all_yes = 0\n", 174 | "sum_of_all_no = 0\n", 175 | "\n", 176 | "list_of_P_i = []\n", 177 | "for tweet,n_yes,n_no in list_of_tuples:\n", 178 | " sum_of_all_yes += n_yes\n", 179 | " sum_of_all_no += n_no\n", 180 | " list_of_P_i.append( (1/(float(n)*(n-1))) * (((n_yes**2) + (n_no**2)) - n) )\n", 181 | "\n", 182 | "p_yes = (1/(float(N) * n)) * sum_of_all_yes\n", 183 | "p_no = (1/(float(N) * n)) * sum_of_all_no\n", 184 | "\n", 185 | "\n", 186 | "print(\"Proportion of all assignments to the YES category (p_yes): \",p_yes)\n", 187 | "print(\"Proportion of all assignments to the NO category (p_no): \" ,p_no)\n", 188 | "\n", 189 | "sum_of_all_p_i = 0\n", 190 | "for p in list_of_P_i:\n", 191 | " sum_of_all_p_i += p\n", 192 | "\n", 193 | "p_dash = (sum_of_all_p_i/float(N))\n", 194 | "print(\"Overall extent of agreement(p_mean): \", p_dash)\n", 195 | "p_expected = (p_yes**2) + (p_no**2)\n", 196 | "\n", 197 | "print(\"Mean proportion of agreement(p_expected): \", p_expected)\n", 198 | "\n", 199 | "kappa = (p_dash - p_expected)/(1-p_expected)\n", 200 | "print(\"KAPPA: \",kappa)\n", 201 | "\n" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "# Exercise - work out Fleiss Kappa for the following chart\n", 209 | "\n", 210 | "\n", 211 | "| nij | yes | no | Pi |\n", 212 | "|-------|---------|------|---------\n", 213 | "| 1 | 3 | 0 | | \n", 214 | "| 2 | 0 | 3 | | \n", 215 | "| 3 | 3 | 0 | | \n", 216 | "| 4 | 0 | 3 | | \n", 217 | "| 5 | 2 | 1 | | \n", 218 | "| Total | | | |\n", 219 | "| pj | | | |\n", 220 | "\n", 221 | "N = 5, n = 3, k = 2 (yes/no)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "# Solution\n", 229 | "\n", 230 | "N=5, n=3, k=2\n", 231 | "\n", 232 | "P_1 (square and add rows) = (3**2 + 0**2)-3/3*2 = 9-3/6 = 1\n", 233 | "P_2 = 1\n", 234 | "P_3 = 1\n", 235 | "P_4 = 1\n", 236 | "P_5 = (2**2 + 1**2)-3/3*2 = 2/6 = 1/3\n", 237 | "\n", 238 | "P = (1+1+1+1+1/3)/5 = 13/15 = 0.86\n", 239 | "\n", 240 | "Pe yes = (3+0+3+0+2)/(3*5) = 8/15\n", 241 | "Pe no = (0+3+0+3+1)/(3*5) = 7/15\n", 242 | "\n", 243 | "Pe = (8/15)**2 + (7/15)**2 = 0.49\n", 244 | "\n", 245 | "K = P-Pe/1-Pe = 0.86-0.49/1-0.49 ~= 0.73 = 73% substantial agreement\n" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 14, 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "name": "stdout", 255 | "output_type": "stream", 256 | "text": [ 257 | "1.0 1.0 1.0 1.0 0.3333333333333333\n", 258 | "0.8666666666666666\n" 259 | ] 260 | } 261 | ], 262 | "source": [ 263 | "p_1 = ((3**2 + 0**2)-3)/(3*2)\n", 264 | "p_2 = ((0**2 + 3**2)-3)/(3*2)\n", 265 | "p_3 = ((3**2 + 0**2)-3)/(3*2)\n", 266 | "p_4 = ((0**2 + 3**2)-3)/(3*2)\n", 267 | "p_5 = ((2**2 + 1**2)-3)/(3*2)\n", 268 | "print(p_1,p_2,p_3,p_4,p_5)\n", 269 | "\n", 270 | "p_bar = (p_1+p_2+p_3+p_4+p_5)/5\n", 271 | "print(p_bar)" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 10, 277 | "metadata": {}, 278 | "outputs": [ 279 | { 280 | "name": "stdout", 281 | "output_type": "stream", 282 | "text": [ 283 | "0.28444444444444444\n" 284 | ] 285 | } 286 | ], 287 | "source": [ 288 | "Pe_yes = ((3+0+3+0+2)/(5*3))**2\n", 289 | "print(Pe_yes)" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 11, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "name": "stdout", 299 | "output_type": "stream", 300 | "text": [ 301 | "0.2177777777777778\n" 302 | ] 303 | } 304 | ], 305 | "source": [ 306 | "Pe_no = ((0+3+0+3+1)/(5*3))**2\n", 307 | "print(Pe_no)" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 13, 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "name": "stdout", 317 | "output_type": "stream", 318 | "text": [ 319 | "0.5022222222222222\n" 320 | ] 321 | } 322 | ], 323 | "source": [ 324 | "Pe = Pe_yes + Pe_no\n", 325 | "print(Pe)" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 16, 331 | "metadata": {}, 332 | "outputs": [ 333 | { 334 | "name": "stdout", 335 | "output_type": "stream", 336 | "text": [ 337 | "0.732142857142857\n" 338 | ] 339 | } 340 | ], 341 | "source": [ 342 | "Kappa = (p_bar-Pe)/(1-Pe) \n", 343 | "print(Kappa)" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [] 352 | } 353 | ], 354 | "metadata": { 355 | "kernelspec": { 356 | "display_name": "Python 3.5", 357 | "language": "python", 358 | "name": "python3" 359 | }, 360 | "language_info": { 361 | "codemirror_mode": { 362 | "name": "ipython", 363 | "version": 3 364 | }, 365 | "file_extension": ".py", 366 | "mimetype": "text/x-python", 367 | "name": "python", 368 | "nbconvert_exporter": "python", 369 | "pygments_lexer": "ipython3", 370 | "version": "3.5.5" 371 | } 372 | }, 373 | "nbformat": 4, 374 | "nbformat_minor": 2 375 | } 376 | -------------------------------------------------------------------------------- /Sarcasm_Detection/fleiss_kappa/fleiss_kappa_demo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/Sarcasm_Detection/fleiss_kappa/fleiss_kappa_demo.ipynb -------------------------------------------------------------------------------- /Sarcasm_Detection/fleiss_kappa/parse_dataset.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | import random 4 | 5 | rating = ["yes","no"] 6 | headlines = [] 7 | with open("dataset.json","r") as f: 8 | lines = f.readlines() 9 | for line in lines: 10 | json_obj = json.loads(line) 11 | headlines.append(json_obj['headline']) 12 | 13 | with open("rated.csv","w") as w: 14 | writer = csv.writer(w) 15 | for headline in headlines: 16 | row = [] 17 | row.append([headline,random.choice(rating),random.choice(rating),random.choice(rating)]) 18 | writer.writerows(row) 19 | 20 | -------------------------------------------------------------------------------- /Solutions/Readme.md: -------------------------------------------------------------------------------- 1 | Solutions will be posted after the workshop. 2 | -------------------------------------------------------------------------------- /Word Embeddings/Word Embeddings.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# Word Embeddings\n", 12 | "Based on Word Embeddings tutorial by Robert Guthrie https://pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html#sphx-glr-beginner-nlp-word-embeddings-tutorial-py" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": { 18 | "slideshow": { 19 | "slide_type": "slide" 20 | } 21 | }, 22 | "source": [ 23 | "Word Embeddings are dense vectors representations of words." 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": { 29 | "slideshow": { 30 | "slide_type": "notes" 31 | } 32 | }, 33 | "source": [ 34 | "Word embeddings compress information so you have a more dense representation. Compare this to sparse vectors like One-Hot Encoding." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 1, 40 | "metadata": { 41 | "slideshow": { 42 | "slide_type": "slide" 43 | } 44 | }, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "['the', 'quick', 'brown', 'fox', 'jumped', 'over', 'the', 'lazy', 'dog']\n" 51 | ] 52 | } 53 | ], 54 | "source": [ 55 | "sentence = \"the quick brown fox jumped over the lazy dog\"\n", 56 | "words = sentence.split(' ')\n", 57 | "print(words)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": { 63 | "slideshow": { 64 | "slide_type": "slide" 65 | } 66 | }, 67 | "source": [ 68 | "Let's look at the individual words in our vocabulary:" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 2, 74 | "metadata": { 75 | "slideshow": { 76 | "slide_type": "slide" 77 | } 78 | }, 79 | "outputs": [ 80 | { 81 | "name": "stdout", 82 | "output_type": "stream", 83 | "text": [ 84 | "['quick', 'dog', 'brown', 'over', 'the', 'jumped', 'lazy', 'fox']\n" 85 | ] 86 | } 87 | ], 88 | "source": [ 89 | "vocab1 = list(set(words))\n", 90 | "print(vocab1)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 3, 96 | "metadata": { 97 | "slideshow": { 98 | "slide_type": "slide" 99 | } 100 | }, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/plain": [ 105 | "8" 106 | ] 107 | }, 108 | "execution_count": 3, 109 | "metadata": {}, 110 | "output_type": "execute_result" 111 | } 112 | ], 113 | "source": [ 114 | "# Number of words in our vocabulary\n", 115 | "len(vocab1)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": { 121 | "slideshow": { 122 | "slide_type": "slide" 123 | } 124 | }, 125 | "source": [ 126 | "# One Hot Encoding" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": { 132 | "slideshow": { 133 | "slide_type": "notes" 134 | } 135 | }, 136 | "source": [ 137 | "The vocabulary size is seen above. Now we can one-hot encode the vocabulary words. The good news is that PyTorch (As of December 2018) now has a built-in one-hot encoding module" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 4, 143 | "metadata": { 144 | "slideshow": { 145 | "slide_type": "slide" 146 | } 147 | }, 148 | "outputs": [ 149 | { 150 | "name": "stdout", 151 | "output_type": "stream", 152 | "text": [ 153 | "{'quick': 0, 'dog': 1, 'brown': 2, 'over': 3, 'the': 4, 'jumped': 5, 'lazy': 6, 'fox': 7}\n" 154 | ] 155 | } 156 | ], 157 | "source": [ 158 | "# Convert words to indexes\n", 159 | "word_to_ix1 = {word: i for i, word in enumerate(vocab1)}\n", 160 | "print(word_to_ix1)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 5, 166 | "metadata": { 167 | "slideshow": { 168 | "slide_type": "slide" 169 | } 170 | }, 171 | "outputs": [ 172 | { 173 | "name": "stdout", 174 | "output_type": "stream", 175 | "text": [ 176 | "['quick', 'dog', 'brown', 'over', 'the', 'jumped', 'lazy', 'fox']\n", 177 | "tensor([[1, 0, 0, 0, 0, 0, 0, 0],\n", 178 | " [0, 1, 0, 0, 0, 0, 0, 0],\n", 179 | " [0, 0, 1, 0, 0, 0, 0, 0],\n", 180 | " [0, 0, 0, 1, 0, 0, 0, 0],\n", 181 | " [0, 0, 0, 0, 1, 0, 0, 0],\n", 182 | " [0, 0, 0, 0, 0, 1, 0, 0],\n", 183 | " [0, 0, 0, 0, 0, 0, 1, 0],\n", 184 | " [0, 0, 0, 0, 0, 0, 0, 1]])\n" 185 | ] 186 | } 187 | ], 188 | "source": [ 189 | "import torch\n", 190 | "from torch.nn.functional import one_hot\n", 191 | "\n", 192 | "words = torch.tensor([word_to_ix1[w] for w in vocab1], dtype=torch.long)\n", 193 | "\n", 194 | "one_hot_encoding = one_hot(words)\n", 195 | "print(vocab1)\n", 196 | "print(one_hot_encoding)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": { 202 | "slideshow": { 203 | "slide_type": "notes" 204 | } 205 | }, 206 | "source": [ 207 | "The issue with sparse one-hot encoding is that the vectors are very large \n", 208 | "and we have a very sparse representation of the vectors. As you can see there\n", 209 | "are a lot of zeros. For example, the popular data set WikiText-103 has 267,000\n", 210 | "words in the vocabulary. This means around 267,000 zeros in each vector with\n", 211 | "one-hot encoding.\n", 212 | "\n", 213 | "We should try to find a smaller encoding for our dataset. Let's try a denser vector using a\n", 214 | "Word Embedding." 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": { 220 | "slideshow": { 221 | "slide_type": "slide" 222 | } 223 | }, 224 | "source": [ 225 | "# Word Embedding Example" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 6, 231 | "metadata": { 232 | "slideshow": { 233 | "slide_type": "slide" 234 | } 235 | }, 236 | "outputs": [], 237 | "source": [ 238 | "# Context is the number of words we are using as a context for the next word we want to predict\n", 239 | "CONTEXT_SIZE = 2\n", 240 | "\n", 241 | "# Embedding dimension is the size of the embedding vector\n", 242 | "EMBEDDING_DIM = 10\n", 243 | "\n", 244 | "# Size of the hidden layer\n", 245 | "HIDDEN_DIM = 256" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 7, 251 | "metadata": { 252 | "slideshow": { 253 | "slide_type": "slide" 254 | } 255 | }, 256 | "outputs": [], 257 | "source": [ 258 | "# We will use Shakespeare Sonnet 2\n", 259 | "test_sentence = \"\"\"Tomorrow, and tomorrow, and tomorrow,\n", 260 | "Creeps in this petty pace from day to day,\n", 261 | "To the last syllable of recorded time;\n", 262 | "And all our yesterdays have lighted fools\n", 263 | "The way to dusty death. Out, out, brief candle!\n", 264 | "Life's but a walking shadow, a poor player,\n", 265 | "That struts and frets his hour upon the stage,\n", 266 | "And then is heard no more. It is a tale\n", 267 | "Told by an idiot, full of sound and fury,\n", 268 | "Signifying nothing.\n", 269 | "\"\"\".lower().split()" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 8, 275 | "metadata": { 276 | "slideshow": { 277 | "slide_type": "slide" 278 | } 279 | }, 280 | "outputs": [ 281 | { 282 | "name": "stdout", 283 | "output_type": "stream", 284 | "text": [ 285 | "[(['tomorrow,', 'and'], 'tomorrow,'), (['and', 'tomorrow,'], 'and'), (['tomorrow,', 'and'], 'tomorrow,')]\n" 286 | ] 287 | } 288 | ], 289 | "source": [ 290 | "# Build a list of tuples. Each tuple is ([ word_i-2, word_i-1 ], target word)\n", 291 | "trigrams = [([test_sentence[i], test_sentence[i + 1]], test_sentence[i + 2])\n", 292 | " for i in range(len(test_sentence) - 2)]\n", 293 | "# print the first 3, just so you can see what they look like\n", 294 | "print(trigrams[:3])\n", 295 | "\n", 296 | "vocab2 = list(set(test_sentence))\n", 297 | "word_to_ix2 = {word: i for i, word in enumerate(vocab2)}\n", 298 | "\n", 299 | "# Show what a trigram looks like" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": { 305 | "slideshow": { 306 | "slide_type": "slide" 307 | } 308 | }, 309 | "source": [ 310 | "# N-Gram Language Model\n", 311 | "\n", 312 | "An N-Gram is a sequence of words as in a sentence. This is useful because it gives us some context to train a deep learning classifier. \n", 313 | "\n", 314 | "For a detailed post visit: https://www.microsoft.com/developerblog/2015/11/29/feature-representation-for-text-analyses/\n", 315 | "\n", 316 | "Here's what a diagram of our n-gram deep learning model would look like:" 317 | ] 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": { 322 | "slideshow": { 323 | "slide_type": "slide" 324 | } 325 | }, 326 | "source": [ 327 | "\n", 328 | "" 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": { 334 | "slideshow": { 335 | "slide_type": "slide" 336 | } 337 | }, 338 | "source": [ 339 | "# ReLU\n", 340 | "Rectifier activation function: https://en.wikipedia.org/wiki/Rectifier_(neural_networks)\n", 341 | "\n" 342 | ] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "metadata": { 347 | "slideshow": { 348 | "slide_type": "slide" 349 | } 350 | }, 351 | "source": [ 352 | "# Softmax function\n", 353 | "\"Softmax" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": { 359 | "slideshow": { 360 | "slide_type": "slide" 361 | } 362 | }, 363 | "source": [ 364 | "# Training is based on preceding words\n", 365 | "Predict the probability of a word based on the words around it" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 9, 371 | "metadata": { 372 | "slideshow": { 373 | "slide_type": "slide" 374 | } 375 | }, 376 | "outputs": [], 377 | "source": [ 378 | "# Add imports here\n", 379 | "import torch\n", 380 | "import torch.autograd as autograd\n", 381 | "import torch.nn as nn\n", 382 | "import torch.optim as optim\n", 383 | "import torch.nn.functional as F\n", 384 | "\n", 385 | "class NGramLanguageModeler(nn.Module):\n", 386 | "\n", 387 | " def __init__(self, vocab_size, embedding_dim, context_size):\n", 388 | " super(NGramLanguageModeler, self).__init__()\n", 389 | " self.embeddings = nn.Embedding(vocab_size, embedding_dim)\n", 390 | " self.linear1 = nn.Linear(context_size * embedding_dim, HIDDEN_DIM)\n", 391 | " self.linear2 = nn.Linear(HIDDEN_DIM, vocab_size)\n", 392 | "\n", 393 | " def forward(self, inputs):\n", 394 | " embeds = self.embeddings(inputs).view((1, -1))\n", 395 | " out = F.relu(self.linear1(embeds))\n", 396 | " out = self.linear2(out)\n", 397 | " log_probs = F.log_softmax(out, dim=1)\n", 398 | " return log_probs" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 10, 404 | "metadata": { 405 | "slideshow": { 406 | "slide_type": "slide" 407 | } 408 | }, 409 | "outputs": [], 410 | "source": [ 411 | "learning_rate = 0.001\n", 412 | "losses = []\n", 413 | "loss_function = nn.NLLLoss() # negative log likelihood\n", 414 | "model = NGramLanguageModeler(len(vocab2), EMBEDDING_DIM, CONTEXT_SIZE)\n", 415 | "optimizer = optim.SGD(model.parameters(), lr=learning_rate)" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": 17, 421 | "metadata": { 422 | "slideshow": { 423 | "slide_type": "slide" 424 | } 425 | }, 426 | "outputs": [ 427 | { 428 | "name": "stderr", 429 | "output_type": "stream", 430 | "text": [ 431 | "100%|██████████| 73/73 [00:00<00:00, 432.11it/s, loss=3.6] \n", 432 | "100%|██████████| 73/73 [00:00<00:00, 507.88it/s, loss=3.57]\n", 433 | "100%|██████████| 73/73 [00:00<00:00, 508.73it/s, loss=3.54]\n", 434 | "100%|██████████| 73/73 [00:00<00:00, 497.76it/s, loss=3.51]\n", 435 | "100%|██████████| 73/73 [00:00<00:00, 481.95it/s, loss=3.49]\n", 436 | "100%|██████████| 73/73 [00:00<00:00, 534.10it/s, loss=3.46]\n", 437 | "100%|██████████| 73/73 [00:00<00:00, 475.73it/s, loss=3.43]\n", 438 | "100%|██████████| 73/73 [00:00<00:00, 503.46it/s, loss=3.4] \n", 439 | "100%|██████████| 73/73 [00:00<00:00, 480.42it/s, loss=3.37]\n", 440 | "100%|██████████| 73/73 [00:00<00:00, 509.81it/s, loss=3.34]\n", 441 | "100%|██████████| 73/73 [00:00<00:00, 489.63it/s, loss=3.31]\n", 442 | "100%|██████████| 73/73 [00:00<00:00, 497.34it/s, loss=3.28]\n", 443 | "100%|██████████| 73/73 [00:00<00:00, 509.76it/s, loss=3.25]\n", 444 | "100%|██████████| 73/73 [00:00<00:00, 510.08it/s, loss=3.22]\n", 445 | "100%|██████████| 73/73 [00:00<00:00, 521.02it/s, loss=3.19]\n", 446 | "100%|██████████| 73/73 [00:00<00:00, 507.04it/s, loss=3.16]\n", 447 | "100%|██████████| 73/73 [00:00<00:00, 507.96it/s, loss=3.13]\n", 448 | "100%|██████████| 73/73 [00:00<00:00, 526.83it/s, loss=3.1] \n", 449 | "100%|██████████| 73/73 [00:00<00:00, 521.02it/s, loss=3.07]\n", 450 | "100%|██████████| 73/73 [00:00<00:00, 515.49it/s, loss=3.04]\n", 451 | "100%|██████████| 73/73 [00:00<00:00, 529.46it/s, loss=3] \n", 452 | "100%|██████████| 73/73 [00:00<00:00, 528.34it/s, loss=2.97]\n", 453 | "100%|██████████| 73/73 [00:00<00:00, 546.78it/s, loss=2.94]\n", 454 | "100%|██████████| 73/73 [00:00<00:00, 511.11it/s, loss=2.91]\n", 455 | "100%|██████████| 73/73 [00:00<00:00, 531.72it/s, loss=2.88]\n" 456 | ] 457 | } 458 | ], 459 | "source": [ 460 | "from tqdm import tqdm\n", 461 | "\n", 462 | "for epoch in range(25):\n", 463 | " total_loss = 0\n", 464 | "\n", 465 | " iterator = tqdm(trigrams)\n", 466 | " for context, target in iterator:\n", 467 | " # (['When', 'forty'], 'winters')\n", 468 | " # Step 1. Prepare the inputs to be passed to the model (i.e, turn the words\n", 469 | " # into integer indices and wrap them in tensors)\n", 470 | " context_idxs = torch.tensor([word_to_ix2[w] for w in context], dtype=torch.long)\n", 471 | "\n", 472 | " # Step 2. Recall that torch *accumulates* gradients. Before passing in a\n", 473 | " # new instance, you need to zero out the gradients from the old\n", 474 | " # instance\n", 475 | " model.zero_grad()\n", 476 | "\n", 477 | " # Step 3. Run the forward pass, getting log probabilities over next\n", 478 | " # words\n", 479 | " log_probs = model(context_idxs)\n", 480 | "\n", 481 | " # Step 4. Compute your loss function. (Again, Torch wants the target\n", 482 | " # word wrapped in a tensor)\n", 483 | " loss = loss_function(log_probs, torch.tensor([word_to_ix2[target]], dtype=torch.long))\n", 484 | "\n", 485 | " # Step 5. Do the backward pass and update the gradient\n", 486 | " loss.backward()\n", 487 | " optimizer.step()\n", 488 | "\n", 489 | " # Get the Python number from a 1-element Tensor by calling tensor.item()\n", 490 | " total_loss += loss.item()\n", 491 | " iterator.set_postfix(loss=float(loss))\n", 492 | " losses.append(total_loss)\n", 493 | " # add progress bar with epochs" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": 12, 499 | "metadata": { 500 | "slideshow": { 501 | "slide_type": "slide" 502 | } 503 | }, 504 | "outputs": [ 505 | { 506 | "data": { 507 | "text/plain": [ 508 | "NGramLanguageModeler(\n", 509 | " (embeddings): Embedding(59, 10)\n", 510 | " (linear1): Linear(in_features=20, out_features=256, bias=True)\n", 511 | " (linear2): Linear(in_features=256, out_features=59, bias=True)\n", 512 | ")" 513 | ] 514 | }, 515 | "execution_count": 12, 516 | "metadata": {}, 517 | "output_type": "execute_result" 518 | } 519 | ], 520 | "source": [ 521 | "# Check the structure of our model here\n", 522 | "model.eval()" 523 | ] 524 | }, 525 | { 526 | "cell_type": "markdown", 527 | "metadata": { 528 | "slideshow": { 529 | "slide_type": "slide" 530 | } 531 | }, 532 | "source": [ 533 | "Let's try this out!" 534 | ] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "execution_count": 13, 539 | "metadata": { 540 | "slideshow": { 541 | "slide_type": "slide" 542 | } 543 | }, 544 | "outputs": [ 545 | { 546 | "name": "stdout", 547 | "output_type": "stream", 548 | "text": [ 549 | "tensor([[-4.2813, -4.3018, -4.1416, -3.9168, -4.2167, -3.9121, -3.8985, -4.1464,\n", 550 | " -4.3967, -3.6064, -3.8017, -3.9178, -4.3452, -4.3658, -4.1338, -3.7384,\n", 551 | " -3.9032, -4.4974, -4.2858, -4.3245, -4.1646, -4.4215, -3.3084, -4.0751,\n", 552 | " -4.1101, -4.3610, -4.2177, -4.1962, -4.4140, -4.0597, -3.0100, -3.8150,\n", 553 | " -4.3852, -4.2511, -4.0806, -4.0710, -4.0321, -4.4454, -4.2866, -4.3894,\n", 554 | " -3.9729, -3.6467, -3.8749, -4.4524, -4.0009, -4.0418, -4.3280, -3.9617,\n", 555 | " -4.4341, -4.4680, -3.9659, -4.5848, -4.1615, -4.1848, -4.3031, -4.0144,\n", 556 | " -4.1116, -4.1997, -4.4411]])\n", 557 | "tomorrow,\n" 558 | ] 559 | } 560 | ], 561 | "source": [ 562 | "import numpy\n", 563 | "\n", 564 | "with torch.no_grad():\n", 565 | " context = ['tomorrow,', 'and']\n", 566 | " context_idxs = torch.tensor([word_to_ix2[w] for w in context], dtype=torch.long)\n", 567 | " pred = model(context_idxs)\n", 568 | " print(pred)\n", 569 | " index_of_prediction = numpy.argmax(pred)\n", 570 | " print(vocab2[index_of_prediction])" 571 | ] 572 | }, 573 | { 574 | "cell_type": "markdown", 575 | "metadata": { 576 | "slideshow": { 577 | "slide_type": "slide" 578 | } 579 | }, 580 | "source": [ 581 | "# Next Steps\n", 582 | "* RNN/LSTM/BiLSTM\n", 583 | "* Pointer to GloVe word embedding: https://nlp.stanford.edu/projects/glove/\n", 584 | "* https://github.com/fastai/word-embeddings-workshop/blob/master/Word%20Embeddings.ipynb\n", 585 | "* ELMo: https://github.com/allenai/allennlp/blob/master/tutorials/how_to/elmo.md" 586 | ] 587 | }, 588 | { 589 | "cell_type": "markdown", 590 | "metadata": { 591 | "slideshow": { 592 | "slide_type": "slide" 593 | } 594 | }, 595 | "source": [ 596 | "# Exercise: Continuous Bag of Words\n", 597 | "Continuous Bag of Words is a model that tries to predict a word based on a few word before and after the word." 598 | ] 599 | }, 600 | { 601 | "cell_type": "code", 602 | "execution_count": 14, 603 | "metadata": { 604 | "slideshow": { 605 | "slide_type": "slide" 606 | } 607 | }, 608 | "outputs": [ 609 | { 610 | "name": "stdout", 611 | "output_type": "stream", 612 | "text": [ 613 | "[(['We', 'are', 'to', 'study'], 'about'), (['are', 'about', 'study', 'the'], 'to'), (['about', 'to', 'the', 'idea'], 'study'), (['to', 'study', 'idea', 'of'], 'the'), (['study', 'the', 'of', 'a'], 'idea')]\n" 614 | ] 615 | } 616 | ], 617 | "source": [ 618 | "CONTEXT_SIZE = 2 # 2 words to the left, 2 to the right\n", 619 | "raw_text = \"\"\"We are about to study the idea of a computational process.\n", 620 | "Computational processes are abstract beings that inhabit computers.\n", 621 | "As they evolve, processes manipulate other abstract things called data.\n", 622 | "The evolution of a process is directed by a pattern of rules\n", 623 | "called a program. People create programs to direct processes. In effect,\n", 624 | "we conjure the spirits of the computer with our spells.\"\"\".split()\n", 625 | "\n", 626 | "# By deriving a set from `raw_text`, we deduplicate the array\n", 627 | "vocab3 = list(set(raw_text))\n", 628 | "vocab_size = len(vocab3)\n", 629 | "\n", 630 | "word_to_ix3 = {word: i for i, word in enumerate(vocab3)}\n", 631 | "data = []\n", 632 | "for i in range(2, len(raw_text) - 2):\n", 633 | " context = [raw_text[i - 2], raw_text[i - 1],\n", 634 | " raw_text[i + 1], raw_text[i + 2]]\n", 635 | " target = raw_text[i]\n", 636 | " data.append((context, target))\n", 637 | "print(data[:5])" 638 | ] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "execution_count": 15, 643 | "metadata": { 644 | "slideshow": { 645 | "slide_type": "slide" 646 | } 647 | }, 648 | "outputs": [ 649 | { 650 | "data": { 651 | "text/plain": [ 652 | "tensor([44, 4, 12, 42])" 653 | ] 654 | }, 655 | "execution_count": 15, 656 | "metadata": {}, 657 | "output_type": "execute_result" 658 | } 659 | ], 660 | "source": [ 661 | "# create your model and train. here are some functions to help you make\n", 662 | "# the data ready for use by your module\n", 663 | "\n", 664 | "def make_context_vector(context, word_to_ix3):\n", 665 | " idxs = [word_to_ix3[w] for w in context]\n", 666 | " return torch.tensor(idxs, dtype=torch.long)\n", 667 | "\n", 668 | "make_context_vector(data[0][0], word_to_ix3) # example" 669 | ] 670 | }, 671 | { 672 | "cell_type": "code", 673 | "execution_count": 16, 674 | "metadata": { 675 | "slideshow": { 676 | "slide_type": "slide" 677 | } 678 | }, 679 | "outputs": [], 680 | "source": [ 681 | "class CBOW(nn.Module):\n", 682 | " def __init__(self):\n", 683 | " pass\n", 684 | "\n", 685 | " def forward(self, inputs):\n", 686 | " pass" 687 | ] 688 | }, 689 | { 690 | "cell_type": "markdown", 691 | "metadata": { 692 | "slideshow": { 693 | "slide_type": "skip" 694 | } 695 | }, 696 | "source": [ 697 | "# Glossary\n", 698 | "\n", 699 | "* word embedding -- a dense vector representation of words\n", 700 | "* one-hot encoding -- a sparse vector representation of words with ones\n", 701 | "* vocabulary -- the set of words used in your language \n", 702 | "* tokenization -- the process of breaking down bodies of text into words\n", 703 | "* ReLU function -- a positive activation function for neural networks\n", 704 | "* softmax function -- activation function used to map probability distribution\n", 705 | "* negative log likelihood -- a probability function used in conjunction with softmax\n", 706 | "* loss function -- also known as a \"cost function\" a function to estimate the cost associated with an event\n", 707 | "* Stochastic Gradient Descent (SGD) -- \"an iterative method for optimizing an objective function\" * (https://en.wikipedia.org/wiki/Stochastic_gradient_descent) \n", 708 | "* learning rate -- a constant step to take in one iteration of stochastic gradient descnet\n", 709 | "* autograd -- PyTorch's automatic differentiation class that performs the backpropagation gradient calculations automatically so that a \"backward\" class does not need to be defined by the programmer" 710 | ] 711 | }, 712 | { 713 | "cell_type": "markdown", 714 | "metadata": { 715 | "slideshow": { 716 | "slide_type": "slide" 717 | } 718 | }, 719 | "source": [ 720 | "\n", 721 | "References: \n", 722 | "* https://pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html\n", 723 | "* https://github.com/fastai/word-embeddings-workshop/blob/master/Word%20Embeddings.ipynb" 724 | ] 725 | } 726 | ], 727 | "metadata": { 728 | "celltoolbar": "Slideshow", 729 | "kernelspec": { 730 | "display_name": "Python 3", 731 | "language": "python", 732 | "name": "python3" 733 | }, 734 | "language_info": { 735 | "codemirror_mode": { 736 | "name": "ipython", 737 | "version": 3 738 | }, 739 | "file_extension": ".py", 740 | "mimetype": "text/x-python", 741 | "name": "python", 742 | "nbconvert_exporter": "python", 743 | "pygments_lexer": "ipython3", 744 | "version": "3.6.8" 745 | } 746 | }, 747 | "nbformat": 4, 748 | "nbformat_minor": 2 749 | } 750 | -------------------------------------------------------------------------------- /archive/fall_2018/Introduction/1.Workshop_Introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "\n", 12 | "\n", 13 | "\n", 14 | "# Introduction" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": { 27 | "slideshow": { 28 | "slide_type": "slide" 29 | } 30 | }, 31 | "source": [ 32 | "## Who are we\n", 33 | "\n", 34 | "* Micheleen Harris (Microsoft) - Introduction and setup\n", 35 | "* Anna Bethke (Intel) - Lifecycle of an NLP Project\n", 36 | "* Mehrdad Yazdani (January, Inc.) - Speaking about Character Level Classification\n", 37 | "* Kendall Chuang (Ayasdi) - Introduction Word Embeddings\n", 38 | "* David Clark (Fresh Gravity) - Speaking about LSTMs\n", 39 | "* Micheleen - Bidirectional LSTMs, Conditional Random Fields\n", 40 | "* Kendall Chuang - ULMFit and Closing" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": { 46 | "slideshow": { 47 | "slide_type": "subslide" 48 | } 49 | }, 50 | "source": [ 51 | "
" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": { 57 | "slideshow": { 58 | "slide_type": "slide" 59 | } 60 | }, 61 | "source": [ 62 | "## Questions about you\n", 63 | "* Python background?\n", 64 | "* Jupyter background?\n", 65 | "* Deep Learning background?\n", 66 | "* NLP background?" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": { 72 | "slideshow": { 73 | "slide_type": "slide" 74 | } 75 | }, 76 | "source": [ 77 | "## Why sequences are important\n", 78 | "\n", 79 | "\"why" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": { 85 | "slideshow": { 86 | "slide_type": "slide" 87 | } 88 | }, 89 | "source": [ 90 | "## What is a Recurrent Neural Network (RNN)\n", 91 | "\n", 92 | "\n", 93 | "\"inside\n" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": { 99 | "slideshow": { 100 | "slide_type": "notes" 101 | } 102 | }, 103 | "source": [ 104 | "* The neural network is simply a tanh activation function\n", 105 | "* Shown unraveled is useful\n", 106 | "* Inputs outputs (hidden, x, and not showing some output)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": { 112 | "slideshow": { 113 | "slide_type": "subslide" 114 | } 115 | }, 116 | "source": [ 117 | "* x = input embedding for a word (vector)\n", 118 | "* h = hidden (or activation) state (vector)\n", 119 | "* tanh = hyperbolic tangent activation function/layer\n", 120 | "* y = output tag (not shown because we can have different schemes)\n", 121 | "\n", 122 | "**A Long Short-Term Memory (LSTM) network is a subclass of RNNs**" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": { 128 | "slideshow": { 129 | "slide_type": "slide" 130 | } 131 | }, 132 | "source": [ 133 | "## What types are RNNs are there?\n", 134 | "\n", 135 | "* Many-to-many - e.g. find names with named entity recognition (NER)\n", 136 | "* Many-to-one - e.g. sentiment analysis\n", 137 | "* One-to-many - e.g. music generation\n", 138 | "* Another many-to-many - e.g. machine translation\n", 139 | "* One-to-one" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": { 145 | "slideshow": { 146 | "slide_type": "slide" 147 | } 148 | }, 149 | "source": [ 150 | "\"why" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": { 156 | "slideshow": { 157 | "slide_type": "slide" 158 | } 159 | }, 160 | "source": [ 161 | "## Want to create a few tensors right now?" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 1, 167 | "metadata": { 168 | "slideshow": { 169 | "slide_type": "slide" 170 | } 171 | }, 172 | "outputs": [ 173 | { 174 | "name": "stdout", 175 | "output_type": "stream", 176 | "text": [ 177 | "\n", 178 | "\n", 179 | "None\n" 180 | ] 181 | } 182 | ], 183 | "source": [ 184 | "# First tensor calculation! # TODO a diagram\n", 185 | "import sys\n", 186 | "import torch\n", 187 | "\n", 188 | "# PyTorch takes care of gradient differentiation for you with something called \"autograd\"!\n", 189 | "# Makes backwards propagation super, duper simple (we don't have to worry about it!)\n", 190 | "# But you must tell the leafs that they require this tracking\n", 191 | "\n", 192 | "# Some random data (2D and 1D vectors)\n", 193 | "x = torch.randn(5, 5, requires_grad=True)\n", 194 | "b = torch.randn(5, requires_grad=True)\n", 195 | "\n", 196 | "# Just a one-element tensor\n", 197 | "w = torch.tensor([1.0], requires_grad=True)\n", 198 | "\n", 199 | "# Do some multiplication\n", 200 | "y = w * x\n", 201 | "\n", 202 | "# Do some addition\n", 203 | "z = y + b\n", 204 | "\n", 205 | "# Let's trace back the operations\n", 206 | "print(z.grad_fn)\n", 207 | "print(y.grad_fn)\n", 208 | "print(x.grad_fn)\n", 209 | "\n", 210 | "# Why do you think x's auto gradient differentiation function is \"None\"?" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": { 216 | "slideshow": { 217 | "slide_type": "slide" 218 | } 219 | }, 220 | "source": [ 221 | "## References\n", 222 | "1. [RNN video \"RNN1. Why sequence models?\"](https://www.youtube.com/watch?v=5Vl-bK7tfD8&list=PLBAGcD3siRDittPwQDGIIAWkjz-RucAc7&index=1) by Andrew Ng\n", 223 | "2. [Getting Started with PyTorch Part 1: Understanding how Automatic Differentiation works](https://towardsdatascience.com/getting-started-with-pytorch-part-1-understanding-how-automatic-differentiation-works-5008282073ec)\n", 224 | "3. [Introduction to PyTorch fro pytorch.org](https://pytorch.org/tutorials/beginner/nlp/pytorch_tutorial.html#sphx-glr-beginner-nlp-pytorch-tutorial-py)\n" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [] 233 | } 234 | ], 235 | "metadata": { 236 | "celltoolbar": "Slideshow", 237 | "kernelspec": { 238 | "display_name": "Python 3", 239 | "language": "python", 240 | "name": "python3" 241 | }, 242 | "language_info": { 243 | "codemirror_mode": { 244 | "name": "ipython", 245 | "version": 3 246 | }, 247 | "file_extension": ".py", 248 | "mimetype": "text/x-python", 249 | "name": "python", 250 | "nbconvert_exporter": "python", 251 | "pygments_lexer": "ipython3", 252 | "version": "3.5.5" 253 | } 254 | }, 255 | "nbformat": 4, 256 | "nbformat_minor": 2 257 | } 258 | -------------------------------------------------------------------------------- /archive/fall_2018/Introduction/2.Setup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "\n", 12 | "\n", 13 | "# Setup for Workshop\n", 14 | "\n", 15 | "For this workshop, the Azure Data Science Virtual Machine will be used. This is a VM one can fire up on Azure to perform data science tasks and is pre-installed with dozens of frameworks and packages for use. It's a nice way to get started quickly without much overhead/setup which is one of the reasons we chose this for the workshop. [Find out more here](https://docs.microsoft.com/en-us/azure/machine-learning/data-science-virtual-machine/overview).\n", 16 | "\n", 17 | "You will get credentials for a machine and log in at the URL provided. This will be the jupyter notebook environment/playground provided to you for the day. At the **end of the day you should zip up your notebooks and download** them as these VMs will disappear shortly thereafter into the sunset. Be sure you are using `https`. You may get a \"Your Connection is Not Private\" or other form of safety certificate error. This is a known issue and we are working on it. Please click through this by hitting the link on the page to continue." 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "slideshow": { 24 | "slide_type": "slide" 25 | } 26 | }, 27 | "source": [ 28 | "**Let's take 10 minutes to ensure setup**" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": { 34 | "slideshow": { 35 | "slide_type": "slide" 36 | } 37 | }, 38 | "source": [ 39 | "## Your First Task\n", 40 | "\n", 41 | "1. Log in to the DSVM with your provided credentials.\n", 42 | "2. Open up a Terminal window (New drop down on right -> Terminal)\n", 43 | "3. Git clone the workshop repo: `git clone https://github.com/PythonWorkshop/intro-to-nlp-with-pytorch.git`\n", 44 | "4. Open this notebook from the Introduction folder and if asked, choose the \"Python 3\" kernel.\n", 45 | "5. Run the following code cells." 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 2, 51 | "metadata": { 52 | "slideshow": { 53 | "slide_type": "slide" 54 | } 55 | }, 56 | "outputs": [ 57 | { 58 | "data": { 59 | "text/plain": [ 60 | "{'/anaconda/envs/py35/bin/python'}" 61 | ] 62 | }, 63 | "execution_count": 2, 64 | "metadata": {}, 65 | "output_type": "execute_result" 66 | } 67 | ], 68 | "source": [ 69 | "# Which Python\n", 70 | "import sys\n", 71 | "{sys.executable}" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 3, 77 | "metadata": { 78 | "slideshow": { 79 | "slide_type": "slide" 80 | } 81 | }, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "'0.4.0'" 87 | ] 88 | }, 89 | "execution_count": 3, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "# Which PyTorch\n", 96 | "import torch\n", 97 | "torch.__version__" 98 | ] 99 | } 100 | ], 101 | "metadata": { 102 | "kernelspec": { 103 | "display_name": "Python 3", 104 | "language": "python", 105 | "name": "python3" 106 | }, 107 | "language_info": { 108 | "codemirror_mode": { 109 | "name": "ipython", 110 | "version": 3 111 | }, 112 | "file_extension": ".py", 113 | "mimetype": "text/x-python", 114 | "name": "python", 115 | "nbconvert_exporter": "python", 116 | "pygments_lexer": "ipython3", 117 | "version": "3.5.5" 118 | } 119 | }, 120 | "nbformat": 4, 121 | "nbformat_minor": 2 122 | } 123 | -------------------------------------------------------------------------------- /archive/fall_2018/Introduction/3.What_Are_Dynamic_Graphs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# What in the world is a dynamic computational graph\n", 12 | "\n", 13 | "Caution: this is not an easy topic so if it doesn't make sense now keep reading about it" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": { 19 | "slideshow": { 20 | "slide_type": "slide" 21 | } 22 | }, 23 | "source": [ 24 | "## Forward/Backwards\n", 25 | "* In training a NN there are a couple of steps: the forward pass and the backwards pass (back propagation of gradients).\n", 26 | " * In PyTorch `forward` and `backward` are in the same class `torch.autograd.Function`\n", 27 | " \n" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": { 33 | "slideshow": { 34 | "slide_type": "slide" 35 | } 36 | }, 37 | "source": [ 38 | "## Let's see an example network (and train it!)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "slideshow": { 46 | "slide_type": "subslide" 47 | } 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "# Do some imports\n", 52 | "import torch\n", 53 | "\n", 54 | "# Define the leaf nodes\n", 55 | "a = torch.tensor([4.])\n", 56 | "\n", 57 | "# This is just a vector of tensors\n", 58 | "weights = [torch.tensor([i], requires_grad=True) for i in (2., 5., 9., 7.)]\n", 59 | "\n", 60 | "# unpack the weights for nicer assignment\n", 61 | "w1, w2, w3, w4 = weights" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": { 67 | "slideshow": { 68 | "slide_type": "slide" 69 | } 70 | }, 71 | "source": [ 72 | "Exercise: Print the type of a" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "slideshow": { 80 | "slide_type": "slide" 81 | } 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "type(a)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": { 91 | "slideshow": { 92 | "slide_type": "slide" 93 | } 94 | }, 95 | "source": [ 96 | "## Create the network\n", 97 | "\n", 98 | "Here we'll see the graph created on-the-fly and the forward pass\n", 99 | "\n", 100 | "**Note: static graph frameworks predefine the graph (that then can not change later) and then run inputs through it**" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "slideshow": { 108 | "slide_type": "fragment" 109 | } 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | "# IMPORTANT: When we create b, the graph creation begins!!!\n", 114 | "\n", 115 | "# The next three lines of code (b, c, d creation) are our\n", 116 | "# forward pass - when the inputs are processed into output\n", 117 | "\n", 118 | "# BEGIN COMPUTATIONAL GRAPH CREATION (some operations)\n", 119 | "b = w1 * a\n", 120 | "c = w2 * a\n", 121 | "d = w3 * b + w4 * c\n", 122 | "# END GRAPH CREATION\n", 123 | "\n", 124 | "# This is the loss\n", 125 | "L = (10 - d)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": { 131 | "slideshow": { 132 | "slide_type": "slide" 133 | } 134 | }, 135 | "source": [ 136 | "## Run backprop and check the gradient data" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": { 143 | "slideshow": { 144 | "slide_type": "fragment" 145 | } 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "L.backward()\n", 150 | "\n", 151 | "for index, weight in enumerate(weights, start=1):\n", 152 | " gradient, *_ = weight.grad.data\n", 153 | " print(\"Gradient of w{} w.r.t to L: {}\".format(index, gradient))" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": { 159 | "slideshow": { 160 | "slide_type": "slide" 161 | } 162 | }, 163 | "source": [ 164 | "Exercise: run the above cell one more time and see what happens\n", 165 | "\n", 166 | "**Remember the computational graph is constructed in PyTorch at the time it executes (`backward()` is called). Two things must be done to run over and over**\n", 167 | " * Clear the gradients\n", 168 | " * Build (and possibly redefine) the network again" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": { 174 | "slideshow": { 175 | "slide_type": "slide" 176 | } 177 | }, 178 | "source": [ 179 | "Exercise: re-run the \"Create the network\" section and then \"Run backprop...\" section. Why do the gradients change? How do you reset the gradients?" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": { 185 | "slideshow": { 186 | "slide_type": "slide" 187 | } 188 | }, 189 | "source": [ 190 | "## As you'll see later...but to round this out\n", 191 | "\n", 192 | "Let's update the weights and zero them (we'd do this before running the network again as would happen in training)\n", 193 | "\n", 194 | "Your update and reset will look like:\n", 195 | "```python\n", 196 | "# For fun let's say we had a learning rate of 1e-4\n", 197 | "learning_rate = 1e-4\n", 198 | "\n", 199 | "with torch.no_grad():\n", 200 | " w1 -= learning_rate * w1.grad\n", 201 | " w2 -= learning_rate * w2.grad\n", 202 | "\n", 203 | " # Manually zero the gradients after running the backward pass\n", 204 | " w1.grad.zero_()\n", 205 | " w2.grad.zero_()\n", 206 | "```" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": { 212 | "slideshow": { 213 | "slide_type": "slide" 214 | } 215 | }, 216 | "source": [ 217 | "## Let's put it all together to create, run, backwards prop, update weights, clear gradients" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": { 224 | "slideshow": { 225 | "slide_type": "-" 226 | } 227 | }, 228 | "outputs": [], 229 | "source": [ 230 | "# Define the leaf nodes\n", 231 | "a = torch.tensor([4.])\n", 232 | "\n", 233 | "# This is just a vector of tensors\n", 234 | "weights = [torch.tensor([i], requires_grad=True) for i in (2., 5., 9., 7.)]\n", 235 | "\n", 236 | "# unpack the weights for nicer assignment\n", 237 | "w1, w2, w3, w4 = weights\n", 238 | "\n", 239 | "\n", 240 | "# IMPORTANT: When we create b, the graph creation begins!!!\n", 241 | "\n", 242 | "# The next three lines of code (b, c, d creation) are our\n", 243 | "# forward pass - when the inputs are processed into output\n", 244 | "\n", 245 | "# BEGIN COMPUTATIONAL GRAPH CREATION (some operations)\n", 246 | "b = w1 * a\n", 247 | "c = w2 * a\n", 248 | "d = w3 * b + w4 * c\n", 249 | "# END GRAPH CREATION\n", 250 | "\n", 251 | "# This is the loss\n", 252 | "L = (10 - d)\n", 253 | "\n", 254 | "# Run the backwards propagation of gradients \n", 255 | "# (remember your chain rule for differentiation? Well PyTorch\n", 256 | "# takes care of this for you!)\n", 257 | "L.backward()\n", 258 | "\n", 259 | "for index, weight in enumerate(weights, start=1):\n", 260 | " gradient, *_ = weight.grad.data\n", 261 | " print(\"Gradient of w{} w.r.t to L: {}\".format(index, gradient))\n", 262 | "\n", 263 | "# For fun let's say we had a learning rate of 1e-4\n", 264 | "learning_rate = 1e-4\n", 265 | "\n", 266 | "with torch.no_grad():\n", 267 | " w1 -= learning_rate * w1.grad\n", 268 | " w2 -= learning_rate * w2.grad\n", 269 | "\n", 270 | " # Manually zero the gradients after running the backward pass\n", 271 | " w1.grad.zero_()\n", 272 | " w2.grad.zero_()" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "**Now we've done one epoch!**" 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": { 285 | "slideshow": { 286 | "slide_type": "slide" 287 | } 288 | }, 289 | "source": [ 290 | "## Advantages\n", 291 | "\n", 292 | "* Easier to debug that a static graph (we can modify our graph and easily check variables and gradients)\n", 293 | "* Since the network is created when ran it can be modified **on-the-fly** (very good for NLP where input lengths and output lengths may differ like in machine translation)\n", 294 | "* Reminiscent (as you'll see more later) of regular Python and object oriented programming - closer to what devs know" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": { 300 | "slideshow": { 301 | "slide_type": "slide" 302 | } 303 | }, 304 | "source": [ 305 | "## References\n", 306 | "1. [Getting Started with PyTorch Part 1: Understanding how Automatic Differentiation works](https://towardsdatascience.com/getting-started-with-pytorch-part-1-understanding-how-automatic-differentiation-works-5008282073ec) by Ayoosh Kathuria\n", 307 | "2. [PyTorch: Autograd example](https://github.com/jcjohnson/pytorch-examples#pytorch-autograd) by Justin Johnson" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [] 316 | } 317 | ], 318 | "metadata": { 319 | "celltoolbar": "Slideshow", 320 | "kernelspec": { 321 | "display_name": "Python 3", 322 | "language": "python", 323 | "name": "python3" 324 | }, 325 | "language_info": { 326 | "codemirror_mode": { 327 | "name": "ipython", 328 | "version": 3 329 | }, 330 | "file_extension": ".py", 331 | "mimetype": "text/x-python", 332 | "name": "python", 333 | "nbconvert_exporter": "python", 334 | "pygments_lexer": "ipython3", 335 | "version": "3.5.5" 336 | } 337 | }, 338 | "nbformat": 4, 339 | "nbformat_minor": 2 340 | } 341 | -------------------------------------------------------------------------------- /archive/fall_2018/Introduction/4.Future_of_PyTorch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "


\n", 12 | "# What's coming in PyTorch 1.0 Release\n", 13 | "\n", 14 | "## BIG Announcement! Today (9/22) PyTorch 1.0 was Released in Preview - Go Check it Out!" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "slideshow": { 21 | "slide_type": "slide" 22 | } 23 | }, 24 | "source": [ 25 | "## Production-ready\n", 26 | "\n", 27 | "* Exporting to C++-only runtimes for use in larger projects\n", 28 | "* Optimizing mobile systems on iPhone, Android, Qualcomm and other systems\n", 29 | "* Using more efficient data layouts and performing **kernel fusion** to do faster inference (saving 10% of speed or memory at scale is a big win)\n", 30 | "* Quantized inference (such as 8-bit inference)\n", 31 | "\n", 32 | "**In 1.0, your code continues to work as-is, we’re not making any big changes to the existing API.**" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": { 38 | "slideshow": { 39 | "slide_type": "slide" 40 | } 41 | }, 42 | "source": [ 43 | "## Software integrations\n", 44 | "\n", 45 | "1. Unification of the PyTorch 0.4 and Caffe2 codebases\n", 46 | "2. ONNX natively integrated into PyTorch 1.0 as the model export format \n" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": { 52 | "slideshow": { 53 | "slide_type": "slide" 54 | } 55 | }, 56 | "source": [ 57 | "## Platform integrations\n", 58 | "\n", 59 | "1. Microsoft plans to support PyTorch 1.0 in their Azure cloud and developer offerings, including Azure Machine Learning services and Data Science Virtual Machines\n", 60 | "2. Amazon Web Services also plans support" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": { 66 | "slideshow": { 67 | "slide_type": "slide" 68 | } 69 | }, 70 | "source": [ 71 | "## References\n", 72 | "\n", 73 | "1. [Announcing PyTorch 1.0 for both research and production](https://developers.facebook.com/blog/post/2018/05/02/announcing-pytorch-1.0-for-research-production/)\n", 74 | "2. [The road to 1.0: production ready PyTorch](https://pytorch.org/2018/05/02/road-to-1.0.html)\n", 75 | "3. [logo](https://github.com/yunjey/pytorch-tutorial/blob/master/logo/pytorch_logo.png)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [] 84 | } 85 | ], 86 | "metadata": { 87 | "celltoolbar": "Slideshow", 88 | "kernelspec": { 89 | "display_name": "Python 3", 90 | "language": "python", 91 | "name": "python3" 92 | }, 93 | "language_info": { 94 | "codemirror_mode": { 95 | "name": "ipython", 96 | "version": 3 97 | }, 98 | "file_extension": ".py", 99 | "mimetype": "text/x-python", 100 | "name": "python", 101 | "nbconvert_exporter": "python", 102 | "pygments_lexer": "ipython3", 103 | "version": "3.5.5" 104 | } 105 | }, 106 | "nbformat": 4, 107 | "nbformat_minor": 2 108 | } 109 | -------------------------------------------------------------------------------- /archive/fall_2018/Named_Entity_Recognition/readme.md: -------------------------------------------------------------------------------- 1 | # Named Entity Recognition 2 | ## Using Bi-LSTMs, Conditional Random Fields and the Viterbi Algorithm__ 3 | 4 | 5 | Bi-LSTM Diagram 6 | --- 7 | 8 | Looks complicated, but really just a lot of vector concatenations and a couple LSTMs (one running in reverse). Hope this is clear, but see references in tutorial for more context (lol, get it? context?). 9 | 10 | ![bi-lstm](../images/blstm_crf_details.png.png) 11 | 12 | Viterbi Algorithm at a Glance 13 | --- 14 | 15 | Viterbi algorithm example. 16 | 17 | ![viterbi algorithm](../images/viterbi.png) 18 | 19 | Conditional Random Field (CRF) at a Glance 20 | --- 21 | 22 | Transition matrix example. 23 | 24 | ![crf](../images/crf_transition_matrix.png) -------------------------------------------------------------------------------- /archive/fall_2018/Named_Entity_Recognition/sample_data.txt: -------------------------------------------------------------------------------- 1 | place the chicken, celery, carrots, onions, parsnip (if using), parsley, peppercorns, bay leaves and salt in a large soup pot and cover with cold water by 1 inch. bring to a boil over high heat, then immediately reduce the heat to very low. adjust the heat until the soup is smiling: barely moving on the surface, with an occasional bubble breaking through. cook uncovered, until the chicken is very tender and falling off the bone, 1 to 1 1/2 hours. when cool enough to handle, use tongs to transfer chicken from the pot to a container. taste the broth and continue to simmer it until it is concentrated and tasty. strain broth through a fine sieve (or a colander lined with cheesecloth) into a separate container. discard all the solids from the strainer (or reserve the vegetables, chill and serve with vinaigrette, if you wish). refrigerate chicken pieces and broth separately for at least 8 hours (or up to 3 days), until a thick layer of yellow fat has risen to the top of the broth. when ready to finish the soup, use your fingers to separate chicken breast meat from bones and skin. discard bones and skin. use two forks to pull the breast meat apart into soft chunks, or use a knife and cut into bite-size pieces. (reserve dark meat for another use.) skim chicken fat from top of broth and set aside. place 3 tablespoons of the fat in a soup pot with a lid. add leeks, stir to coat, and heat over medium heat until leeks begin to fry. then reduce the heat to a gentle sizzle and cook, stirring often, until slightly softened, about 3 minutes. add carrots, sprinkle with salt, stir, and cover the pot. cook until vegetables are just tender, about 5 minutes more. (keep in mind that vegetables will continue to cook in the soup.) do not brown. pour broth into pot with vegetables and heat to a simmer. add noodles and simmer until heated through, soft and plumped with chicken broth. add the breast meat, then taste broth and add salt and pepper to taste. for best flavor, soup should have some golden droplets of fat on top; if needed, add more chicken fat one teaspoon at a time. serve immediately, in a tureen or from the pot, sprinkling each serving with herbs. -------------------------------------------------------------------------------- /archive/fall_2018/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Introduction to NLP with PyTorch Workshop 3 | 4 | 5 | 6 | Agenda 7 | ---- 8 | * 9-9:30AM Registration, bagel breakfast 9 | * 9:30-10:30AM [Setup, Introduction to PyTorch and NLP](Introduction/) 10 | * 10:30-11:30AM [Life cycle of an NLP Research Project](nlp_lifecycle/) 11 | * 11:30AM-12:00PM [Word Embeddings]() 12 | * 12-1PM Pizza lunch 13 | * 1-2PM RNNs with PyTorch, Character Counting 14 | * 2-3PM [LSTMs and Sequence Models]() 15 | * 3-4PM [Bi-LSTMs and Named Entity Recognition](Named_Entity_Recognition/) 16 | * 4-4:30PM [Transfer Learning with ULMFiT](transfer_learning/) 17 | -------------------------------------------------------------------------------- /archive/fall_2018/Word Embeddings/Word Embeddings.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# Word Embeddings" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "Word Embeddings are dense vectors representations of words. Compare this to sparse vectors like One-Hot Encoding." 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 21, 28 | "metadata": { 29 | "slideshow": { 30 | "slide_type": "slide" 31 | } 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "sentence = \"the quick brown fox jumped over the lazy dog\"\n", 36 | "words = sentence.split(' ')" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": { 42 | "slideshow": { 43 | "slide_type": "slide" 44 | } 45 | }, 46 | "source": [ 47 | "Let's look at the individual words in our vocabulary:" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 22, 53 | "metadata": { 54 | "slideshow": { 55 | "slide_type": "slide" 56 | } 57 | }, 58 | "outputs": [ 59 | { 60 | "name": "stdout", 61 | "output_type": "stream", 62 | "text": [ 63 | "['quick', 'dog', 'the', 'lazy', 'over', 'jumped', 'brown', 'fox']\n" 64 | ] 65 | } 66 | ], 67 | "source": [ 68 | "vocabulary = list(set(words))\n", 69 | "print(vocabulary)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 23, 75 | "metadata": { 76 | "slideshow": { 77 | "slide_type": "slide" 78 | } 79 | }, 80 | "outputs": [ 81 | { 82 | "data": { 83 | "text/plain": [ 84 | "8" 85 | ] 86 | }, 87 | "execution_count": 23, 88 | "metadata": {}, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": [ 93 | "# Number of words in our vocabulary\n", 94 | "len(vocabulary)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": { 100 | "slideshow": { 101 | "slide_type": "slide" 102 | } 103 | }, 104 | "source": [ 105 | "# One Hot Encoding\n", 106 | "\n", 107 | "The vocabulary size is seen above. Now we can one-hot encode the vocabulary words. Let's try using Scikit-Learn One-Hot encoding:" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 24, 113 | "metadata": { 114 | "slideshow": { 115 | "slide_type": "slide" 116 | } 117 | }, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/plain": [ 122 | "array(['brown', 'dog', 'fox', 'jumped', 'lazy', 'over', 'quick', 'the'],\n", 123 | " dtype='" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": { 267 | "slideshow": { 268 | "slide_type": "slide" 269 | } 270 | }, 271 | "source": [ 272 | "Training based on surrounding words. We want to predict the probability of a word based on the words around it." 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 27, 278 | "metadata": { 279 | "slideshow": { 280 | "slide_type": "slide" 281 | } 282 | }, 283 | "outputs": [], 284 | "source": [ 285 | "# Add imports here\n", 286 | "import torch\n", 287 | "import torch.autograd as autograd\n", 288 | "import torch.nn as nn\n", 289 | "import torch.optim as optim\n", 290 | "import torch.nn.functional as F\n", 291 | "\n", 292 | "class NGramLanguageModeler(nn.Module):\n", 293 | "\n", 294 | " def __init__(self, vocab_size, embedding_dim, context_size):\n", 295 | " super(NGramLanguageModeler, self).__init__()\n", 296 | " self.embeddings = nn.Embedding(vocab_size, embedding_dim)\n", 297 | " self.linear1 = nn.Linear(context_size * embedding_dim, 128)\n", 298 | " self.linear2 = nn.Linear(128, vocab_size)\n", 299 | "\n", 300 | " def forward(self, inputs):\n", 301 | " embeds = self.embeddings(inputs).view((1, -1))\n", 302 | " out = F.relu(self.linear1(embeds))\n", 303 | " out = self.linear2(out)\n", 304 | " log_probs = F.log_softmax(out, dim=1)\n", 305 | " return log_probs" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 28, 311 | "metadata": { 312 | "slideshow": { 313 | "slide_type": "slide" 314 | } 315 | }, 316 | "outputs": [], 317 | "source": [ 318 | "learning_rate = 0.001\n", 319 | "losses = []\n", 320 | "loss_function = nn.NLLLoss() # negative log likelihood\n", 321 | "model = NGramLanguageModeler(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE)\n", 322 | "optimizer = optim.SGD(model.parameters(), lr=learning_rate)" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 29, 328 | "metadata": { 329 | "slideshow": { 330 | "slide_type": "skip" 331 | } 332 | }, 333 | "outputs": [ 334 | { 335 | "name": "stdout", 336 | "output_type": "stream", 337 | "text": [ 338 | "\r\n", 339 | "Usage: \r\n", 340 | " pip install [options] [package-index-options] ...\r\n", 341 | " pip install [options] -r [package-index-options] ...\r\n", 342 | " pip install [options] [-e] ...\r\n", 343 | " pip install [options] [-e] ...\r\n", 344 | " pip install [options] ...\r\n", 345 | "\r\n", 346 | "no such option: --yes\r\n" 347 | ] 348 | } 349 | ], 350 | "source": [ 351 | "import sys\n", 352 | "! {sys.prefix}/bin/pip install tqdm --yes" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 30, 358 | "metadata": { 359 | "slideshow": { 360 | "slide_type": "slide" 361 | } 362 | }, 363 | "outputs": [ 364 | { 365 | "name": "stderr", 366 | "output_type": "stream", 367 | "text": [ 368 | "100%|██████████| 113/113 [00:00<00:00, 279.06it/s]\n", 369 | "100%|██████████| 113/113 [00:00<00:00, 268.62it/s]\n", 370 | "100%|██████████| 113/113 [00:00<00:00, 274.08it/s]\n", 371 | "100%|██████████| 113/113 [00:00<00:00, 277.02it/s]\n", 372 | "100%|██████████| 113/113 [00:00<00:00, 289.94it/s]\n", 373 | "100%|██████████| 113/113 [00:00<00:00, 281.88it/s]\n", 374 | "100%|██████████| 113/113 [00:00<00:00, 275.84it/s]\n", 375 | "100%|██████████| 113/113 [00:00<00:00, 281.50it/s]\n", 376 | "100%|██████████| 113/113 [00:00<00:00, 276.44it/s]\n", 377 | "100%|██████████| 113/113 [00:00<00:00, 264.27it/s]" 378 | ] 379 | }, 380 | { 381 | "name": "stdout", 382 | "output_type": "stream", 383 | "text": [ 384 | "[525.890875339508, 523.3742175102234, 520.8734226226807, 518.3879098892212, 515.917643070221, 513.4630217552185, 511.02079725265503, 508.5904929637909, 506.1698443889618, 503.7612953186035]\n" 385 | ] 386 | }, 387 | { 388 | "name": "stderr", 389 | "output_type": "stream", 390 | "text": [ 391 | "\n" 392 | ] 393 | } 394 | ], 395 | "source": [ 396 | "from tqdm import tqdm\n", 397 | "\n", 398 | "for epoch in range(10):\n", 399 | " total_loss = 0\n", 400 | "\n", 401 | " for context, target in tqdm(trigrams):\n", 402 | " # (['When', 'forty'], 'winters')\n", 403 | " # Step 1. Prepare the inputs to be passed to the model (i.e, turn the words\n", 404 | " # into integer indices and wrap them in tensors)\n", 405 | " context_idxs = torch.tensor([word_to_ix[w] for w in context], dtype=torch.long)\n", 406 | "\n", 407 | " # Step 2. Recall that torch *accumulates* gradients. Before passing in a\n", 408 | " # new instance, you need to zero out the gradients from the old\n", 409 | " # instance\n", 410 | " model.zero_grad()\n", 411 | "\n", 412 | " # Step 3. Run the forward pass, getting log probabilities over next\n", 413 | " # words\n", 414 | " log_probs = model(context_idxs)\n", 415 | "\n", 416 | " # Step 4. Compute your loss function. (Again, Torch wants the target\n", 417 | " # word wrapped in a tensor)\n", 418 | " loss = loss_function(log_probs, torch.tensor([word_to_ix[target]], dtype=torch.long))\n", 419 | "\n", 420 | " # Step 5. Do the backward pass and update the gradient\n", 421 | " loss.backward()\n", 422 | " optimizer.step()\n", 423 | "\n", 424 | " # Get the Python number from a 1-element Tensor by calling tensor.item()\n", 425 | " total_loss += loss.item()\n", 426 | " losses.append(total_loss)\n", 427 | " # add progress bar with epochs\n", 428 | "print(losses) # The loss decreased every iteration over the training data!" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": 31, 434 | "metadata": {}, 435 | "outputs": [ 436 | { 437 | "data": { 438 | "text/plain": [ 439 | "NGramLanguageModeler(\n", 440 | " (embeddings): Embedding(97, 10)\n", 441 | " (linear1): Linear(in_features=20, out_features=128, bias=True)\n", 442 | " (linear2): Linear(in_features=128, out_features=97, bias=True)\n", 443 | ")" 444 | ] 445 | }, 446 | "execution_count": 31, 447 | "metadata": {}, 448 | "output_type": "execute_result" 449 | } 450 | ], 451 | "source": [ 452 | "model.eval()" 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": 40, 458 | "metadata": {}, 459 | "outputs": [ 460 | { 461 | "name": "stdout", 462 | "output_type": "stream", 463 | "text": [ 464 | "tensor(-4.2438)\n" 465 | ] 466 | } 467 | ], 468 | "source": [ 469 | "with torch.no_grad():\n", 470 | " context = ['When', 'forty']\n", 471 | " context_idxs = torch.tensor([word_to_ix[w] for w in context], dtype=torch.long)\n", 472 | " model(context_idxs)\n", 473 | " " 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": 12, 479 | "metadata": {}, 480 | "outputs": [], 481 | "source": [ 482 | "# We should show the embedding vector for a words\n", 483 | "\n", 484 | "# We should show the prediction given two words (probabilities and the highest prediction -- look up torch argmax )" 485 | ] 486 | }, 487 | { 488 | "cell_type": "markdown", 489 | "metadata": { 490 | "slideshow": { 491 | "slide_type": "slide" 492 | } 493 | }, 494 | "source": [ 495 | "# Next Steps\n", 496 | "* RNN/LSTM/BiLSTM\n", 497 | "* Why memory? It gives more context for a sentence. Less rigid.\n", 498 | "* Pointer to GloVe word embedding\n", 499 | "* https://github.com/fastai/word-embeddings-workshop/blob/master/Word%20Embeddings.ipynb\n" 500 | ] 501 | }, 502 | { 503 | "cell_type": "markdown", 504 | "metadata": { 505 | "slideshow": { 506 | "slide_type": "slide" 507 | } 508 | }, 509 | "source": [ 510 | "# Exercise: Continuous Bag of Words\n", 511 | "Continuous Bag of Words is a model that tries to predict a word based on a few word before and after the word.\n", 512 | "\n", 513 | "* Not sequential\n", 514 | "* Not necessarily probabilistic\n", 515 | "* Used for Pretraining a word embedding\n" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": 13, 521 | "metadata": {}, 522 | "outputs": [ 523 | { 524 | "name": "stdout", 525 | "output_type": "stream", 526 | "text": [ 527 | "[(['We', 'are', 'to', 'study'], 'about'), (['are', 'about', 'study', 'the'], 'to'), (['about', 'to', 'the', 'idea'], 'study'), (['to', 'study', 'idea', 'of'], 'the'), (['study', 'the', 'of', 'a'], 'idea')]\n" 528 | ] 529 | } 530 | ], 531 | "source": [ 532 | "CONTEXT_SIZE = 2 # 2 words to the left, 2 to the right\n", 533 | "raw_text = \"\"\"We are about to study the idea of a computational process.\n", 534 | "Computational processes are abstract beings that inhabit computers.\n", 535 | "As they evolve, processes manipulate other abstract things called data.\n", 536 | "The evolution of a process is directed by a pattern of rules\n", 537 | "called a program. People create programs to direct processes. In effect,\n", 538 | "we conjure the spirits of the computer with our spells.\"\"\".split()\n", 539 | "\n", 540 | "# By deriving a set from `raw_text`, we deduplicate the array\n", 541 | "vocab = set(raw_text)\n", 542 | "vocab_size = len(vocab)\n", 543 | "\n", 544 | "word_to_ix = {word: i for i, word in enumerate(vocab)}\n", 545 | "data = []\n", 546 | "for i in range(2, len(raw_text) - 2):\n", 547 | " context = [raw_text[i - 2], raw_text[i - 1],\n", 548 | " raw_text[i + 1], raw_text[i + 2]]\n", 549 | " target = raw_text[i]\n", 550 | " data.append((context, target))\n", 551 | "print(data[:5])" 552 | ] 553 | }, 554 | { 555 | "cell_type": "code", 556 | "execution_count": 14, 557 | "metadata": {}, 558 | "outputs": [ 559 | { 560 | "data": { 561 | "text/plain": [ 562 | "tensor([ 18, 1, 42, 15])" 563 | ] 564 | }, 565 | "execution_count": 14, 566 | "metadata": {}, 567 | "output_type": "execute_result" 568 | } 569 | ], 570 | "source": [ 571 | "# create your model and train. here are some functions to help you make\n", 572 | "# the data ready for use by your module\n", 573 | "\n", 574 | "def make_context_vector(context, word_to_ix):\n", 575 | " idxs = [word_to_ix[w] for w in context]\n", 576 | " return torch.tensor(idxs, dtype=torch.long)\n", 577 | "\n", 578 | "make_context_vector(data[0][0], word_to_ix) # example" 579 | ] 580 | }, 581 | { 582 | "cell_type": "code", 583 | "execution_count": 15, 584 | "metadata": {}, 585 | "outputs": [], 586 | "source": [ 587 | "class CBOW(nn.Module):\n", 588 | " def __init__(self):\n", 589 | " pass\n", 590 | "\n", 591 | " def forward(self, inputs):\n", 592 | " pass\n", 593 | "# Solutions in another directory?\n", 594 | "# " 595 | ] 596 | }, 597 | { 598 | "cell_type": "markdown", 599 | "metadata": { 600 | "slideshow": { 601 | "slide_type": "slide" 602 | } 603 | }, 604 | "source": [ 605 | "\n", 606 | "References: \n", 607 | "* https://pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html\n", 608 | "* https://github.com/fastai/word-embeddings-workshop/blob/master/Word%20Embeddings.ipynb" 609 | ] 610 | } 611 | ], 612 | "metadata": { 613 | "kernelspec": { 614 | "display_name": "Python 3", 615 | "language": "python", 616 | "name": "python3" 617 | }, 618 | "language_info": { 619 | "codemirror_mode": { 620 | "name": "ipython", 621 | "version": 3 622 | }, 623 | "file_extension": ".py", 624 | "mimetype": "text/x-python", 625 | "name": "python", 626 | "nbconvert_exporter": "python", 627 | "pygments_lexer": "ipython3", 628 | "version": "3.5.5" 629 | } 630 | }, 631 | "nbformat": 4, 632 | "nbformat_minor": 2 633 | } 634 | -------------------------------------------------------------------------------- /archive/fall_2018/basics/README.md: -------------------------------------------------------------------------------- 1 | Intro notebooks to get started with PyTorch and NLP basics. 2 | -------------------------------------------------------------------------------- /archive/fall_2018/images: -------------------------------------------------------------------------------- 1 | images -------------------------------------------------------------------------------- /archive/fall_2018/nlp_lifecycle/accuracies.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/archive/fall_2018/nlp_lifecycle/accuracies.jpeg -------------------------------------------------------------------------------- /archive/fall_2018/nlp_lifecycle/convolution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/archive/fall_2018/nlp_lifecycle/convolution.png -------------------------------------------------------------------------------- /archive/fall_2018/nlp_lifecycle/deepNetVis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/archive/fall_2018/nlp_lifecycle/deepNetVis.png -------------------------------------------------------------------------------- /archive/fall_2018/nlp_lifecycle/textUnderstand.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/archive/fall_2018/nlp_lifecycle/textUnderstand.png -------------------------------------------------------------------------------- /archive/fall_2018/requirements.txt: -------------------------------------------------------------------------------- 1 | jupyter 2 | numpy 3 | scipy 4 | sklearn 5 | torch==0.4.0 6 | tqdm 7 | -------------------------------------------------------------------------------- /archive/fall_2018/transfer_learning/ULMFiT and Transfer Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# NLP Transfer Learning\n", 12 | "While one of the biggest advancements in deep learning for image processing has been transfer learning." 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": { 18 | "slideshow": { 19 | "slide_type": "slide" 20 | } 21 | }, 22 | "source": [ 23 | "# What is Transfer Learning?\n", 24 | "\n", 25 | "Take a neural network.\n", 26 | "Train it and export the model.\n", 27 | "\n", 28 | "Import part or all of the model architecture into a new model!\n", 29 | "Use parts of it to build a new model!\n", 30 | "\n", 31 | "ImageNet Models:\n", 32 | "\n", 33 | "VGGNet\n", 34 | "ResNet\n", 35 | "Inception\n" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": { 41 | "slideshow": { 42 | "slide_type": "slide" 43 | } 44 | }, 45 | "source": [ 46 | "# Why Transfer Learning?\n", 47 | "\n", 48 | "* Save training time 1 Month --> 1 Day\n", 49 | "* Reuse models\n", 50 | "* Train on a smaller data set for a specific task\n", 51 | "* Don't need to reinvent the wheel" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": { 57 | "slideshow": { 58 | "slide_type": "slide" 59 | } 60 | }, 61 | "source": [ 62 | "# Word Embeddings\n", 63 | "We previously discussed Word Embeddings. In the past, people have performed transfer learning with just the first Embedding layer. \n", 64 | "\n", 65 | "One common one is:\n", 66 | "\n", 67 | "GloVe: Global Vectors for Word Representation\n", 68 | "https://nlp.stanford.edu/projects/glove/\n", 69 | "\n", 70 | "However, beyond the embedding layer, there have not been many successful shared examples of Transfer Learning in NLP." 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": { 76 | "slideshow": { 77 | "slide_type": "slide" 78 | } 79 | }, 80 | "source": [ 81 | "# ULMFiT\n", 82 | "Jeremy Howard and Sebastian Ruder from Fast.AI\n", 83 | "\n", 84 | "## Why ULMFiT\n", 85 | "* Faster time retraining for classification task\n", 86 | "* Better performance\n", 87 | "* Smaller amount of labeled training data required\n", 88 | "\n", 89 | "## Blog Introduction\n", 90 | "http://nlp.fast.ai/classification/2018/05/15/introducting-ulmfit.html\n", 91 | "\n", 92 | "## Fast.ai text library -- like PyTorch but with \n", 93 | "https://github.com/fastai" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": { 99 | "slideshow": { 100 | "slide_type": "slide" 101 | } 102 | }, 103 | "source": [ 104 | "# The Paper\n", 105 | "\n", 106 | "https://arxiv.org/abs/1801.06146\n" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": { 112 | "slideshow": { 113 | "slide_type": "slide" 114 | } 115 | }, 116 | "source": [ 117 | "# Step 1: General Language Model Pre-training\n", 118 | "First they train the model on a large corpus such as the Wikitext-103 data set.\n", 119 | "\n", 120 | "The Language model is fairly straightforward and similar to what David had shown:\n", 121 | "* Embedding Layer + LSTM and Linear\n", 122 | "\n", 123 | "They have two main classes:\n", 124 | "* RNN_Encoder (PyTorch nn.Module)\n", 125 | "* Learner \n", 126 | "** Has a model as an attribute\n", 127 | "** Has helper methods to work on the model (training, etc)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "slideshow": { 135 | "slide_type": "skip" 136 | } 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "# Note: This is just for demo:\n", 141 | "\n", 142 | "class RNN_Encoder(nn.Module):\n", 143 | "\n", 144 | " \"\"\"A custom RNN encoder network that uses\n", 145 | " - an embedding matrix to encode input,\n", 146 | " - a stack of LSTM or QRNN layers to drive the network, and\n", 147 | " - variational dropouts in the embedding and LSTM/QRNN layers\n", 148 | "\n", 149 | " The architecture for this network was inspired by the work done in\n", 150 | " \"Regularizing and Optimizing LSTM Language Models\".\n", 151 | " (https://arxiv.org/pdf/1708.02182.pdf)\n", 152 | " \"\"\"\n", 153 | "\n", 154 | " initrange=0.1\n", 155 | "\n", 156 | " def __init__(self, ntoken, emb_sz, n_hid, n_layers, pad_token, bidir=False,\n", 157 | " dropouth=0.3, dropouti=0.65, dropoute=0.1, wdrop=0.5, qrnn=False):\n", 158 | " \"\"\" Default constructor for the RNN_Encoder class\n", 159 | "\n", 160 | " Args:\n", 161 | " bs (int): batch size of input data\n", 162 | " ntoken (int): number of vocabulary (or tokens) in the source dataset\n", 163 | " emb_sz (int): the embedding size to use to encode each token\n", 164 | " n_hid (int): number of hidden activation per LSTM layer\n", 165 | " n_layers (int): number of LSTM layers to use in the architecture\n", 166 | " pad_token (int): the int value used for padding text.\n", 167 | " dropouth (float): dropout to apply to the activations going from one LSTM layer to another\n", 168 | " dropouti (float): dropout to apply to the input layer.\n", 169 | " dropoute (float): dropout to apply to the embedding layer.\n", 170 | " wdrop (float): dropout used for a LSTM's internal (or hidden) recurrent weights.\n", 171 | "\n", 172 | " Returns:\n", 173 | " None\n", 174 | " \"\"\"\n", 175 | "\n", 176 | " super().__init__()\n", 177 | " self.ndir = 2 if bidir else 1\n", 178 | " self.bs, self.qrnn = 1, qrnn\n", 179 | " self.encoder = nn.Embedding(ntoken, emb_sz, padding_idx=pad_token)\n", 180 | " self.encoder_with_dropout = EmbeddingDropout(self.encoder)\n", 181 | " if self.qrnn:\n", 182 | " #Using QRNN requires cupy: https://github.com/cupy/cupy\n", 183 | " from .torchqrnn.qrnn import QRNNLayer\n", 184 | " self.rnns = [QRNNLayer(emb_sz if l == 0 else n_hid, (n_hid if l != n_layers - 1 else emb_sz)//self.ndir,\n", 185 | " save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(n_layers)]\n", 186 | " if wdrop:\n", 187 | " for rnn in self.rnns:\n", 188 | " rnn.linear = WeightDrop(rnn.linear, wdrop, weights=['weight'])\n", 189 | " else:\n", 190 | " self.rnns = [nn.LSTM(emb_sz if l == 0 else n_hid, (n_hid if l != n_layers - 1 else emb_sz)//self.ndir,\n", 191 | " 1, bidirectional=bidir) for l in range(n_layers)]\n", 192 | " if wdrop: self.rnns = [WeightDrop(rnn, wdrop) for rnn in self.rnns]\n", 193 | " self.rnns = torch.nn.ModuleList(self.rnns)\n", 194 | " self.encoder.weight.data.uniform_(-self.initrange, self.initrange)\n", 195 | "\n", 196 | " self.emb_sz,self.n_hid,self.n_layers,self.dropoute = emb_sz,n_hid,n_layers,dropoute\n", 197 | " self.dropouti = LockedDropout(dropouti)\n", 198 | " self.dropouths = nn.ModuleList([LockedDropout(dropouth) for l in range(n_layers)])\n", 199 | "\n", 200 | " def forward(self, input):\n", 201 | " \"\"\" Invoked during the forward propagation of the RNN_Encoder module.\n", 202 | " Args:\n", 203 | " input (Tensor): input of shape (sentence length x batch_size)\n", 204 | "\n", 205 | " Returns:\n", 206 | " raw_outputs (tuple(list (Tensor), list(Tensor)): list of tensors evaluated from each RNN layer without using\n", 207 | " dropouth, list of tensors evaluated from each RNN layer using dropouth,\n", 208 | " \"\"\"\n", 209 | " sl,bs = input.size()\n", 210 | " if bs!=self.bs:\n", 211 | " self.bs=bs\n", 212 | " self.reset()\n", 213 | " with set_grad_enabled(self.training):\n", 214 | " emb = self.encoder_with_dropout(input, dropout=self.dropoute if self.training else 0)\n", 215 | " emb = self.dropouti(emb)\n", 216 | " raw_output = emb\n", 217 | " new_hidden,raw_outputs,outputs = [],[],[]\n", 218 | " for l, (rnn,drop) in enumerate(zip(self.rnns, self.dropouths)):\n", 219 | " current_input = raw_output\n", 220 | " with warnings.catch_warnings():\n", 221 | " warnings.simplefilter(\"ignore\")\n", 222 | " raw_output, new_h = rnn(raw_output, self.hidden[l])\n", 223 | " new_hidden.append(new_h)\n", 224 | " raw_outputs.append(raw_output)\n", 225 | " if l != self.n_layers - 1: raw_output = drop(raw_output)\n", 226 | " outputs.append(raw_output)\n", 227 | "\n", 228 | " self.hidden = repackage_var(new_hidden)\n", 229 | " return raw_outputs, outputs\n", 230 | "\n", 231 | " def one_hidden(self, l):\n", 232 | " nh = (self.n_hid if l != self.n_layers - 1 else self.emb_sz)//self.ndir\n", 233 | " if IS_TORCH_04: return Variable(self.weights.new(self.ndir, self.bs, nh).zero_())\n", 234 | " else: return Variable(self.weights.new(self.ndir, self.bs, nh).zero_(), volatile=not self.training)\n", 235 | "\n", 236 | " def reset(self):\n", 237 | " if self.qrnn: [r.reset() for r in self.rnns]\n", 238 | " self.weights = next(self.parameters()).data\n", 239 | " if self.qrnn: self.hidden = [self.one_hidden(l) for l in range(self.n_layers)]\n", 240 | " else: self.hidden = [(self.one_hidden(l), self.one_hidden(l)) for l in range(self.n_layers)]\n" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": { 246 | "slideshow": { 247 | "slide_type": "slide" 248 | } 249 | }, 250 | "source": [ 251 | "# Step 2: Language Model Fine Tuning\n", 252 | "Then they fine-tune the language model on the target data set, so they can get the characteristics of the data set \n" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": { 258 | "slideshow": { 259 | "slide_type": "slide" 260 | } 261 | }, 262 | "source": [ 263 | "# Step 3: Classifier Fine-Tuning\n", 264 | "\n", 265 | "In this part, they:\n", 266 | "1. Add classification task to model\n", 267 | "2. Unfreeze last layer and retrain\n", 268 | "3. Gradually unfreeze other layers and retrain\n", 269 | "\n", 270 | "https://github.com/fastai/fastai/blob/master/courses/dl2/imdb_scripts/train_clas.py" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": { 277 | "slideshow": { 278 | "slide_type": "skip" 279 | } 280 | }, 281 | "outputs": [], 282 | "source": [ 283 | "# This is how Fast.ai freezes parameters and layers in \n", 284 | "\n", 285 | "def set_trainable_attr(m,b):\n", 286 | " m.trainable=b\n", 287 | " for p in m.parameters(): p.requires_grad=b\n", 288 | "\n", 289 | "def set_trainable(l, b):\n", 290 | " apply_leaf(l, lambda m: set_trainable_attr(m,b))\n" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": { 297 | "slideshow": { 298 | "slide_type": "skip" 299 | } 300 | }, 301 | "outputs": [], 302 | "source": [ 303 | "class PoolingLinearClassifier(nn.Module):\n", 304 | " def __init__(self, layers, drops):\n", 305 | " super().__init__()\n", 306 | " self.layers = nn.ModuleList([\n", 307 | " LinearBlock(layers[i], layers[i + 1], drops[i]) for i in range(len(layers) - 1)])\n", 308 | "\n", 309 | " def pool(self, x, bs, is_max):\n", 310 | " f = F.adaptive_max_pool1d if is_max else F.adaptive_avg_pool1d\n", 311 | " return f(x.permute(1,2,0), (1,)).view(bs,-1)\n", 312 | "\n", 313 | " def forward(self, input):\n", 314 | " raw_outputs, outputs = input\n", 315 | " output = outputs[-1]\n", 316 | " sl,bs,_ = output.size()\n", 317 | " avgpool = self.pool(output, bs, False)\n", 318 | " mxpool = self.pool(output, bs, True)\n", 319 | " x = torch.cat([output[-1], mxpool, avgpool], 1)\n", 320 | " for l in self.layers:\n", 321 | " l_x = l(x)\n", 322 | " x = F.relu(l_x)\n", 323 | " return l_x, raw_outputs, outputs\n" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": null, 329 | "metadata": {}, 330 | "outputs": [], 331 | "source": [ 332 | "class Learner():\n", 333 | " ...\n", 334 | " def freeze_to(self, n):\n", 335 | " c=self.get_layer_groups()\n", 336 | " for l in c: set_trainable(l, False)\n", 337 | " for l in c[n:]: set_trainable(l, True)\n", 338 | "\n", 339 | " def freeze_all_but(self, n):\n", 340 | " c=self.get_layer_groups()\n", 341 | " for l in c: set_trainable(l, False)\n", 342 | " set_trainable(c[n], True)\n", 343 | " \n", 344 | " def freeze_groups(self, groups):\n", 345 | " c = self.get_layer_groups()\n", 346 | " self.unfreeze()\n", 347 | " for g in groups:\n", 348 | " set_trainable(c[g], False)\n", 349 | " \n", 350 | " def unfreeze_groups(self, groups):\n", 351 | " c = self.get_layer_groups()\n", 352 | " for g in groups:\n", 353 | " set_trainable(c[g], True)\n", 354 | "\n", 355 | " def unfreeze(self): self.freeze_to(0)\n", 356 | " \n", 357 | " \n", 358 | " def fit(self, lrs, n_cycle, wds=None, **kwargs):\n", 359 | "\n", 360 | " \"\"\"Method gets an instance of LayerOptimizer and delegates to self.fit_gen(..)\n", 361 | "\n", 362 | " Note that one can specify a list of learning rates which, when appropriately\n", 363 | " defined, will be applied to different segments of an architecture. This seems\n", 364 | " mostly relevant to ImageNet-trained models, where we want to alter the layers\n", 365 | " closest to the images by much smaller amounts.\n", 366 | "\n", 367 | " Likewise, a single or list of weight decay parameters can be specified, which\n", 368 | " if appropriate for a model, will apply variable weight decay parameters to\n", 369 | " different segments of the model.\n", 370 | "\n", 371 | " Args:\n", 372 | " lrs (float or list(float)): learning rate for the model\n", 373 | "\n", 374 | " n_cycle (int): number of cycles (or iterations) to fit the model for\n", 375 | "\n", 376 | " wds (float or list(float)): weight decay parameter(s).\n", 377 | "\n", 378 | " kwargs: other arguments\n", 379 | "\n", 380 | " Returns:\n", 381 | " None\n", 382 | " \"\"\"\n", 383 | " self.sched = None\n", 384 | " layer_opt = self.get_layer_opt(lrs, wds)\n", 385 | " return self.fit_gen(self.model, self.data, layer_opt, n_cycle, **kwargs)\n", 386 | "\n" 387 | ] 388 | } 389 | ], 390 | "metadata": { 391 | "celltoolbar": "Slideshow", 392 | "kernelspec": { 393 | "display_name": "Python 3", 394 | "language": "python", 395 | "name": "python3" 396 | }, 397 | "language_info": { 398 | "codemirror_mode": { 399 | "name": "ipython", 400 | "version": 3 401 | }, 402 | "file_extension": ".py", 403 | "mimetype": "text/x-python", 404 | "name": "python", 405 | "nbconvert_exporter": "python", 406 | "pygments_lexer": "ipython3", 407 | "version": "3.5.5" 408 | } 409 | }, 410 | "nbformat": 4, 411 | "nbformat_minor": 2 412 | } 413 | -------------------------------------------------------------------------------- /archive/fall_2018/transfer_learning/ULMFiT.sketch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/archive/fall_2018/transfer_learning/ULMFiT.sketch -------------------------------------------------------------------------------- /images/LSTM3-focus-C.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/LSTM3-focus-C.png -------------------------------------------------------------------------------- /images/LSTM3-focus-f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/LSTM3-focus-f.png -------------------------------------------------------------------------------- /images/LSTM3-focus-i.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/LSTM3-focus-i.png -------------------------------------------------------------------------------- /images/LSTM3-focus-o.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/LSTM3-focus-o.png -------------------------------------------------------------------------------- /images/Simple-graph-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/Simple-graph-example.png -------------------------------------------------------------------------------- /images/bilstm_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/bilstm_flow.png -------------------------------------------------------------------------------- /images/blstm_crf_details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/blstm_crf_details.png -------------------------------------------------------------------------------- /images/count_vec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/count_vec.png -------------------------------------------------------------------------------- /images/crf_transition_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/crf_transition_matrix.png -------------------------------------------------------------------------------- /images/diff_types_rnns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/diff_types_rnns.png -------------------------------------------------------------------------------- /images/doccano_label.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/doccano_label.png -------------------------------------------------------------------------------- /images/linear_crf_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/linear_crf_example.png -------------------------------------------------------------------------------- /images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/logo.png -------------------------------------------------------------------------------- /images/lstm_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/lstm_flow.png -------------------------------------------------------------------------------- /images/lstm_inner_workings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/lstm_inner_workings.png -------------------------------------------------------------------------------- /images/network_next_word.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/network_next_word.png -------------------------------------------------------------------------------- /images/nll_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/nll_loss.png -------------------------------------------------------------------------------- /images/rnn_inner_workings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/rnn_inner_workings.png -------------------------------------------------------------------------------- /images/viterbi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/viterbi.png -------------------------------------------------------------------------------- /images/why_sequences.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/images/why_sequences.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | bs4 2 | jdc 3 | jupyter 4 | matplotlib==3.0.0 5 | numpy 6 | requests 7 | scipy 8 | sklearn 9 | torch==1.1.0 10 | torchvision==0.2.2.post3 11 | tqdm 12 | -------------------------------------------------------------------------------- /transfer_learning/ULMFiT and Transfer Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# NLP Transfer Learning\n", 12 | "While one of the biggest advancements in deep learning for image processing has been transfer learning." 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": { 18 | "slideshow": { 19 | "slide_type": "slide" 20 | } 21 | }, 22 | "source": [ 23 | "# What is Transfer Learning?\n", 24 | "\n", 25 | "Take a neural network.\n", 26 | "Train it and export the model.\n", 27 | "\n", 28 | "Import part or all of the model architecture into a new model!\n", 29 | "Use parts of it to build a new model!\n", 30 | "\n", 31 | "ImageNet Models:\n", 32 | "\n", 33 | "VGGNet\n", 34 | "ResNet\n", 35 | "Inception\n" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": { 41 | "slideshow": { 42 | "slide_type": "slide" 43 | } 44 | }, 45 | "source": [ 46 | "# Why Transfer Learning?\n", 47 | "\n", 48 | "* Save training time 1 Month --> 1 Day\n", 49 | "* Reuse models\n", 50 | "* Train on a smaller data set for a specific task\n", 51 | "* Don't need to reinvent the wheel" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": { 57 | "slideshow": { 58 | "slide_type": "slide" 59 | } 60 | }, 61 | "source": [ 62 | "# Word Embeddings\n", 63 | "We previously discussed Word Embeddings. In the past, people have performed transfer learning with just the first Embedding layer. \n", 64 | "\n", 65 | "One common one is:\n", 66 | "\n", 67 | "GloVe: Global Vectors for Word Representation\n", 68 | "https://nlp.stanford.edu/projects/glove/\n", 69 | "\n", 70 | "However, beyond the embedding layer, there have not been many successful shared examples of Transfer Learning in NLP." 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": { 76 | "slideshow": { 77 | "slide_type": "slide" 78 | } 79 | }, 80 | "source": [ 81 | "# ULMFiT\n", 82 | "Jeremy Howard and Sebastian Ruder from Fast.AI\n", 83 | "\n", 84 | "## Why ULMFiT\n", 85 | "* Faster time retraining for classification task\n", 86 | "* Better performance\n", 87 | "* Smaller amount of labeled training data required\n", 88 | "\n", 89 | "## Blog Introduction\n", 90 | "http://nlp.fast.ai/classification/2018/05/15/introducting-ulmfit.html\n", 91 | "\n", 92 | "## Fast.ai text library -- like PyTorch but with \n", 93 | "https://github.com/fastai" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": { 99 | "slideshow": { 100 | "slide_type": "slide" 101 | } 102 | }, 103 | "source": [ 104 | "# The Paper\n", 105 | "\n", 106 | "https://arxiv.org/abs/1801.06146\n" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": { 112 | "slideshow": { 113 | "slide_type": "slide" 114 | } 115 | }, 116 | "source": [ 117 | "# Step 1: General Language Model Pre-training\n", 118 | "First they train the model on a large corpus such as the Wikitext-103 data set.\n", 119 | "\n", 120 | "The Language model is fairly straightforward and similar to what David had shown:\n", 121 | "* Embedding Layer + LSTM and Linear\n", 122 | "\n", 123 | "They have two main classes:\n", 124 | "* RNN_Encoder (PyTorch nn.Module)\n", 125 | "* Learner \n", 126 | "** Has a model as an attribute\n", 127 | "** Has helper methods to work on the model (training, etc)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "slideshow": { 135 | "slide_type": "skip" 136 | } 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "# Note: This is just for demo:\n", 141 | "\n", 142 | "class RNN_Encoder(nn.Module):\n", 143 | "\n", 144 | " \"\"\"A custom RNN encoder network that uses\n", 145 | " - an embedding matrix to encode input,\n", 146 | " - a stack of LSTM or QRNN layers to drive the network, and\n", 147 | " - variational dropouts in the embedding and LSTM/QRNN layers\n", 148 | "\n", 149 | " The architecture for this network was inspired by the work done in\n", 150 | " \"Regularizing and Optimizing LSTM Language Models\".\n", 151 | " (https://arxiv.org/pdf/1708.02182.pdf)\n", 152 | " \"\"\"\n", 153 | "\n", 154 | " initrange=0.1\n", 155 | "\n", 156 | " def __init__(self, ntoken, emb_sz, n_hid, n_layers, pad_token, bidir=False,\n", 157 | " dropouth=0.3, dropouti=0.65, dropoute=0.1, wdrop=0.5, qrnn=False):\n", 158 | " \"\"\" Default constructor for the RNN_Encoder class\n", 159 | "\n", 160 | " Args:\n", 161 | " bs (int): batch size of input data\n", 162 | " ntoken (int): number of vocabulary (or tokens) in the source dataset\n", 163 | " emb_sz (int): the embedding size to use to encode each token\n", 164 | " n_hid (int): number of hidden activation per LSTM layer\n", 165 | " n_layers (int): number of LSTM layers to use in the architecture\n", 166 | " pad_token (int): the int value used for padding text.\n", 167 | " dropouth (float): dropout to apply to the activations going from one LSTM layer to another\n", 168 | " dropouti (float): dropout to apply to the input layer.\n", 169 | " dropoute (float): dropout to apply to the embedding layer.\n", 170 | " wdrop (float): dropout used for a LSTM's internal (or hidden) recurrent weights.\n", 171 | "\n", 172 | " Returns:\n", 173 | " None\n", 174 | " \"\"\"\n", 175 | "\n", 176 | " super().__init__()\n", 177 | " self.ndir = 2 if bidir else 1\n", 178 | " self.bs, self.qrnn = 1, qrnn\n", 179 | " self.encoder = nn.Embedding(ntoken, emb_sz, padding_idx=pad_token)\n", 180 | " self.encoder_with_dropout = EmbeddingDropout(self.encoder)\n", 181 | " if self.qrnn:\n", 182 | " #Using QRNN requires cupy: https://github.com/cupy/cupy\n", 183 | " from .torchqrnn.qrnn import QRNNLayer\n", 184 | " self.rnns = [QRNNLayer(emb_sz if l == 0 else n_hid, (n_hid if l != n_layers - 1 else emb_sz)//self.ndir,\n", 185 | " save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(n_layers)]\n", 186 | " if wdrop:\n", 187 | " for rnn in self.rnns:\n", 188 | " rnn.linear = WeightDrop(rnn.linear, wdrop, weights=['weight'])\n", 189 | " else:\n", 190 | " self.rnns = [nn.LSTM(emb_sz if l == 0 else n_hid, (n_hid if l != n_layers - 1 else emb_sz)//self.ndir,\n", 191 | " 1, bidirectional=bidir) for l in range(n_layers)]\n", 192 | " if wdrop: self.rnns = [WeightDrop(rnn, wdrop) for rnn in self.rnns]\n", 193 | " self.rnns = torch.nn.ModuleList(self.rnns)\n", 194 | " self.encoder.weight.data.uniform_(-self.initrange, self.initrange)\n", 195 | "\n", 196 | " self.emb_sz,self.n_hid,self.n_layers,self.dropoute = emb_sz,n_hid,n_layers,dropoute\n", 197 | " self.dropouti = LockedDropout(dropouti)\n", 198 | " self.dropouths = nn.ModuleList([LockedDropout(dropouth) for l in range(n_layers)])\n", 199 | "\n", 200 | " def forward(self, input):\n", 201 | " \"\"\" Invoked during the forward propagation of the RNN_Encoder module.\n", 202 | " Args:\n", 203 | " input (Tensor): input of shape (sentence length x batch_size)\n", 204 | "\n", 205 | " Returns:\n", 206 | " raw_outputs (tuple(list (Tensor), list(Tensor)): list of tensors evaluated from each RNN layer without using\n", 207 | " dropouth, list of tensors evaluated from each RNN layer using dropouth,\n", 208 | " \"\"\"\n", 209 | " sl,bs = input.size()\n", 210 | " if bs!=self.bs:\n", 211 | " self.bs=bs\n", 212 | " self.reset()\n", 213 | " with set_grad_enabled(self.training):\n", 214 | " emb = self.encoder_with_dropout(input, dropout=self.dropoute if self.training else 0)\n", 215 | " emb = self.dropouti(emb)\n", 216 | " raw_output = emb\n", 217 | " new_hidden,raw_outputs,outputs = [],[],[]\n", 218 | " for l, (rnn,drop) in enumerate(zip(self.rnns, self.dropouths)):\n", 219 | " current_input = raw_output\n", 220 | " with warnings.catch_warnings():\n", 221 | " warnings.simplefilter(\"ignore\")\n", 222 | " raw_output, new_h = rnn(raw_output, self.hidden[l])\n", 223 | " new_hidden.append(new_h)\n", 224 | " raw_outputs.append(raw_output)\n", 225 | " if l != self.n_layers - 1: raw_output = drop(raw_output)\n", 226 | " outputs.append(raw_output)\n", 227 | "\n", 228 | " self.hidden = repackage_var(new_hidden)\n", 229 | " return raw_outputs, outputs\n", 230 | "\n", 231 | " def one_hidden(self, l):\n", 232 | " nh = (self.n_hid if l != self.n_layers - 1 else self.emb_sz)//self.ndir\n", 233 | " if IS_TORCH_04: return Variable(self.weights.new(self.ndir, self.bs, nh).zero_())\n", 234 | " else: return Variable(self.weights.new(self.ndir, self.bs, nh).zero_(), volatile=not self.training)\n", 235 | "\n", 236 | " def reset(self):\n", 237 | " if self.qrnn: [r.reset() for r in self.rnns]\n", 238 | " self.weights = next(self.parameters()).data\n", 239 | " if self.qrnn: self.hidden = [self.one_hidden(l) for l in range(self.n_layers)]\n", 240 | " else: self.hidden = [(self.one_hidden(l), self.one_hidden(l)) for l in range(self.n_layers)]\n" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": { 246 | "slideshow": { 247 | "slide_type": "slide" 248 | } 249 | }, 250 | "source": [ 251 | "# Step 2: Language Model Fine Tuning\n", 252 | "Then they fine-tune the language model on the target data set, so they can get the characteristics of the data set \n" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": { 258 | "slideshow": { 259 | "slide_type": "slide" 260 | } 261 | }, 262 | "source": [ 263 | "# Step 3: Classifier Fine-Tuning\n", 264 | "\n", 265 | "In this part, they:\n", 266 | "1. Add classification task to model\n", 267 | "2. Unfreeze last layer and retrain\n", 268 | "3. Gradually unfreeze other layers and retrain\n", 269 | "\n", 270 | "https://github.com/fastai/fastai/blob/master/courses/dl2/imdb_scripts/train_clas.py" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": { 277 | "slideshow": { 278 | "slide_type": "skip" 279 | } 280 | }, 281 | "outputs": [], 282 | "source": [ 283 | "# This is how Fast.ai freezes parameters and layers in \n", 284 | "\n", 285 | "def set_trainable_attr(m,b):\n", 286 | " m.trainable=b\n", 287 | " for p in m.parameters(): p.requires_grad=b\n", 288 | "\n", 289 | "def set_trainable(l, b):\n", 290 | " apply_leaf(l, lambda m: set_trainable_attr(m,b))\n" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": { 297 | "slideshow": { 298 | "slide_type": "skip" 299 | } 300 | }, 301 | "outputs": [], 302 | "source": [ 303 | "class PoolingLinearClassifier(nn.Module):\n", 304 | " def __init__(self, layers, drops):\n", 305 | " super().__init__()\n", 306 | " self.layers = nn.ModuleList([\n", 307 | " LinearBlock(layers[i], layers[i + 1], drops[i]) for i in range(len(layers) - 1)])\n", 308 | "\n", 309 | " def pool(self, x, bs, is_max):\n", 310 | " f = F.adaptive_max_pool1d if is_max else F.adaptive_avg_pool1d\n", 311 | " return f(x.permute(1,2,0), (1,)).view(bs,-1)\n", 312 | "\n", 313 | " def forward(self, input):\n", 314 | " raw_outputs, outputs = input\n", 315 | " output = outputs[-1]\n", 316 | " sl,bs,_ = output.size()\n", 317 | " avgpool = self.pool(output, bs, False)\n", 318 | " mxpool = self.pool(output, bs, True)\n", 319 | " x = torch.cat([output[-1], mxpool, avgpool], 1)\n", 320 | " for l in self.layers:\n", 321 | " l_x = l(x)\n", 322 | " x = F.relu(l_x)\n", 323 | " return l_x, raw_outputs, outputs\n" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": null, 329 | "metadata": {}, 330 | "outputs": [], 331 | "source": [ 332 | "class Learner():\n", 333 | " ...\n", 334 | " def freeze_to(self, n):\n", 335 | " c=self.get_layer_groups()\n", 336 | " for l in c: set_trainable(l, False)\n", 337 | " for l in c[n:]: set_trainable(l, True)\n", 338 | "\n", 339 | " def freeze_all_but(self, n):\n", 340 | " c=self.get_layer_groups()\n", 341 | " for l in c: set_trainable(l, False)\n", 342 | " set_trainable(c[n], True)\n", 343 | " \n", 344 | " def freeze_groups(self, groups):\n", 345 | " c = self.get_layer_groups()\n", 346 | " self.unfreeze()\n", 347 | " for g in groups:\n", 348 | " set_trainable(c[g], False)\n", 349 | " \n", 350 | " def unfreeze_groups(self, groups):\n", 351 | " c = self.get_layer_groups()\n", 352 | " for g in groups:\n", 353 | " set_trainable(c[g], True)\n", 354 | "\n", 355 | " def unfreeze(self): self.freeze_to(0)\n", 356 | " \n", 357 | " \n", 358 | " def fit(self, lrs, n_cycle, wds=None, **kwargs):\n", 359 | "\n", 360 | " \"\"\"Method gets an instance of LayerOptimizer and delegates to self.fit_gen(..)\n", 361 | "\n", 362 | " Note that one can specify a list of learning rates which, when appropriately\n", 363 | " defined, will be applied to different segments of an architecture. This seems\n", 364 | " mostly relevant to ImageNet-trained models, where we want to alter the layers\n", 365 | " closest to the images by much smaller amounts.\n", 366 | "\n", 367 | " Likewise, a single or list of weight decay parameters can be specified, which\n", 368 | " if appropriate for a model, will apply variable weight decay parameters to\n", 369 | " different segments of the model.\n", 370 | "\n", 371 | " Args:\n", 372 | " lrs (float or list(float)): learning rate for the model\n", 373 | "\n", 374 | " n_cycle (int): number of cycles (or iterations) to fit the model for\n", 375 | "\n", 376 | " wds (float or list(float)): weight decay parameter(s).\n", 377 | "\n", 378 | " kwargs: other arguments\n", 379 | "\n", 380 | " Returns:\n", 381 | " None\n", 382 | " \"\"\"\n", 383 | " self.sched = None\n", 384 | " layer_opt = self.get_layer_opt(lrs, wds)\n", 385 | " return self.fit_gen(self.model, self.data, layer_opt, n_cycle, **kwargs)\n", 386 | "\n" 387 | ] 388 | } 389 | ], 390 | "metadata": { 391 | "celltoolbar": "Slideshow", 392 | "kernelspec": { 393 | "display_name": "Python 3", 394 | "language": "python", 395 | "name": "python3" 396 | }, 397 | "language_info": { 398 | "codemirror_mode": { 399 | "name": "ipython", 400 | "version": 3 401 | }, 402 | "file_extension": ".py", 403 | "mimetype": "text/x-python", 404 | "name": "python", 405 | "nbconvert_exporter": "python", 406 | "pygments_lexer": "ipython3", 407 | "version": "3.5.5" 408 | } 409 | }, 410 | "nbformat": 4, 411 | "nbformat_minor": 2 412 | } 413 | -------------------------------------------------------------------------------- /transfer_learning/ULMFiT.sketch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonWorkshop/intro-to-nlp-with-pytorch/eb1a814589cdd4a41ae8bdabfdf9a0c4ad5fc55c/transfer_learning/ULMFiT.sketch --------------------------------------------------------------------------------