├── .github └── FUNDING.yml ├── imgs ├── dog.jpeg ├── linreg_setosa.png └── matrix_factorization_city_temperature.png ├── .gitignore ├── extra ├── imgs │ ├── squeezenet_fox_kerasjs.png │ └── imagenet_sota_paperswithcode.png ├── CIFAR.ipynb ├── Matrix exercise.ipynb ├── Operation playground.ipynb └── CPU vs GPU.ipynb ├── environment.yml ├── data ├── Heart_rate_and_weight.csv └── Animals.csv ├── LICENSE ├── materials.md ├── 7 Log loss.ipynb ├── rnns ├── 3 Embedding vs one-hot encoding.ipynb ├── OpenAI bot.ipynb ├── Names gender 1.ipynb ├── 4 LSTM GRU anatomy.ipynb ├── Transformer example.ipynb ├── Names gender 2.ipynb ├── 1 RNN architecture overview.ipynb └── Word vectors.ipynb ├── 0 Before you start.ipynb ├── README.md ├── 6 Classification.ipynb ├── convnets ├── Data augmentation.ipynb └── Transfer learning.ipynb ├── 5 Nonlinear regression.ipynb ├── 1 Vectors, matrices and tensors.ipynb └── 4 Multiple Linear Regression.ipynb /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [stared] 4 | -------------------------------------------------------------------------------- /imgs/dog.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stared/thinking-in-tensors-writing-in-pytorch/HEAD/imgs/dog.jpeg -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | random/ 3 | 4 | .ipynb_checkpoints/ 5 | 6 | *.pyc 7 | 8 | .DS_Store 9 | 10 | secret.json -------------------------------------------------------------------------------- /imgs/linreg_setosa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stared/thinking-in-tensors-writing-in-pytorch/HEAD/imgs/linreg_setosa.png -------------------------------------------------------------------------------- /extra/imgs/squeezenet_fox_kerasjs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stared/thinking-in-tensors-writing-in-pytorch/HEAD/extra/imgs/squeezenet_fox_kerasjs.png -------------------------------------------------------------------------------- /extra/imgs/imagenet_sota_paperswithcode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stared/thinking-in-tensors-writing-in-pytorch/HEAD/extra/imgs/imagenet_sota_paperswithcode.png -------------------------------------------------------------------------------- /imgs/matrix_factorization_city_temperature.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stared/thinking-in-tensors-writing-in-pytorch/HEAD/imgs/matrix_factorization_city_temperature.png -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: thinking-in-tensors 2 | channels: 3 | - conda-forge 4 | - pytorch 5 | dependencies: 6 | - python=3.11 7 | - pip 8 | - pytorch 9 | - torchvision 10 | - jupyter 11 | - numpy 12 | - matplotlib 13 | - scikit-learn 14 | - pandas 15 | - h5py 16 | 17 | - pip: 18 | - seaborn 19 | - livelossplot 20 | - openai 21 | -------------------------------------------------------------------------------- /data/Heart_rate_and_weight.csv: -------------------------------------------------------------------------------- 1 | Creature,"Average Heart Rate 2 | (beats per  3 | minute)","Weight 4 | (grams)" 5 | Human,60,90000 6 | Cat,150,2000 7 | Small dog,100,2000 8 | Medium dog,90,5000 9 | Large dogs:,75,8000 10 | Hamster,450,60 11 | Chick,400,50 12 | Chicken,275,1500 13 | Monkey,192,5000 14 | Horse,44,1200000 15 | Cow,65,800000 16 | Pig,70,150000 17 | Rabbit,205,1000 18 | elephant,30,5000000 19 | giraffe,65,900000 20 | large whales,20,120000000 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Piotr Migdał 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /data/Animals.csv: -------------------------------------------------------------------------------- 1 | Species,BodyWeight(kg),BrainWeight(kg),Brain-to-Body-Weight_Proportion 2 | Newborn_Human,3.2,0.374984813,0.117182754 3 | Adult_Human,73,1.349981613,0.018492899 4 | Pithecanthropus_Man,70,0.925010921,0.013214442 5 | Squirrel,0.8,0.007620352,0.00952544 6 | Hamster,0.15,0.001406136,0.009374242 7 | Chimpanzee,50,0.419981176,0.008399624 8 | Rabbit,1.4,0.011521246,0.008229462 9 | Dog_(Beagle),10,0.071985109,0.007198511 10 | Cat,4.5,0.029982456,0.006662768 11 | Rat,0.4,0.001995806,0.004989516 12 | Bottle-Nosed_Dolphin,400,1.49998461,0.003749962 13 | Beaver,24,0.044996363,0.001874848 14 | Gorilla,320,0.502489628,0.00157028 15 | Tiger,170,0.263491808,0.001549952 16 | Owl,1.5,0.002222603,0.001481735 17 | Camel,550,0.761989823,0.001385436 18 | Elephant,4600,5.999983798,0.001304344 19 | Lion,187,0.239995723,0.0012834 20 | Sheep,120,0.139978605,0.001166488 21 | Walrus,800,0.925010921,0.001156264 22 | Horse,450,0.502489628,0.001116644 23 | Cow,700,0.441481454,0.000630688 24 | Giraffe,950,0.532018491,0.000560019 25 | Green_Lizard,0.2,9.07185E-05,0.000453592 26 | Sperm_Whale,35000,7.799974401,0.000222856 27 | Turtle,3,0.000317515,0.000105838 28 | Alligator,270,0.008391459,3.10795E-05 29 | -------------------------------------------------------------------------------- /materials.md: -------------------------------------------------------------------------------- 1 | 2 | ## In order 3 | 4 | * Vectors 5 | * Extra: tensors, Einstein notation 6 | * Gradient descent 7 | * $y=x^2$ 8 | * Backpropagation 9 | * Linear regression 10 | * Nonlinearities 11 | * Hidden layer 12 | * Exercise with functions 13 | * Logistic regression 14 | * Logistic function 15 | * Log-loss 16 | * MLP and non-linear slits (with examples) 17 | 18 | 19 | ## All other stuff 20 | 21 | * Various gradient optimizers 22 | * Convolutions 23 | * ImageNet networks 24 | 25 | * Residual networks 26 | * [Neural networks as Ordinary Differential Equations](https://rkevingibson.github.io/blog/neural-networks-as-ordinary-differential-equations/) 27 | 28 | * LSTMs as evergreen architecure 29 | * [How to implement a YOLO (v3) object detector from scratch in PyTorch: Part 1](https://blog.paperspace.com/how-to-implement-a-yolo-object-detector-in-pytorch/) 30 | 31 | ## Random but interesting stuff 32 | 33 | * Game of life (and variants) in PyTorch 34 | * [Guide to Restricted Boltzmann Machines Using PyTorch](https://heartbeat.fritz.ai/guide-to-restricted-boltzmann-machines-using-pytorch-ee50d1ed21a8) 35 | 36 | 37 | 38 | ## Maybe useful 39 | 40 | https://github.com/trekhleb/homemade-machine-learning/blob/master/README.md 41 | -------------------------------------------------------------------------------- /7 Log loss.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Thinking in tensors, writing in PyTorch\n", 8 | "\n", 9 | "Hands-on training by [Piotr Migdał](https://p.migdal.pl) (2019). Version for Uniwersytet Śląski.\n", 10 | "\n", 11 | "**WORK IN PROGRESS**\n", 12 | "\n", 13 | "\n", 14 | "## Log loss\n", 15 | "\n", 16 | "Multi-class logistic regression can be expressed as a shallow neural network consisting of one linear layer and a softmax activation function.\n", 17 | "\n", 18 | "For binary classification, we can use sigmoid (a.k.a. logistic function):\n", 19 | "\n", 20 | "$$ \\sigma(x) = \\frac{1}{1+\\exp(-x)} $$\n", 21 | "\n", 22 | "Softmax function transforms any vector into distribution vector (values in range (0., 1.) that sum up to 1.):\n", 23 | "$$\\text{softmax}(x_i) = \\frac{\\exp(x_i)}{\\sum_j \\exp(x_j)}$$\n", 24 | "\n", 25 | "We use a cross-entropy loss function:\n", 26 | "$$- \\sum_j p_{j, true} \\log(p_{j, pred})$$\n", 27 | "\n", 28 | "Note that we do not state explicitly the softmax function in the model class below. For details see [torch.nn.CrossEntropyLoss](https://pytorch.org/docs/stable/nn.html#torch.nn.CrossEntropyLoss).\n", 29 | "\n", 30 | "See also:\n", 31 | "\n", 32 | "* [Cross-entropy vs. mean-squared error loss](https://www.reddit.com/r/MachineLearning/comments/8im9eb/d_crossentropy_vs_meansquared_error_loss/)\n", 33 | "* [Understanding binary cross-entropy / log loss: a visual explanation](https://towardsdatascience.com/understanding-binary-cross-entropy-log-loss-a-visual-explanation-a3ac6025181a)\n", 34 | "* [Cross entropy](https://pandeykartikey.github.io/machine/learning/basics/2018/05/22/cross-entropy.html) - another explanation\n", 35 | "* [Softmax function](https://en.wikipedia.org/wiki/Softmax_function)\n", 36 | "* [Multiclass logistic regression](https://en.wikipedia.org/wiki/Multinomial_logistic_regression)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 1, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "import numpy as np" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [] 54 | } 55 | ], 56 | "metadata": { 57 | "kernelspec": { 58 | "display_name": "Python [conda env:py37]", 59 | "language": "python", 60 | "name": "conda-env-py37-py" 61 | }, 62 | "language_info": { 63 | "codemirror_mode": { 64 | "name": "ipython", 65 | "version": 3 66 | }, 67 | "file_extension": ".py", 68 | "mimetype": "text/x-python", 69 | "name": "python", 70 | "nbconvert_exporter": "python", 71 | "pygments_lexer": "ipython3", 72 | "version": "3.7.2" 73 | }, 74 | "varInspector": { 75 | "cols": { 76 | "lenName": 16, 77 | "lenType": 16, 78 | "lenVar": 40 79 | }, 80 | "kernels_config": { 81 | "python": { 82 | "delete_cmd_postfix": "", 83 | "delete_cmd_prefix": "del ", 84 | "library": "var_list.py", 85 | "varRefreshCmd": "print(var_dic_list())" 86 | }, 87 | "r": { 88 | "delete_cmd_postfix": ") ", 89 | "delete_cmd_prefix": "rm(", 90 | "library": "var_list.r", 91 | "varRefreshCmd": "cat(var_dic_list()) " 92 | } 93 | }, 94 | "types_to_exclude": [ 95 | "module", 96 | "function", 97 | "builtin_function_or_method", 98 | "instance", 99 | "_Feature" 100 | ], 101 | "window_display": false 102 | } 103 | }, 104 | "nbformat": 4, 105 | "nbformat_minor": 2 106 | } 107 | -------------------------------------------------------------------------------- /rnns/3 Embedding vs one-hot encoding.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Thinking in tensors in PyTorch\n", 8 | "\n", 9 | "Hands-on training by [Piotr Migdał](https://p.migdal.pl) (2019). \n", 10 | "\n", 11 | "Version for [AI & NLP Workshop Day](https://nlpday.pl/), 31 May 2019, Warsaw, Poland: **Understanding LSTM and GRU networks in PyTorch**.\n", 12 | "\n", 13 | "\n", 14 | "\n", 15 | "## NLP & AI: 3. Embedding vs one-hot encoding\n", 16 | "\n", 17 | "\n", 18 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/stared/thinking-in-tensors-writing-in-pytorch/blob/master/extra/3%20Embedding%20vs%20one-hot%20encoding.ipynb)\n", 19 | " " 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import torch\n", 29 | "from torch import nn" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "emb = nn.Embedding(10, 3)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "emb.weight" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "words = torch.LongTensor([[2, 2, 4, 1, 5]])\n", 57 | "words" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "emb(words)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "words_onehot = torch.zeros((5, 10), dtype=torch.float32)\n", 76 | "for i, j in enumerate([ 2, 2, 4, 1, 5]):\n", 77 | " words_onehot[i, j] = 1.\n", 78 | "words_onehot" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "words_onehot.matmul(emb.weight)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [] 96 | } 97 | ], 98 | "metadata": { 99 | "kernelspec": { 100 | "display_name": "Python [default]", 101 | "language": "python", 102 | "name": "python3" 103 | }, 104 | "language_info": { 105 | "codemirror_mode": { 106 | "name": "ipython", 107 | "version": 3 108 | }, 109 | "file_extension": ".py", 110 | "mimetype": "text/x-python", 111 | "name": "python", 112 | "nbconvert_exporter": "python", 113 | "pygments_lexer": "ipython3", 114 | "version": "3.5.4" 115 | }, 116 | "varInspector": { 117 | "cols": { 118 | "lenName": 16, 119 | "lenType": 16, 120 | "lenVar": 40 121 | }, 122 | "kernels_config": { 123 | "python": { 124 | "delete_cmd_postfix": "", 125 | "delete_cmd_prefix": "del ", 126 | "library": "var_list.py", 127 | "varRefreshCmd": "print(var_dic_list())" 128 | }, 129 | "r": { 130 | "delete_cmd_postfix": ") ", 131 | "delete_cmd_prefix": "rm(", 132 | "library": "var_list.r", 133 | "varRefreshCmd": "cat(var_dic_list()) " 134 | } 135 | }, 136 | "types_to_exclude": [ 137 | "module", 138 | "function", 139 | "builtin_function_or_method", 140 | "instance", 141 | "_Feature" 142 | ], 143 | "window_display": false 144 | } 145 | }, 146 | "nbformat": 4, 147 | "nbformat_minor": 2 148 | } 149 | -------------------------------------------------------------------------------- /0 Before you start.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Thinking in tensors, writing in PyTorch\n", 9 | "\n", 10 | "A hands-on course by [Piotr Migdał](https://p.migdal.pl) et al. (2019-2022).\n", 11 | "\n", 12 | "An interactive, installation-free version of this course: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/stared/thinking-in-tensors-writing-in-pytorch/)\n", 13 | "\n", 14 | "## Notebook 0: Before you start\n", 15 | "\n", 16 | "\n", 17 | "It is an introduction to deep learning using PyTorch.\n", 18 | "\n", 19 | "More about it in the description: https://github.com/stared/thinking-in-tensors-writing-in-pytorch\n", 20 | "\n", 21 | "Also, if you have any problems, please use [GitHub issues](https://github.com/stared/thinking-in-tensors-writing-in-pytorch/issues). \n", 22 | "\n", 23 | "\n", 24 | "### Installation instructions\n", 25 | "\n", 26 | "The whole course in Jupyter Notebook, an interactive interface for Python 3.\n", 27 | "\n", 28 | "There are a few ways to run it.\n", 29 | "\n", 30 | "* online, using [Google Colaboratory](https://colab.research.google.com/)\n", 31 | "* locally, using [Anaconda Distribution](https://www.anaconda.com/distribution/#download-section)\n", 32 | "\n", 33 | "On top of standard scientific libraries to Python (3.5 or higher), we need:\n", 34 | "\n", 35 | "* [PyTorch](https://pytorch.org/)\n", 36 | "* [livelossplot](https://github.com/stared/livelossplot)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "import torch" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "torch.__version__ # should be 1.12.1 or higher" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "import livelossplot" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "livelossplot.__version__ #should be 0.5.5 or higher" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "# if it doesn't work, type\n", 82 | "!pip install livelossplot --quiet" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [] 91 | } 92 | ], 93 | "metadata": { 94 | "kernelspec": { 95 | "display_name": "pytorch-macos-m1-gpu", 96 | "language": "python", 97 | "name": "python3" 98 | }, 99 | "language_info": { 100 | "codemirror_mode": { 101 | "name": "ipython", 102 | "version": 3 103 | }, 104 | "file_extension": ".py", 105 | "mimetype": "text/x-python", 106 | "name": "python", 107 | "nbconvert_exporter": "python", 108 | "pygments_lexer": "ipython3", 109 | "version": "3.9.13" 110 | }, 111 | "varInspector": { 112 | "cols": { 113 | "lenName": 16, 114 | "lenType": 16, 115 | "lenVar": 40 116 | }, 117 | "kernels_config": { 118 | "python": { 119 | "delete_cmd_postfix": "", 120 | "delete_cmd_prefix": "del ", 121 | "library": "var_list.py", 122 | "varRefreshCmd": "print(var_dic_list())" 123 | }, 124 | "r": { 125 | "delete_cmd_postfix": ") ", 126 | "delete_cmd_prefix": "rm(", 127 | "library": "var_list.r", 128 | "varRefreshCmd": "cat(var_dic_list()) " 129 | } 130 | }, 131 | "types_to_exclude": [ 132 | "module", 133 | "function", 134 | "builtin_function_or_method", 135 | "instance", 136 | "_Feature" 137 | ], 138 | "window_display": false 139 | }, 140 | "vscode": { 141 | "interpreter": { 142 | "hash": "032715a8d60757555a2623145b534a689e8b5634c5398e8bbec1014c4a8ede12" 143 | } 144 | } 145 | }, 146 | "nbformat": 4, 147 | "nbformat_minor": 2 148 | } 149 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Thinking in tensors, writing in PyTorch 2 | 3 | A hands-on deep learning introduction, from pieces. 4 | 5 | For an interactive, installation-free version, use Colab: https://colab.research.google.com/github/stared/thinking-in-tensors-writing-in-pytorch/ 6 | 7 | By [Piotr Migdał](https://p.migdal.pl/) et al. (Weronika Ormaniec, possibly others) 8 | 9 | 10 | > “Study hard what interests you the most in the most undisciplined, irreverent and original manner possible.” ― Richard Feynman 11 | 12 | > “Scientists start out doing work that's perfect, in the sense that they're just trying to reproduce work someone else has already done for them. Eventually, they get to the point where they can do original work. Whereas hackers, from the start, are doing original work; it's just very bad. So hackers start original, and get good, and scientists start good, and get original.” - Paul Graham in [Hackers and Painters](http://www.paulgraham.com/hp.html) 13 | 14 | ## Supporters 15 | 16 | This project supported by: [Jacek Migdał](http://jacek.migdal.pl/), [Marek Cichy](https://medium.com/@marekkcichy/). [Join the sponsors - show your ❤️ and support](https://github.com/sponsors/stared)! It will give me time and energy to work on this project! 17 | 18 | This project benefited from [University of Silesia in Katowice](http://english.us.edu.pl/) course, which they let me to open source. 19 | 20 | ## What's that? 21 | 22 | Mathematical concepts behind deep learning using PyTorch 1.0. 23 | 24 | * All math equations as PyTorch code 25 | * Explicit, minimalistic examples 26 | * Jupyter Notebook for interactivity 27 | * “On the shoulders of giants” - I link and refer to the best materials I know 28 | * Fully open source & open for collaboration (I guess I will go with MIT for code, CC-BY for anything else) 29 | 30 | 31 | ## Why not something else? 32 | 33 | There are quite a few practical introductions to deep learning. I recommend [Deep Learning in Python](https://www.manning.com/books/deep-learning-with-python) by François Chollet (the Keras author). Or you want, you can classify small pictures, or extraterrestrial beings, today. 34 | 35 | When it comes to the mathematical background, [Deep Learning Book](https://www.deeplearningbook.org/) by Ian Goodfellow et al. is a great starting point, giving a lot of overview. Though, it requires a lot of interest in maths. Convolutional networks start well after page 300. 36 | 37 | I struggled to find something in the middle ground - showing mathematical foundations of deep learning, step by step, at the same time translating it into code. The closest example is [CS231n: Convolutional Neural Networks for Visual Recognition](http://cs231n.stanford.edu/) (which is, IMHO, a masterpiece). Though, I believe that instead of using NumPy we can use PyTorch, giving a smooth transition between mathematic ideas and a practical, working code. 38 | 39 | Of course, there are quite a few awesome posts, notebooks and visualizations. I try to link to the ones that are useful for reader. In particular, I maintain a collaborative list of [Interactive Machine Learning, Deep Learning and Statistics websites](https://p.migdal.pl/interactive-machine-learning-list/). 40 | 41 | 42 | ## Contribute! 43 | 44 | Crucially, this course is for you, the reader. If you are interested in one topic, let us know! There is nothing more inspiring that eager readers. 45 | 46 | 47 | ## Style 48 | 49 | * Start with concrete examples first 50 | * First 1d, then more 51 | * Equations in LaTeX AND PyTorch 52 | * `x.matmul(y).pow(2).sum()` not `torch.sum(torch.matmul(x, y) ** 2)` 53 | 54 | 55 | ## Adverts 56 | 57 | A few links of mine: 58 | 59 | * [Learning deep learning wth Keras](https://p.migdal.pl/2017/04/30/teaching-deep-learning.html) - an overview of deep learning (what's that? what one should learn before); post from 2017 but surprisingly up-to-date 60 | * My deep learning framework credo: [Keras or PyTorch as your first deep learning framework](https://deepsense.ai/keras-or-pytorch/) 61 | * [Keras vs. PyTorch: Alien vs. Predator recognition with transfer learning ](https://deepsense.ai/keras-vs-pytorch-avp-transfer-learning/) 62 | * [My general overview of “how to start data science”](https://p.migdal.pl/2016/03/15/data-science-intro-for-math-phys-background.html) (turns out - not only for math/phys background; though, I intend to write a separate text for non-STEM backgrounds) 63 | * [Quantum Tensors](https://github.com/stared/quantum-tensors) - a JavaScript / TypeScript package for sparse tensor operations on complex numbers. For example for quantum computing, quantum information, and well - the Quantum Game. 64 | * [Simple diagrams of convoluted neural networks](https://medium.com/inbrowserai/simple-diagrams-of-convoluted-neural-networks-39c097d2925b) - on deep learning architecture visualizations 65 | * I am an independent AI consultant, specializing in giving hands-on trainings in deep learning (and general machine learning). If you are interested in a workshop, let me know at [p.migdal.pl](https://p.migdal.pl/)! 66 | -------------------------------------------------------------------------------- /rnns/OpenAI bot.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "!pip install openai --quiet" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import json\n", 19 | "import openai" 20 | ] 21 | }, 22 | { 23 | "attachments": {}, 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "You need to obtain keys:\n", 28 | "\n", 29 | "* \n", 30 | "\n", 31 | "Good practices:\n", 32 | "\n", 33 | "* DON'T put your secret key in a visible place (e.g. Jupyter Notebook, hardcoded or printed as an output)\n", 34 | "* Standard ways include using env variables or loading from a git-ignored secret config file\n", 35 | "* Generate new key per every app/usage\n", 36 | "* Revoke codes once you don't use a given service\n", 37 | "* If you suspect your key get compromised, regenerate it\n" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "with open(\"secret.json\") as f:\n", 47 | " secret = json.load(f)\n", 48 | "\n", 49 | "openai.api_key = secret['openai_key']" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "response = openai.Completion.create(model=\"text-davinci-003\", prompt=\"A nice way to say goodbye with emoji:\", temperature=0, max_tokens=7)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 4, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/plain": [ 69 | " JSON: {\n", 70 | " \"choices\": [\n", 71 | " {\n", 72 | " \"finish_reason\": \"length\",\n", 73 | " \"index\": 0,\n", 74 | " \"logprobs\": null,\n", 75 | " \"text\": \"\\n\\n\\ud83d\\udc4b\\ud83c\\udffc\"\n", 76 | " }\n", 77 | " ],\n", 78 | " \"created\": 1670853454,\n", 79 | " \"id\": \"cmpl-6MdbCI0bnxBdlc9D10kT6FhBD7Z43\",\n", 80 | " \"model\": \"text-davinci-003\",\n", 81 | " \"object\": \"text_completion\",\n", 82 | " \"usage\": {\n", 83 | " \"completion_tokens\": 7,\n", 84 | " \"prompt_tokens\": 9,\n", 85 | " \"total_tokens\": 16\n", 86 | " }\n", 87 | "}" 88 | ] 89 | }, 90 | "execution_count": 4, 91 | "metadata": {}, 92 | "output_type": "execute_result" 93 | } 94 | ], 95 | "source": [ 96 | "response" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "name": "stdout", 106 | "output_type": "stream", 107 | "text": [ 108 | "\n", 109 | "\n", 110 | "👋🏼\n" 111 | ] 112 | } 113 | ], 114 | "source": [ 115 | "print(response.choices[0].text)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 6, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "def ask_openai(prompt: str, temperature: float, max_tokens: int=10, model=\"text-davinci-003\") -> str:\n", 125 | " response = openai.Completion.create(model=model, prompt=prompt, temperature=temperature, max_tokens=max_tokens)\n", 126 | " return response.choices[0].text" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 7, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/plain": [ 137 | "'\\n\\nKathmandu.'" 138 | ] 139 | }, 140 | "execution_count": 7, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "ask_openai(\"What the capital of Nepal?\", 0)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 8, 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "data": { 156 | "text/plain": [ 157 | "'\\n\\nKathmandu'" 158 | ] 159 | }, 160 | "execution_count": 8, 161 | "metadata": {}, 162 | "output_type": "execute_result" 163 | } 164 | ], 165 | "source": [ 166 | "ask_openai(\"What the capital of Nepal?\", 1)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 10, 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "data": { 176 | "text/plain": [ 177 | "'\\n\\nKathmandu is the capital of'" 178 | ] 179 | }, 180 | "execution_count": 10, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "ask_openai(\"What the capital of Nepal?\", 2)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 12, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/plain": [ 197 | "'\\n\\nPogoń za dzikim gęsią.'" 198 | ] 199 | }, 200 | "execution_count": 12, 201 | "metadata": {}, 202 | "output_type": "execute_result" 203 | } 204 | ], 205 | "source": [ 206 | "ask_openai(\"Translate to Polish: 'A wild goose chase'\", 0, max_tokens=30)" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 16, 212 | "metadata": {}, 213 | "outputs": [ 214 | { 215 | "data": { 216 | "text/plain": [ 217 | "'\\n\\nA wild goose chase is an idiom meaning a futile or fruitless pursuit, especially one that is lengthy or complicated. It is derived from the 16th century English game of the same name, in which a hunter would chase a goose or other game around a field.'" 218 | ] 219 | }, 220 | "execution_count": 16, 221 | "metadata": {}, 222 | "output_type": "execute_result" 223 | } 224 | ], 225 | "source": [ 226 | "ask_openai(\"What does 'A wild goose chase' mean?\", 0, max_tokens=100)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 17, 232 | "metadata": {}, 233 | "outputs": [ 234 | { 235 | "data": { 236 | "text/plain": [ 237 | "'\\n\\n\"Pogoń za zającem\".'" 238 | ] 239 | }, 240 | "execution_count": 17, 241 | "metadata": {}, 242 | "output_type": "execute_result" 243 | } 244 | ], 245 | "source": [ 246 | "ask_openai(\"How would you translate an imdiom 'A wild goose chase' into Polish?\", 0, max_tokens=100)" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 18, 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "data": { 256 | "text/plain": [ 257 | "'\\n\\n\"Poszukiwanie czegoś straconego\".'" 258 | ] 259 | }, 260 | "execution_count": 18, 261 | "metadata": {}, 262 | "output_type": "execute_result" 263 | } 264 | ], 265 | "source": [ 266 | "ask_openai(\"How would you translate an imdiom 'A wild goose chase' into Polish?\", 1, max_tokens=100)" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [] 275 | } 276 | ], 277 | "metadata": { 278 | "kernelspec": { 279 | "display_name": "pytorch-macos-m1-gpu", 280 | "language": "python", 281 | "name": "python3" 282 | }, 283 | "language_info": { 284 | "codemirror_mode": { 285 | "name": "ipython", 286 | "version": 3 287 | }, 288 | "file_extension": ".py", 289 | "mimetype": "text/x-python", 290 | "name": "python", 291 | "nbconvert_exporter": "python", 292 | "pygments_lexer": "ipython3", 293 | "version": "3.9.13" 294 | }, 295 | "orig_nbformat": 4, 296 | "vscode": { 297 | "interpreter": { 298 | "hash": "032715a8d60757555a2623145b534a689e8b5634c5398e8bbec1014c4a8ede12" 299 | } 300 | } 301 | }, 302 | "nbformat": 4, 303 | "nbformat_minor": 2 304 | } 305 | -------------------------------------------------------------------------------- /extra/CIFAR.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Thinking in tensors in PyTorch\n", 8 | "\n", 9 | "Hands-on training by [Piotr Migdał](https://p.migdal.pl) (2019). Version 0.4 for Uniwersytet Śląski.\n", 10 | "\n", 11 | "**Under construction**\n", 12 | "\n", 13 | "\n", 14 | "## Extra: Image classification with CIFAR\n", 15 | "\n", 16 | "\n", 17 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/stared/thinking-in-tensors-writing-in-pytorch/blob/master/extra/CIFAR.ipynb)\n", 18 | "\n", 19 | "\n", 20 | "\n", 21 | "Datasets: https://pytorch.org/docs/stable/torchvision/datasets.html\n", 22 | "\n", 23 | "See [Starting deep learning hands-on: image classification on CIFAR-10](https://deepsense.ai/deep-learning-hands-on-image-classification/) for a longer description\n", 24 | "\n", 25 | "State of the art results:\n", 26 | "\n", 27 | "* https://github.com/kuangliu/pytorch-cifar\n", 28 | "* [Browse state-of-the-art for AI](https://paperswithcode.com/sota)\n", 29 | " * [Image Classification on CIFAR-10](https://paperswithcode.com/sota/image-classification-on-cifar-10)\n" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 1, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "import numpy as np\n", 39 | "from matplotlib import pyplot as plt\n", 40 | "from torchvision import datasets, transforms\n", 41 | "from torch.utils.data import DataLoader" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "datasets.ImageFolder?" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "transform = transforms.Compose([\n", 60 | " #transforms.RandomAffine(45, scale=(0.5, 2.)),\n", 61 | " transforms.ToTensor(),\n", 62 | " transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])\n", 63 | "\n", 64 | "trainset = datasets.CIFAR10(root='./data', train=True,\n", 65 | " download=True, transform=transform)\n", 66 | "trainloader = DataLoader(trainset, batch_size=4,\n", 67 | " shuffle=True, num_workers=2)\n", 68 | "\n", 69 | "testset = datasets.CIFAR10(root='./data', train=False,\n", 70 | " download=True, transform=transform)\n", 71 | "testloader = DataLoader(testset, batch_size=4,\n", 72 | " shuffle=False, num_workers=2)\n", 73 | "\n", 74 | "classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "for inputs, labels in trainloader:\n", 84 | " inputs = inputs\n", 85 | " labels = labels\n", 86 | " print(\"Input size: \", inputs.size())\n", 87 | " print(\"Label size: \", labels.size())\n", 88 | " print(\"Label values: \", labels)\n", 89 | " break" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "# labels are integers\n", 99 | "labels.dtype" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "img = 0.5 * inputs.permute(0, 2, 3, 1).numpy()[0] + 0.5\n", 109 | "plt.imshow(img)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "for x in inputs:\n", 119 | " print(x.size())" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "def draw_batch(X, Y, classes=classes, scale=3):\n", 129 | " assert X.size(0) == Y.size(0)\n", 130 | " n = X.size(0)\n", 131 | " fig, axs = plt.subplots(1, n, figsize=(scale * n, scale))\n", 132 | " for i, img_tensor in enumerate(X):\n", 133 | " img_numpy = img_tensor.permute(1, 2, 0).numpy()\n", 134 | " ax = axs[i]\n", 135 | " ax.imshow(0.5 * img_numpy + 0.5, interpolation='none')\n", 136 | " ax.set_title(classes[Y[i].item()])\n", 137 | " ax.axis('off')" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "for inputs, labels in trainloader:\n", 147 | " draw_batch(inputs, labels)\n", 148 | " plt.show()\n", 149 | " break" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "avg_per_class = np.zeros((len(classes), 32, 32, 3), dtype=np.float32)\n", 159 | "class_count = np.zeros((len(classes)), dtype=np.float32)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "for inputs, labels in trainloader:\n", 169 | " avg_per_class[labels.numpy()] += 0.5 * inputs.sum(dim=0).permute(1, 2, 0).numpy() + 0.5\n", 170 | " for label in labels:\n", 171 | " class_count[label.item()] += 1." 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "[The Average Woman's Face Around The World](https://www.huffingtonpost.ca/2013/10/07/average-woman-face-around-world_n_4058145.html) - an art project by [](https://pmsol3.wordpress.com/)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "import math\n", 188 | "\n", 189 | "def draw_averages(trainloader, classes=classes, cols=5, scale=2):\n", 190 | " \n", 191 | " avg_per_class = np.zeros((len(classes), 32, 32, 3), dtype=np.float32)\n", 192 | " class_count = np.zeros((len(classes)), dtype=np.float32)\n", 193 | " \n", 194 | " for inputs, labels in trainloader:\n", 195 | " avg_per_class[labels.numpy()] += 0.5 * inputs.sum(dim=0).permute(1, 2, 0).numpy() + 0.5\n", 196 | " for label in labels:\n", 197 | " class_count[label.item()] += 1.\n", 198 | "\n", 199 | " fig, axs = plt.subplots(1, cols, figsize=(scale * cols, scale))\n", 200 | " for i, class_name in enumerate(classes):\n", 201 | " ax = axs[i]\n", 202 | " ax.imshow(avg_per_class[i] / class_count[i], interpolation='none')\n", 203 | " ax.set_title(class_name)\n", 204 | " ax.axis('off') \n", 205 | " \n", 206 | "# rows = math.ceil(len(classes) / cols)\n", 207 | "# fig, axs = plt.subplots(rows, cols, figsize=(scale * cols, scale * rows))\n", 208 | "# for i, class_name in enumerate(classes):\n", 209 | "# ax = axs[math.floor(i / rows), i % rows]\n", 210 | "# ax.imshow(avg_per_class[i] / class_count[i], interpolation='none')\n", 211 | "# ax.set_title(class_name)\n", 212 | "# ax.axis('off')" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "draw_averages(trainloader, classes=classes, cols=10, scale=3)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [] 230 | } 231 | ], 232 | "metadata": { 233 | "kernelspec": { 234 | "display_name": "Python [default]", 235 | "language": "python", 236 | "name": "python3" 237 | }, 238 | "language_info": { 239 | "codemirror_mode": { 240 | "name": "ipython", 241 | "version": 3 242 | }, 243 | "file_extension": ".py", 244 | "mimetype": "text/x-python", 245 | "name": "python", 246 | "nbconvert_exporter": "python", 247 | "pygments_lexer": "ipython3", 248 | "version": "3.5.4" 249 | }, 250 | "varInspector": { 251 | "cols": { 252 | "lenName": 16, 253 | "lenType": 16, 254 | "lenVar": 40 255 | }, 256 | "kernels_config": { 257 | "python": { 258 | "delete_cmd_postfix": "", 259 | "delete_cmd_prefix": "del ", 260 | "library": "var_list.py", 261 | "varRefreshCmd": "print(var_dic_list())" 262 | }, 263 | "r": { 264 | "delete_cmd_postfix": ") ", 265 | "delete_cmd_prefix": "rm(", 266 | "library": "var_list.r", 267 | "varRefreshCmd": "cat(var_dic_list()) " 268 | } 269 | }, 270 | "types_to_exclude": [ 271 | "module", 272 | "function", 273 | "builtin_function_or_method", 274 | "instance", 275 | "_Feature" 276 | ], 277 | "window_display": false 278 | } 279 | }, 280 | "nbformat": 4, 281 | "nbformat_minor": 2 282 | } 283 | -------------------------------------------------------------------------------- /rnns/Names gender 1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Thinking in tensors in PyTorch\n", 9 | "\n", 10 | "Hands-on training by [Piotr Migdał](https://p.migdal.pl) (2019-2022).\n", 11 | "\n", 12 | "\n", 13 | "## RNN: Text one-hot encoding, names part 1\n", 14 | "\n", 15 | "We use [US Baby Names - Kaggle Dataset](https://www.kaggle.com/kaggle/us-baby-names).\n", 16 | "If needed, you can use: `!wget https://www.dropbox.com/s/s14l44ptqevgech/NationalNames.csv.zip?dl=1`\n", 17 | "\n", 18 | "See also:\n", 19 | "\n", 20 | "* [The Most Unisex Names in US History](https://flowingdata.com/2013/09/25/the-most-unisex-names-in-us-history/)\n", 21 | "* [Why Most European Names Ending in A Are Female](http://blog-en.namepedia.org/2015/11/why-most-european-names-ending-in-a-are-female/)\n", 22 | "\n", 23 | "And for Polish names and surnames:\n", 24 | "\n", 25 | "* [Najpopularniejsze imiona w Polsce - Otwarte Dane](https://dane.gov.pl/dataset/219)\n", 26 | "* [Nazwiska występujące w rejestrze PESEL - Otwarte Dane](https://dane.gov.pl/dataset/568)\n", 27 | "* https://nazwiska-polskie.pl/\n", 28 | "* [List of polish first and last names - Kaggle Dataset](https://www.kaggle.com/djablo/list-of-polish-first-and-last-names/home)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "%matplotlib inline\n", 38 | "from collections import Counter\n", 39 | "import numpy as np\n", 40 | "import pandas as pd\n", 41 | "import seaborn as sns\n", 42 | "from sklearn.model_selection import train_test_split\n", 43 | "import h5py" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "names = pd.read_csv(\"./data/NationalNames.csv\")" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "names.info()" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "names.head()" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "names['Year'].max()" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "names2014 = names.loc[lambda df: df['Year'] == 2014]" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "names2014.shape" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "names2014.sample(5)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "names2014['Gender'].value_counts()" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "names2014['Name'].apply(len).value_counts().sort_index()" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "y = names2014['Gender'].map({'F': 0, 'M': 1}).values.astype('int64')" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "y[:5]" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "X_text = list(names2014['Name'])" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "X_text[:5]" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "char_count = Counter()\n", 170 | "for name in X_text:\n", 171 | " char_count.update(name)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "char_count.most_common(5)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [ 189 | "char_count.keys()" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "char_count_lower = Counter()\n", 199 | "for name in X_text:\n", 200 | " char_count_lower.update(name.lower())" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "chars = sorted(char_count_lower.keys())\n", 210 | "\"\".join(chars)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "char2id = {c: i for i, c in enumerate(chars)}" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "char2id" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "max_len = 16\n", 238 | "X = np.zeros((len(X_text), len(chars), max_len), dtype='float32')\n", 239 | "for i, name in enumerate(X_text):\n", 240 | " for j, c in enumerate(name.lower()):\n", 241 | " X[i, char2id[c], j] = 1." 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "sns.heatmap(pd.DataFrame(X[1], index=chars))" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "len(X)" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "len(y)" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [ 277 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=137)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [ 286 | "with h5py.File(\"data/names_dense.h5\", \"w\") as f:\n", 287 | " f.create_dataset('X_train', data=X_train)\n", 288 | " f.create_dataset('y_train', data=y_train)\n", 289 | " f.create_dataset('X_test', data=X_test)\n", 290 | " f.create_dataset('y_test', data=y_test)\n", 291 | " f.create_dataset('characters', data=np.array(chars, dtype='S1'))\n", 292 | " f.create_dataset('categories', data=np.array(['F', 'M'], dtype='S1'))" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [] 301 | } 302 | ], 303 | "metadata": { 304 | "kernelspec": { 305 | "display_name": "pytorch-macos-m1-gpu", 306 | "language": "python", 307 | "name": "python3" 308 | }, 309 | "language_info": { 310 | "codemirror_mode": { 311 | "name": "ipython", 312 | "version": 3 313 | }, 314 | "file_extension": ".py", 315 | "mimetype": "text/x-python", 316 | "name": "python", 317 | "nbconvert_exporter": "python", 318 | "pygments_lexer": "ipython3", 319 | "version": "3.9.13 | packaged by conda-forge | (main, May 27 2022, 17:01:00) \n[Clang 13.0.1 ]" 320 | }, 321 | "varInspector": { 322 | "cols": { 323 | "lenName": 16, 324 | "lenType": 16, 325 | "lenVar": 40 326 | }, 327 | "kernels_config": { 328 | "python": { 329 | "delete_cmd_postfix": "", 330 | "delete_cmd_prefix": "del ", 331 | "library": "var_list.py", 332 | "varRefreshCmd": "print(var_dic_list())" 333 | }, 334 | "r": { 335 | "delete_cmd_postfix": ") ", 336 | "delete_cmd_prefix": "rm(", 337 | "library": "var_list.r", 338 | "varRefreshCmd": "cat(var_dic_list()) " 339 | } 340 | }, 341 | "types_to_exclude": [ 342 | "module", 343 | "function", 344 | "builtin_function_or_method", 345 | "instance", 346 | "_Feature" 347 | ], 348 | "window_display": false 349 | }, 350 | "vscode": { 351 | "interpreter": { 352 | "hash": "032715a8d60757555a2623145b534a689e8b5634c5398e8bbec1014c4a8ede12" 353 | } 354 | } 355 | }, 356 | "nbformat": 4, 357 | "nbformat_minor": 2 358 | } 359 | -------------------------------------------------------------------------------- /rnns/4 LSTM GRU anatomy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Thinking in tensors in PyTorch\n", 8 | "\n", 9 | "Hands-on training by [Piotr Migdał](https://p.migdal.pl) (2019). \n", 10 | "\n", 11 | "Version for [AI & NLP Workshop Day](https://nlpday.pl/), 31 May 2019, Warsaw, Poland: **Understanding LSTM and GRU networks in PyTorch**.\n", 12 | "\n", 13 | "\n", 14 | "\n", 15 | "## NLP & AI: 4. LSTM GRU anatomy\n", 16 | "\n", 17 | "\n", 18 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/stared/thinking-in-tensors-writing-in-pytorch/blob/master/extra/4%20LSTM%20GRU%20anatomy.ipynb)\n", 19 | " " 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import torch\n", 29 | "from torch import nn" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "## LSTM\n", 37 | "\n", 38 | "More in https://pytorch.org/docs/stable/nn.html#lstm" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "lstm = nn.LSTM(5, 3)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "# L = 8 (length)\n", 57 | "# B = 1 (batch size)\n", 58 | "# C = 5 (channels)\n", 59 | "x = torch.randn(8, 1, 5)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "output, (hidden, cell) = lstm(x)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "output" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "hidden" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "cell" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "output[-1] == hidden" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "## Step by step" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "output1, (hidden1, cell1) = lstm(x[:4])\n", 121 | "output2, (hidden2, cell2) = lstm(x[4:], (hidden1, cell1))" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "output2" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "## Iteration" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "lstm" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "hidden = torch.tensor([[[ 0., 0., 0.]]])\n", 156 | "cell = torch.tensor([[[ 0., 0., 0.]]])\n", 157 | "for i, token in enumerate(x):\n", 158 | " output, (hidden, cell) = lstm(x[i:i+1], (hidden, cell))\n", 159 | " print(output)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "for e" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "## GRU\n", 176 | "\n", 177 | "More in https://pytorch.org/docs/stable/nn.html#gru" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "gru = nn.GRU(5, 3)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "# note that instead of (hidden, cell) there is only hidden\n", 196 | "output, hidden = gru(x)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "output" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "hidden" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "## Bidirectional LSTM\n", 222 | "\n", 223 | "See also: [Understanding Bidirectional RNN in PyTorch](https://towardsdatascience.com/understanding-bidirectional-rnn-in-pytorch-5bd25a5dd66) by Cechine Lee" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "bilstm = nn.LSTM(5, 3, bidirectional=True)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "output, (hidden, cell) = bilstm(x)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "output.size()" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "hidden.size()" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "cell.size()" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [ 277 | "output" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [ 286 | "hidden" 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "metadata": {}, 292 | "source": [ 293 | "## Many-layered LSTM" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": null, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "multilstm = nn.LSTM(5, 3, num_layers=2)" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "output, (hidden, cell) = multilstm(x)" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "output.size()" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "hidden.size()" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": null, 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "cell.size()" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | "output" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": null, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "hidden" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": null, 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [] 365 | } 366 | ], 367 | "metadata": { 368 | "kernelspec": { 369 | "display_name": "Python [default]", 370 | "language": "python", 371 | "name": "python3" 372 | }, 373 | "language_info": { 374 | "codemirror_mode": { 375 | "name": "ipython", 376 | "version": 3 377 | }, 378 | "file_extension": ".py", 379 | "mimetype": "text/x-python", 380 | "name": "python", 381 | "nbconvert_exporter": "python", 382 | "pygments_lexer": "ipython3", 383 | "version": "3.5.4" 384 | }, 385 | "varInspector": { 386 | "cols": { 387 | "lenName": 16, 388 | "lenType": 16, 389 | "lenVar": 40 390 | }, 391 | "kernels_config": { 392 | "python": { 393 | "delete_cmd_postfix": "", 394 | "delete_cmd_prefix": "del ", 395 | "library": "var_list.py", 396 | "varRefreshCmd": "print(var_dic_list())" 397 | }, 398 | "r": { 399 | "delete_cmd_postfix": ") ", 400 | "delete_cmd_prefix": "rm(", 401 | "library": "var_list.r", 402 | "varRefreshCmd": "cat(var_dic_list()) " 403 | } 404 | }, 405 | "types_to_exclude": [ 406 | "module", 407 | "function", 408 | "builtin_function_or_method", 409 | "instance", 410 | "_Feature" 411 | ], 412 | "window_display": false 413 | } 414 | }, 415 | "nbformat": 4, 416 | "nbformat_minor": 2 417 | } 418 | -------------------------------------------------------------------------------- /6 Classification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Thinking in tensors in PyTorch\n", 8 | "\n", 9 | "Hands-on training by [Piotr Migdał](https://p.migdal.pl) (2019). Version for Uniwersytet Śląski.\n", 10 | "\n", 11 | "\n", 12 | "## 6 Classification \n", 13 | "\n", 14 | "Notebook by Piotr Migdał. And example adopted from https://github.com/stared/livelossplot version 0.4.1.\n", 15 | "\n", 16 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/stared/thinking-in-tensors-writing-in-pytorch/blob/master/extra/Image%20classification.ipynb)\n", 17 | "\n", 18 | "Inspirations: \n", 19 | "\n", 20 | "* [Plot classifier comparison - scikit-learn](https://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html)\n", 21 | "* [TensorFlow playground](https://playground.tensorflow.org/)\n", 22 | "* [Which Machine Learning algorithm are you?](https://github.com/stared/which-ml-are-you)\n", 23 | "\n", 24 | "Very work in progress.\n", 25 | "\n", 26 | "For using ReLU activations, and how does it relate to folding paper, see:\n", 27 | "\n", 28 | "* [On the Number of Linear Regions of Deep Neural Networks](https://arxiv.org/abs/1402.1869) (NIPS 2014)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "from sklearn import datasets\n", 38 | "from sklearn.model_selection import train_test_split\n", 39 | "\n", 40 | "import torch\n", 41 | "from torch import nn, optim\n", 42 | "from torch.utils.data import TensorDataset, DataLoader\n", 43 | "\n", 44 | "import matplotlib.pyplot as plt\n", 45 | "from matplotlib.colors import ListedColormap\n", 46 | "\n", 47 | "from livelossplot import PlotLosses\n", 48 | "from livelossplot import matplotlib_subplots" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "# try with make_moons\n", 58 | "X, y = datasets.make_circles(noise=0.2, factor=0.5, random_state=1)\n", 59 | "X_train, X_test, y_train, y_test = \\\n", 60 | " train_test_split(X, y, test_size=.4, random_state=42)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "# plot them\n", 70 | "cm_bright = ListedColormap(['#FF0000', '#0000FF'])\n", 71 | "plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)\n", 72 | "plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.3)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "# PyTorch technicalities - loading and trainin\n", 82 | "dataloaders = {\n", 83 | " 'train':\n", 84 | " DataLoader(TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).long()),\n", 85 | " batch_size=32,\n", 86 | " shuffle=True, num_workers=4),\n", 87 | " 'validation': \n", 88 | " DataLoader(TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).long()),\n", 89 | " batch_size=32,\n", 90 | " shuffle=False, num_workers=4)\n", 91 | "}\n", 92 | "\n", 93 | "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\n", 94 | "\n", 95 | "def train_model(model, criterion, optimizer, num_epochs=10,\n", 96 | " liveloss=PlotLosses()):\n", 97 | " model = model.to(device)\n", 98 | " \n", 99 | " for epoch in range(num_epochs):\n", 100 | " logs = {}\n", 101 | " for phase in ['train', 'validation']:\n", 102 | " if phase == 'train':\n", 103 | " model.train()\n", 104 | " else:\n", 105 | " model.eval()\n", 106 | "\n", 107 | " running_loss = 0.0\n", 108 | " running_corrects = 0\n", 109 | "\n", 110 | " for inputs, labels in dataloaders[phase]:\n", 111 | " inputs = inputs.to(device)\n", 112 | " labels = labels.to(device)\n", 113 | "\n", 114 | " outputs = model(inputs)\n", 115 | " loss = criterion(outputs, labels)\n", 116 | "\n", 117 | " if phase == 'train':\n", 118 | " optimizer.zero_grad()\n", 119 | " loss.backward()\n", 120 | " optimizer.step()\n", 121 | "\n", 122 | " _, preds = torch.max(outputs, 1)\n", 123 | " running_loss += loss.item() * inputs.size(0)\n", 124 | " running_corrects += (preds == labels.data).sum().item()\n", 125 | "\n", 126 | " epoch_loss = running_loss / len(dataloaders[phase].dataset)\n", 127 | " epoch_acc = running_corrects / len(dataloaders[phase].dataset)\n", 128 | " \n", 129 | " prefix = ''\n", 130 | " if phase == 'validation':\n", 131 | " prefix = 'val_'\n", 132 | "\n", 133 | " logs[prefix + 'log loss'] = epoch_loss\n", 134 | " logs[prefix + 'accuracy'] = epoch_acc\n", 135 | " \n", 136 | " liveloss.update(logs)\n", 137 | " liveloss.draw()" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "# an old-school neural network: a multi-layer perceptron\n", 147 | "\n", 148 | "class MLP(nn.Module):\n", 149 | " def __init__(self, hidden_size=3, activation=nn.ReLU()):\n", 150 | " super().__init__()\n", 151 | "\n", 152 | " self.fc = nn.Sequential(\n", 153 | " nn.Linear(2, hidden_size),\n", 154 | " activation,\n", 155 | " nn.Linear(hidden_size, 2)\n", 156 | " )\n", 157 | " \n", 158 | " def forward(self, x):\n", 159 | " x = self.fc(x)\n", 160 | " return x" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "model = MLP(6)\n", 170 | "\n", 171 | "plot2d = matplotlib_subplots.Plot2d(model, X_train, y_train,\n", 172 | " valiation_data=(X_test, y_test),\n", 173 | " margin=0.2, h=0.02)\n", 174 | "plot2d.predict = plot2d._predict_pytorch\n", 175 | "liveloss = PlotLosses(cell_size=(5, 6), extra_plots=[plot2d])\n", 176 | "\n", 177 | "criterion = nn.CrossEntropyLoss()\n", 178 | "optimizer = optim.Adam(model.parameters(), lr=1e-1)\n", 179 | "\n", 180 | "train_model(model, criterion, optimizer, num_epochs=30,\n", 181 | " liveloss=liveloss)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "model = MLP(3, activation=nn.Sigmoid())\n", 191 | "\n", 192 | "plot2d = matplotlib_subplots.Plot2d(model, X_train, y_train,\n", 193 | " valiation_data=(X_test, y_test),\n", 194 | " margin=0.2, h=0.02)\n", 195 | "plot2d.predict = plot2d._predict_pytorch\n", 196 | "liveloss = PlotLosses(cell_size=(5, 6), extra_plots=[plot2d])\n", 197 | "\n", 198 | "criterion = nn.CrossEntropyLoss()\n", 199 | "optimizer = optim.Adam(model.parameters(), lr=1e-1)\n", 200 | "\n", 201 | "train_model(model, criterion, optimizer, num_epochs=100,\n", 202 | " liveloss=liveloss)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "# if needed, we can draw it as a separate plot\n", 212 | "plot2d.draw()" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [] 221 | } 222 | ], 223 | "metadata": { 224 | "kernelspec": { 225 | "display_name": "Python [default]", 226 | "language": "python", 227 | "name": "python3" 228 | }, 229 | "language_info": { 230 | "codemirror_mode": { 231 | "name": "ipython", 232 | "version": 3 233 | }, 234 | "file_extension": ".py", 235 | "mimetype": "text/x-python", 236 | "name": "python", 237 | "nbconvert_exporter": "python", 238 | "pygments_lexer": "ipython3", 239 | "version": "3.5.4" 240 | }, 241 | "varInspector": { 242 | "cols": { 243 | "lenName": 16, 244 | "lenType": 16, 245 | "lenVar": 40 246 | }, 247 | "kernels_config": { 248 | "python": { 249 | "delete_cmd_postfix": "", 250 | "delete_cmd_prefix": "del ", 251 | "library": "var_list.py", 252 | "varRefreshCmd": "print(var_dic_list())" 253 | }, 254 | "r": { 255 | "delete_cmd_postfix": ") ", 256 | "delete_cmd_prefix": "rm(", 257 | "library": "var_list.r", 258 | "varRefreshCmd": "cat(var_dic_list()) " 259 | } 260 | }, 261 | "types_to_exclude": [ 262 | "module", 263 | "function", 264 | "builtin_function_or_method", 265 | "instance", 266 | "_Feature" 267 | ], 268 | "window_display": false 269 | } 270 | }, 271 | "nbformat": 4, 272 | "nbformat_minor": 2 273 | } 274 | -------------------------------------------------------------------------------- /convnets/Data augmentation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Thinking in tensors in PyTorch\n", 8 | "\n", 9 | "Hands-on training by [Piotr Migdał](https://p.migdal.pl) (2019).\n", 10 | "\n", 11 | "**Under construction**\n", 12 | "\n", 13 | "\n", 14 | "## Extra: Data augmentation\n", 15 | "\n", 16 | "\n", 17 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/stared/thinking-in-tensors-writing-in-pytorch/blob/master/convnets/Data%20augmentation.ipynb)\n", 18 | "\n", 19 | "https://pytorch.org/docs/stable/torchvision/transforms.html" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "%matplotlib inline\n", 29 | "import requests\n", 30 | "from io import BytesIO\n", 31 | "from PIL import Image\n", 32 | "\n", 33 | "from torchvision import transforms" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "transform = transforms.Compose([\n", 43 | " transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)),\n", 44 | " transforms.RandomHorizontalFlip(),\n", 45 | " transforms.ToTensor()\n", 46 | "])" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "file_path = \"https://www.petmd.com/sites/default/files/Acute-Dog-Diarrhea-47066074.jpg\"\n", 56 | "if \":\" in file_path:\n", 57 | " response = requests.get(file_path)\n", 58 | " img = Image.open(BytesIO(response.content))\n", 59 | "else:\n", 60 | " img = Image.open(file_path)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "# let's change to something nicer; Zerg cake?\n", 70 | "# img = Image.open(\"/Users/pmigdal/not_my_repos/Keras-PyTorch-AvP-transfer-learning/data/train/alien/77.jpg\")\n", 71 | "# img" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "## Image preprocessing" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "transform = transforms.CenterCrop((50, 75))\n", 88 | "transform(img)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "transform = transforms.Resize((100, 100))\n", 98 | "transform(img)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "transform = transforms.Grayscale()\n", 108 | "transform(img)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "#transforms.Pad(padding, fill=0, padding_mode='constant')\n", 118 | "transform = transforms.Pad(padding=(5, 30), padding_mode='constant')\n", 119 | "transform(img)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "#transforms.Pad(padding, fill=0, padding_mode='constant')\n", 129 | "transform = transforms.Pad(padding=(5, 30), padding_mode='reflect')\n", 130 | "transform(img)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "transform = transforms.FiveCrop(100)\n", 140 | "transform(img)[0]" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "## Data augmentation\n", 148 | "\n", 149 | "Data augmentation is a way to make sure.\n", 150 | "\n", 151 | "\n", 152 | "* extending dataset with a given set of operations\n", 153 | "* passing data in a way that that make some features irrelevant\n", 154 | "\n", 155 | "\n", 156 | "Common operations involve random rotations, flips, crops, scale changes and color alterations." 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "# transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0)\n", 166 | "transform = transforms.ColorJitter(brightness=0.5, hue=0.4)\n", 167 | "transform(img)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "transform = transforms.RandomCrop((100, 200))\n", 177 | "transform(img)" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "[Game of Thrones: greyscale, diagnosed and explained](https://www.vox.com/culture/2017/7/24/16019550/game-of-thrones-greyscale-jorah-mormont-diagnosed-explained)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "# to grayscale, or not to grayscale\n", 194 | "# https://gameofthrones.fandom.com/wiki/Greyscale\n", 195 | "\n", 196 | "\n", 197 | "transform = transforms.RandomGrayscale(p=0.5)\n", 198 | "transform(img)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "transform = transforms.RandomHorizontalFlip(p=1.)\n", 208 | "transform(img)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "transform = transforms.RandomVerticalFlip(p=1.)\n", 218 | "transform(img)" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "# transforms.RandomRotation(degrees, resample=False, expand=False, center=None)\n", 228 | "transform = transforms.RandomRotation(30)\n", 229 | "transform(img)" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "# if we want to allow any rotation\n", 239 | "transform = transforms.RandomRotation(90)\n", 240 | "transform(img)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "# transforms.RandomAffine(degrees, translate=None, scale=None, shear=None, resample=False, fillcolor=0)\n", 250 | "transform = transforms.RandomAffine(degrees=0, translate=(0.3, 0.3))\n", 251 | "transform(img)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "transform = transforms.RandomAffine(degrees=0, scale=(0.5, 2.0))\n", 261 | "transform(img)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "transform = transforms.RandomAffine(degrees=0, shear=30)\n", 271 | "transform(img)" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "transform = transforms.ToTensor()\n", 281 | "transform(img)" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "* Compose\n", 289 | "* Apply\n", 290 | "* Random" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": {}, 297 | "outputs": [], 298 | "source": [] 299 | } 300 | ], 301 | "metadata": { 302 | "kernelspec": { 303 | "display_name": "Python 3", 304 | "language": "python", 305 | "name": "python3" 306 | }, 307 | "language_info": { 308 | "codemirror_mode": { 309 | "name": "ipython", 310 | "version": 3 311 | }, 312 | "file_extension": ".py", 313 | "mimetype": "text/x-python", 314 | "name": "python", 315 | "nbconvert_exporter": "python", 316 | "pygments_lexer": "ipython3", 317 | "version": "3.7.3" 318 | }, 319 | "toc": { 320 | "base_numbering": 1, 321 | "nav_menu": {}, 322 | "number_sections": true, 323 | "sideBar": true, 324 | "skip_h1_title": false, 325 | "title_cell": "Table of Contents", 326 | "title_sidebar": "Contents", 327 | "toc_cell": false, 328 | "toc_position": {}, 329 | "toc_section_display": true, 330 | "toc_window_display": false 331 | }, 332 | "varInspector": { 333 | "cols": { 334 | "lenName": 16, 335 | "lenType": 16, 336 | "lenVar": 40 337 | }, 338 | "kernels_config": { 339 | "python": { 340 | "delete_cmd_postfix": "", 341 | "delete_cmd_prefix": "del ", 342 | "library": "var_list.py", 343 | "varRefreshCmd": "print(var_dic_list())" 344 | }, 345 | "r": { 346 | "delete_cmd_postfix": ") ", 347 | "delete_cmd_prefix": "rm(", 348 | "library": "var_list.r", 349 | "varRefreshCmd": "cat(var_dic_list()) " 350 | } 351 | }, 352 | "types_to_exclude": [ 353 | "module", 354 | "function", 355 | "builtin_function_or_method", 356 | "instance", 357 | "_Feature" 358 | ], 359 | "window_display": false 360 | } 361 | }, 362 | "nbformat": 4, 363 | "nbformat_minor": 2 364 | } 365 | -------------------------------------------------------------------------------- /extra/Matrix exercise.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Thinking in tensors, writing in PyTorch\n", 9 | "\n", 10 | "Hands-on training by [Piotr Migdał](https://p.migdal.pl) (2019-2022).\n", 11 | "\n", 12 | "\n", 13 | "## Extra: matrix factorization\n", 14 | "\n", 15 | "See:\n", 16 | "\n", 17 | "* [Matrix decomposition viz](http://p.migdal.pl/matrix-decomposition-viz/) for some inspiration.\n", 18 | "* Section 4 from [From Customer Segmentation to Recommendation Systems](https://www.aitrends.com/machine-learning/ai-customer-targeting-levels/)." 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import pandas as pd\n", 28 | "import matplotlib.pyplot as plt\n", 29 | "import seaborn as sns\n", 30 | "import numpy as np\n", 31 | "\n", 32 | "import torch\n", 33 | "from torch import nn\n", 34 | "from torch.nn import Parameter" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "months = [\"Jan\", \"Feb\", \"Mar\", \"Apr\", \"May\", \"Jun\", \"Jul\", \"Aug\", \"Sep\", \"Oct\", \"Nov\", \"Dec\"]\n", 44 | "cities = [\"Toronto\", \"Warsaw\", \"Boston\", \"London\", \"San Francisco\", \"Jerusalem\", \"Mexico\", \"Cape Town\", \"Sydney\"]\n", 45 | "avg_temp = np.array([\n", 46 | " [-5.8, -3.1, 4.5, 6.7, 14.3, 18.2, 20.1, 20.6, 15.9, 11.2, 3.6, -7.2],\n", 47 | " [-2.9, 3.6, 4.2, 9.7, 16.1, 19.5, 20.0, 18.8, 16.4, 7.6, 3.2, 1.3],\n", 48 | " [0.3, 1.5, 5.9, 8.4, 14.8, 20.2, 24.5, 24.7, 19.7, 13.0, 7.9, 1.9],\n", 49 | " [2.3, 6.5, 8.7, 9.2, 12.3, 15.4, 17.3, 20.0, 14.8, 10.8, 8.7, 6.4],\n", 50 | " [11.5, 13.9, 14.3, 15.7, 16.3, 17.4, 17.2, 17.7, 18.2, 17.4, 14.6, 10.4],\n", 51 | " [9.7, 10.3, 12.7, 15.5, 21.2, 22.1, 24.1, 25.3, 23.5, 20.1, 15.7, 11.8],\n", 52 | " [14.0, 15.6, 17.5, 20.3, 20.6, 18.1, 17.6, 18.2, 17.8, 16.8, 14.9, 16.0],\n", 53 | " [23.1, 23.3, 21.4, 19.0, 17.1, 15.5, 15.4, 15.6, 15.4, 18.6, 20.9, 21.3],\n", 54 | " [23.8, 24.6, 23.4, 20.8, 18.1, 15.1, 14.4, 14.5, 17.3, 19.0, 21.8, 24.3]\n", 55 | "])" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "df = pd.DataFrame(avg_temp, index=cities, columns=months)\n", 65 | "sns.heatmap(df, annot=True, fmt='.0f')" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "## Exercise\n", 73 | "\n", 74 | "Using PyTorch, perform a matrix decomposition, i.e. $M = A B$.\n", 75 | "\n", 76 | "Hints:\n", 77 | "\n", 78 | "* NumPy to PyTorch: `torch.from_numpy(x)`\n", 79 | "* PyTorch to NumPy: `x.numpy()` or `x.detach().numpy()`\n", 80 | "* make sure or floats are `float32` (for Torch tensors use: `x = x.float()`)\n", 81 | "* view results and the training curve" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "avg_temp_tensor = torch.from_numpy(avg_temp).float()" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "def show_loss(losses, logy=False):\n", 100 | " print(\"Minimal loss: {:.3f}\".format(losses[-1]))\n", 101 | " if logy:\n", 102 | " plt.semilogy(range(len(losses)), losses)\n", 103 | " else:\n", 104 | " plt.plot(range(len(losses)), losses);\n", 105 | " plt.xlabel(\"Step\")\n", 106 | " plt.ylabel(\"Loss\")" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "class Factorize(nn.Module):\n", 116 | " \n", 117 | " def __init__(self, factors=2):\n", 118 | " super().__init__()\n", 119 | " self.A = Parameter(torch.randn(9, factors))\n", 120 | " self.B = Parameter(torch.randn(factors, 12))\n", 121 | " \n", 122 | " def forward(self):\n", 123 | " output = self.A.matmul(self.B)\n", 124 | " return output\n", 125 | " " 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "class FactorizeBiasA(nn.Module):\n", 135 | " \n", 136 | " def __init__(self, factors=2):\n", 137 | " super().__init__()\n", 138 | " self.A = Parameter(torch.randn(9, factors))\n", 139 | " self.B = Parameter(torch.randn(factors, 12))\n", 140 | " self.bias_A = Parameter(torch.randn(9, 1))\n", 141 | " \n", 142 | " def forward(self):\n", 143 | " output = self.A.matmul(self.B) + self.bias_A\n", 144 | " return output" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "model = Factorize(factors=2)\n", 154 | "criterion = torch.nn.MSELoss()\n", 155 | "optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "losses = []\n", 165 | "for i in range(10000):\n", 166 | " \n", 167 | " output = model()\n", 168 | " loss = criterion(output, avg_temp_tensor)\n", 169 | " losses.append(loss.item())\n", 170 | " \n", 171 | " optimizer.zero_grad()\n", 172 | " loss.backward() \n", 173 | " optimizer.step()\n", 174 | " \n", 175 | "show_loss(losses, logy=True)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "df_pred = pd.DataFrame(model().detach().numpy(), index=cities, columns=months)\n", 185 | "sns.heatmap(df_pred, annot=True, fmt='.0f')" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "sns.heatmap(df_pred - df, annot=True, fmt='.0f')" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "torch.randint_like(avg_temp_tensor, 0, 2)" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "def train_cv(model, optimizer, epochs=10000):\n", 213 | " losses = []\n", 214 | " losses_val = []\n", 215 | " mask = torch.randint_like(avg_temp_tensor, 0, 2)\n", 216 | " for i in range(epochs):\n", 217 | "\n", 218 | " output = model()\n", 219 | " loss = (output - avg_temp_tensor).mul(mask).pow(2).sum() / mask.sum()\n", 220 | " losses.append(loss.item())\n", 221 | " \n", 222 | " loss_val = (output - avg_temp_tensor).mul(1 - mask).pow(2).sum() / (1 - mask).sum()\n", 223 | " losses_val.append(loss_val.item())\n", 224 | "\n", 225 | " optimizer.zero_grad()\n", 226 | " loss.backward() \n", 227 | " optimizer.step()\n", 228 | " return losses, losses_val" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "model = Factorize(factors=2)\n", 238 | "optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)\n", 239 | "losses, losses_val = train_cv(model, optimizer, epochs=10000)\n", 240 | "print(losses[-1], losses_val[-1])" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "dims = [1, 2, 3, 4]\n", 250 | "res = []\n", 251 | "\n", 252 | "for d in dims:\n", 253 | " model = Factorize(factors=d)\n", 254 | " optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)\n", 255 | " losses, losses_val = train_cv(model, optimizer, epochs=10000)\n", 256 | " res.append({\n", 257 | " 'd': d,\n", 258 | " 'loss': losses[-1],\n", 259 | " 'losses_val': losses_val[-1]\n", 260 | " })\n", 261 | " \n", 262 | "pd.DataFrame(res).set_index('d').plot.bar(logy=True)" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "dims = [1, 2, 3, 4]\n", 272 | "res = []\n", 273 | "\n", 274 | "for d in dims:\n", 275 | " model = FactorizeBiasA(factors=d)\n", 276 | " optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)\n", 277 | " losses, losses_val = train_cv(model, optimizer, epochs=10000)\n", 278 | " res.append({\n", 279 | " 'd': d,\n", 280 | " 'loss': losses[-1],\n", 281 | " 'losses_val': losses_val[-1]\n", 282 | " })\n", 283 | " \n", 284 | "pd.DataFrame(res).set_index('d').plot.bar(logy=True)" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [] 293 | } 294 | ], 295 | "metadata": { 296 | "kernelspec": { 297 | "display_name": "pytorch-macos-m1-gpu", 298 | "language": "python", 299 | "name": "python3" 300 | }, 301 | "language_info": { 302 | "codemirror_mode": { 303 | "name": "ipython", 304 | "version": 3 305 | }, 306 | "file_extension": ".py", 307 | "mimetype": "text/x-python", 308 | "name": "python", 309 | "nbconvert_exporter": "python", 310 | "pygments_lexer": "ipython3", 311 | "version": "3.9.13 | packaged by conda-forge | (main, May 27 2022, 17:01:00) \n[Clang 13.0.1 ]" 312 | }, 313 | "varInspector": { 314 | "cols": { 315 | "lenName": 16, 316 | "lenType": 16, 317 | "lenVar": 40 318 | }, 319 | "kernels_config": { 320 | "python": { 321 | "delete_cmd_postfix": "", 322 | "delete_cmd_prefix": "del ", 323 | "library": "var_list.py", 324 | "varRefreshCmd": "print(var_dic_list())" 325 | }, 326 | "r": { 327 | "delete_cmd_postfix": ") ", 328 | "delete_cmd_prefix": "rm(", 329 | "library": "var_list.r", 330 | "varRefreshCmd": "cat(var_dic_list()) " 331 | } 332 | }, 333 | "types_to_exclude": [ 334 | "module", 335 | "function", 336 | "builtin_function_or_method", 337 | "instance", 338 | "_Feature" 339 | ], 340 | "window_display": false 341 | }, 342 | "vscode": { 343 | "interpreter": { 344 | "hash": "032715a8d60757555a2623145b534a689e8b5634c5398e8bbec1014c4a8ede12" 345 | } 346 | } 347 | }, 348 | "nbformat": 4, 349 | "nbformat_minor": 2 350 | } 351 | -------------------------------------------------------------------------------- /extra/Operation playground.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Thinking in tensors\n", 9 | "\n", 10 | "A hands-on training by [Piotr Migdał](https://p.migdal.pl) (2019-2022)\n", 11 | "\n", 12 | "\n", 13 | "## Extra notebook: Operation playground \n", 14 | "\n", 15 | "Open in Colab: https://colab.research.google.com/github/stared/thinking-in-tensors-writing-in-pytorch/blob/master/extra/extra%20Operation%20playground.ipynb" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "import matplotlib.pyplot as plt\n", 25 | "import numpy as np\n", 26 | "\n", 27 | "import torch\n", 28 | "from torch import nn\n", 29 | "from torch.nn import functional as F" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "## What is a neuron" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "x_in = torch.tensor([0.5, -1., 3.])\n", 46 | "A_single = torch.tensor([[1.], [1.], [0.]])" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "x_in.matmul(A_single)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "A_layer = torch.tensor([[1., 0.2], [1., 0.5], [0., -.1]])\n", 65 | "x_in.matmul(A_layer)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "A_layer_2 = torch.tensor([[1.], [-1.]])" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "x_in.matmul(A_layer).matmul(A_layer_2)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "x_in.matmul(A_layer).sigmoid().matmul(A_layer_2).sigmoid()" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "z = torch.tensor([0.5, -2., 1.5])" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "z.max(dim=0)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "z.exp() / z.exp().sum()" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "F.softmax(z, dim=0)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "And now, with `torch.nn` module." 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "x = torch.randn(2, 1, 4, 4)\n", 145 | "x" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "## Flatten" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "x.view(x.size(0), -1)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "## Activation functions\n", 169 | "\n", 170 | "* Element-wise" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "x.relu()" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "F.relu(x)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "relu = nn.ReLU()\n", 198 | "relu(x)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "X = torch.arange(-3, 3, step=0.2)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "plt.plot(X.numpy(), X.relu().numpy(), label=\"ReLU\")\n", 217 | "plt.plot(X.numpy(), X.sigmoid().numpy(), label=\"Sigmoid\")\n", 218 | "plt.plot(X.numpy(), X.tanh().numpy(), label=\"Tanh\")\n", 219 | "plt.ylim([-1.5, 1.5])\n", 220 | "plt.legend()" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "## Pooling operation" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "metadata": {}, 234 | "outputs": [], 235 | "source": [ 236 | "x" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "maxpool = nn.MaxPool2d((2, 2))\n", 246 | "maxpool(x)" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "avgpool = nn.AvgPool2d((2, 2))\n", 256 | "avgpool(x)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "## Convolutions" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3)\n", 273 | "conv(x)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "conv = nn.Conv2d(in_channels=1, out_channels=2, kernel_size=3, padding=1)\n", 283 | "conv(x)" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=1, padding=1)\n", 293 | "conv(x)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "## Dropout\n", 301 | "\n", 302 | "During the training phase it \"switches off\" randomly a fraction of neurons. This prevents network from relaying only on a few neurons.S\n", 303 | "\n", 304 | "See:\n", 305 | "* [Dropout: A Simple Way to Prevent Neural Networks from Overfitting](http://jmlr.org/papers/volume15/srivastava14a.old/srivastava14a.pdf)\n", 306 | "* [torch.nn.Dropout](https://pytorch.org/docs/stable/nn.html#dropout-layers)" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [ 315 | "dropout = nn.Dropout(p=0.5)" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [ 324 | "dropout(x)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [ 333 | "dropout.eval()" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": null, 339 | "metadata": {}, 340 | "outputs": [], 341 | "source": [ 342 | "dropout(x)" 343 | ] 344 | }, 345 | { 346 | "cell_type": "markdown", 347 | "metadata": {}, 348 | "source": [ 349 | "## Batch norm" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [ 358 | "bn = nn.BatchNorm2d(num_features=1)" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "bn(x)" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": {}, 374 | "outputs": [], 375 | "source": [ 376 | "bn(x[:1])" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": {}, 383 | "outputs": [], 384 | "source": [ 385 | "bn(x[:1]).mean(dim=[2, 3])" 386 | ] 387 | }, 388 | { 389 | "cell_type": "markdown", 390 | "metadata": {}, 391 | "source": [ 392 | "## Softmax" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": null, 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [ 401 | "x = torch.tensor([0.5, -1., 3.])" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": null, 407 | "metadata": {}, 408 | "outputs": [], 409 | "source": [ 410 | "x.softmax(0)" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": null, 416 | "metadata": {}, 417 | "outputs": [], 418 | "source": [] 419 | } 420 | ], 421 | "metadata": { 422 | "kernelspec": { 423 | "display_name": "pytorch-macos-m1-gpu", 424 | "language": "python", 425 | "name": "python3" 426 | }, 427 | "language_info": { 428 | "codemirror_mode": { 429 | "name": "ipython", 430 | "version": 3 431 | }, 432 | "file_extension": ".py", 433 | "mimetype": "text/x-python", 434 | "name": "python", 435 | "nbconvert_exporter": "python", 436 | "pygments_lexer": "ipython3", 437 | "version": "3.9.13" 438 | }, 439 | "varInspector": { 440 | "cols": { 441 | "lenName": 16, 442 | "lenType": 16, 443 | "lenVar": 40 444 | }, 445 | "kernels_config": { 446 | "python": { 447 | "delete_cmd_postfix": "", 448 | "delete_cmd_prefix": "del ", 449 | "library": "var_list.py", 450 | "varRefreshCmd": "print(var_dic_list())" 451 | }, 452 | "r": { 453 | "delete_cmd_postfix": ") ", 454 | "delete_cmd_prefix": "rm(", 455 | "library": "var_list.r", 456 | "varRefreshCmd": "cat(var_dic_list()) " 457 | } 458 | }, 459 | "types_to_exclude": [ 460 | "module", 461 | "function", 462 | "builtin_function_or_method", 463 | "instance", 464 | "_Feature" 465 | ], 466 | "window_display": false 467 | }, 468 | "vscode": { 469 | "interpreter": { 470 | "hash": "032715a8d60757555a2623145b534a689e8b5634c5398e8bbec1014c4a8ede12" 471 | } 472 | } 473 | }, 474 | "nbformat": 4, 475 | "nbformat_minor": 2 476 | } 477 | -------------------------------------------------------------------------------- /rnns/Transformer example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Thinking in tensors in PyTorch\n", 8 | "\n", 9 | "Hands-on training by [Piotr Migdał](https://p.migdal.pl) (2019). \n", 10 | "\n", 11 | "\n", 12 | "## Transformer models\n", 13 | "\n", 14 | "\n", 15 | "* [GPT-2 - better language models and their implications](https://openai.com/blog/better-language-models/) by Open AI\n", 16 | "\n", 17 | "\n", 18 | "PROMPT: \n", 19 | "\n", 20 | "> **Cities & Lights**\n", 21 | "> \n", 22 | "> When you enter the city of Singapore during the night, you see lights: colorful and ubiquitous. Lights on every building, on every fountain, and in every park.\n", 23 | "\n", 24 | "GENERATED:\n", 25 | "\n", 26 | "> Lights shining in a city in which the majority of people are now using mobile phones. Singapore has a bright future as a technology hub, and it 's not too late to make it happen.\n", 27 | "\n", 28 | "Inspired by [Invisible Cities by Italo Calvino](https://en.wikipedia.org/wiki/Invisible_Cities).\n", 29 | "\n", 30 | "### Interactive demos\n", 31 | "\n", 32 | "* [Write With Transformer by Hugging Face](https://transformer.huggingface.co/)\n", 33 | " * [GPT-2 large](https://transformer.huggingface.co/doc/gpt2-large)\n", 34 | "* [Gwern's AI-generated poetry](https://slatestarcodex.com/2019/03/14/gwerns-ai-generated-poetry/) and [GPT-2 Neural Network Poetry](https://www.gwern.net/GPT-2)\n", 35 | "* [AI Dungeon](https://www.aidungeon.io/) - a text-based adventure game powered by GPT-2\n", 36 | "\n", 37 | "### This example\n", 38 | "\n", 39 | "https://github.com/huggingface/pytorch-transformers\n", 40 | "\n", 41 | "https://huggingface.co/pytorch-transformers/index.html\n", 42 | "\n", 43 | "Heavily based on https://github.com/huggingface/pytorch-transformers/blob/master/examples/run_generation.py\n", 44 | "\n", 45 | "Note: models are BIG." 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "!pip install pytorch_transformers" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "import argparse\n", 64 | "import logging\n", 65 | "from tqdm import trange\n", 66 | "\n", 67 | "import torch\n", 68 | "import torch.nn.functional as F\n", 69 | "import numpy as np\n", 70 | "\n", 71 | "from pytorch_transformers import GPT2Config\n", 72 | "from pytorch_transformers import GPT2LMHeadModel, GPT2Tokenizer" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',\n", 82 | " datefmt = '%m/%d/%Y %H:%M:%S',\n", 83 | " level = logging.INFO)\n", 84 | "logger = logging.getLogger(__name__)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "MAX_LENGTH = int(10000) # Hardcoded max length to avoid infinite loop\n", 94 | "\n", 95 | "ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) for conf in (GPT2Config,)), ())\n", 96 | "\n", 97 | "MODEL_CLASSES = {\n", 98 | " 'gpt2': (GPT2LMHeadModel, GPT2Tokenizer)\n", 99 | "}\n", 100 | "\n", 101 | "print(ALL_MODELS)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "### Sampling functions" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):\n", 118 | " \"\"\" Filter a distribution of logits using top-k and/or nucleus (top-p) filtering\n", 119 | " Args:\n", 120 | " logits: logits distribution shape (vocabulary size)\n", 121 | " top_k > 0: keep only top k tokens with highest probability (top-k filtering).\n", 122 | " top_p > 0.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).\n", 123 | " Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)\n", 124 | " From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317\n", 125 | " \"\"\"\n", 126 | " assert logits.dim() == 1 # batch size 1 for now - could be updated for more but the code would be less clear\n", 127 | " top_k = min(top_k, logits.size(-1)) # Safety check\n", 128 | " if top_k > 0:\n", 129 | " # Remove all tokens with a probability less than the last token of the top-k\n", 130 | " indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]\n", 131 | " logits[indices_to_remove] = filter_value\n", 132 | "\n", 133 | " if top_p > 0.0:\n", 134 | " sorted_logits, sorted_indices = torch.sort(logits, descending=True)\n", 135 | " cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)\n", 136 | "\n", 137 | " # Remove tokens with cumulative probability above the threshold\n", 138 | " sorted_indices_to_remove = cumulative_probs > top_p\n", 139 | " # Shift the indices to the right to keep also the first token above the threshold\n", 140 | " sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()\n", 141 | " sorted_indices_to_remove[..., 0] = 0\n", 142 | "\n", 143 | " indices_to_remove = sorted_indices[sorted_indices_to_remove]\n", 144 | " logits[indices_to_remove] = filter_value\n", 145 | " return logits" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "def sample_sequence(model, length, context, num_samples=1, temperature=1, top_k=0, top_p=0.0, device='cpu'):\n", 155 | " context = torch.tensor(context, dtype=torch.long, device=device)\n", 156 | " context = context.unsqueeze(0).repeat(num_samples, 1)\n", 157 | " generated = context\n", 158 | " with torch.no_grad():\n", 159 | " for _ in trange(length):\n", 160 | "\n", 161 | " inputs = {'input_ids': generated}\n", 162 | "\n", 163 | " outputs = model(**inputs) # Note: we could also use 'past' with GPT-2/Transfo-XL/XLNet (cached hidden-states)\n", 164 | " next_token_logits = outputs[0][0, -1, :] / temperature\n", 165 | " filtered_logits = top_k_top_p_filtering(next_token_logits, top_k=top_k, top_p=top_p)\n", 166 | " next_token = torch.multinomial(F.softmax(filtered_logits, dim=-1), num_samples=1)\n", 167 | " generated = torch.cat((generated, next_token.unsqueeze(0)), dim=1)\n", 168 | " return generated" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "### Loading model" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "model_type = 'gpt2' #@param [\"gpt2\"]\n", 185 | "model_name_or_path = 'gpt2-medium' #@param [\"gpt2\", \"gpt2-medium\", \"gpt2-large\"]\n", 186 | "device = 'cuda' #@param [\"cuda\", \"cpu\"]\n", 187 | "# device auto?" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "# instead of args\n", 197 | "model_class, tokenizer_class = MODEL_CLASSES[model_type]" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "# this line downloads things\n", 207 | "tokenizer = tokenizer_class.from_pretrained(model_name_or_path)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "# this even more\n", 217 | "# and loading itself takes ~20 sec\n", 218 | "model = model_class.from_pretrained(model_name_or_path)" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "model.to(device)\n", 228 | "model.eval()" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "generate_length = 64 #@param {type:\"integer\"}\n", 238 | "temperature = 1. #@param {type:\"slider\", min:0.1, max:5.0, step:0.1}\n", 239 | "top_k = 50 #@param {type:\"integer\"}\n", 240 | "top_p = 0. #@param {type:\"slider\", min:0.0, max:1.0, step:0.05}\n", 241 | "text_prompt = 'Before going into the wilderedness, make sure that' #@param {type:\"string\"}\n", 242 | "\n", 243 | "context_tokens = tokenizer.encode(text_prompt)\n", 244 | "out = sample_sequence(\n", 245 | " model=model,\n", 246 | " context=context_tokens,\n", 247 | " length=generate_length,\n", 248 | " temperature=temperature,\n", 249 | " top_k=top_k\n", 250 | " top_p=top_p,\n", 251 | " device=device\n", 252 | ")\n", 253 | "out = out[0, len(context_tokens):].tolist()\n", 254 | "text = tokenizer.decode(out, clean_up_tokenization_spaces=True)\n", 255 | "\n", 256 | "print(raw_text)\n", 257 | "print(text)" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [] 266 | } 267 | ], 268 | "metadata": { 269 | "kernelspec": { 270 | "display_name": "Python [default]", 271 | "language": "python", 272 | "name": "python3" 273 | }, 274 | "language_info": { 275 | "codemirror_mode": { 276 | "name": "ipython", 277 | "version": 3 278 | }, 279 | "file_extension": ".py", 280 | "mimetype": "text/x-python", 281 | "name": "python", 282 | "nbconvert_exporter": "python", 283 | "pygments_lexer": "ipython3", 284 | "version": "3.5.4" 285 | }, 286 | "varInspector": { 287 | "cols": { 288 | "lenName": 16, 289 | "lenType": 16, 290 | "lenVar": 40 291 | }, 292 | "kernels_config": { 293 | "python": { 294 | "delete_cmd_postfix": "", 295 | "delete_cmd_prefix": "del ", 296 | "library": "var_list.py", 297 | "varRefreshCmd": "print(var_dic_list())" 298 | }, 299 | "r": { 300 | "delete_cmd_postfix": ") ", 301 | "delete_cmd_prefix": "rm(", 302 | "library": "var_list.r", 303 | "varRefreshCmd": "cat(var_dic_list()) " 304 | } 305 | }, 306 | "types_to_exclude": [ 307 | "module", 308 | "function", 309 | "builtin_function_or_method", 310 | "instance", 311 | "_Feature" 312 | ], 313 | "window_display": false 314 | } 315 | }, 316 | "nbformat": 4, 317 | "nbformat_minor": 2 318 | } 319 | -------------------------------------------------------------------------------- /convnets/Transfer learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Thinking in tensors in PyTorch\n", 8 | "\n", 9 | "Hands-on training by [Piotr Migdał](https://p.migdal.pl) (2019). Version 0.4 for Uniwersytet Śląski.\n", 10 | "\n", 11 | "\n", 12 | "## Extra: Transfer learning\n", 13 | "\n", 14 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/stared/thinking-in-tensors-writing-in-pytorch/blob/master/convnets/Transfer%20learning.ipynb)\n", 15 | "\n", 16 | "See:\n", 17 | "\n", 18 | "* [Keras vs. PyTorch: Alien vs. Predator recognition with transfer learning\t](https://deepsense.ai/keras-vs-pytorch-avp-transfer-learning/) by Piotr Migdał and Patryk Miziuła\n", 19 | "* Dataset: https://www.kaggle.com/pmigdal/alien-vs-predator-images\n", 20 | "* Notebook influenced by https://github.com/deepsense-ai/Keras-PyTorch-AvP-transfer-learning\n", 21 | "\n", 22 | "\n", 23 | "You want to use a GPU otherwise it will be very slow.\n", 24 | "\n", 25 | "In Colab:\n", 26 | "\n", 27 | "* set the runtime to GPU (runtime > change runtime type)\n", 28 | "* download data: `!wget https://www.dropbox.com/s/naggs4je33owsc9/AvP_dataset.zip?dl=1`\n", 29 | "* unzip it: `!unzip AvP_dataset.zip?dl=1 -d data/`\n", 30 | "\n", 31 | "The data structure is:\n", 32 | "\n", 33 | "* train\n", 34 | " * alien\n", 35 | " * predator\n", 36 | "* validation\n", 37 | " * alien\n", 38 | " * predator\n", 39 | " \n", 40 | "Take a note that we can have a different folder structure. " 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "!pip install livelossplot --quiet" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "%matplotlib inline\n", 59 | "import numpy as np\n", 60 | "import matplotlib.pyplot as plt\n", 61 | "from PIL import Image\n", 62 | "\n", 63 | "import torch\n", 64 | "from torchvision import datasets, models, transforms\n", 65 | "import torch.nn as nn\n", 66 | "import torch.optim as optim\n", 67 | "from livelossplot import PlotLosses" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "Download the data" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "!wget https://www.dropbox.com/s/naggs4je33owsc9/AvP_dataset.zip?dl=1\n", 84 | "!unzip AvP_dataset.zip?dl=1 -d data/" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],\n", 94 | " std=[0.229, 0.224, 0.225])\n", 95 | "\n", 96 | "data_transforms = {\n", 97 | " 'train':\n", 98 | " transforms.Compose([\n", 99 | " transforms.Resize((224,224)),\n", 100 | " transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)),\n", 101 | " transforms.RandomHorizontalFlip(),\n", 102 | " transforms.ToTensor(),\n", 103 | " normalize\n", 104 | " ]),\n", 105 | " 'validation':\n", 106 | " transforms.Compose([\n", 107 | " transforms.Resize((224,224)),\n", 108 | " transforms.ToTensor(),\n", 109 | " normalize\n", 110 | " ]),\n", 111 | "}\n", 112 | "\n", 113 | "image_datasets = {\n", 114 | " 'train': \n", 115 | " datasets.ImageFolder('data/train', data_transforms['train']),\n", 116 | " 'validation': \n", 117 | " datasets.ImageFolder('data/validation', data_transforms['validation'])\n", 118 | "}\n", 119 | "\n", 120 | "dataloaders = {\n", 121 | " 'train':\n", 122 | " torch.utils.data.DataLoader(image_datasets['train'],\n", 123 | " batch_size=32,\n", 124 | " shuffle=True, num_workers=4),\n", 125 | " 'validation':\n", 126 | " torch.utils.data.DataLoader(image_datasets['validation'],\n", 127 | " batch_size=32,\n", 128 | " shuffle=False, num_workers=4)\n", 129 | "}" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", 139 | "device" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "# if on CPU, uncomment some lines... unless you are VERY patient\n", 149 | "\n", 150 | "def train_model(model, criterion, optimizer, num_epochs=10):\n", 151 | " liveloss = PlotLosses()\n", 152 | " model = model.to(device)\n", 153 | " \n", 154 | " for epoch in range(num_epochs):\n", 155 | " logs = {}\n", 156 | " for phase in ['train', 'validation']:\n", 157 | " if phase == 'train':\n", 158 | " model.train()\n", 159 | " else:\n", 160 | " model.eval()\n", 161 | "\n", 162 | " # current_step = 0\n", 163 | " running_loss = 0.0\n", 164 | " running_corrects = 0\n", 165 | "\n", 166 | " for inputs, labels in dataloaders[phase]:\n", 167 | " inputs = inputs.to(device)\n", 168 | " labels = labels.to(device)\n", 169 | " # print(current_step, end=\" \")\n", 170 | "\n", 171 | " outputs = model(inputs)\n", 172 | " loss = criterion(outputs, labels)\n", 173 | "\n", 174 | " if phase == 'train':\n", 175 | " optimizer.zero_grad()\n", 176 | " loss.backward()\n", 177 | " optimizer.step()\n", 178 | "\n", 179 | " _, preds = torch.max(outputs, 1)\n", 180 | " running_loss += loss.item() * inputs.size(0)\n", 181 | " running_corrects += (preds == labels.data).sum().item()\n", 182 | " # current_step += inputs.size(0)\n", 183 | "\n", 184 | " epoch_loss = running_loss / len(dataloaders[phase].dataset)\n", 185 | " epoch_acc = running_corrects / len(dataloaders[phase].dataset)\n", 186 | " \n", 187 | " prefix = ''\n", 188 | " if phase == 'validation':\n", 189 | " prefix = 'val_'\n", 190 | "\n", 191 | " logs[prefix + 'log loss'] = epoch_loss\n", 192 | " logs[prefix + 'accuracy'] = epoch_acc\n", 193 | " \n", 194 | " liveloss.update(logs)\n", 195 | " liveloss.draw()" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "model = models.resnet50(pretrained=True).to(device)\n", 205 | " \n", 206 | "for param in model.parameters():\n", 207 | " param.requires_grad = False \n", 208 | " \n", 209 | "model.fc = nn.Sequential(\n", 210 | " nn.Linear(2048, 128),\n", 211 | " nn.ReLU(inplace=True),\n", 212 | " nn.Linear(128, 2)).to(device)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "criterion = nn.CrossEntropyLoss()\n", 222 | "optimizer = optim.Adam(model.fc.parameters())" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "train_model(model, criterion, optimizer, num_epochs=3)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": {}, 238 | "outputs": [], 239 | "source": [ 240 | "validation_img_paths = [\"data/validation/alien/11.jpg\",\n", 241 | " \"data/validation/alien/22.jpg\",\n", 242 | " \"data/validation/predator/33.jpg\"]\n", 243 | "img_list = [Image.open(img_path) for img_path in validation_img_paths]" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "validation_batch = torch.stack([data_transforms['validation'](img).to(device)\n", 253 | " for img in img_list])" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": {}, 260 | "outputs": [], 261 | "source": [ 262 | "pred_logits_tensor = model(validation_batch)\n", 263 | "pred_probs = pred_logits_tensor.softmax(dim=1).cpu().data.numpy()" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "# make it more generig with classes\n", 273 | "fig, axs = plt.subplots(1, len(img_list), figsize=(20, 5))\n", 274 | "for i, img in enumerate(img_list):\n", 275 | " ax = axs[i]\n", 276 | " ax.axis('off')\n", 277 | " ax.set_title(\"{:.0f}% Alien, {:.0f}% Predator\".format(100*pred_probs[i,0],\n", 278 | " 100*pred_probs[i,1]))\n", 279 | " ax.imshow(img)" 280 | ] 281 | } 282 | ], 283 | "metadata": { 284 | "kernelspec": { 285 | "display_name": "Python 3", 286 | "language": "python", 287 | "name": "python3" 288 | }, 289 | "language_info": { 290 | "codemirror_mode": { 291 | "name": "ipython", 292 | "version": 3 293 | }, 294 | "file_extension": ".py", 295 | "mimetype": "text/x-python", 296 | "name": "python", 297 | "nbconvert_exporter": "python", 298 | "pygments_lexer": "ipython3", 299 | "version": "3.7.3" 300 | }, 301 | "toc": { 302 | "base_numbering": 1, 303 | "nav_menu": {}, 304 | "number_sections": true, 305 | "sideBar": true, 306 | "skip_h1_title": false, 307 | "title_cell": "Table of Contents", 308 | "title_sidebar": "Contents", 309 | "toc_cell": false, 310 | "toc_position": {}, 311 | "toc_section_display": true, 312 | "toc_window_display": false 313 | }, 314 | "varInspector": { 315 | "cols": { 316 | "lenName": 16, 317 | "lenType": 16, 318 | "lenVar": 40 319 | }, 320 | "kernels_config": { 321 | "python": { 322 | "delete_cmd_postfix": "", 323 | "delete_cmd_prefix": "del ", 324 | "library": "var_list.py", 325 | "varRefreshCmd": "print(var_dic_list())" 326 | }, 327 | "r": { 328 | "delete_cmd_postfix": ") ", 329 | "delete_cmd_prefix": "rm(", 330 | "library": "var_list.r", 331 | "varRefreshCmd": "cat(var_dic_list()) " 332 | } 333 | }, 334 | "types_to_exclude": [ 335 | "module", 336 | "function", 337 | "builtin_function_or_method", 338 | "instance", 339 | "_Feature" 340 | ], 341 | "window_display": false 342 | } 343 | }, 344 | "nbformat": 4, 345 | "nbformat_minor": 2 346 | } 347 | -------------------------------------------------------------------------------- /rnns/Names gender 2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Thinking in tensors in PyTorch\n", 9 | "\n", 10 | "Hands-on training by [Piotr Migdał](https://p.migdal.pl) (2019-2022).\n", 11 | "\n", 12 | "## RNNs: LSTMs for on-hot encoded data\n", 13 | "\n", 14 | "We use recurrent networks. For wonderful introductions:\n", 15 | "\n", 16 | "* [Understanding LSTM Networks](http://colah.github.io/posts/2015-08-Understanding-LSTMs/) by Chris Olah\n", 17 | "* [Exploring LSTMs](http://blog.echen.me/2017/05/30/exploring-lstms/) by Edwin Chen\t\n", 18 | "\n", 19 | "See also:\n", 20 | "\n", 21 | "* [Simple diagrams of convoluted neural networks](https://medium.com/inbrowserai/simple-diagrams-of-convoluted-neural-networks-39c097d2925b) by Piotr Migdał\n", 22 | "* [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/) by Andrej Kaprathy\n", 23 | "* [Repository to track the progress in Natural Language Processing](https://github.com/sebastianruder/NLP-progress) by Sebastian Ruder\n", 24 | "\n", 25 | "And a few technical remarks:\n", 26 | "\n", 27 | "* [Inconsistent dimension ordering for 1D networks - NCL vs NLC vs LNC](https://discuss.pytorch.org/t/inconsistent-dimension-ordering-for-1d-networks-ncl-vs-nlc-vs-lnc/14807)\n", 28 | "* [Contiguous() and permute()](https://discuss.pytorch.org/t/contiguous-and-permute/20673)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "%matplotlib inline\n", 38 | "import h5py\n", 39 | "import pandas as pd\n", 40 | "import numpy as np\n", 41 | "\n", 42 | "import torch\n", 43 | "from torch import nn\n", 44 | "from torch import optim\n", 45 | "from torch.utils.data import TensorDataset, DataLoader\n", 46 | "\n", 47 | "from livelossplot import PlotLosses # !pip install livelossplot --quiet" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "with h5py.File(\"./data/names_dense.h5\", 'r') as data:\n", 57 | " X_train = data['X_train'][()]\n", 58 | " y_train = data['y_train'][()]\n", 59 | " X_test = data['X_test'][()]\n", 60 | " y_test = data['y_test'][()]\n", 61 | " categories = [x.decode('utf-8') for x in data['categories'][()]]\n", 62 | " characters = [x.decode('utf-8') for x in data['characters'][()]]" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "#X_train = X_train.transpose((0, 2, 1)).copy()\n", 72 | "#X_test = X_test.transpose((0, 2, 1)).copy()" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "print(X_train.shape) # should be: (24783, 26, 16)\n", 82 | "print(y_train.shape) # should be: (24783)\n", 83 | "print(X_test.shape) # should be: (8261, 26, 16)\n", 84 | "print(y_test.shape) # should be: (8261)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "## Loading " 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "trainloader = DataLoader(TensorDataset(torch.from_numpy(X_train), torch.LongTensor(y_train)),\n", 101 | " batch_size=32, shuffle=True)\n", 102 | "testloader = DataLoader(TensorDataset(torch.from_numpy(X_test), torch.LongTensor(y_test)),\n", 103 | " batch_size=32, shuffle=False)\n", 104 | "\n", 105 | "dataloaders = {\n", 106 | " \"train\": trainloader,\n", 107 | " \"validation\": testloader\n", 108 | "}" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "# train on cuda if available\n", 118 | "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\n", 119 | "\n", 120 | "def train_model(model, criterion, optimizer, num_epochs=10):\n", 121 | " liveloss = PlotLosses()\n", 122 | " model = model.to(device)\n", 123 | " \n", 124 | " for epoch in range(num_epochs):\n", 125 | " logs = {}\n", 126 | " for phase in ['train', 'validation']:\n", 127 | " if phase == 'train':\n", 128 | " model.train()\n", 129 | " else:\n", 130 | " model.eval()\n", 131 | "\n", 132 | " running_loss = 0.0\n", 133 | " running_corrects = 0\n", 134 | "\n", 135 | " for inputs, labels in dataloaders[phase]:\n", 136 | " inputs = inputs.to(device)\n", 137 | " labels = labels.to(device)\n", 138 | "\n", 139 | " outputs = model(inputs)\n", 140 | " loss = criterion(outputs, labels)\n", 141 | "\n", 142 | " if phase == 'train':\n", 143 | " optimizer.zero_grad()\n", 144 | " loss.backward()\n", 145 | " optimizer.step()\n", 146 | "\n", 147 | " _, preds = torch.max(outputs, 1)\n", 148 | " running_loss += loss.detach() * inputs.size(0)\n", 149 | " running_corrects += torch.sum(preds == labels.data)\n", 150 | "\n", 151 | " epoch_loss = running_loss / len(dataloaders[phase].dataset)\n", 152 | " epoch_acc = running_corrects.float() / len(dataloaders[phase].dataset)\n", 153 | " \n", 154 | " prefix = ''\n", 155 | " if phase == 'validation':\n", 156 | " prefix = 'val_'\n", 157 | "\n", 158 | " logs[prefix + 'log loss'] = epoch_loss.item()\n", 159 | " logs[prefix + 'accuracy'] = epoch_acc.item()\n", 160 | " \n", 161 | " liveloss.update(logs)\n", 162 | " liveloss.draw()\n", 163 | " return model" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "## Example of networks in PyTorch" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "class Linear(nn.Module):\n", 180 | " def __init__(self):\n", 181 | " super().__init__()\n", 182 | " self.fc = nn.Linear(16 * 26, 2)\n", 183 | " \n", 184 | " def forward(self, x):\n", 185 | " x = self.fc(x.view(x.size(0), -1))\n", 186 | " return x" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "class Convolutional(nn.Module):\n", 196 | " def __init__(self):\n", 197 | " super().__init__()\n", 198 | " self.convs = nn.Sequential(\n", 199 | " nn.Conv1d(26, 32, 3, padding=1),\n", 200 | " nn.ReLU(),\n", 201 | " nn.MaxPool1d(1)\n", 202 | " )\n", 203 | " self.fc = nn.Linear(8 * 32, 2)\n", 204 | " \n", 205 | " def forward(self, x):\n", 206 | " x = self.convs(x)\n", 207 | " x = x.view(x.size(0), -1) # flatten\n", 208 | " x = self.fc(x)\n", 209 | " return x" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "class FullyConvolutional(nn.Module):\n", 219 | " def __init__(self):\n", 220 | " super().__init__()\n", 221 | " self.convs = nn.Sequential(\n", 222 | " nn.Conv1d(26, 128, 3, padding=1),\n", 223 | " nn.ReLU(),\n", 224 | " nn.MaxPool1d(2),\n", 225 | " nn.Conv1d(128, 128, 3, padding=1),\n", 226 | " nn.ReLU(),\n", 227 | " nn.MaxPool1d(2)\n", 228 | " ) \n", 229 | " \n", 230 | " self.fc = nn.Linear(128, 2)\n", 231 | " \n", 232 | " def forward(self, x):\n", 233 | " x = self.convs(x)\n", 234 | " x = x.max(dim=2)[0] # global max pool\n", 235 | " x = self.fc(x)\n", 236 | " return x" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "class Recurrent(nn.Module):\n", 246 | " def __init__(self, rnn_size):\n", 247 | " super().__init__()\n", 248 | " self.lstm = nn.LSTM(input_size=26, hidden_size=rnn_size)\n", 249 | " self.fc = nn.Linear(rnn_size, 2)\n", 250 | "\n", 251 | " def forward(self, x):\n", 252 | " x = x.permute(2, 0, 1)\n", 253 | " output, (hidden, cell) = self.lstm(x)\n", 254 | " res = self.fc(cell).squeeze(0)\n", 255 | " return res" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "## Network creation and cost function" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "model = Linear()\n", 272 | "criterion = nn.CrossEntropyLoss()\n", 273 | "optimizer = optim.SGD(model.parameters(), lr=0.01)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "model" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "model(torch.from_numpy(X_train[:4])).size()" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "train_model(model, criterion, optimizer, num_epochs=10)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": {}, 307 | "outputs": [], 308 | "source": [] 309 | } 310 | ], 311 | "metadata": { 312 | "kernelspec": { 313 | "display_name": "pytorch-macos-m1-gpu", 314 | "language": "python", 315 | "name": "python3" 316 | }, 317 | "language_info": { 318 | "codemirror_mode": { 319 | "name": "ipython", 320 | "version": 3 321 | }, 322 | "file_extension": ".py", 323 | "mimetype": "text/x-python", 324 | "name": "python", 325 | "nbconvert_exporter": "python", 326 | "pygments_lexer": "ipython3", 327 | "version": "3.9.13 | packaged by conda-forge | (main, May 27 2022, 17:01:00) \n[Clang 13.0.1 ]" 328 | }, 329 | "varInspector": { 330 | "cols": { 331 | "lenName": 16, 332 | "lenType": 16, 333 | "lenVar": 40 334 | }, 335 | "kernels_config": { 336 | "python": { 337 | "delete_cmd_postfix": "", 338 | "delete_cmd_prefix": "del ", 339 | "library": "var_list.py", 340 | "varRefreshCmd": "print(var_dic_list())" 341 | }, 342 | "r": { 343 | "delete_cmd_postfix": ") ", 344 | "delete_cmd_prefix": "rm(", 345 | "library": "var_list.r", 346 | "varRefreshCmd": "cat(var_dic_list()) " 347 | } 348 | }, 349 | "types_to_exclude": [ 350 | "module", 351 | "function", 352 | "builtin_function_or_method", 353 | "instance", 354 | "_Feature" 355 | ], 356 | "window_display": false 357 | }, 358 | "vscode": { 359 | "interpreter": { 360 | "hash": "032715a8d60757555a2623145b534a689e8b5634c5398e8bbec1014c4a8ede12" 361 | } 362 | } 363 | }, 364 | "nbformat": 4, 365 | "nbformat_minor": 2 366 | } 367 | -------------------------------------------------------------------------------- /rnns/1 RNN architecture overview.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Thinking in tensors in PyTorch\n", 8 | "\n", 9 | "Hands-on training by [Piotr Migdał](https://p.migdal.pl) (2019). \n", 10 | "\n", 11 | "Version for [AI & NLP Workshop Day](https://nlpday.pl/), 31 May 2019, Warsaw, Poland: **Understanding LSTM and GRU networks in PyTorch**.\n", 12 | "\n", 13 | "> Long short-term memory (LSTM) and gated recurrent unit (GRU) network are popular network architectures for text processing. During this workshop (held in PyTorch) we will work with them in a low-level way, getting access to memory cells and intermediate states. Targeted at people using LSTMs/GRUs as black boxes OR have a background in other network architectures and would like to understand natural language processing with deep learning.\n", 14 | "\n", 15 | "\n", 16 | "## NLP & AI: 1. RNN architecture overview\n", 17 | "\n", 18 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/stared/thinking-in-tensors-writing-in-pytorch/blob/master/extra/1%20RNN%20architecture%20overview.ipynb)\n", 19 | "\n", 20 | "We use recurrent networks. For wonderful introductions:\n", 21 | "\n", 22 | "* [Understanding LSTM Networks](http://colah.github.io/posts/2015-08-Understanding-LSTMs/) by Chris Olah\n", 23 | "* [Exploring LSTMs](http://blog.echen.me/2017/05/30/exploring-lstms/) by Edwin Chen\t\n", 24 | "\n", 25 | "See also:\n", 26 | "\n", 27 | "* [Simple diagrams of convoluted neural networks](https://medium.com/inbrowserai/simple-diagrams-of-convoluted-neural-networks-39c097d2925b) by Piotr Migdał\n", 28 | "* [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/) by Andrej Karpathy\n", 29 | "* [Repository to track the progress in Natural Language Processing](https://github.com/sebastianruder/NLP-progress) by Sebastian Ruder\n", 30 | "* [Memorization in RNNs](https://distill.pub/2019/memorization-in-rnns/)\n", 31 | "\n", 32 | "And a few technical remarks:\n", 33 | "\n", 34 | "* [Inconsistent dimension ordering for 1D networks - NCL vs NLC vs LNC](https://discuss.pytorch.org/t/inconsistent-dimension-ordering-for-1d-networks-ncl-vs-nlc-vs-lnc/14807)\n", 35 | "* [Contiguous() and permute()](https://discuss.pytorch.org/t/contiguous-and-permute/20673)\n", 36 | "\n", 37 | "How to think about tensors:\n", 38 | "\n", 39 | "* [Named tensors](http://nlp.seas.harvard.edu/NamedTensor) and [Named tensors (part 2)](http://nlp.seas.harvard.edu/NamedTensor2) by Alexander Rush \n", 40 | "* [Matrices as Tensor Network Diagrams](https://www.math3ma.com/blog/matrices-as-tensor-network-diagrams) by Tai-Danae Bradley\n", 41 | "* There are Named Tensors with [PyTorch 1.3.0 release](https://github.com/pytorch/pytorch/releases/tag/v1.3.0)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "### Let's do a few examples in... Keras\n", 49 | "\n", 50 | "* [Keras or PyTorch as your first deep learning framework](https://deepsense.ai/keras-or-pytorch/) by Rafał Jakubanis and Piotr Migdał\n", 51 | "\n", 52 | "At least for the first approach, Keras may over an easier start:\n", 53 | "\n", 54 | "* [Recurrent Layers - Keras](https://keras.io/layers/recurrent/)\n", 55 | "* [François Chollet, \"Deep Learning with Python\"](https://www.manning.com/books/deep-learning-with-python), Chapter 6. Deep learning for text and sequences\n", 56 | "\n", 57 | "Here we use [keras-sequential-ascii](https://github.com/stared/keras-sequential-ascii) package.\n", 58 | "\n" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 1, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "name": "stdout", 68 | "output_type": "stream", 69 | "text": [ 70 | "\u001b[33mYou are using pip version 19.0.3, however version 19.1.1 is available.\r\n", 71 | "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\r\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "# if you need to install that\n", 77 | "!pip install -q keras_sequential_ascii" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 2, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "name": "stderr", 87 | "output_type": "stream", 88 | "text": [ 89 | "Using TensorFlow backend.\n" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "from keras.models import Sequential\n", 95 | "from keras.layers import Flatten, SimpleRNN, Dense, Dropout, LSTM, GRU, Bidirectional\n", 96 | "\n", 97 | "from keras_sequential_ascii import sequential_model_to_ascii_printout" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "## Simple Recurrent Neural Networks\n", 105 | "\n", 106 | "![](https://colah.github.io/posts/2015-08-Understanding-LSTMs/img/LSTM3-SimpleRNN.png)\n", 107 | "\n", 108 | "from [Understanding LSTM Networks](https://colah.github.io/posts/2015-08-Understanding-LSTMs/)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 3, 114 | "metadata": {}, 115 | "outputs": [ 116 | { 117 | "name": "stdout", 118 | "output_type": "stream", 119 | "text": [ 120 | " OPERATION DATA DIMENSIONS WEIGHTS(N) WEIGHTS(%)\n", 121 | "\n", 122 | " Input ##### 10 26\n", 123 | " SimpleRNN ????? ------------------- 1888 92.0%\n", 124 | " tanh ##### 32\n", 125 | " Dense XXXXX ------------------- 165 8.0%\n", 126 | " softmax ##### 5\n" 127 | ] 128 | } 129 | ], 130 | "source": [ 131 | "model = Sequential([\n", 132 | " SimpleRNN(32, return_sequences=False, input_shape=(10, 26)),\n", 133 | " Dense(5, activation='softmax')\n", 134 | "])\n", 135 | "\n", 136 | "sequential_model_to_ascii_printout(model)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "### Long short-term memory (LSTM)\n", 144 | "\n", 145 | "![](https://colah.github.io/posts/2015-08-Understanding-LSTMs/img/LSTM3-chain.png)\n", 146 | "\n", 147 | "from [Understanding LSTM Networks](https://colah.github.io/posts/2015-08-Understanding-LSTMs/)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 4, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | " OPERATION DATA DIMENSIONS WEIGHTS(N) WEIGHTS(%)\n", 160 | "\n", 161 | " Input ##### 10 26\n", 162 | " LSTM LLLLL ------------------- 7552 97.9%\n", 163 | " tanh ##### 32\n", 164 | " Dense XXXXX ------------------- 165 2.1%\n", 165 | " softmax ##### 5\n" 166 | ] 167 | } 168 | ], 169 | "source": [ 170 | "model = Sequential([\n", 171 | " LSTM(32, input_shape=(10, 26)),\n", 172 | " Dense(5, activation='softmax')\n", 173 | "])\n", 174 | "\n", 175 | "sequential_model_to_ascii_printout(model)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 5, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "model = Sequential([\n", 185 | " Bidirectional(LSTM(32, input_shape=(10, 26), return_sequences=True)),\n", 186 | " Bidirectional(LSTM(32)),\n", 187 | " Dense(5, activation='softmax')\n", 188 | "])\n", 189 | "\n", 190 | "# sequential_model_to_ascii_printout(model)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 6, 196 | "metadata": {}, 197 | "outputs": [ 198 | { 199 | "name": "stdout", 200 | "output_type": "stream", 201 | "text": [ 202 | " OPERATION DATA DIMENSIONS WEIGHTS(N) WEIGHTS(%)\n", 203 | "\n", 204 | " Input ##### 10 26\n", 205 | " LSTM LLLLL ------------------- 7552 47.1%\n", 206 | " tanh ##### 10 32\n", 207 | " LSTM LLLLL ------------------- 8320 51.9%\n", 208 | " tanh ##### 32\n", 209 | " Dense XXXXX ------------------- 165 1.0%\n", 210 | " softmax ##### 5\n" 211 | ] 212 | } 213 | ], 214 | "source": [ 215 | "model = Sequential([\n", 216 | " LSTM(32, input_shape=(10, 26), return_sequences=True),\n", 217 | " LSTM(32),\n", 218 | " Dense(5, activation='softmax')\n", 219 | "])\n", 220 | "\n", 221 | "sequential_model_to_ascii_printout(model)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "### Gated Recurrent Unit (GRU)\n", 229 | "\n", 230 | "To some extent- \n", 231 | "\n", 232 | "![](https://colah.github.io/posts/2015-08-Understanding-LSTMs/img/LSTM3-var-GRU.png)\n", 233 | "\n", 234 | "from [Understanding LSTM Networks](https://colah.github.io/posts/2015-08-Understanding-LSTMs/)" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 7, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "name": "stdout", 244 | "output_type": "stream", 245 | "text": [ 246 | " OPERATION DATA DIMENSIONS WEIGHTS(N) WEIGHTS(%)\n", 247 | "\n", 248 | " Input ##### 10 26\n", 249 | " GRU LLLLL ------------------- 5664 46.9%\n", 250 | " tanh ##### 10 32\n", 251 | " GRU LLLLL ------------------- 6240 51.7%\n", 252 | " tanh ##### 32\n", 253 | " Dense XXXXX ------------------- 165 1.4%\n", 254 | " softmax ##### 5\n" 255 | ] 256 | } 257 | ], 258 | "source": [ 259 | "# GRU is a drop-on replacement\n", 260 | "model = Sequential([\n", 261 | " GRU(32, input_shape=(10, 26), return_sequences=True),\n", 262 | " GRU(32),\n", 263 | " Dense(5, activation='softmax')\n", 264 | "])\n", 265 | "\n", 266 | "sequential_model_to_ascii_printout(model)" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "If in Keras is is simpler, why bother?" 274 | ] 275 | } 276 | ], 277 | "metadata": { 278 | "kernelspec": { 279 | "display_name": "Python [conda root]", 280 | "language": "python", 281 | "name": "conda-root-py" 282 | }, 283 | "language_info": { 284 | "codemirror_mode": { 285 | "name": "ipython", 286 | "version": 3 287 | }, 288 | "file_extension": ".py", 289 | "mimetype": "text/x-python", 290 | "name": "python", 291 | "nbconvert_exporter": "python", 292 | "pygments_lexer": "ipython3", 293 | "version": "3.5.4" 294 | }, 295 | "varInspector": { 296 | "cols": { 297 | "lenName": 16, 298 | "lenType": 16, 299 | "lenVar": 40 300 | }, 301 | "kernels_config": { 302 | "python": { 303 | "delete_cmd_postfix": "", 304 | "delete_cmd_prefix": "del ", 305 | "library": "var_list.py", 306 | "varRefreshCmd": "print(var_dic_list())" 307 | }, 308 | "r": { 309 | "delete_cmd_postfix": ") ", 310 | "delete_cmd_prefix": "rm(", 311 | "library": "var_list.r", 312 | "varRefreshCmd": "cat(var_dic_list()) " 313 | } 314 | }, 315 | "types_to_exclude": [ 316 | "module", 317 | "function", 318 | "builtin_function_or_method", 319 | "instance", 320 | "_Feature" 321 | ], 322 | "window_display": false 323 | } 324 | }, 325 | "nbformat": 4, 326 | "nbformat_minor": 2 327 | } 328 | -------------------------------------------------------------------------------- /5 Nonlinear regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Thinking in tensors, writing in PyTorch\n", 8 | "\n", 9 | "A hands-on course by [Piotr Migdał](https://p.migdal.pl) (2019).\n", 10 | "\n", 11 | "\n", 12 | " \n", 13 | "\n", 14 | "\n", 15 | "## Notebook 5: Non-linear regression\n", 16 | "\n", 17 | "Very **Work in Progress**\n", 18 | "\n", 19 | "![](https://upload.wikimedia.org/wikipedia/commons/thumb/d/d4/Correlation_examples2.svg/400px-Correlation_examples2.svg.png)\n", 20 | "\n", 21 | "\n", 22 | "### Exercise\n", 23 | "\n", 24 | "Which of the following can be described by linear regression:\n", 25 | "\n", 26 | "* without any modifications,\n", 27 | "* by after rescaling *x* or *y*,\n", 28 | "* cannot be described by linear regression?\n", 29 | "\n", 30 | "**TODO**\n", 31 | "\n", 32 | "* Prepare examples\n", 33 | "* 1d function with nonlinearities (by hand and automatically)\n", 34 | "* More advanced\n", 35 | "\n", 36 | "**Datasets to consider**\n", 37 | "\n", 38 | "* https://en.wikipedia.org/wiki/Flight_airspeed_record\n", 39 | "\n", 40 | "**TODO later**\n", 41 | "\n", 42 | "* livelossplot `plot_extrema` error\n", 43 | "* drawing a plot \n", 44 | "* consider using [hiddenlayer](https://github.com/waleedka/hiddenlayer)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "%matplotlib inline\n", 54 | "\n", 55 | "from matplotlib import pyplot as plt\n", 56 | "\n", 57 | "import torch\n", 58 | "from torch import nn\n", 59 | "from torch import tensor\n", 60 | "from livelossplot import PlotLosses" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "X = torch.linspace(-2., 2., 30).unsqueeze(1)\n", 70 | "Y = torch.cat([torch.zeros(10), torch.linspace(0., 1., 10), 1. + torch.zeros(10)], dim=0)\n", 71 | "plt.plot(X.squeeze().numpy(), Y.numpy(), 'r.')" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "linear_model = nn.Linear(in_features=1, out_features=1)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "def train(X, Y, model, loss_function, optim, num_epochs):\n", 90 | " loss_history = []\n", 91 | " \n", 92 | " def extra_plot(*args):\n", 93 | " plt.plot(X.squeeze(1).numpy(), Y.numpy(), 'r.', label=\"Ground truth\")\n", 94 | " plt.plot(X.squeeze(1).numpy(), model(X).detach().numpy(), '-', label=\"Model\")\n", 95 | " plt.title(\"Prediction\")\n", 96 | " plt.legend(loc='lower right')\n", 97 | " \n", 98 | " liveloss = PlotLosses(extra_plots=[extra_plot], plot_extrema=False)\n", 99 | "\n", 100 | " for epoch in range(num_epochs):\n", 101 | " \n", 102 | " epoch_loss = 0.0\n", 103 | " \n", 104 | " Y_pred = model(X)\n", 105 | " loss = loss_function(Y_pred, Y)\n", 106 | " \n", 107 | " loss.backward()\n", 108 | " optim.step()\n", 109 | " optim.zero_grad()\n", 110 | " \n", 111 | " liveloss.update({\n", 112 | " 'loss': loss.data.item(),\n", 113 | " })\n", 114 | " liveloss.draw()" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "## Linear model\n", 122 | "\n", 123 | "$$y = a x + b$$" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "class Linear(nn.Module):\n", 133 | " def __init__(self):\n", 134 | " super().__init__()\n", 135 | " \n", 136 | " self.layer_weights = nn.Parameter(torch.randn(1, 1))\n", 137 | " self.layer_bias = nn.Parameter(torch.randn(1))\n", 138 | " \n", 139 | " def forward(self, x):\n", 140 | " return x.matmul(self.layer_weights).add(self.layer_bias).squeeze()" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "linear_model = Linear()\n", 150 | "optim = torch.optim.SGD(linear_model.parameters(), lr=0.03)\n", 151 | "loss_function = nn.MSELoss()" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "list(linear_model.parameters())" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "linear_model(X)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "train(X, Y, linear_model, loss_function, optim, num_epochs=50)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "## Nonlinear\n", 186 | "\n", 187 | "$$ x \\mapsto h \\mapsto y$$" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "class Nonlinear(nn.Module):\n", 197 | " def __init__(self, hidden_size=2):\n", 198 | " super().__init__()\n", 199 | " \n", 200 | " self.layer_1_weights = nn.Parameter(torch.randn(1, hidden_size))\n", 201 | " self.layer_1_bias = nn.Parameter(torch.randn(hidden_size)) \n", 202 | " \n", 203 | " self.layer_2_weights = nn.Parameter(torch.randn(hidden_size, 1) ) \n", 204 | " self.layer_2_bias = nn.Parameter(torch.randn(1))\n", 205 | " \n", 206 | " def forward(self, x):\n", 207 | " x = x.matmul(self.layer_1_weights).add(self.layer_1_bias)\n", 208 | " x = x.relu()\n", 209 | " x = x.matmul(self.layer_2_weights).add(self.layer_2_bias)\n", 210 | " return x.squeeze()\n", 211 | " \n", 212 | " def nonrandom_init(self):\n", 213 | " self.layer_1_weights.data = tensor([[1.1, 0.8]])\n", 214 | " self.layer_1_bias.data = tensor([0.5 , -0.7]) \n", 215 | " self.layer_2_weights.data = tensor([[0.3], [-0.7]])\n", 216 | " self.layer_2_bias.data = tensor([0.2])" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "nonlinear_model = Nonlinear(hidden_size=2)\n", 226 | "nonlinear_model.nonrandom_init()\n", 227 | "\n", 228 | "optim = torch.optim.SGD(nonlinear_model.parameters(), lr=0.2)\n", 229 | "# optim = torch.optim.Adam(nonlinear_model.parameters(), lr=0.1)\n", 230 | "loss_function = nn.MSELoss()" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": { 237 | "scrolled": true 238 | }, 239 | "outputs": [], 240 | "source": [ 241 | "train(X, Y, nonlinear_model, loss_function, optim, num_epochs=200)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "## Other shapes and activations" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "Y_sin = (2 * X).sin()\n", 258 | "plt.plot(X.squeeze().numpy(), Y_sin.numpy(), 'r.')" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "# warning: \n", 268 | "# for 1-d problems it rarely works (often gets stuck in some local minimum)\n", 269 | "nonlinear_model = Nonlinear(hidden_size=10)\n", 270 | "\n", 271 | "optim = torch.optim.Adam(nonlinear_model.parameters(), lr=0.01)\n", 272 | "loss_function = nn.MSELoss()\n", 273 | "train(X, Y_sin, nonlinear_model, loss_function, optim, num_epochs=100)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "class NonlinearSigmoid2(nn.Module):\n", 283 | " def __init__(self, hidden_size=2):\n", 284 | " super().__init__()\n", 285 | " \n", 286 | " self.layer_1_weights = nn.Parameter(torch.randn(1, hidden_size))\n", 287 | " self.layer_1_bias = nn.Parameter(torch.randn(hidden_size))\n", 288 | " \n", 289 | " self.layer_2_weights = nn.Parameter(torch.randn(hidden_size, 1))\n", 290 | " self.layer_2_bias = nn.Parameter(torch.randn(1))\n", 291 | " \n", 292 | " def forward(self, x):\n", 293 | " x = x.matmul(self.layer_1_weights).add(self.layer_1_bias)\n", 294 | " x = x.sigmoid()\n", 295 | " x = x.matmul(self.layer_2_weights).add(self.layer_2_bias)\n", 296 | " x = x.sigmoid()\n", 297 | " return x.squeeze()" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "X1 = torch.linspace(-2., 2., 30).unsqueeze(1)\n", 307 | "Y1 = torch.cat([torch.zeros(10), 1. + torch.zeros(10), torch.zeros(10)], dim=0)\n", 308 | "plt.plot(X1.squeeze().numpy(), Y1.numpy(), 'r.')" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [ 317 | "nonlinear_model = NonlinearSigmoid2(hidden_size=2)\n", 318 | "# optim = torch.optim.SGD(nonlinear_model.parameters(), lr=0.1)\n", 319 | "optim = torch.optim.Adam(nonlinear_model.parameters(), lr=0.1)\n", 320 | "loss_function = nn.MSELoss()\n", 321 | "train(X1, Y1, nonlinear_model, loss_function, optim, num_epochs=100)" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "## Nonlinear model - by hand" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "metadata": {}, 335 | "outputs": [], 336 | "source": [ 337 | "my_nonlinear_model = Nonlinear(hidden_size=2)" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": {}, 344 | "outputs": [], 345 | "source": [ 346 | "my_nonlinear_model.layer_1_weights.data = tensor([[1. , 1.]])\n", 347 | "my_nonlinear_model.layer_1_bias.data = tensor([1. , -1.])" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": null, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "X.matmul(my_nonlinear_model.layer_1_weights).add(my_nonlinear_model.layer_1_bias).relu()" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": null, 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [ 365 | "my_nonlinear_model.layer_2_weights.data = tensor([[0.5], [-0.5]])\n", 366 | "my_nonlinear_model.layer_2_bias.data = tensor([0.])" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": null, 372 | "metadata": {}, 373 | "outputs": [], 374 | "source": [ 375 | "my_nonlinear_model(X)" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": null, 381 | "metadata": {}, 382 | "outputs": [], 383 | "source": [ 384 | "plt.plot(X.squeeze(1).numpy(), Y.numpy(), 'r.')\n", 385 | "plt.plot(X.squeeze(1).numpy(), my_nonlinear_model(X).detach().numpy(), '-')" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": null, 391 | "metadata": {}, 392 | "outputs": [], 393 | "source": [] 394 | } 395 | ], 396 | "metadata": { 397 | "kernelspec": { 398 | "display_name": "Python [default]", 399 | "language": "python", 400 | "name": "python3" 401 | }, 402 | "language_info": { 403 | "codemirror_mode": { 404 | "name": "ipython", 405 | "version": 3 406 | }, 407 | "file_extension": ".py", 408 | "mimetype": "text/x-python", 409 | "name": "python", 410 | "nbconvert_exporter": "python", 411 | "pygments_lexer": "ipython3", 412 | "version": "3.5.4" 413 | }, 414 | "varInspector": { 415 | "cols": { 416 | "lenName": 16, 417 | "lenType": 16, 418 | "lenVar": 40 419 | }, 420 | "kernels_config": { 421 | "python": { 422 | "delete_cmd_postfix": "", 423 | "delete_cmd_prefix": "del ", 424 | "library": "var_list.py", 425 | "varRefreshCmd": "print(var_dic_list())" 426 | }, 427 | "r": { 428 | "delete_cmd_postfix": ") ", 429 | "delete_cmd_prefix": "rm(", 430 | "library": "var_list.r", 431 | "varRefreshCmd": "cat(var_dic_list()) " 432 | } 433 | }, 434 | "types_to_exclude": [ 435 | "module", 436 | "function", 437 | "builtin_function_or_method", 438 | "instance", 439 | "_Feature" 440 | ], 441 | "window_display": false 442 | } 443 | }, 444 | "nbformat": 4, 445 | "nbformat_minor": 2 446 | } 447 | -------------------------------------------------------------------------------- /extra/CPU vs GPU.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## To do\n", 8 | "\n", 9 | "* text from https://www.reddit.com/r/MachineLearning/comments/ecazk2/d_gpu_benchmarks_for_deep_learning_tasks/\n", 10 | "* some comments there https://www.reddit.com/r/MachineLearning/comments/c6jxog/p_ai_benchmark_a_new_standard_for_ml_performance/\n", 11 | "* also numpy\n", 12 | "* some plots" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 9, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "import torch\n", 22 | "from timeit import timeit" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/plain": [ 33 | "True" 34 | ] 35 | }, 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "output_type": "execute_result" 39 | } 40 | ], 41 | "source": [ 42 | "torch.cuda.is_available()" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 13, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "text/plain": [ 53 | "1" 54 | ] 55 | }, 56 | "execution_count": 13, 57 | "metadata": {}, 58 | "output_type": "execute_result" 59 | } 60 | ], 61 | "source": [ 62 | "torch.cuda.device_count()" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 21, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/plain": [ 73 | "'10.1'" 74 | ] 75 | }, 76 | "execution_count": 21, 77 | "metadata": {}, 78 | "output_type": "execute_result" 79 | } 80 | ], 81 | "source": [ 82 | "torch.version.cuda" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 16, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/plain": [ 93 | "'GeForce RTX 2080 with Max-Q Design'" 94 | ] 95 | }, 96 | "execution_count": 16, 97 | "metadata": {}, 98 | "output_type": "execute_result" 99 | } 100 | ], 101 | "source": [ 102 | "torch.cuda.get_device_name()" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 19, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "data": { 112 | "text/plain": [ 113 | "_CudaDeviceProperties(name='GeForce RTX 2080 with Max-Q Design', major=7, minor=5, total_memory=8192MB, multi_processor_count=46)" 114 | ] 115 | }, 116 | "execution_count": 19, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "torch.cuda.get_device_properties('cuda')" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 6, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 3, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "n = " 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 4, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "data": { 150 | "text/plain": [ 151 | "0.09138809999998898" 152 | ] 153 | }, 154 | "execution_count": 4, 155 | "metadata": {}, 156 | "output_type": "execute_result" 157 | } 158 | ], 159 | "source": [ 160 | "timeit(func)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 29, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "A = torch.randn((10000, 10000))\n", 170 | "v = torch.randn((10000))" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 31, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "#timeit(lambda: A.matmul(v))" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 32, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "name": "stdout", 189 | "output_type": "stream", 190 | "text": [ 191 | "13.7 ms ± 136 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" 192 | ] 193 | } 194 | ], 195 | "source": [ 196 | "%%timeit\n", 197 | "A.matmul(v)" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 33, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "A = A.to(device)\n", 207 | "v = v.to(device)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 34, 213 | "metadata": {}, 214 | "outputs": [ 215 | { 216 | "name": "stdout", 217 | "output_type": "stream", 218 | "text": [ 219 | "1.24 ms ± 9.69 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" 220 | ] 221 | } 222 | ], 223 | "source": [ 224 | "%%timeit\n", 225 | "A.matmul(v)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 35, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "A = torch.randn((1000, 1000))\n", 235 | "B = torch.randn((1000, 1000))" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 36, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "name": "stdout", 245 | "output_type": "stream", 246 | "text": [ 247 | "8.17 ms ± 96.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "%%timeit\n", 253 | "A.matmul(B)" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 37, 259 | "metadata": {}, 260 | "outputs": [], 261 | "source": [ 262 | "A = A.to(device)\n", 263 | "B = B.to(device)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 38, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | "456 µs ± 5.98 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "%%timeit\n", 281 | "A.matmul(B)" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 3, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "A = torch.randn((10000, 10000))\n", 291 | "B = torch.randn((10000, 10000))" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 4, 297 | "metadata": {}, 298 | "outputs": [ 299 | { 300 | "name": "stdout", 301 | "output_type": "stream", 302 | "text": [ 303 | "Wall time: 5.74 s\n" 304 | ] 305 | }, 306 | { 307 | "data": { 308 | "text/plain": [ 309 | "tensor([[-1.9520e+01, 6.9182e+01, 1.1564e+02, ..., 1.2619e+02,\n", 310 | " -6.4699e+01, 1.6668e+02],\n", 311 | " [-1.2492e+02, -1.0849e+02, -2.6037e+01, ..., -7.8305e+01,\n", 312 | " -1.9603e+02, 6.1230e+01],\n", 313 | " [-1.0486e+00, -6.5757e+01, 2.1775e+02, ..., 9.2899e+01,\n", 314 | " -3.7656e+01, -1.0037e+02],\n", 315 | " ...,\n", 316 | " [-1.5888e+02, 7.1087e+01, 4.1497e+01, ..., -5.4155e+01,\n", 317 | " -9.1897e+01, -5.1101e+01],\n", 318 | " [ 2.7407e+01, 8.0948e+01, 2.1446e+02, ..., -3.9251e-02,\n", 319 | " 2.5917e+02, 1.5489e+01],\n", 320 | " [ 6.1963e+01, 2.8205e+01, 1.2452e+02, ..., -2.2053e+01,\n", 321 | " 1.3920e+02, -2.7770e+01]])" 322 | ] 323 | }, 324 | "execution_count": 4, 325 | "metadata": {}, 326 | "output_type": "execute_result" 327 | } 328 | ], 329 | "source": [ 330 | "%%time\n", 331 | "A.matmul(B)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 45, 337 | "metadata": {}, 338 | "outputs": [], 339 | "source": [ 340 | "# timeit says:\n", 341 | "# 5.7 s ± 183 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 7, 347 | "metadata": {}, 348 | "outputs": [], 349 | "source": [ 350 | "A = A.to(device)\n", 351 | "B = B.to(device)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 8, 357 | "metadata": {}, 358 | "outputs": [ 359 | { 360 | "name": "stdout", 361 | "output_type": "stream", 362 | "text": [ 363 | "Wall time: 416 ms\n" 364 | ] 365 | }, 366 | { 367 | "data": { 368 | "text/plain": [ 369 | "tensor([[-1.9520e+01, 6.9182e+01, 1.1564e+02, ..., 1.2619e+02,\n", 370 | " -6.4699e+01, 1.6668e+02],\n", 371 | " [-1.2492e+02, -1.0849e+02, -2.6037e+01, ..., -7.8305e+01,\n", 372 | " -1.9603e+02, 6.1230e+01],\n", 373 | " [-1.0487e+00, -6.5757e+01, 2.1775e+02, ..., 9.2899e+01,\n", 374 | " -3.7656e+01, -1.0037e+02],\n", 375 | " ...,\n", 376 | " [-1.5888e+02, 7.1087e+01, 4.1497e+01, ..., -5.4155e+01,\n", 377 | " -9.1897e+01, -5.1101e+01],\n", 378 | " [ 2.7407e+01, 8.0948e+01, 2.1446e+02, ..., -3.9189e-02,\n", 379 | " 2.5917e+02, 1.5489e+01],\n", 380 | " [ 6.1963e+01, 2.8205e+01, 1.2452e+02, ..., -2.2053e+01,\n", 381 | " 1.3920e+02, -2.7770e+01]], device='cuda:0')" 382 | ] 383 | }, 384 | "execution_count": 8, 385 | "metadata": {}, 386 | "output_type": "execute_result" 387 | } 388 | ], 389 | "source": [ 390 | "%%time\n", 391 | "A.matmul(B)" 392 | ] 393 | }, 394 | { 395 | "cell_type": "markdown", 396 | "metadata": {}, 397 | "source": [ 398 | "## And Float16" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 14, 404 | "metadata": {}, 405 | "outputs": [], 406 | "source": [ 407 | "A = A.cpu().half()\n", 408 | "B = B.cpu().half()" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 15, 414 | "metadata": {}, 415 | "outputs": [ 416 | { 417 | "data": { 418 | "text/plain": [ 419 | "tensor([[ 2.3301, 1.4033, -0.1700, ..., -0.4141, 1.4805, 0.3557],\n", 420 | " [ 0.0626, 1.5762, -0.7759, ..., 0.9121, 0.8638, 0.1989],\n", 421 | " [ 1.0459, 0.0194, 0.1581, ..., 1.5264, 1.1943, -1.3916],\n", 422 | " ...,\n", 423 | " [-0.3701, 0.7959, -0.1727, ..., -0.9199, 0.1326, -0.6104],\n", 424 | " [-0.0676, -0.4153, 2.1602, ..., -0.9980, -0.1550, 0.7407],\n", 425 | " [-1.8232, -0.6748, 1.4727, ..., 0.6724, 0.3904, 0.4854]],\n", 426 | " dtype=torch.float16)" 427 | ] 428 | }, 429 | "execution_count": 15, 430 | "metadata": {}, 431 | "output_type": "execute_result" 432 | } 433 | ], 434 | "source": [ 435 | "A" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": 16, 441 | "metadata": {}, 442 | "outputs": [ 443 | { 444 | "ename": "RuntimeError", 445 | "evalue": "_th_mm not supported on CPUType for Half", 446 | "output_type": "error", 447 | "traceback": [ 448 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 449 | "\u001b[1;31mRuntimeError\u001b[0m Traceback (most recent call last)", 450 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n", 451 | "\u001b[1;31mRuntimeError\u001b[0m: _th_mm not supported on CPUType for Half" 452 | ] 453 | } 454 | ], 455 | "source": [ 456 | "%%time\n", 457 | "A.matmul(B)" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": 17, 463 | "metadata": {}, 464 | "outputs": [], 465 | "source": [ 466 | "A = A.to(device)\n", 467 | "B = B.to(device)" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": 18, 473 | "metadata": {}, 474 | "outputs": [ 475 | { 476 | "name": "stdout", 477 | "output_type": "stream", 478 | "text": [ 479 | "Wall time: 9.01 ms\n" 480 | ] 481 | }, 482 | { 483 | "data": { 484 | "text/plain": [ 485 | "tensor([[-1.9500e+01, 6.9188e+01, 1.1562e+02, ..., 1.2625e+02,\n", 486 | " -6.4688e+01, 1.6675e+02],\n", 487 | " [-1.2488e+02, -1.0850e+02, -2.6016e+01, ..., -7.8312e+01,\n", 488 | " -1.9600e+02, 6.1250e+01],\n", 489 | " [-1.0352e+00, -6.5812e+01, 2.1775e+02, ..., 9.2875e+01,\n", 490 | " -3.7656e+01, -1.0038e+02],\n", 491 | " ...,\n", 492 | " [-1.5888e+02, 7.1125e+01, 4.1500e+01, ..., -5.4094e+01,\n", 493 | " -9.1875e+01, -5.1125e+01],\n", 494 | " [ 2.7391e+01, 8.0938e+01, 2.1450e+02, ..., 7.1289e-02,\n", 495 | " 2.5925e+02, 1.5484e+01],\n", 496 | " [ 6.1969e+01, 2.8188e+01, 1.2456e+02, ..., -2.2047e+01,\n", 497 | " 1.3925e+02, -2.7750e+01]], device='cuda:0', dtype=torch.float16)" 498 | ] 499 | }, 500 | "execution_count": 18, 501 | "metadata": {}, 502 | "output_type": "execute_result" 503 | } 504 | ], 505 | "source": [ 506 | "%%time\n", 507 | "A.matmul(B)" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": null, 513 | "metadata": {}, 514 | "outputs": [], 515 | "source": [ 516 | "##" 517 | ] 518 | } 519 | ], 520 | "metadata": { 521 | "kernelspec": { 522 | "display_name": "p37-pytorch", 523 | "language": "python", 524 | "name": "p37-pytorch" 525 | }, 526 | "language_info": { 527 | "codemirror_mode": { 528 | "name": "ipython", 529 | "version": 3 530 | }, 531 | "file_extension": ".py", 532 | "mimetype": "text/x-python", 533 | "name": "python", 534 | "nbconvert_exporter": "python", 535 | "pygments_lexer": "ipython3", 536 | "version": "3.7.5" 537 | } 538 | }, 539 | "nbformat": 4, 540 | "nbformat_minor": 2 541 | } 542 | -------------------------------------------------------------------------------- /1 Vectors, matrices and tensors.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Thinking in tensors, writing in PyTorch\n", 9 | "\n", 10 | "A hands-on course by [Piotr Migdał](https://p.migdal.pl) (2019-2022).\n", 11 | "\n", 12 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/stared/thinking-in-tensors-writing-in-pytorch/blob/master/1%20Vectors%2C%20matrices%20and%20tensors.ipynb)\n", 13 | "\n", 14 | "\n", 15 | "## Notebook 1: Vectors, matrices and tensors" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "import torch\n", 25 | "from torch import tensor" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "\n", 33 | "\n", 34 | "Linear algebra is the language of deep learning... and quantum mechanics.\n", 35 | "\n", 36 | "Note: in physics and engineering, tensor is not any array. There is a one-two-many rule: \n", 37 | "\n", 38 | "* 0: scalar\n", 39 | "* 1: vector\n", 40 | "* 2: matrix\n", 41 | "* 3 and above: n-dimensional tensor\n", 42 | "\n", 43 | "In theory, tensors can be of an arbitrarily high dimension. In deep learning, they rare exceed 5." 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## Scalar\n", 51 | "\n", 52 | "Scalar is \"just a number\". Real-world examples of a scalar are: temperature, pressure, price of an apple in a given shop, etc." 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "x = tensor(42.)\n", 62 | "x" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "x.dim()" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "2 * x" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "x.item()" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "### Food for thought\n", 97 | "\n", 98 | "> The scalar fallacy is the false but pervasive assumption that real-world things (hotels, sandwiches, people, mutual funds, chemo drugs, whatever) have some single-dimension ordering of \"goodness\".\n", 99 | "\n", 100 | "> When you project a multi-dimensional space down to one dimension, you are involving a lot of context and preferences in the act of projecting. - [rlucas on HN](https://news.ycombinator.com/item?id=8132525)\n", 101 | "\n", 102 | "See also: [Scalar fallacy](http://observationalepidemiology.blogspot.com/2011/01/scalar-fallacy.html).\n" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "## Vector\n", 110 | "\n", 111 | "Vector is an ordered list of numbers, such as `[-5., 2., 0.]`.\n", 112 | "\n", 113 | "In physics and mechanical engineering, not everything is a vector:\n", 114 | "\n", 115 | "> it is not generally true that any three numbers form a vector. It is true only if, when we rotate the coordinate system, the components of the vector transform among themselves in the correct way. - [II 02: Differential Calculus of Vector Fields](http://www.feynmanlectures.caltech.edu/II_02.html) from [The Feynman Lectures on Physics](http://www.feynmanlectures.caltech.edu/)\n", 116 | "\n", 117 | "* position\n", 118 | "* velocity\n", 119 | "* electric field\n", 120 | "* spatial gradient of a scalar field ($\\nabla T$)\n", 121 | "\n", 122 | "\n", 123 | "In deep learning we are more... relaxed. Usually vectors are abstract, \n", 124 | "\n", 125 | "\n", 126 | "* feature vector after a ImageNet-trained vector\n", 127 | "* a word representation in (see: [king - man + woman is queen; but why?](https://p.migdal.pl/2017/01/06/king-man-woman-queen-why.html))\n", 128 | "* user and product vectors in [Factorization Machines](https://www.reddit.com/r/MachineLearning/comments/65d3lt/r_factorization_machines_2010_a_classic_paper_in/) and related recommendation systems\n", 129 | "\n", 130 | "\n", 131 | "$$\\vec{v} = \\left[ v_1, v_2, \\ldots, v_n \\right]$$" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "v = tensor([1.5, -0.5, 3.0])\n", 141 | "v" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "v.dim()" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "v.size()" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "### Vector arithmetics\n", 167 | "\n", 168 | "We can multiply vectors by a scalar: \n", 169 | "\n", 170 | "$$a \\vec{v} = \\left[a v_1, a v_2, \\ldots, a v_n \\right]$$\n", 171 | "\n", 172 | "Or, provided that two vectors have the same length, add and subtract vectors to each other:\n", 173 | "\n", 174 | "$$\\vec{v} + \\vec{u}$$" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "2 * v" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "u = tensor([1., 0., 1.])" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "v + u" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "### Vector length\n", 209 | "\n", 210 | "\n", 211 | "$$|\\vec{v}| = \\sqrt{v_1^2 + v_2^2 + \\ldots + v_n^2} = \\sqrt{\\sum_{i=1}^n v_i^2}$$" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [ 220 | "v.pow(2).sum().sqrt()" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "v**2" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "torch.pow(v, 2)" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "v.pow(2).sum()" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "# or to normalize a vector\n", 257 | "v / v.norm()" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": [ 264 | "## Matrix\n", 265 | "\n", 266 | "[![Matrix transform - xkcd](https://imgs.xkcd.com/comics/matrix_transform.png)](https://xkcd.com/184/)\n", 267 | "\n", 268 | "Typical operations:\n", 269 | "\n", 270 | "* rotations\n", 271 | "* next step in a stochastic process\n", 272 | "* unitary operations and projections in quantum mechanics (these use complex numbers)\n", 273 | "* scalar products\n", 274 | "* [Hessian matrix](https://en.wikipedia.org/wiki/Hessian_matrix) of a scalar (i.e. second order derivatives of a scalar with respect to a vector)\n", 275 | "* channel mixing (e.g. `RGB` to gray-scale and R-G)" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "metadata": {}, 282 | "outputs": [], 283 | "source": [ 284 | "M = tensor([[1., 2.], [3., 4.]])\n", 285 | "M" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "M.matmul(M)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": null, 300 | "metadata": {}, 301 | "outputs": [], 302 | "source": [ 303 | "tensor([1., 0.]).matmul(M)" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": null, 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [ 312 | "# for Python 3.5+\n", 313 | "M @ M" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "M * M" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": null, 328 | "metadata": {}, 329 | "outputs": [], 330 | "source": [ 331 | "tensor([1., 2.]).matmul(M)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": {}, 338 | "outputs": [], 339 | "source": [ 340 | "M.det()" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": {}, 347 | "outputs": [], 348 | "source": [ 349 | "# or we can use Singular Value Decomposition, the key step of Principal Component Analysis\n", 350 | "M.svd()" 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": {}, 356 | "source": [ 357 | "[Matrix factorization visualized](https://p.migdal.pl/matrix-decomposition-viz/) by Piotr Migdał (work in progress):\n", 358 | "\n", 359 | "![](imgs/matrix_factorization_city_temperature.png)" 360 | ] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "metadata": {}, 365 | "source": [ 366 | "## Tensor\n", 367 | "\n", 368 | "\n", 369 | "Tensor is a generalization of vectors and matrices for more dimensions.\n", 370 | "\n", 371 | "In physics and engineering they have more properties, as in:\n", 372 | "\n", 373 | "\n", 374 | "![](https://upload.wikimedia.org/wikipedia/commons/thumb/f/fe/StressEnergyTensor_contravariant.svg/250px-StressEnergyTensor_contravariant.svg.png)\n", 375 | "\n", 376 | "[Electromagnetic tensor](https://en.wikipedia.org/wiki/Electromagnetic_tensor) from [Introduction to the mathematics of general relativity - Wikipedia](https://en.wikipedia.org/wiki/Introduction_to_the_mathematics_of_general_relativity), see also: [Tensor](https://en.wikipedia.org/wiki/Tensor).\n", 377 | "\n", 378 | "In deep learning, there are any arrays.\n" 379 | ] 380 | }, 381 | { 382 | "attachments": {}, 383 | "cell_type": "markdown", 384 | "metadata": {}, 385 | "source": [ 386 | "# Further reading\n", 387 | "\n", 388 | "Deep learning is just a lot of array operations, which [can be done even in Excel](http://www.deepexcel.net/). While the network was an April Fool's Day joke, it was a very educational one.\n", 389 | "\n", 390 | "For an introduction to linear algebra, I recommend [immersive linear algebra](http://immersivemath.com/ila/index.html) by J. Ström, K. Åström, and T. Akenine-Möller (from my [Interactive Machine Learning, Deep Learning and Statistics websites\n", 391 | "](http://p.migdal.pl/interactive-machine-learning-list/) collection).\n", 392 | "\n", 393 | "I made some points about In particular, I recommend:\n", 394 | "\n", 395 | "* [Tensor diagrams](https://medium.com/@pmigdal/in-the-topic-of-diagrams-i-did-write-a-review-simple-diagrams-of-convoluted-neural-networks-6418a63f9281).\n", 396 | "* [Einsum is All you Need - Einstein Summation in Deep Learning](https://rockt.github.io/2018/04/30/einsum) by Tim Rocktäschel.\n", 397 | "* [Matrices as Tensor Network Diagrams](https://www.math3ma.com/blog/matrices-as-tensor-network-diagrams) by [Tai-Danae Bradley](https://twitter.com/math3ma):\n", 398 | "\n", 399 | "![Scalar, vector, matrix, tensor - a drawing by Tai-Danae Bradley](https://uploads-ssl.webflow.com/5b1d427ae0c922e912eda447/5cd99a73f8ce4494ad86852e_arraychart.jpg)\n", 400 | "\n", 401 | "Beware, that PyTorch can be tricky with the tensor dimension order:\n", 402 | "\n", 403 | "* [Inconsistent dimension ordering for 1D networks - NCL vs NLC vs LNC]()\n", 404 | "* [Named tensors](http://nlp.seas.harvard.edu/NamedTensor) and [Named tensors (part 2)](http://nlp.seas.harvard.edu/NamedTensor2) by Alexander Rush - a proposal of type-checking tensor dimensions\n", 405 | "\n", 406 | "> Is it only me, or does \"Theano tensor dimension order\" sound like some secret convent? - [Piotr Migdał's tweet](https://twitter.com/pmigdal/status/961344490500952070)" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": null, 412 | "metadata": {}, 413 | "outputs": [], 414 | "source": [] 415 | } 416 | ], 417 | "metadata": { 418 | "kernelspec": { 419 | "display_name": "pytorch-macos-m1-gpu", 420 | "language": "python", 421 | "name": "python3" 422 | }, 423 | "language_info": { 424 | "codemirror_mode": { 425 | "name": "ipython", 426 | "version": 3 427 | }, 428 | "file_extension": ".py", 429 | "mimetype": "text/x-python", 430 | "name": "python", 431 | "nbconvert_exporter": "python", 432 | "pygments_lexer": "ipython3", 433 | "version": "3.9.13" 434 | }, 435 | "varInspector": { 436 | "cols": { 437 | "lenName": 16, 438 | "lenType": 16, 439 | "lenVar": 40 440 | }, 441 | "kernels_config": { 442 | "python": { 443 | "delete_cmd_postfix": "", 444 | "delete_cmd_prefix": "del ", 445 | "library": "var_list.py", 446 | "varRefreshCmd": "print(var_dic_list())" 447 | }, 448 | "r": { 449 | "delete_cmd_postfix": ") ", 450 | "delete_cmd_prefix": "rm(", 451 | "library": "var_list.r", 452 | "varRefreshCmd": "cat(var_dic_list()) " 453 | } 454 | }, 455 | "types_to_exclude": [ 456 | "module", 457 | "function", 458 | "builtin_function_or_method", 459 | "instance", 460 | "_Feature" 461 | ], 462 | "window_display": false 463 | }, 464 | "vscode": { 465 | "interpreter": { 466 | "hash": "032715a8d60757555a2623145b534a689e8b5634c5398e8bbec1014c4a8ede12" 467 | } 468 | } 469 | }, 470 | "nbformat": 4, 471 | "nbformat_minor": 2 472 | } 473 | -------------------------------------------------------------------------------- /rnns/Word vectors.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Thinking in tensors, writing in PyTorch\n", 9 | "\n", 10 | "A hands-on course by [Piotr Migdał](https://p.migdal.pl) (2019-2022).\n", 11 | "\n", 12 | "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)]( https://colab.research.google.com/github/stared/thinking-in-tensors-writing-in-pytorch/blob/master/rnns/Word%20vectors.ipynb)\n", 13 | "\n", 14 | "\n", 15 | "## Word vectors\n", 16 | "\n", 17 | "### Reading\n", 18 | "\n", 19 | "For a general reading, see:\n", 20 | "\n", 21 | "* [king - man + woman is queen; but why?](https://p.migdal.pl/2017/01/06/king-man-woman-queen-why.html)\n", 22 | "* [Word2vec in PyTorch](https://adoni.github.io/2017/11/08/word2vec-pytorch/)\n", 23 | "* [Embedding Projector - TensorFlow](https://projector.tensorflow.org/)\n", 24 | "\n", 25 | "### Notes\n", 26 | "\n", 27 | "We use the smallest, 50-dimensional, uncased GloVe word embedding:\n", 28 | "\n", 29 | "* [GloVe: Global Vectors for Word Representation by Stanford](https://nlp.stanford.edu/projects/glove/)\n", 30 | "\n", 31 | "Other popular pre-trained word embeddings:\n", 32 | "\n", 33 | "* [word2vec by Google](https://code.google.com/archive/p/word2vec/)\n", 34 | "* [fastText by Facebook](https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md) (multilingual)\n", 35 | "\n", 36 | "See also:\n", 37 | "\n", 38 | "* [Aligning the fastText vectors of 78 languages](https://github.com/Babylonpartners/fastText_multilingual)\n", 39 | "* [gensim-data](https://github.com/RaRe-Technologies/gensim-data) - data repository for pretrained NLP models and NLP corpora." 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 1, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "import pandas as pd\n", 49 | "import numpy as np\n", 50 | "import seaborn as sns\n", 51 | "\n", 52 | "from matplotlib import pyplot as plt\n", 53 | "\n", 54 | "from sklearn.decomposition import PCA\n", 55 | "from sklearn.manifold import TSNE" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "## Loading data" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "# download the data\n", 72 | "!wget https://nlp.stanford.edu/data/glove.6B.zip\n", 73 | "!unzip glove.6B.zip -d data" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "wv = pd.read_csv(\"./data/glove.6B.50d.txt\",\n", 83 | " delimiter=\" \", header=None, index_col=0, quoting=3)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "wv.head()" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "wv.loc[\"julia\"].values" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "# let's make it nicer!\n", 111 | "def latex_vector(series, first=3, last=1):\n", 112 | " from IPython.display import Math\n", 113 | " \n", 114 | " if len(series) < first + last:\n", 115 | " raise Exception(\"len(series) < first + last\")\n", 116 | " \n", 117 | " s = r\"\\vec{v}_{\\text{\" + series.name + r\"}} = [\"\n", 118 | " \n", 119 | " vs_fmtd = [\"{:.2f}\".format(v) for v in series.values[:first]]\n", 120 | " if len(series) > first + last:\n", 121 | " vs_fmtd.append(r\"\\ldots\")\n", 122 | " vs_fmtd += [\"{:.2f}\".format(v) for v in series.values[-last:]]\n", 123 | " \n", 124 | " s += \", \".join(vs_fmtd)\n", 125 | " s += \"]\"\n", 126 | "\n", 127 | " return Math(s) " 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "latex_vector(wv.loc[\"julia\"])" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "## Words close to each other" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "words = set(wv.index)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "\"daniel\" in words" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "correlations = wv.loc[[\"cat\", \"dog\", \"bar\", \"pub\", \"beer\", \"tea\", \"coffee\", \"talked\", \"nicely\"]].transpose().corr()\n", 171 | "sns.clustermap(correlations, vmin=-1., vmax=1., cmap=\"coolwarm\")" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "correlations = wv.loc[[\"hotel\", \"motel\", \"guesthouse\", \"bar\", \"pub\", \"party\"]].transpose().corr()\n", 181 | "sns.clustermap(correlations, vmin=-1., vmax=1., cmap=\"coolwarm\")" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "## Projections on word differences" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "np.dot(wv.loc[\"kate\"], wv.loc[\"he\"] - wv.loc[\"she\"])" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "np.dot(wv.loc[\"john\"], wv.loc[\"he\"] - wv.loc[\"she\"])" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "names = [\"kate\", \"catherine\", \"john\", \"mark\", \"peter\", \"anna\", \"julia\", \"jacob\", \"jake\",\n", 216 | " \"richard\", \"ted\", \"theodore\", \"sue\", \"susanne\", \"suzanne\", \"susan\", \"mary\",\n", 217 | " \"leo\", \"leonard\", \"alexander\", \"alexandra\", \"alex\", \"sasha\"]\n", 218 | "all([name in words for name in names])" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "gender = wv.loc[\"he\"] - wv.loc[\"she\"]" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "metadata": {}, 234 | "outputs": [], 235 | "source": [ 236 | "wv.loc[names].dot(gender).sort_values()" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "wv.loc[names].dot(gender).sort_values().plot.barh()" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [ 254 | "diminutive = wv.loc[\"kate\"] - wv.loc[\"catherine\"]" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [ 263 | "proj = pd.DataFrame([gender, diminutive], index=[\"gender\", \"diminutive\"]).transpose()" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "df_plot = wv.loc[names].dot(proj).sort_values(by=\"diminutive\")\n", 273 | "df_plot" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "# let's normalize data\n", 283 | "lens = (wv**2).sum(axis=1)\n", 284 | "wvn = wv.div(np.sqrt(lens), axis='index')" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "some_words = [\"good\", \"bad\", \"ok\", \"not\", \"ugly\", \"beautiful\", \"awesome\", \"!\", \"?\"]\n", 294 | "assert(all([word in words for word in some_words]))\n", 295 | "\n", 296 | "awesomeness = wvn.loc[\"awesome\"] - wvn.loc[\"awful\"]\n", 297 | "wvn.loc[some_words].dot(awesomeness).sort_values()" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "## Plots\n", 305 | "\n", 306 | "To reduce dimensions, we use:\n", 307 | "\n", 308 | "* [PCA](http://setosa.io/ev/principal-component-analysis/) - Principal Component Analysis\n", 309 | "* [t-SNE](https://lvdmaaten.github.io/tsne/) - t-Distributed Stochastic Neighbor Embedding\n", 310 | "\n", 311 | "See also [How to Use t-SNE Effectively](https://distill.pub/2016/misread-tsne/) at Distill." 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "pca = PCA(n_components=2)\n", 321 | "X_pca = pca.fit_transform(wv.loc[names])" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": null, 327 | "metadata": {}, 328 | "outputs": [], 329 | "source": [ 330 | "plt.plot(X_pca[:, 0], X_pca[:, 1], '.')\n", 331 | "for i, name in enumerate(names):\n", 332 | " plt.annotate(name, X_pca[i])" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": null, 338 | "metadata": {}, 339 | "outputs": [], 340 | "source": [ 341 | "tsne = TSNE(n_components=2, perplexity=3.)\n", 342 | "X_tsne = tsne.fit_transform(wv.loc[names])" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "plt.plot(X_tsne[:, 0], X_tsne[:, 1], '.')\n", 352 | "for i, name in enumerate(names):\n", 353 | " plt.annotate(name, X_tsne[i])" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": {}, 359 | "source": [ 360 | "## Analogies" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": null, 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [ 369 | "# normalize your data\n", 370 | "wv.dot(wv.loc[\"dog\"]).sort_values(ascending=False).head(10)" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": null, 376 | "metadata": {}, 377 | "outputs": [], 378 | "source": [ 379 | "wvn.dot(wvn.loc[\"dog\"]).sort_values(ascending=False).head(20)" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": null, 385 | "metadata": {}, 386 | "outputs": [], 387 | "source": [ 388 | "wvn.dot(wvn.loc[\"dog\"]).sort_values(ascending=False).tail(20)" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": null, 394 | "metadata": {}, 395 | "outputs": [], 396 | "source": [ 397 | "wvn.dot(wvn.loc[\"king\"] - wvn.loc[\"man\"] + wvn.loc[\"woman\"]).sort_values(ascending=False).head(20)" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": null, 403 | "metadata": {}, 404 | "outputs": [], 405 | "source": [ 406 | "wvn.dot(wvn.loc[\"kissed\"] - wvn.loc[\"kiss\"] + wvn.loc[\"eat\"]).sort_values(ascending=False).head(20)" 407 | ] 408 | }, 409 | { 410 | "cell_type": "markdown", 411 | "metadata": { 412 | "collapsed": true 413 | }, 414 | "source": [ 415 | "## Extremes" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": null, 421 | "metadata": {}, 422 | "outputs": [], 423 | "source": [ 424 | "temp_diff = wvn.loc[\"hot\"] - wvn.loc[\"cold\"]\n", 425 | "temp_avg = (wvn.loc[\"hot\"] + wvn.loc[\"cold\"]) / 2.\n", 426 | "proj = pd.DataFrame([temp_diff, temp_avg], index=[\"temp_diff\", \"temp_avg\"]).transpose()" 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": null, 432 | "metadata": {}, 433 | "outputs": [], 434 | "source": [ 435 | "temp_all = wvn.dot(proj).sort_values(by=\"temp_avg\", ascending=False)" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": null, 441 | "metadata": {}, 442 | "outputs": [], 443 | "source": [ 444 | "temp_all.head(20)" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": null, 450 | "metadata": {}, 451 | "outputs": [], 452 | "source": [ 453 | "temp_all.head(20).sort_values(by=\"temp_diff\", ascending=False)" 454 | ] 455 | }, 456 | { 457 | "cell_type": "markdown", 458 | "metadata": {}, 459 | "source": [ 460 | "## Other notes\n", 461 | "\n", 462 | "* ['unk' in GloVe is not for UNKNOWN](https://stackoverflow.com/questions/49239941/what-is-unk-in-the-pretrained-glove-vector-files-e-g-glove-6b-50d-txt)" 463 | ] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": null, 468 | "metadata": {}, 469 | "outputs": [], 470 | "source": [] 471 | } 472 | ], 473 | "metadata": { 474 | "anaconda-cloud": {}, 475 | "kernelspec": { 476 | "display_name": "thinking-in-tensors", 477 | "language": "python", 478 | "name": "python3" 479 | }, 480 | "language_info": { 481 | "codemirror_mode": { 482 | "name": "ipython", 483 | "version": 3 484 | }, 485 | "file_extension": ".py", 486 | "mimetype": "text/x-python", 487 | "name": "python", 488 | "nbconvert_exporter": "python", 489 | "pygments_lexer": "ipython3", 490 | "version": "3.11.0 | packaged by conda-forge | (main, Oct 25 2022, 06:21:25) [Clang 14.0.4 ]" 491 | }, 492 | "varInspector": { 493 | "cols": { 494 | "lenName": 16, 495 | "lenType": 16, 496 | "lenVar": 40 497 | }, 498 | "kernels_config": { 499 | "python": { 500 | "delete_cmd_postfix": "", 501 | "delete_cmd_prefix": "del ", 502 | "library": "var_list.py", 503 | "varRefreshCmd": "print(var_dic_list())" 504 | }, 505 | "r": { 506 | "delete_cmd_postfix": ") ", 507 | "delete_cmd_prefix": "rm(", 508 | "library": "var_list.r", 509 | "varRefreshCmd": "cat(var_dic_list()) " 510 | } 511 | }, 512 | "types_to_exclude": [ 513 | "module", 514 | "function", 515 | "builtin_function_or_method", 516 | "instance", 517 | "_Feature" 518 | ], 519 | "window_display": false 520 | }, 521 | "vscode": { 522 | "interpreter": { 523 | "hash": "437241f1648476fc7478db27d2d3bcbd0bd9c15f86357ad6b2124a6956f5c349" 524 | } 525 | } 526 | }, 527 | "nbformat": 4, 528 | "nbformat_minor": 1 529 | } 530 | -------------------------------------------------------------------------------- /4 Multiple Linear Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Thinking in Tensors, writing in PyTorch\n", 8 | "\n", 9 | "A hands-on course by [Piotr Migdał](https://p.migdal.pl) (2019).\n", 10 | "This notebook prepared by [Weronika Ormaniec](https://github.com/werkaaa).\n", 11 | "\n", 12 | "## Notebook 4: Multiple Linear Regression\n", 13 | "\n", 14 | "\n", 15 | "\n", 16 | " \n", 17 | "\n", 18 | "\n", 19 | "Simple linear regression is a useful tool when it comes to predicting an output given single predictor input. However, in practice we often come across problems which are described by more than one predictor. In this case we use Multiple Linear Regression.\n", 20 | "\n", 21 | "Instead of fitting several linear equations for each predictor, we will create one equation that will take the form:\n", 22 | "$$ Y = \\alpha_0 + \\alpha_1 \\cdot X_1 + \\alpha_2\\cdot X_2 + ... + \\alpha_n\\cdot X_n$$\n", 23 | "where $X_i$ is one of the predictors, $\\alpha_1$ is a coefficient, we want to get to know and $n$ is the number of predictors.\n", 24 | "\n", 25 | "The learning process in Multiple Linear Regression is the same as the one in Simple Linear Regression. " 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "import numpy as np\n", 35 | "import pandas as pd\n", 36 | "from sklearn.datasets import load_boston\n", 37 | "import matplotlib.pyplot as plt\n", 38 | "from livelossplot import PlotLosses\n", 39 | "\n", 40 | "from sklearn.linear_model import LinearRegression\n", 41 | "from sklearn.metrics import mean_squared_error, r2_score\n", 42 | "\n", 43 | "import torch\n", 44 | "import torch.nn as nn\n", 45 | "import torch.nn.functional as F\n", 46 | "from torchvision import transforms" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "### Data" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "In this notebook we will analyze The Boston Housing Dataset. It contains information about 506 houses in Boston. There are 13 features of the houses, which have grate or little impact on the price of the house. Using PyTorch we will implement a model that will predict the price of the house and then we will try to answer the question, which parameters have the biggest impact on the price of the houses\n", 61 | "\n", 62 | "We will take the dataset from scikit learn datasets." 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "boston = load_boston()\n", 72 | "boston_df = pd.DataFrame(boston.data, columns=boston.feature_names)\n", 73 | "boston_df[:5]" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "Parameters description:\n", 81 | " \n", 82 | "* CRIM: Per capita crime rate by town\n", 83 | "* ZN: Proportion of residential land zoned for lots over 25,000 sq. ft\n", 84 | "* INDUS: Proportion of non-retail business acres per town\n", 85 | "* CHAS: Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)\n", 86 | "* NOX: Nitric oxide concentration (parts per 10 million)\n", 87 | "* RM: Average number of rooms per dwelling\n", 88 | "* AGE: Proportion of owner-occupied units built prior to 1940\n", 89 | "* DIS: Weighted distances to five Boston employment centers\n", 90 | "* RAD: Index of accessibility to radial highways\n", 91 | "* TAX: Full-value property tax rate per $10,000\n", 92 | "* PTRATIO: Pupil-teacher ratio by town\n", 93 | "* B: 1000(Bk — 0.63)², where Bk is the proportion of [people of African American descent] by town\n", 94 | "* LSTAT: Percentage of lower status of the population\n", 95 | "\n", 96 | "Target: Median value of owner-occupied homes in $1000s" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "First of all, let's check which parameters have the most linear correlation with the price of the houses." 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "fig, (ax1, ax2, ax3, ax4) = plt.subplots(1,4,figsize=(15,7))\n", 113 | "ax1.scatter(boston_data_frame['RM'], boston.target)\n", 114 | "ax1.set_xlabel('RM')\n", 115 | "ax1.set_ylabel('Target')\n", 116 | "\n", 117 | "ax2.scatter(boston_data_frame['CRIM'], boston.target)\n", 118 | "ax2.set_xlabel('CRIM')\n", 119 | "ax2.set_ylabel('Target')\n", 120 | "\n", 121 | "ax3.scatter(boston_data_frame['PTRATIO'], boston.target)\n", 122 | "ax3.set_xlabel('PTRATIO')\n", 123 | "ax3.set_ylabel('Target')\n", 124 | "\n", 125 | "ax4.scatter(boston_data_frame['LSTAT'], boston.target)\n", 126 | "ax4.set_xlabel('LSTAT')\n", 127 | "ax4.set_ylabel('Target')\n" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "We can say that the correlation between RM (number of rooms per dwelling) and the price may be linear. The same as correlation between LSTAT (percentage of lower status of the population) and the price. What about CRIM (per capita crime rate by town) and PTRATIO (pupil-teacher ratio by town)? Those relationships are clearly not linear. Let's check how linear model will put up with it!" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "Looking at the data, we can see that some predictors have different orders of magnitude. That can be an obstacle during model training. That is why, we will normalize the data, so they will be in range $[-1,1]$." 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "X = torch.tensor(boston.data, dtype=torch.float32)\n", 151 | "Y = torch.tensor(boston.target, dtype=torch.float32)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "def Normalize(data):\n", 161 | " data_mean = torch.mean(data, dim=0)\n", 162 | " data_max = torch.max(data, dim=0)[0]\n", 163 | " data_min = torch.min(data, dim=0)[0]\n", 164 | " data = (data-data_mean)/(data_max-data_min)\n", 165 | " return data" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "X_normalized = Normalize(X)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "boston_df = pd.DataFrame(np.array(X_normalized), columns=boston.feature_names)\n", 184 | "boston_df[:5]" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "This time we will divide the data into training and test sets because we will be able to measure how well the model is doing in general, on the examples it has not seen during training process." 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "X_train = X_normalized[:400]\n", 201 | "Y_train = Y[:400]\n", 202 | "X_test = X_normalized[401:]\n", 203 | "Y_test = Y[401:]" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "### Model" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "class Linear(nn.Module):\n", 220 | " def __init__(self):\n", 221 | " super().__init__()\n", 222 | " self.linear = nn.Linear(in_features=13, out_features=1)\n", 223 | " \n", 224 | " def forward(self, x):\n", 225 | " return self.linear(x).squeeze(1)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "linear_model = Linear()\n", 235 | "print(linear_model.linear.weight)\n", 236 | "print(linear_model.linear.bias)" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "y_predict_train = linear_model(X_train)\n", 246 | "rmse_train = torch.sqrt(F.mse_loss(Y_train, y_predict_train))\n", 247 | "\n", 248 | "y_predict_test = linear_model(X_test)\n", 249 | "rmse_test = torch.sqrt(F.mse_loss(Y_test, y_predict_test))\n", 250 | "\n", 251 | "print(\"The PyTorch model performance:\")\n", 252 | "print('RMSE_train is {}'.format(rmse_train))\n", 253 | "print('RMSE_test is {}'.format(rmse_test))" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": {}, 260 | "outputs": [], 261 | "source": [ 262 | "#optim = torch.optim.SGD(linear_model.parameters(), lr=0.1)\n", 263 | "optim = torch.optim.Adam(linear_model.parameters(), lr=1.)\n", 264 | "loss_function = F.mse_loss\n", 265 | "loss = loss_function(linear_model(X), Y)\n", 266 | "print(loss) " 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "def train(X, Y, model, loss_function, optim, num_epochs):\n", 276 | " loss_history = []\n", 277 | " liveloss = PlotLosses()\n", 278 | "\n", 279 | "\n", 280 | " for epoch in range(num_epochs):\n", 281 | " \n", 282 | " epoch_loss = 0.0\n", 283 | " \n", 284 | " Y_pred = model(X)\n", 285 | " loss = loss_function(Y_pred, Y)\n", 286 | " \n", 287 | " loss.backward()\n", 288 | " optim.step()\n", 289 | " optim.zero_grad()\n", 290 | " \n", 291 | "\n", 292 | " epoch_loss = loss.data.item()\n", 293 | " \n", 294 | " avg_loss = epoch_loss / len(X)\n", 295 | "\n", 296 | " liveloss.update({\n", 297 | " 'loss': avg_loss,\n", 298 | " })\n", 299 | " liveloss.draw()\n", 300 | "\n", 301 | "train(X_train, Y_train, linear_model, loss_function, optim, num_epochs=80)" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": null, 307 | "metadata": {}, 308 | "outputs": [], 309 | "source": [ 310 | "y_predict_train = linear_model(X_train)\n", 311 | "rmse_train = torch.sqrt(F.mse_loss(Y_train, y_predict_train))\n", 312 | "\n", 313 | "y_predict_test = linear_model(X_test)\n", 314 | "rmse_test = torch.sqrt(F.mse_loss(Y_test, y_predict_test))\n", 315 | "\n", 316 | "print(\"The PyTorch model performance:\")\n", 317 | "print('RMSE train is {:.3f}'.format(rmse_train))\n", 318 | "print('RMSE test is {:.3f}'.format(rmse_test))" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "A we can see, our model fits the data better after training. \n", 326 | "\n", 327 | "We can now compare it with scikit learn linear regression model." 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": null, 333 | "metadata": {}, 334 | "outputs": [], 335 | "source": [ 336 | "lin_model = LinearRegression()\n", 337 | "lin_model.fit(X_train.numpy(), Y_train.numpy())" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": {}, 344 | "outputs": [], 345 | "source": [ 346 | "y_ptrain = lin_model.predict(X_train)\n", 347 | "rmse_train = np.sqrt(mean_squared_error(Y_train, y_ptrain))\n", 348 | "\n", 349 | "y_ptest = lin_model.predict(X_test)\n", 350 | "rmse_test = np.sqrt(mean_squared_error(Y_test, y_ptest))\n", 351 | "\n", 352 | "print(\"The model performance for training set\")\n", 353 | "print('RMSE train is {:.3f}'.format(rmse_train))\n", 354 | "print('RMSE test is {:.3f}'.format(rmse_test))" 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "Our model is not perfect but it has learned some intuition about the data and is able to make predictions even on the data it has not seen during learning process." 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "Let's compare the coefficients of both models." 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": null, 374 | "metadata": {}, 375 | "outputs": [], 376 | "source": [ 377 | "n_groups = 13\n", 378 | "\n", 379 | "fig, ax = plt.subplots(figsize=(15, 7))\n", 380 | "\n", 381 | "index = np.arange(n_groups)\n", 382 | "bar_width = 0.35\n", 383 | "opacity = 0.8\n", 384 | "\n", 385 | "rects1 = plt.bar(index, linear_model.linear.weight.detach().squeeze(), bar_width,\n", 386 | "alpha=opacity,\n", 387 | "color='b',\n", 388 | "label='our model')\n", 389 | "\n", 390 | "rects2 = plt.bar(index + bar_width, lin_model.coef_, bar_width,\n", 391 | "alpha=opacity,\n", 392 | "color='g',\n", 393 | "label='scikit learn')\n", 394 | "\n", 395 | "plt.xlabel('Variable')\n", 396 | "plt.ylabel('Value')\n", 397 | "plt.title('Coefficients')\n", 398 | "plt.xticks(index + bar_width, ('CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',\n", 399 | " 'TAX', 'PTRATIO', 'B', 'LSTAT'))\n", 400 | "plt.legend()\n", 401 | "\n", 402 | "plt.tight_layout()\n", 403 | "plt.show()" 404 | ] 405 | }, 406 | { 407 | "cell_type": "markdown", 408 | "metadata": {}, 409 | "source": [ 410 | "## To do\n", 411 | "* Proper dataset slicing" 412 | ] 413 | } 414 | ], 415 | "metadata": { 416 | "kernelspec": { 417 | "display_name": "Python 3", 418 | "language": "python", 419 | "name": "python3" 420 | }, 421 | "language_info": { 422 | "codemirror_mode": { 423 | "name": "ipython", 424 | "version": 3 425 | }, 426 | "file_extension": ".py", 427 | "mimetype": "text/x-python", 428 | "name": "python", 429 | "nbconvert_exporter": "python", 430 | "pygments_lexer": "ipython3", 431 | "version": "3.6.7" 432 | }, 433 | "toc": { 434 | "base_numbering": 1, 435 | "nav_menu": {}, 436 | "number_sections": true, 437 | "sideBar": true, 438 | "skip_h1_title": false, 439 | "title_cell": "Table of Contents", 440 | "title_sidebar": "Contents", 441 | "toc_cell": false, 442 | "toc_position": {}, 443 | "toc_section_display": true, 444 | "toc_window_display": false 445 | }, 446 | "varInspector": { 447 | "cols": { 448 | "lenName": 16, 449 | "lenType": 16, 450 | "lenVar": 40 451 | }, 452 | "kernels_config": { 453 | "python": { 454 | "delete_cmd_postfix": "", 455 | "delete_cmd_prefix": "del ", 456 | "library": "var_list.py", 457 | "varRefreshCmd": "print(var_dic_list())" 458 | }, 459 | "r": { 460 | "delete_cmd_postfix": ") ", 461 | "delete_cmd_prefix": "rm(", 462 | "library": "var_list.r", 463 | "varRefreshCmd": "cat(var_dic_list()) " 464 | } 465 | }, 466 | "types_to_exclude": [ 467 | "module", 468 | "function", 469 | "builtin_function_or_method", 470 | "instance", 471 | "_Feature" 472 | ], 473 | "window_display": false 474 | } 475 | }, 476 | "nbformat": 4, 477 | "nbformat_minor": 2 478 | } 479 | --------------------------------------------------------------------------------