├── README.md ├── cbow.ipynb ├── collaborative-filtering-nn.ipynb ├── environment.yml ├── image-caption-tutorial.ipynb ├── images ├── model.png ├── tiny_training2.csv └── tiny_val2.csv └── intro-to-pytoch.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # pytorch-tutorials 2 | 3 | Here are a few basic deep learning tutorial using Pytorch. 4 | -------------------------------------------------------------------------------- /cbow.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "toc": true 7 | }, 8 | "source": [ 9 | "

Table of Contents

\n", 10 | "
" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "# import pytorch libraries\n", 20 | "%matplotlib inline\n", 21 | "import torch \n", 22 | "import torch.autograd as autograd \n", 23 | "import torch.nn as nn \n", 24 | "import torch.nn.functional as F\n", 25 | "import torch.optim as optim\n", 26 | "import numpy as np" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "# Text Classification\n", 34 | "In this part of the tutorial we develop a continuous bag of words (CBOW) model for a text classification task described [here]( https://people.cs.umass.edu/~miyyer/pubs/2015_acl_dan.pdf). The CBOW model was first described [here](https://arxiv.org/pdf/1301.3781.pdf)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "## Subjectivity Dataset\n", 42 | "The subjectivity dataset has 5000 subjective and 5000 objective processed sentences. To get the data:\n", 43 | "```\n", 44 | "wget http://www.cs.cornell.edu/people/pabo/movie-review-data/rotten_imdb.tar.gz\n", 45 | "```" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 2, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "def unpack_dataset():\n", 55 | " ! wget http://www.cs.cornell.edu/people/pabo/movie-review-data/rotten_imdb.tar.gz\n", 56 | " ! mkdir data\n", 57 | " ! tar -xvf rotten_imdb.tar.gz -C data" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 3, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "#unpack_dataset()" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 4, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "name": "stdout", 76 | "output_type": "stream", 77 | "text": [ 78 | "plot.tok.gt9.5000 quote.tok.gt9.5000 subjdata.README.1.0\r\n" 79 | ] 80 | } 81 | ], 82 | "source": [ 83 | "!ls data" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 5, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "name": "stdout", 93 | "output_type": "stream", 94 | "text": [ 95 | "the movie begins in the past where a young boy named sam attempts to save celebi from a hunter . \r\n", 96 | "emerging from the human psyche and showing characteristics of abstract expressionism , minimalism and russian constructivism , graffiti removal has secured its place in the history of modern art while being created by artists who are unconscious of their artistic achievements . \r\n" 97 | ] 98 | } 99 | ], 100 | "source": [ 101 | "! head -2 data/plot.tok.gt9.5000" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 6, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/plain": [ 112 | "[PosixPath('data/plot.tok.gt9.5000'),\n", 113 | " PosixPath('data/subjdata.README.1.0'),\n", 114 | " PosixPath('data/quote.tok.gt9.5000')]" 115 | ] 116 | }, 117 | "execution_count": 6, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "from pathlib import Path\n", 124 | "PATH = Path(\"data\")\n", 125 | "list(PATH.iterdir())" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "## Tokenization\n", 133 | "Tokenization is the task of chopping up text into pieces, called tokens.\n", 134 | "\n", 135 | "spaCy is an open-source software library for advanced Natural Language Processing. Here we will use it for tokenization. " 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "### Simple Tokenization" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 7, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "# We need each line in the file \n", 152 | "def read_file(path):\n", 153 | " \"\"\" Read file returns a list of lines.\n", 154 | " \"\"\"\n", 155 | " with open(path, encoding = \"ISO-8859-1\") as f:\n", 156 | " content = f.readlines()\n", 157 | " return content" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 8, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "obj_lines = read_file(PATH/\"plot.tok.gt9.5000\")" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 9, 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "data": { 176 | "text/plain": [ 177 | "'the movie begins in the past where a young boy named sam attempts to save celebi from a hunter . \\n'" 178 | ] 179 | }, 180 | "execution_count": 9, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "obj_lines[0]" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 10, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/plain": [ 197 | "array(['the', 'movie', 'begins', 'in', 'the', 'past', 'where', 'a',\n", 198 | " 'young', 'boy', 'named', 'sam', 'attempts', 'to', 'save', 'celebi',\n", 199 | " 'from', 'a', 'hunter', '.'], dtype='\":0, \"UNK\":1} # init with padding and unknown\n", 502 | "words = [\"\", \"UNK\"]\n", 503 | "for word in word_count:\n", 504 | " vocab2index[word] = len(words)\n", 505 | " words.append(word)" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 24, 511 | "metadata": {}, 512 | "outputs": [], 513 | "source": [ 514 | "#vocab2index" 515 | ] 516 | }, 517 | { 518 | "cell_type": "markdown", 519 | "metadata": {}, 520 | "source": [ 521 | "## Sentence encoding\n", 522 | "Here we encode each sentence as a sequence of indices corresponding to each word." 523 | ] 524 | }, 525 | { 526 | "cell_type": "code", 527 | "execution_count": 25, 528 | "metadata": {}, 529 | "outputs": [], 530 | "source": [ 531 | "x_train_len = np.array([len(x.split()) for x in X_train])\n", 532 | "x_val_len = np.array([len(x.split()) for x in X_val])" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": 26, 538 | "metadata": {}, 539 | "outputs": [ 540 | { 541 | "data": { 542 | "text/plain": [ 543 | "43.0" 544 | ] 545 | }, 546 | "execution_count": 26, 547 | "metadata": {}, 548 | "output_type": "execute_result" 549 | } 550 | ], 551 | "source": [ 552 | "np.percentile(x_train_len, 95) # let set the max sequence len to N=40" 553 | ] 554 | }, 555 | { 556 | "cell_type": "code", 557 | "execution_count": 27, 558 | "metadata": {}, 559 | "outputs": [ 560 | { 561 | "data": { 562 | "text/plain": [ 563 | "'will god let her fall or give her a new path ?'" 564 | ] 565 | }, 566 | "execution_count": 27, 567 | "metadata": {}, 568 | "output_type": "execute_result" 569 | } 570 | ], 571 | "source": [ 572 | "X_train[0]" 573 | ] 574 | }, 575 | { 576 | "cell_type": "code", 577 | "execution_count": 28, 578 | "metadata": {}, 579 | "outputs": [ 580 | { 581 | "data": { 582 | "text/plain": [ 583 | "8" 584 | ] 585 | }, 586 | "execution_count": 28, 587 | "metadata": {}, 588 | "output_type": "execute_result" 589 | } 590 | ], 591 | "source": [ 592 | "# returns the index of the word or the index of \"UNK\" otherwise\n", 593 | "vocab2index.get(\"?\", vocab2index[\"UNK\"])" 594 | ] 595 | }, 596 | { 597 | "cell_type": "code", 598 | "execution_count": 29, 599 | "metadata": {}, 600 | "outputs": [ 601 | { 602 | "data": { 603 | "text/plain": [ 604 | "array([11, 3, 6, 7, 2, 12, 9, 7, 10, 4, 5, 8])" 605 | ] 606 | }, 607 | "execution_count": 29, 608 | "metadata": {}, 609 | "output_type": "execute_result" 610 | } 611 | ], 612 | "source": [ 613 | "np.array([vocab2index.get(w, vocab2index[\"UNK\"]) for w in X_train[0].split()])" 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": 30, 619 | "metadata": {}, 620 | "outputs": [], 621 | "source": [ 622 | "def encode_sentence(s, N=40):\n", 623 | " enc = np.zeros(N, dtype=np.int32)\n", 624 | " enc1 = np.array([vocab2index.get(w, vocab2index[\"UNK\"]) for w in s.split()])\n", 625 | " l = min(N, len(enc1))\n", 626 | " enc[:l] = enc1[:l]\n", 627 | " return enc" 628 | ] 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": 31, 633 | "metadata": {}, 634 | "outputs": [ 635 | { 636 | "data": { 637 | "text/plain": [ 638 | "array([11, 3, 6, 7, 2, 12, 9, 7, 10, 4, 5, 8, 0, 0, 0, 0, 0,\n", 639 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 640 | " 0, 0, 0, 0, 0, 0], dtype=int32)" 641 | ] 642 | }, 643 | "execution_count": 31, 644 | "metadata": {}, 645 | "output_type": "execute_result" 646 | } 647 | ], 648 | "source": [ 649 | "encode_sentence(X_train[0])" 650 | ] 651 | }, 652 | { 653 | "cell_type": "code", 654 | "execution_count": 32, 655 | "metadata": {}, 656 | "outputs": [], 657 | "source": [ 658 | "x_train_len = np.minimum(x_train_len, 40)\n", 659 | "x_val_len = np.minimum(x_val_len, 40)" 660 | ] 661 | }, 662 | { 663 | "cell_type": "code", 664 | "execution_count": 33, 665 | "metadata": {}, 666 | "outputs": [ 667 | { 668 | "data": { 669 | "text/plain": [ 670 | "(8000, 40)" 671 | ] 672 | }, 673 | "execution_count": 33, 674 | "metadata": {}, 675 | "output_type": "execute_result" 676 | } 677 | ], 678 | "source": [ 679 | "x_train = np.vstack([encode_sentence(x) for x in X_train])\n", 680 | "x_train.shape" 681 | ] 682 | }, 683 | { 684 | "cell_type": "code", 685 | "execution_count": 34, 686 | "metadata": {}, 687 | "outputs": [ 688 | { 689 | "data": { 690 | "text/plain": [ 691 | "(2000, 40)" 692 | ] 693 | }, 694 | "execution_count": 34, 695 | "metadata": {}, 696 | "output_type": "execute_result" 697 | } 698 | ], 699 | "source": [ 700 | "x_val = np.vstack([encode_sentence(x) for x in X_val])\n", 701 | "x_val.shape" 702 | ] 703 | }, 704 | { 705 | "cell_type": "markdown", 706 | "metadata": {}, 707 | "source": [ 708 | "## Embedding layer\n", 709 | "Most deep learning models use a dense vectors of real numbers as representation of words (word embeddings), as opposed to a one-hot encoding representations. The module torch.nn.Embedding is used to represent word embeddings. It takes two arguments: the vocabulary size, and the dimensionality of the embeddings. The embeddings are initialized with random vectors. " 710 | ] 711 | }, 712 | { 713 | "cell_type": "code", 714 | "execution_count": 35, 715 | "metadata": {}, 716 | "outputs": [ 717 | { 718 | "data": { 719 | "text/plain": [ 720 | "Parameter containing:\n", 721 | "tensor([[ 0.0000, 0.0000, 0.0000, 0.0000],\n", 722 | " [-0.9722, 0.9138, 0.0743, -0.1021],\n", 723 | " [-0.0091, -0.4712, 1.2977, -1.2585],\n", 724 | " [ 0.1368, 1.4354, -0.0935, 0.1110],\n", 725 | " [ 0.7230, -0.9195, 0.9880, 1.2590],\n", 726 | " [-0.4534, 0.8292, -0.2036, 0.3768],\n", 727 | " [ 1.1694, 1.6533, 1.1898, 1.0617],\n", 728 | " [-1.1252, -0.2761, -0.1112, 0.7598],\n", 729 | " [ 0.9987, 1.0012, -0.3599, 0.5257],\n", 730 | " [ 1.2248, -0.2419, 0.2870, -1.5904]])" 731 | ] 732 | }, 733 | "execution_count": 35, 734 | "metadata": {}, 735 | "output_type": "execute_result" 736 | } 737 | ], 738 | "source": [ 739 | "# an Embedding module containing 10 words with embedding size 4\n", 740 | "# embedding will be initialized at random\n", 741 | "embed = nn.Embedding(10, 4, padding_idx=0)\n", 742 | "embed.weight" 743 | ] 744 | }, 745 | { 746 | "cell_type": "markdown", 747 | "metadata": {}, 748 | "source": [ 749 | "Note that the `padding_idx` has embedding vector 0." 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": 36, 755 | "metadata": {}, 756 | "outputs": [ 757 | { 758 | "data": { 759 | "text/plain": [ 760 | "tensor([[[-0.9722, 0.9138, 0.0743, -0.1021],\n", 761 | " [ 0.7230, -0.9195, 0.9880, 1.2590],\n", 762 | " [-0.9722, 0.9138, 0.0743, -0.1021],\n", 763 | " [-0.4534, 0.8292, -0.2036, 0.3768],\n", 764 | " [-0.9722, 0.9138, 0.0743, -0.1021],\n", 765 | " [ 0.0000, 0.0000, 0.0000, 0.0000]]])" 766 | ] 767 | }, 768 | "execution_count": 36, 769 | "metadata": {}, 770 | "output_type": "execute_result" 771 | } 772 | ], 773 | "source": [ 774 | "# given a list of ids we can \"look up\" the embedding corresponing to each id\n", 775 | "# can you see that some vectors are the same?\n", 776 | "a = torch.LongTensor([[1,4,1,5,1,0]])\n", 777 | "embed(a)" 778 | ] 779 | }, 780 | { 781 | "cell_type": "markdown", 782 | "metadata": {}, 783 | "source": [ 784 | "This would be the representation of a sentence with words with indices [1,4,1,5,1] and a padding at the end. Bellow we have an example in which we have two sentences. the first sentence has length 3 and the last sentence has length 2. In order to use a tensor we use padding at the end of the second sentence. " 785 | ] 786 | }, 787 | { 788 | "cell_type": "code", 789 | "execution_count": 37, 790 | "metadata": {}, 791 | "outputs": [], 792 | "source": [ 793 | "a = torch.LongTensor([[1,4,1], [1,3,0]])" 794 | ] 795 | }, 796 | { 797 | "cell_type": "markdown", 798 | "metadata": {}, 799 | "source": [ 800 | "Our model takes an average of the word embedding of each word. Here is how we do it." 801 | ] 802 | }, 803 | { 804 | "cell_type": "code", 805 | "execution_count": 38, 806 | "metadata": {}, 807 | "outputs": [], 808 | "source": [ 809 | "s = torch.FloatTensor([3, 2]) # here is the size of the vector" 810 | ] 811 | }, 812 | { 813 | "cell_type": "code", 814 | "execution_count": 39, 815 | "metadata": {}, 816 | "outputs": [ 817 | { 818 | "data": { 819 | "text/plain": [ 820 | "tensor([[[-0.9722, 0.9138, 0.0743, -0.1021],\n", 821 | " [ 0.7230, -0.9195, 0.9880, 1.2590],\n", 822 | " [-0.9722, 0.9138, 0.0743, -0.1021]],\n", 823 | "\n", 824 | " [[-0.9722, 0.9138, 0.0743, -0.1021],\n", 825 | " [ 0.1368, 1.4354, -0.0935, 0.1110],\n", 826 | " [ 0.0000, 0.0000, 0.0000, 0.0000]]])" 827 | ] 828 | }, 829 | "execution_count": 39, 830 | "metadata": {}, 831 | "output_type": "execute_result" 832 | } 833 | ], 834 | "source": [ 835 | "embed(a)" 836 | ] 837 | }, 838 | { 839 | "cell_type": "code", 840 | "execution_count": 40, 841 | "metadata": {}, 842 | "outputs": [ 843 | { 844 | "data": { 845 | "text/plain": [ 846 | "tensor([[-1.2213, 0.9080, 1.1367, 1.0548],\n", 847 | " [-0.8354, 2.3491, -0.0192, 0.0089]])" 848 | ] 849 | }, 850 | "execution_count": 40, 851 | "metadata": {}, 852 | "output_type": "execute_result" 853 | } 854 | ], 855 | "source": [ 856 | "embed(a).sum(dim=1)" 857 | ] 858 | }, 859 | { 860 | "cell_type": "code", 861 | "execution_count": 41, 862 | "metadata": {}, 863 | "outputs": [ 864 | { 865 | "data": { 866 | "text/plain": [ 867 | "tensor([[-0.4071, 0.3027, 0.3789, 0.3516],\n", 868 | " [-0.4177, 1.1746, -0.0096, 0.0044]])" 869 | ] 870 | }, 871 | "execution_count": 41, 872 | "metadata": {}, 873 | "output_type": "execute_result" 874 | } 875 | ], 876 | "source": [ 877 | "sum_embs = embed(a).sum(dim=1) \n", 878 | "sum_embs/ s.view(s.shape[0], 1)" 879 | ] 880 | }, 881 | { 882 | "cell_type": "markdown", 883 | "metadata": {}, 884 | "source": [ 885 | "## Continuous Bag of Words Model" 886 | ] 887 | }, 888 | { 889 | "cell_type": "code", 890 | "execution_count": 42, 891 | "metadata": {}, 892 | "outputs": [], 893 | "source": [ 894 | "class CBOW(nn.Module):\n", 895 | " def __init__(self, vocab_size, emb_size=100):\n", 896 | " super(CBOW, self).__init__()\n", 897 | " self.word_emb = nn.Embedding(vocab_size, emb_size, padding_idx=0)\n", 898 | " self.linear = nn.Linear(emb_size, 1)\n", 899 | " \n", 900 | " def forward(self, x, s):\n", 901 | " x = self.word_emb(x)\n", 902 | " x = x.sum(dim=1)/ s\n", 903 | " x = self.linear(x)\n", 904 | " return x" 905 | ] 906 | }, 907 | { 908 | "cell_type": "code", 909 | "execution_count": 43, 910 | "metadata": {}, 911 | "outputs": [], 912 | "source": [ 913 | "model = CBOW(vocab_size=5, emb_size=3)" 914 | ] 915 | }, 916 | { 917 | "cell_type": "code", 918 | "execution_count": 44, 919 | "metadata": {}, 920 | "outputs": [ 921 | { 922 | "data": { 923 | "text/plain": [ 924 | "Parameter containing:\n", 925 | "tensor([[ 0.0000, 0.0000, 0.0000],\n", 926 | " [ 1.6292, 1.2889, 0.7647],\n", 927 | " [ 2.5952, -0.9427, 0.3432],\n", 928 | " [ 0.5775, -2.7160, -1.4606],\n", 929 | " [ 1.2119, 0.8058, -0.0705]])" 930 | ] 931 | }, 932 | "execution_count": 44, 933 | "metadata": {}, 934 | "output_type": "execute_result" 935 | } 936 | ], 937 | "source": [ 938 | "model.word_emb.weight" 939 | ] 940 | }, 941 | { 942 | "cell_type": "code", 943 | "execution_count": 45, 944 | "metadata": {}, 945 | "outputs": [ 946 | { 947 | "data": { 948 | "text/plain": [ 949 | "tensor([[ 0.1384],\n", 950 | " [ 0.5663]])" 951 | ] 952 | }, 953 | "execution_count": 45, 954 | "metadata": {}, 955 | "output_type": "execute_result" 956 | } 957 | ], 958 | "source": [ 959 | "s = s.view(s.shape[0], 1)\n", 960 | "model(a, s)" 961 | ] 962 | }, 963 | { 964 | "cell_type": "markdown", 965 | "metadata": {}, 966 | "source": [ 967 | "# Training the CBOW model " 968 | ] 969 | }, 970 | { 971 | "cell_type": "code", 972 | "execution_count": 51, 973 | "metadata": {}, 974 | "outputs": [ 975 | { 976 | "name": "stdout", 977 | "output_type": "stream", 978 | "text": [ 979 | "4067\n" 980 | ] 981 | } 982 | ], 983 | "source": [ 984 | "V = len(words)\n", 985 | "model = CBOW(vocab_size=V, emb_size=50)\n", 986 | "print(V)" 987 | ] 988 | }, 989 | { 990 | "cell_type": "code", 991 | "execution_count": 52, 992 | "metadata": {}, 993 | "outputs": [], 994 | "source": [ 995 | "def val_metrics(model):\n", 996 | " model.eval()\n", 997 | " x = torch.LongTensor(x_val) #.cuda()\n", 998 | " y = torch.Tensor(y_val).unsqueeze(1) #).cuda()\n", 999 | " s = torch.Tensor(x_val_len).view(x_val_len.shape[0], 1)\n", 1000 | " y_hat = model(x, s)\n", 1001 | " loss = F.binary_cross_entropy_with_logits(y_hat, y)\n", 1002 | " y_pred = y_hat > 0\n", 1003 | " correct = (y_pred.float() == y).float().sum()\n", 1004 | " accuracy = correct/y_pred.shape[0]\n", 1005 | " return loss.item(), accuracy.item()" 1006 | ] 1007 | }, 1008 | { 1009 | "cell_type": "code", 1010 | "execution_count": 53, 1011 | "metadata": {}, 1012 | "outputs": [ 1013 | { 1014 | "data": { 1015 | "text/plain": [ 1016 | "(0.6892560720443726, 0.5245000123977661)" 1017 | ] 1018 | }, 1019 | "execution_count": 53, 1020 | "metadata": {}, 1021 | "output_type": "execute_result" 1022 | } 1023 | ], 1024 | "source": [ 1025 | "# accuracy of a random model should be around 0.5\n", 1026 | "val_metrics(model)" 1027 | ] 1028 | }, 1029 | { 1030 | "cell_type": "code", 1031 | "execution_count": 54, 1032 | "metadata": {}, 1033 | "outputs": [], 1034 | "source": [ 1035 | "def train_epocs(model, epochs=10, lr=0.01):\n", 1036 | " optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n", 1037 | " for i in range(epochs):\n", 1038 | " model.train()\n", 1039 | " x = torch.LongTensor(x_train) #.cuda()\n", 1040 | " y = torch.Tensor(y_train).unsqueeze(1)\n", 1041 | " s = torch.Tensor(x_train_len).view(x_train_len.shape[0], 1)\n", 1042 | " y_hat = model(x, s)\n", 1043 | " loss = F.binary_cross_entropy_with_logits(y_hat, y)\n", 1044 | " optimizer.zero_grad()\n", 1045 | " loss.backward()\n", 1046 | " optimizer.step()\n", 1047 | " val_loss, val_accuracy = val_metrics(model)\n", 1048 | " print(\"train_loss %.3f val_loss %.3f val_accuracy %.3f\" % (loss.item(), val_loss, val_accuracy))" 1049 | ] 1050 | }, 1051 | { 1052 | "cell_type": "code", 1053 | "execution_count": 55, 1054 | "metadata": {}, 1055 | "outputs": [ 1056 | { 1057 | "name": "stdout", 1058 | "output_type": "stream", 1059 | "text": [ 1060 | "train_loss 0.689 val_loss 0.651 val_accuracy 0.582\n", 1061 | "train_loss 0.639 val_loss 0.559 val_accuracy 0.766\n", 1062 | "train_loss 0.544 val_loss 0.467 val_accuracy 0.825\n", 1063 | "train_loss 0.440 val_loss 0.375 val_accuracy 0.866\n", 1064 | "train_loss 0.337 val_loss 0.321 val_accuracy 0.867\n", 1065 | "train_loss 0.270 val_loss 0.274 val_accuracy 0.893\n", 1066 | "train_loss 0.213 val_loss 0.258 val_accuracy 0.900\n", 1067 | "train_loss 0.183 val_loss 0.253 val_accuracy 0.902\n", 1068 | "train_loss 0.160 val_loss 0.250 val_accuracy 0.909\n", 1069 | "train_loss 0.135 val_loss 0.260 val_accuracy 0.905\n" 1070 | ] 1071 | } 1072 | ], 1073 | "source": [ 1074 | "train_epocs(model, epochs=10, lr=0.1)" 1075 | ] 1076 | }, 1077 | { 1078 | "cell_type": "code", 1079 | "execution_count": 56, 1080 | "metadata": {}, 1081 | "outputs": [ 1082 | { 1083 | "name": "stdout", 1084 | "output_type": "stream", 1085 | "text": [ 1086 | "train_loss 0.120 val_loss 0.257 val_accuracy 0.907\n", 1087 | "train_loss 0.115 val_loss 0.257 val_accuracy 0.909\n", 1088 | "train_loss 0.113 val_loss 0.256 val_accuracy 0.907\n", 1089 | "train_loss 0.110 val_loss 0.256 val_accuracy 0.908\n", 1090 | "train_loss 0.107 val_loss 0.255 val_accuracy 0.908\n", 1091 | "train_loss 0.103 val_loss 0.255 val_accuracy 0.907\n", 1092 | "train_loss 0.100 val_loss 0.255 val_accuracy 0.906\n", 1093 | "train_loss 0.098 val_loss 0.256 val_accuracy 0.908\n", 1094 | "train_loss 0.095 val_loss 0.257 val_accuracy 0.906\n", 1095 | "train_loss 0.092 val_loss 0.257 val_accuracy 0.906\n" 1096 | ] 1097 | } 1098 | ], 1099 | "source": [ 1100 | "train_epocs(model, epochs=10, lr=0.01)" 1101 | ] 1102 | }, 1103 | { 1104 | "cell_type": "markdown", 1105 | "metadata": {}, 1106 | "source": [ 1107 | "# Data loaders for SGD" 1108 | ] 1109 | }, 1110 | { 1111 | "cell_type": "markdown", 1112 | "metadata": {}, 1113 | "source": [ 1114 | "Nearly all of deep learning is powered by one very important algorithm: **stochastic gradient descent (SGD)**. SGD can be seeing as an approximation of **gradient descent** (GD). In GD you have to run through *all* the samples in your training set to do a single itaration. In SGD you use *only one* or *a subset* of training samples to do the update for a parameter in a particular iteration. The subset use in every iteration is called a **batch** or **minibatch**." 1115 | ] 1116 | }, 1117 | { 1118 | "cell_type": "code", 1119 | "execution_count": 57, 1120 | "metadata": {}, 1121 | "outputs": [], 1122 | "source": [ 1123 | "from torch.utils.data import Dataset, DataLoader" 1124 | ] 1125 | }, 1126 | { 1127 | "cell_type": "markdown", 1128 | "metadata": {}, 1129 | "source": [ 1130 | "Next we are going to create a data loader. The data loader provides the following features:\n", 1131 | "* Batching the data\n", 1132 | "* Shuffling the data\n", 1133 | "* Load the data in parallel using multiprocessing workers." 1134 | ] 1135 | }, 1136 | { 1137 | "cell_type": "code", 1138 | "execution_count": 58, 1139 | "metadata": {}, 1140 | "outputs": [], 1141 | "source": [ 1142 | "def encode_sentence2(s, N=40):\n", 1143 | " enc = np.zeros(N, dtype=np.int32)\n", 1144 | " enc1 = np.array([vocab2index.get(w, vocab2index[\"UNK\"]) for w in s.split()])\n", 1145 | " l = min(N, len(enc1))\n", 1146 | " enc[:l] = enc1[:l]\n", 1147 | " return enc, l" 1148 | ] 1149 | }, 1150 | { 1151 | "cell_type": "code", 1152 | "execution_count": 59, 1153 | "metadata": {}, 1154 | "outputs": [ 1155 | { 1156 | "data": { 1157 | "text/plain": [ 1158 | "(array([11, 3, 6, 7, 2, 12, 9, 7, 10, 4, 5, 8, 0, 0, 0, 0, 0,\n", 1159 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 1160 | " 0, 0, 0, 0, 0, 0], dtype=int32), 12)" 1161 | ] 1162 | }, 1163 | "execution_count": 59, 1164 | "metadata": {}, 1165 | "output_type": "execute_result" 1166 | } 1167 | ], 1168 | "source": [ 1169 | "encode_sentence2(X_train[0])" 1170 | ] 1171 | }, 1172 | { 1173 | "cell_type": "code", 1174 | "execution_count": 60, 1175 | "metadata": {}, 1176 | "outputs": [], 1177 | "source": [ 1178 | "class SubjectivityDataset(Dataset):\n", 1179 | " def __init__(self, X, y):\n", 1180 | " self.x = X\n", 1181 | " self.y = y\n", 1182 | " \n", 1183 | " def __len__(self):\n", 1184 | " return len(self.y)\n", 1185 | " \n", 1186 | " def __getitem__(self, idx):\n", 1187 | " x = self.x[idx]\n", 1188 | " x, s = encode_sentence2(x)\n", 1189 | " return x, self.y[idx], s\n", 1190 | " \n", 1191 | "sub_dataset_train = SubjectivityDataset(X_train, y_train)" 1192 | ] 1193 | }, 1194 | { 1195 | "cell_type": "code", 1196 | "execution_count": 61, 1197 | "metadata": {}, 1198 | "outputs": [], 1199 | "source": [ 1200 | "train_loader = DataLoader(sub_dataset_train, batch_size=5, shuffle=True)\n", 1201 | "x, y, s = next(iter(train_loader))" 1202 | ] 1203 | }, 1204 | { 1205 | "cell_type": "code", 1206 | "execution_count": 62, 1207 | "metadata": {}, 1208 | "outputs": [ 1209 | { 1210 | "data": { 1211 | "text/plain": [ 1212 | "(tensor([[ 243, 2146, 1, 384, 57, 1, 57, 1, 1,\n", 1213 | " 37, 559, 1, 1, 2632, 1, 42, 24, 15,\n", 1214 | " 645, 3014, 2936, 88, 1, 37, 1, 2632, 2029,\n", 1215 | " 1, 80, 1, 23, 0, 0, 0, 0, 0,\n", 1216 | " 0, 0, 0, 0],\n", 1217 | " [ 147, 15, 1075, 1910, 362, 42, 3155, 125, 588,\n", 1218 | " 32, 588, 63, 40, 41, 1479, 57, 2537, 24,\n", 1219 | " 15, 1, 57, 1725, 152, 40, 233, 23, 0,\n", 1220 | " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 1221 | " 0, 0, 0, 0],\n", 1222 | " [ 15, 1873, 24, 119, 29, 2288, 477, 1575, 24,\n", 1223 | " 1645, 42, 24, 15, 127, 635, 436, 147, 173,\n", 1224 | " 128, 1551, 129, 436, 72, 704, 1, 42, 977,\n", 1225 | " 24, 2144, 42, 29, 738, 1, 434, 104, 23,\n", 1226 | " 0, 0, 0, 0],\n", 1227 | " [ 1, 476, 29, 661, 91, 3319, 42, 24, 1,\n", 1228 | " 476, 29, 661, 91, 3320, 23, 0, 0, 0,\n", 1229 | " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 1230 | " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 1231 | " 0, 0, 0, 0],\n", 1232 | " [ 261, 15, 3115, 354, 1298, 42, 243, 148, 199,\n", 1233 | " 1013, 60, 24, 60, 24, 60, 24, 60, 0,\n", 1234 | " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 1235 | " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 1236 | " 0, 0, 0, 0]], dtype=torch.int32),\n", 1237 | " tensor([ 1., 1., 0., 0., 0.], dtype=torch.float64),\n", 1238 | " tensor([ 31, 26, 36, 15, 17]))" 1239 | ] 1240 | }, 1241 | "execution_count": 62, 1242 | "metadata": {}, 1243 | "output_type": "execute_result" 1244 | } 1245 | ], 1246 | "source": [ 1247 | "x, y, s" 1248 | ] 1249 | }, 1250 | { 1251 | "cell_type": "code", 1252 | "execution_count": 63, 1253 | "metadata": {}, 1254 | "outputs": [], 1255 | "source": [ 1256 | "model = CBOW(vocab_size=V, emb_size=50)" 1257 | ] 1258 | }, 1259 | { 1260 | "cell_type": "code", 1261 | "execution_count": 64, 1262 | "metadata": {}, 1263 | "outputs": [], 1264 | "source": [ 1265 | "train_loader = DataLoader(sub_dataset_train, batch_size=500, shuffle=True)" 1266 | ] 1267 | }, 1268 | { 1269 | "cell_type": "code", 1270 | "execution_count": null, 1271 | "metadata": {}, 1272 | "outputs": [], 1273 | "source": [ 1274 | "def train_epocs(model, epochs=10, lr=0.01):\n", 1275 | " optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n", 1276 | " for i in range(epochs):\n", 1277 | " total_loss = 0\n", 1278 | " total = 0\n", 1279 | " model.train()\n", 1280 | " for x, y, s in train_loader:\n", 1281 | " x = x.type(torch.LongTensor) #.cuda()\n", 1282 | " y = y.type(torch.FloatTensor).unsqueeze(1)\n", 1283 | " s = s.type(torch.Tensor).view(s.shape[0], 1)\n", 1284 | " y_hat = model(x, s)\n", 1285 | " loss = F.binary_cross_entropy_with_logits(y_hat, y)\n", 1286 | " optimizer.zero_grad()\n", 1287 | " loss.backward()\n", 1288 | " optimizer.step()\n", 1289 | " total_loss += x.size(0)*loss.item()\n", 1290 | " total += x.size(0)\n", 1291 | " train_loss\n", 1292 | " val_loss, val_accuracy = val_metrics(model)\n", 1293 | " \n", 1294 | " print(\"train_loss %.3f val_loss %.3f val_accuracy %.3f\" % (loss.item(), val_loss, val_accuracy))" 1295 | ] 1296 | }, 1297 | { 1298 | "cell_type": "code", 1299 | "execution_count": null, 1300 | "metadata": {}, 1301 | "outputs": [], 1302 | "source": [ 1303 | "train_epocs(model, epochs=10)" 1304 | ] 1305 | }, 1306 | { 1307 | "cell_type": "code", 1308 | "execution_count": null, 1309 | "metadata": {}, 1310 | "outputs": [], 1311 | "source": [] 1312 | } 1313 | ], 1314 | "metadata": { 1315 | "kernelspec": { 1316 | "display_name": "Python 3", 1317 | "language": "python", 1318 | "name": "python3" 1319 | }, 1320 | "language_info": { 1321 | "codemirror_mode": { 1322 | "name": "ipython", 1323 | "version": 3 1324 | }, 1325 | "file_extension": ".py", 1326 | "mimetype": "text/x-python", 1327 | "name": "python", 1328 | "nbconvert_exporter": "python", 1329 | "pygments_lexer": "ipython3", 1330 | "version": "3.6.6" 1331 | }, 1332 | "nav_menu": {}, 1333 | "toc": { 1334 | "nav_menu": { 1335 | "height": "116px", 1336 | "width": "251px" 1337 | }, 1338 | "number_sections": true, 1339 | "sideBar": true, 1340 | "skip_h1_title": false, 1341 | "toc_cell": true, 1342 | "toc_position": {}, 1343 | "toc_section_display": "block", 1344 | "toc_window_display": false 1345 | }, 1346 | "widgets": { 1347 | "state": {}, 1348 | "version": "1.1.2" 1349 | } 1350 | }, 1351 | "nbformat": 4, 1352 | "nbformat_minor": 1 1353 | } 1354 | -------------------------------------------------------------------------------- /collaborative-filtering-nn.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Collaborative Filtering with Neural Networks" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "In this notebook we will write a matrix factorization model in pytorch to solve a recommendation problem. Then we will write a more general neural model for the same problem." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "The MovieLens dataset (ml-latest-small) describes 5-star rating and free-text tagging activity from MovieLens, a movie recommendation service. It contains 100004 ratings and 1296 tag applications across 9125 movies. https://grouplens.org/datasets/movielens/. To get the data:\n", 22 | "\n", 23 | "`wget http://files.grouplens.org/datasets/movielens/ml-latest-small.zip`" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## MovieLens dataset" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 1, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "from pathlib import Path\n", 40 | "import pandas as pd\n", 41 | "import numpy as np" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "[PosixPath('/data2/yinterian/ml-latest-small/ratings.csv'),\n", 53 | " PosixPath('/data2/yinterian/ml-latest-small/tags.csv'),\n", 54 | " PosixPath('/data2/yinterian/ml-latest-small/tiny_training2.csv'),\n", 55 | " PosixPath('/data2/yinterian/ml-latest-small/links.csv'),\n", 56 | " PosixPath('/data2/yinterian/ml-latest-small/tiny_val2.csv'),\n", 57 | " PosixPath('/data2/yinterian/ml-latest-small/README.txt'),\n", 58 | " PosixPath('/data2/yinterian/ml-latest-small/movies.csv')]" 59 | ] 60 | }, 61 | "execution_count": 3, 62 | "metadata": {}, 63 | "output_type": "execute_result" 64 | } 65 | ], 66 | "source": [ 67 | "PATH = Path(\"/Users/yinterian/teaching/deeplearning/data/ml-latest-small/\")\n", 68 | "PATH = Path(\"/data2/yinterian/ml-latest-small/\")\n", 69 | "list(PATH.iterdir())" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "userId,movieId,rating,timestamp\r", 82 | "\r\n", 83 | "1,31,2.5,1260759144\r", 84 | "\r\n", 85 | "1,1029,3.0,1260759179\r", 86 | "\r\n", 87 | "1,1061,3.0,1260759182\r", 88 | "\r\n", 89 | "1,1129,2.0,1260759185\r", 90 | "\r\n", 91 | "1,1172,4.0,1260759205\r", 92 | "\r\n", 93 | "1,1263,2.0,1260759151\r", 94 | "\r\n", 95 | "1,1287,2.0,1260759187\r", 96 | "\r\n", 97 | "1,1293,2.0,1260759148\r", 98 | "\r\n", 99 | "1,1339,3.5,1260759125\r", 100 | "\r\n" 101 | ] 102 | } 103 | ], 104 | "source": [ 105 | "! head $PATH/ratings.csv" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 5, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "data = pd.read_csv(PATH/\"ratings.csv\")" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 6, 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "data": { 124 | "text/html": [ 125 | "
\n", 126 | "\n", 139 | "\n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | "
userIdmovieIdratingtimestamp
01312.51260759144
1110293.01260759179
2110613.01260759182
3111292.01260759185
4111724.01260759205
\n", 187 | "
" 188 | ], 189 | "text/plain": [ 190 | " userId movieId rating timestamp\n", 191 | "0 1 31 2.5 1260759144\n", 192 | "1 1 1029 3.0 1260759179\n", 193 | "2 1 1061 3.0 1260759182\n", 194 | "3 1 1129 2.0 1260759185\n", 195 | "4 1 1172 4.0 1260759205" 196 | ] 197 | }, 198 | "execution_count": 6, 199 | "metadata": {}, 200 | "output_type": "execute_result" 201 | } 202 | ], 203 | "source": [ 204 | "data.head()" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "### Encoding data\n", 212 | "We enconde the data to have contiguous ids for users and movies. You can think about this as a categorical encoding of our two categorical variables userId and movieId." 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 7, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "# split train and validation before encoding\n", 222 | "np.random.seed(3)\n", 223 | "msk = np.random.rand(len(data)) < 0.8\n", 224 | "train = data[msk].copy()\n", 225 | "val = data[~msk].copy()" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 8, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "# here is a handy function modified from fast.ai\n", 235 | "def proc_col(col, train_col=None):\n", 236 | " \"\"\"Encodes a pandas column with continuous ids. \n", 237 | " \"\"\"\n", 238 | " if train_col is not None:\n", 239 | " uniq = train_col.unique()\n", 240 | " else:\n", 241 | " uniq = col.unique()\n", 242 | " name2idx = {o:i for i,o in enumerate(uniq)}\n", 243 | " return name2idx, np.array([name2idx.get(x, -1) for x in col]), len(uniq)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 9, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "def encode_data(df, train=None):\n", 253 | " \"\"\" Encodes rating data with continous user and movie ids. \n", 254 | " If train is provided, encodes df with the same encoding as train.\n", 255 | " \"\"\"\n", 256 | " df = df.copy()\n", 257 | " for col_name in [\"userId\", \"movieId\"]:\n", 258 | " train_col = None\n", 259 | " if train is not None:\n", 260 | " train_col = train[col_name]\n", 261 | " _,col,_ = proc_col(df[col_name], train_col)\n", 262 | " df[col_name] = col\n", 263 | " df = df[df[col_name] >= 0]\n", 264 | " return df" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": 10, 270 | "metadata": {}, 271 | "outputs": [ 272 | { 273 | "name": "stdout", 274 | "output_type": "stream", 275 | "text": [ 276 | " userId movieId rating\n", 277 | "0 11 1 4\n", 278 | "1 11 23 5\n", 279 | "2 2 23 5\n", 280 | "3 2 4 3\n", 281 | "4 31 1 4\n", 282 | "5 31 23 4\n", 283 | "6 4 1 5\n", 284 | "7 4 3 2\n", 285 | "8 52 1 1\n", 286 | "9 52 3 4\n", 287 | "10 61 3 5\n", 288 | "11 7 23 1\n", 289 | "12 7 3 3\n", 290 | " userId movieId rating\n", 291 | "0 0 0 4\n", 292 | "1 0 1 5\n", 293 | "2 1 1 5\n", 294 | "3 1 2 3\n", 295 | "4 2 0 4\n", 296 | "5 2 1 4\n", 297 | "6 3 0 5\n", 298 | "7 3 3 2\n", 299 | "8 4 0 1\n", 300 | "9 4 3 4\n", 301 | "10 5 3 5\n", 302 | "11 6 1 1\n", 303 | "12 6 3 3\n" 304 | ] 305 | } 306 | ], 307 | "source": [ 308 | "# to check my new implementation\n", 309 | "LOCAL_PATH = Path(\"images/\")\n", 310 | "df_t = pd.read_csv(LOCAL_PATH/\"tiny_training2.csv\")\n", 311 | "df_v = pd.read_csv(LOCAL_PATH/\"tiny_val2.csv\")\n", 312 | "print(df_t)\n", 313 | "df_t_e = encode_data(df_t)\n", 314 | "df_v_e = encode_data(df_v, df_t)\n", 315 | "df_v_e\n", 316 | "print(df_t_e)" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 11, 322 | "metadata": {}, 323 | "outputs": [], 324 | "source": [ 325 | "# encoding the train and validation data\n", 326 | "df_train = encode_data(train)\n", 327 | "df_val = encode_data(val, train)" 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "metadata": {}, 333 | "source": [ 334 | "## Embedding layer" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 12, 340 | "metadata": {}, 341 | "outputs": [], 342 | "source": [ 343 | "import torch\n", 344 | "import torch.nn as nn\n", 345 | "import torch.nn.functional as F" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 13, 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [ 354 | "# an Embedding module containing 10 user or item embedding size 3\n", 355 | "# embedding will be initialized at random\n", 356 | "embed = nn.Embedding(10, 3)" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 14, 362 | "metadata": {}, 363 | "outputs": [ 364 | { 365 | "data": { 366 | "text/plain": [ 367 | "tensor([[[-0.1301, 0.0691, -1.1678],\n", 368 | " [-0.9865, 0.4514, -1.4770],\n", 369 | " [-1.7121, 0.0701, 0.0481],\n", 370 | " [ 1.4485, 0.1340, 0.0099],\n", 371 | " [-1.4074, -0.8650, -0.1255],\n", 372 | " [-0.1301, 0.0691, -1.1678]]])" 373 | ] 374 | }, 375 | "execution_count": 14, 376 | "metadata": {}, 377 | "output_type": "execute_result" 378 | } 379 | ], 380 | "source": [ 381 | "# given a list of ids we can \"look up\" the embedding corresponing to each id\n", 382 | "a = torch.LongTensor([[1,2,0,4,5,1]])\n", 383 | "embed(a)" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "## Matrix factorization model" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": 15, 396 | "metadata": {}, 397 | "outputs": [], 398 | "source": [ 399 | "class MF(nn.Module):\n", 400 | " def __init__(self, num_users, num_items, emb_size=100):\n", 401 | " super(MF, self).__init__()\n", 402 | " self.user_emb = nn.Embedding(num_users, emb_size)\n", 403 | " self.item_emb = nn.Embedding(num_items, emb_size)\n", 404 | " self.user_emb.weight.data.uniform_(0, 0.05)\n", 405 | " self.item_emb.weight.data.uniform_(0, 0.05)\n", 406 | " \n", 407 | " def forward(self, u, v):\n", 408 | " u = self.user_emb(u)\n", 409 | " v = self.item_emb(v)\n", 410 | " return (u*v).sum(1) " 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "metadata": {}, 416 | "source": [ 417 | "## Debugging MF model" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": 16, 423 | "metadata": {}, 424 | "outputs": [ 425 | { 426 | "data": { 427 | "text/html": [ 428 | "
\n", 429 | "\n", 442 | "\n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | "
userIdmovieIdrating
0004
1015
2115
3123
4204
5214
6305
7332
8401
9434
10535
11611
12633
\n", 532 | "
" 533 | ], 534 | "text/plain": [ 535 | " userId movieId rating\n", 536 | "0 0 0 4\n", 537 | "1 0 1 5\n", 538 | "2 1 1 5\n", 539 | "3 1 2 3\n", 540 | "4 2 0 4\n", 541 | "5 2 1 4\n", 542 | "6 3 0 5\n", 543 | "7 3 3 2\n", 544 | "8 4 0 1\n", 545 | "9 4 3 4\n", 546 | "10 5 3 5\n", 547 | "11 6 1 1\n", 548 | "12 6 3 3" 549 | ] 550 | }, 551 | "execution_count": 16, 552 | "metadata": {}, 553 | "output_type": "execute_result" 554 | } 555 | ], 556 | "source": [ 557 | "df_t_e" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 16, 563 | "metadata": {}, 564 | "outputs": [], 565 | "source": [ 566 | "num_users = 7\n", 567 | "num_items = 4\n", 568 | "emb_size = 3\n", 569 | "\n", 570 | "user_emb = nn.Embedding(num_users, emb_size)\n", 571 | "item_emb = nn.Embedding(num_items, emb_size)\n", 572 | "users = torch.LongTensor(df_t_e.userId.values)\n", 573 | "items = torch.LongTensor(df_t_e.movieId.values)" 574 | ] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "execution_count": 17, 579 | "metadata": {}, 580 | "outputs": [], 581 | "source": [ 582 | "U = user_emb(users)\n", 583 | "V = item_emb(items)" 584 | ] 585 | }, 586 | { 587 | "cell_type": "code", 588 | "execution_count": 18, 589 | "metadata": {}, 590 | "outputs": [ 591 | { 592 | "data": { 593 | "text/plain": [ 594 | "tensor([[ 0.1547, 0.2277, 0.2442],\n", 595 | " [ 0.1547, 0.2277, 0.2442],\n", 596 | " [ 0.6601, 0.8225, -1.2139],\n", 597 | " [ 0.6601, 0.8225, -1.2139],\n", 598 | " [ 0.1672, -1.2177, 0.1403],\n", 599 | " [ 0.1672, -1.2177, 0.1403],\n", 600 | " [-1.1907, -1.2933, -0.5506],\n", 601 | " [-1.1907, -1.2933, -0.5506],\n", 602 | " [ 0.1938, -0.0683, -0.8493],\n", 603 | " [ 0.1938, -0.0683, -0.8493],\n", 604 | " [ 0.8506, -1.1564, 1.1165],\n", 605 | " [ 0.8639, -2.5148, -0.8391],\n", 606 | " [ 0.8639, -2.5148, -0.8391]])" 607 | ] 608 | }, 609 | "execution_count": 18, 610 | "metadata": {}, 611 | "output_type": "execute_result" 612 | } 613 | ], 614 | "source": [ 615 | "U" 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": 19, 621 | "metadata": {}, 622 | "outputs": [ 623 | { 624 | "data": { 625 | "text/plain": [ 626 | "tensor([[-0.1766, 0.2957, 0.4409],\n", 627 | " [ 0.1205, 0.1733, 0.1165],\n", 628 | " [ 0.5143, 0.6258, -0.5793],\n", 629 | " [-0.5603, 0.3582, -0.5370],\n", 630 | " [-0.1909, -1.5812, 0.2533],\n", 631 | " [ 0.1303, -0.9266, 0.0670],\n", 632 | " [ 1.3594, -1.6793, -0.9940],\n", 633 | " [-0.2324, 1.4822, 0.5151],\n", 634 | " [-0.2212, -0.0887, -1.5335],\n", 635 | " [ 0.0378, 0.0783, 0.7947],\n", 636 | " [ 0.1660, 1.3253, -1.0447],\n", 637 | " [ 0.6730, -1.9135, -0.4004],\n", 638 | " [ 0.1686, 2.8820, 0.7851]])" 639 | ] 640 | }, 641 | "execution_count": 19, 642 | "metadata": {}, 643 | "output_type": "execute_result" 644 | } 645 | ], 646 | "source": [ 647 | "# element wise multiplication\n", 648 | "U*V " 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": 20, 654 | "metadata": {}, 655 | "outputs": [ 656 | { 657 | "data": { 658 | "text/plain": [ 659 | "tensor([ 0.5600, 0.4103, 0.5608, -0.7391, -1.5187, -0.7294, -1.3139,\n", 660 | " 1.7649, -1.8434, 0.9108, 0.4466, -1.6409, 3.8357])" 661 | ] 662 | }, 663 | "execution_count": 20, 664 | "metadata": {}, 665 | "output_type": "execute_result" 666 | } 667 | ], 668 | "source": [ 669 | "# what we want is a dot product per row\n", 670 | "(U*V).sum(1) " 671 | ] 672 | }, 673 | { 674 | "cell_type": "markdown", 675 | "metadata": {}, 676 | "source": [ 677 | "## Training MF model" 678 | ] 679 | }, 680 | { 681 | "cell_type": "code", 682 | "execution_count": 21, 683 | "metadata": {}, 684 | "outputs": [ 685 | { 686 | "name": "stdout", 687 | "output_type": "stream", 688 | "text": [ 689 | "671 8442\n" 690 | ] 691 | } 692 | ], 693 | "source": [ 694 | "num_users = len(df_train.userId.unique())\n", 695 | "num_items = len(df_train.movieId.unique())\n", 696 | "print(num_users, num_items) " 697 | ] 698 | }, 699 | { 700 | "cell_type": "code", 701 | "execution_count": 22, 702 | "metadata": {}, 703 | "outputs": [], 704 | "source": [ 705 | "model = MF(num_users, num_items, emb_size=100) # .cuda() if you have a GPU" 706 | ] 707 | }, 708 | { 709 | "cell_type": "code", 710 | "execution_count": 23, 711 | "metadata": {}, 712 | "outputs": [], 713 | "source": [ 714 | "def train_epocs(model, epochs=10, lr=0.01, wd=0.0, unsqueeze=False):\n", 715 | " optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)\n", 716 | " model.train()\n", 717 | " for i in range(epochs):\n", 718 | " users = torch.LongTensor(df_train.userId.values) # .cuda()\n", 719 | " items = torch.LongTensor(df_train.movieId.values) #.cuda()\n", 720 | " ratings = torch.FloatTensor(df_train.rating.values) #.cuda()\n", 721 | " if unsqueeze:\n", 722 | " ratings = ratings.unsqueeze(1)\n", 723 | " y_hat = model(users, items)\n", 724 | " loss = F.mse_loss(y_hat, ratings)\n", 725 | " optimizer.zero_grad()\n", 726 | " loss.backward()\n", 727 | " optimizer.step()\n", 728 | " print(loss.item()) \n", 729 | " test_loss(model, unsqueeze)" 730 | ] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": 24, 735 | "metadata": {}, 736 | "outputs": [ 737 | { 738 | "name": "stdout", 739 | "output_type": "stream", 740 | "text": [ 741 | "torch.Size([79799])\n", 742 | "torch.Size([79799, 1])\n" 743 | ] 744 | } 745 | ], 746 | "source": [ 747 | "# Here is what unsqueeze does\n", 748 | "ratings = torch.FloatTensor(df_train.rating.values)\n", 749 | "print(ratings.shape)\n", 750 | "ratings = ratings.unsqueeze(1) # .cuda()\n", 751 | "print(ratings.shape)" 752 | ] 753 | }, 754 | { 755 | "cell_type": "code", 756 | "execution_count": 25, 757 | "metadata": {}, 758 | "outputs": [], 759 | "source": [ 760 | "def test_loss(model, unsqueeze=False):\n", 761 | " model.eval()\n", 762 | " users = torch.LongTensor(df_val.userId.values) #.cuda()\n", 763 | " items = torch.LongTensor(df_val.movieId.values) #.cuda()\n", 764 | " ratings = torch.FloatTensor(df_val.rating.values) #.cuda()\n", 765 | " if unsqueeze:\n", 766 | " ratings = ratings.unsqueeze(1)\n", 767 | " y_hat = model(users, items)\n", 768 | " loss = F.mse_loss(y_hat, ratings)\n", 769 | " print(\"test loss %.3f \" % loss.item())" 770 | ] 771 | }, 772 | { 773 | "cell_type": "code", 774 | "execution_count": 26, 775 | "metadata": {}, 776 | "outputs": [ 777 | { 778 | "name": "stdout", 779 | "output_type": "stream", 780 | "text": [ 781 | "13.23068904876709\n", 782 | "5.119534015655518\n", 783 | "2.3902299404144287\n", 784 | "3.441521406173706\n", 785 | "0.9096018671989441\n", 786 | "1.8109439611434937\n", 787 | "2.749631643295288\n", 788 | "2.278921604156494\n", 789 | "1.1593214273452759\n", 790 | "0.925656795501709\n", 791 | "test loss 1.947 \n" 792 | ] 793 | } 794 | ], 795 | "source": [ 796 | "train_epocs(model, epochs=10, lr=0.1)" 797 | ] 798 | }, 799 | { 800 | "cell_type": "code", 801 | "execution_count": 27, 802 | "metadata": {}, 803 | "outputs": [ 804 | { 805 | "name": "stdout", 806 | "output_type": "stream", 807 | "text": [ 808 | "1.7027523517608643\n", 809 | "1.0512956380844116\n", 810 | "0.7498359680175781\n", 811 | "0.6950282454490662\n", 812 | "0.7596880197525024\n", 813 | "0.8397833108901978\n", 814 | "0.8818210363388062\n", 815 | "0.8753886818885803\n", 816 | "0.8334189653396606\n", 817 | "0.7767009735107422\n", 818 | "0.7246581315994263\n", 819 | "0.6901594400405884\n", 820 | "0.6771144866943359\n", 821 | "0.6810137033462524\n", 822 | "0.69219970703125\n", 823 | "test loss 0.894 \n" 824 | ] 825 | } 826 | ], 827 | "source": [ 828 | "train_epocs(model, epochs=15, lr=0.01)" 829 | ] 830 | }, 831 | { 832 | "cell_type": "code", 833 | "execution_count": 28, 834 | "metadata": {}, 835 | "outputs": [ 836 | { 837 | "name": "stdout", 838 | "output_type": "stream", 839 | "text": [ 840 | "0.7007282376289368\n", 841 | "0.6625022888183594\n", 842 | "0.6684340834617615\n", 843 | "0.6455244421958923\n", 844 | "0.6380830407142639\n", 845 | "0.6450700759887695\n", 846 | "0.6408411264419556\n", 847 | "0.6256920099258423\n", 848 | "0.6144804358482361\n", 849 | "0.6132143139839172\n", 850 | "0.6140048503875732\n", 851 | "0.6083489060401917\n", 852 | "0.5969548225402832\n", 853 | "0.5860226154327393\n", 854 | "0.5791704058647156\n", 855 | "test loss 0.822 \n" 856 | ] 857 | } 858 | ], 859 | "source": [ 860 | "train_epocs(model, epochs=15, lr=0.01)" 861 | ] 862 | }, 863 | { 864 | "cell_type": "markdown", 865 | "metadata": {}, 866 | "source": [ 867 | "## MF with bias" 868 | ] 869 | }, 870 | { 871 | "cell_type": "code", 872 | "execution_count": 29, 873 | "metadata": {}, 874 | "outputs": [], 875 | "source": [ 876 | "class MF_bias(nn.Module):\n", 877 | " def __init__(self, num_users, num_items, emb_size=100):\n", 878 | " super(MF_bias, self).__init__()\n", 879 | " self.user_emb = nn.Embedding(num_users, emb_size)\n", 880 | " self.user_bias = nn.Embedding(num_users, 1)\n", 881 | " self.item_emb = nn.Embedding(num_items, emb_size)\n", 882 | " self.item_bias = nn.Embedding(num_items, 1)\n", 883 | " self.user_emb.weight.data.uniform_(0,0.05)\n", 884 | " self.item_emb.weight.data.uniform_(0,0.05)\n", 885 | " self.user_bias.weight.data.uniform_(-0.01,0.01)\n", 886 | " self.item_bias.weight.data.uniform_(-0.01,0.01)\n", 887 | " \n", 888 | " def forward(self, u, v):\n", 889 | " U = self.user_emb(u)\n", 890 | " V = self.item_emb(v)\n", 891 | " b_u = self.user_bias(u).squeeze()\n", 892 | " b_v = self.item_bias(v).squeeze()\n", 893 | " return (U*V).sum(1) + b_u + b_v" 894 | ] 895 | }, 896 | { 897 | "cell_type": "code", 898 | "execution_count": 32, 899 | "metadata": {}, 900 | "outputs": [], 901 | "source": [ 902 | "model = MF_bias(num_users, num_items, emb_size=100) #.cuda()" 903 | ] 904 | }, 905 | { 906 | "cell_type": "code", 907 | "execution_count": 33, 908 | "metadata": {}, 909 | "outputs": [ 910 | { 911 | "name": "stdout", 912 | "output_type": "stream", 913 | "text": [ 914 | "13.233644485473633\n", 915 | "9.459980964660645\n", 916 | "4.618295669555664\n", 917 | "1.2266862392425537\n", 918 | "2.4537320137023926\n", 919 | "3.888521432876587\n", 920 | "2.6157896518707275\n", 921 | "1.1573508977890015\n", 922 | "0.8204843997955322\n", 923 | "1.3100122213363647\n", 924 | "test loss 2.126 \n" 925 | ] 926 | } 927 | ], 928 | "source": [ 929 | "train_epocs(model, epochs=10, lr=0.05, wd=1e-5)" 930 | ] 931 | }, 932 | { 933 | "cell_type": "code", 934 | "execution_count": 34, 935 | "metadata": {}, 936 | "outputs": [ 937 | { 938 | "name": "stdout", 939 | "output_type": "stream", 940 | "text": [ 941 | "1.9130752086639404\n", 942 | "1.3447301387786865\n", 943 | "0.9572998285293579\n", 944 | "0.7714419364929199\n", 945 | "0.752704381942749\n", 946 | "0.8091325759887695\n", 947 | "0.8543495535850525\n", 948 | "0.8524782657623291\n", 949 | "0.8114585876464844\n", 950 | "0.7577651739120483\n", 951 | "test loss 0.851 \n" 952 | ] 953 | } 954 | ], 955 | "source": [ 956 | "train_epocs(model, epochs=10, lr=0.01, wd=1e-5)" 957 | ] 958 | }, 959 | { 960 | "cell_type": "code", 961 | "execution_count": 35, 962 | "metadata": {}, 963 | "outputs": [ 964 | { 965 | "name": "stdout", 966 | "output_type": "stream", 967 | "text": [ 968 | "0.7163214087486267\n", 969 | "0.7023102045059204\n", 970 | "0.6904919147491455\n", 971 | "0.6807348728179932\n", 972 | "0.6728458404541016\n", 973 | "0.6666097044944763\n", 974 | "0.6618107557296753\n", 975 | "0.6582220792770386\n", 976 | "0.6556380391120911\n", 977 | "0.6538312435150146\n", 978 | "test loss 0.805 \n" 979 | ] 980 | } 981 | ], 982 | "source": [ 983 | "train_epocs(model, epochs=10, lr=0.001, wd=1e-5)" 984 | ] 985 | }, 986 | { 987 | "cell_type": "markdown", 988 | "metadata": {}, 989 | "source": [ 990 | "Note that these models are susceptible to weight initialization, optimization algorithm and regularization." 991 | ] 992 | }, 993 | { 994 | "cell_type": "markdown", 995 | "metadata": {}, 996 | "source": [ 997 | "## Neural Network Model" 998 | ] 999 | }, 1000 | { 1001 | "cell_type": "code", 1002 | "execution_count": 76, 1003 | "metadata": {}, 1004 | "outputs": [], 1005 | "source": [ 1006 | "# Note here there is no matrix multiplication, we could potentially make the embeddings of different sizes.\n", 1007 | "# Here we could get better results by keep playing with regularization.\n", 1008 | " \n", 1009 | "class CollabFNet(nn.Module):\n", 1010 | " def __init__(self, num_users, num_items, emb_size=100, n_hidden=10):\n", 1011 | " super(CollabFNet, self).__init__()\n", 1012 | " self.user_emb = nn.Embedding(num_users, emb_size)\n", 1013 | " self.item_emb = nn.Embedding(num_items, emb_size)\n", 1014 | " self.lin1 = nn.Linear(emb_size*2, n_hidden)\n", 1015 | " self.lin2 = nn.Linear(n_hidden, 1)\n", 1016 | " self.drop1 = nn.Dropout(0.1)\n", 1017 | " \n", 1018 | " def forward(self, u, v):\n", 1019 | " U = self.user_emb(u)\n", 1020 | " V = self.item_emb(v)\n", 1021 | " x = F.relu(torch.cat([U, V], dim=1))\n", 1022 | " x = self.drop1(x)\n", 1023 | " x = F.relu(self.lin1(x))\n", 1024 | " x = self.lin2(x)\n", 1025 | " return x" 1026 | ] 1027 | }, 1028 | { 1029 | "cell_type": "code", 1030 | "execution_count": 77, 1031 | "metadata": {}, 1032 | "outputs": [], 1033 | "source": [ 1034 | "model = CollabFNet(num_users, num_items, emb_size=100) #.cuda()" 1035 | ] 1036 | }, 1037 | { 1038 | "cell_type": "code", 1039 | "execution_count": 78, 1040 | "metadata": {}, 1041 | "outputs": [ 1042 | { 1043 | "name": "stdout", 1044 | "output_type": "stream", 1045 | "text": [ 1046 | "13.101761817932129\n", 1047 | "1.957230806350708\n", 1048 | "1.2605514526367188\n", 1049 | "1.3381402492523193\n", 1050 | "1.061022162437439\n", 1051 | "1.1385098695755005\n", 1052 | "0.9165319800376892\n", 1053 | "0.9622549414634705\n", 1054 | "0.8723138570785522\n", 1055 | "0.8084518909454346\n", 1056 | "0.8500765562057495\n", 1057 | "0.7535637617111206\n", 1058 | "0.791947603225708\n", 1059 | "0.7653028964996338\n", 1060 | "0.7301635146141052\n", 1061 | "test loss 0.869 \n" 1062 | ] 1063 | } 1064 | ], 1065 | "source": [ 1066 | "train_epocs(model, epochs=15, lr=0.05, wd=1e-6, unsqueeze=True) " 1067 | ] 1068 | }, 1069 | { 1070 | "cell_type": "code", 1071 | "execution_count": 79, 1072 | "metadata": {}, 1073 | "outputs": [ 1074 | { 1075 | "name": "stdout", 1076 | "output_type": "stream", 1077 | "text": [ 1078 | "0.7691234350204468\n", 1079 | "0.9072751402854919\n", 1080 | "0.7757670879364014\n", 1081 | "0.7180655598640442\n", 1082 | "0.7918605208396912\n", 1083 | "0.7724899053573608\n", 1084 | "0.7119362950325012\n", 1085 | "0.7106000185012817\n", 1086 | "0.7403213977813721\n", 1087 | "0.7438958883285522\n", 1088 | "test loss 0.816 \n" 1089 | ] 1090 | } 1091 | ], 1092 | "source": [ 1093 | "train_epocs(model, epochs=10, lr=0.01, wd=1e-6, unsqueeze=True)" 1094 | ] 1095 | }, 1096 | { 1097 | "cell_type": "code", 1098 | "execution_count": 80, 1099 | "metadata": {}, 1100 | "outputs": [ 1101 | { 1102 | "name": "stdout", 1103 | "output_type": "stream", 1104 | "text": [ 1105 | "0.7163267731666565\n", 1106 | "0.7032808065414429\n", 1107 | "0.695513904094696\n", 1108 | "0.6967512369155884\n", 1109 | "0.6998187303543091\n", 1110 | "0.700666606426239\n", 1111 | "0.7004959583282471\n", 1112 | "0.6982167959213257\n", 1113 | "0.6955875158309937\n", 1114 | "0.694402813911438\n", 1115 | "test loss 0.796 \n" 1116 | ] 1117 | } 1118 | ], 1119 | "source": [ 1120 | "train_epocs(model, epochs=10, lr=0.001, wd=1e-6, unsqueeze=True)" 1121 | ] 1122 | }, 1123 | { 1124 | "cell_type": "code", 1125 | "execution_count": 81, 1126 | "metadata": {}, 1127 | "outputs": [ 1128 | { 1129 | "name": "stdout", 1130 | "output_type": "stream", 1131 | "text": [ 1132 | "0.6919353008270264\n", 1133 | "0.6934647560119629\n", 1134 | "0.6922585368156433\n", 1135 | "0.6942275762557983\n", 1136 | "0.6926798224449158\n", 1137 | "0.6916202902793884\n", 1138 | "0.6911264061927795\n", 1139 | "0.6923496127128601\n", 1140 | "0.6922929286956787\n", 1141 | "0.6904215812683105\n", 1142 | "test loss 0.795 \n" 1143 | ] 1144 | } 1145 | ], 1146 | "source": [ 1147 | "train_epocs(model, epochs=10, lr=0.001, wd=1e-6, unsqueeze=True)" 1148 | ] 1149 | }, 1150 | { 1151 | "cell_type": "markdown", 1152 | "metadata": {}, 1153 | "source": [ 1154 | "# References\n", 1155 | "* This notebook is based on [lesson 5 of Jeremy Howard's Deep Learning Course](https://github.com/fastai/fastai/blob/master/courses/dl1/lesson5-movielens.ipynb)" 1156 | ] 1157 | }, 1158 | { 1159 | "cell_type": "code", 1160 | "execution_count": null, 1161 | "metadata": {}, 1162 | "outputs": [], 1163 | "source": [] 1164 | } 1165 | ], 1166 | "metadata": { 1167 | "kernelspec": { 1168 | "display_name": "Python 3", 1169 | "language": "python", 1170 | "name": "python3" 1171 | }, 1172 | "language_info": { 1173 | "codemirror_mode": { 1174 | "name": "ipython", 1175 | "version": 3 1176 | }, 1177 | "file_extension": ".py", 1178 | "mimetype": "text/x-python", 1179 | "name": "python", 1180 | "nbconvert_exporter": "python", 1181 | "pygments_lexer": "ipython3", 1182 | "version": "3.6.5" 1183 | }, 1184 | "toc": { 1185 | "nav_menu": {}, 1186 | "number_sections": true, 1187 | "sideBar": true, 1188 | "skip_h1_title": false, 1189 | "toc_cell": false, 1190 | "toc_position": {}, 1191 | "toc_section_display": "block", 1192 | "toc_window_display": false 1193 | } 1194 | }, 1195 | "nbformat": 4, 1196 | "nbformat_minor": 2 1197 | } 1198 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: pytorch 2 | channels: 3 | - conda-forge 4 | - pytorch 5 | dependencies: 6 | - python==3.6 7 | - ipython 8 | - jupyter 9 | - pytorch 10 | - torchvision 11 | - numpy 12 | - matplotlib 13 | - pandas 14 | - opencv 15 | - spacy -------------------------------------------------------------------------------- /images/model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanneta/pytorch-tutorials/53c1dc84fc11e4c50877fa141f9b84ad18265047/images/model.png -------------------------------------------------------------------------------- /images/tiny_training2.csv: -------------------------------------------------------------------------------- 1 | userId,movieId,rating 2 | 11,1,4 3 | 11,23,5 4 | 2,23,5 5 | 2,4,3 6 | 31,1,4 7 | 31,23,4 8 | 4,1,5 9 | 4,3,2 10 | 52,1,1 11 | 52,3,4 12 | 61,3,5 13 | 7,23,1 14 | 7,3,3 15 | -------------------------------------------------------------------------------- /images/tiny_val2.csv: -------------------------------------------------------------------------------- 1 | userId,movieId,rating 2 | 2,1,5 3 | 4,23,5 4 | 4,2,3 5 | -------------------------------------------------------------------------------- /intro-to-pytoch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "toc": true 7 | }, 8 | "source": [ 9 | "

Table of Contents

\n", 10 | "" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "# import pytorch libraries\n", 20 | "%matplotlib inline\n", 21 | "import torch \n", 22 | "import torch.autograd as autograd \n", 23 | "import torch.nn as nn \n", 24 | "import torch.nn.functional as F\n", 25 | "import torch.optim as optim\n", 26 | "import numpy as np" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "# Intro to Pytorch" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "PyTorch consists of 4 main packages:\n", 41 | "* torch: a general purpose array library similar to Numpy that can do computations on GPU\n", 42 | "* torch.autograd: a package for automatically obtaining gradients\n", 43 | "* torch.nn: a neural net library with common layers and cost functions\n", 44 | "* torch.optim: an optimization package with common optimization algorithms like SGD, Adam, etc" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "## Pytorch tensors\n", 52 | "Like Numpy tensors but can utilize GPUs to accelerate its numerical computations. " 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 2, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "# Create random tensor\n", 62 | "N = 5\n", 63 | "x = torch.randn(N, 10).type(torch.FloatTensor)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 3, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "data": { 73 | "text/plain": [ 74 | "tensor([[-1.8239, 0.7380, 0.0890, -1.1650, 0.3185, 1.8577, 0.1110,\n", 75 | " -0.8694, 1.1761, 1.0106],\n", 76 | " [ 1.8847, -0.3493, -0.3044, 1.3749, 0.4396, 1.5092, -0.7950,\n", 77 | " 0.5705, 0.9309, -0.3835],\n", 78 | " [ 0.4320, 0.1081, -0.8353, 0.5639, 0.1228, 1.4746, -0.5602,\n", 79 | " -1.2526, 0.0964, -0.1116],\n", 80 | " [ 1.8627, -1.1173, 2.0276, 0.6197, -1.0586, 0.6214, -0.1054,\n", 81 | " -0.3784, 0.9780, -1.6672],\n", 82 | " [-0.1745, -1.0696, -0.1319, -0.5890, 0.7507, -0.3775, -1.7948,\n", 83 | " 1.2520, -0.8993, 1.2639]])" 84 | ] 85 | }, 86 | "execution_count": 3, 87 | "metadata": {}, 88 | "output_type": "execute_result" 89 | } 90 | ], 91 | "source": [ 92 | "x" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 4, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "data": { 102 | "text/plain": [ 103 | "tensor([[-1.8239, 0.7380, 0.0890, -1.1650, 0.3185, 1.8577, 0.1110,\n", 104 | " -0.8694, 1.1761, 1.0106, 1.8847, -0.3493, -0.3044, 1.3749,\n", 105 | " 0.4396, 1.5092, -0.7950, 0.5705, 0.9309, -0.3835, 0.4320,\n", 106 | " 0.1081, -0.8353, 0.5639, 0.1228, 1.4746, -0.5602, -1.2526,\n", 107 | " 0.0964, -0.1116, 1.8627, -1.1173, 2.0276, 0.6197, -1.0586,\n", 108 | " 0.6214, -0.1054, -0.3784, 0.9780, -1.6672, -0.1745, -1.0696,\n", 109 | " -0.1319, -0.5890, 0.7507, -0.3775, -1.7948, 1.2520, -0.8993,\n", 110 | " 1.2639]])" 111 | ] 112 | }, 113 | "execution_count": 4, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "# reshaping of tensors using .view()\n", 120 | "x.view(1,-1) #-1 makes torch infer the second dim" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "## Pytorch Autograd\n", 128 | "The autograd package in PyTorch provides classes and functions implementing automatic differentiation of arbitrary scalar valued function. For example, the gradient of the error with respect to all parameters.\n", 129 | "\n", 130 | "In order for this to happen we need to declare our paramerers as Tensors with the requires_grad=True keyword. Here is an example:" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 5, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "x = torch.tensor([1., 2., 3., 4., 5., 6.], requires_grad=True)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 6, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "x.grad" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 7, 154 | "metadata": {}, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/plain": [ 159 | "tensor(48.)" 160 | ] 161 | }, 162 | "execution_count": 7, 163 | "metadata": {}, 164 | "output_type": "execute_result" 165 | } 166 | ], 167 | "source": [ 168 | "L = (2*x+1).sum()\n", 169 | "L" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 8, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "L.backward() # computes the grad of L with respect to x" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 9, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/plain": [ 189 | "tensor([ 2., 2., 2., 2., 2., 2.])" 190 | ] 191 | }, 192 | "execution_count": 9, 193 | "metadata": {}, 194 | "output_type": "execute_result" 195 | } 196 | ], 197 | "source": [ 198 | "x.grad" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "## torch.nn module\n", 206 | "A neural net library with common layers and cost functions" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 10, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "# linear transformation of a Nx5 matrix into a Nx3 matrix, where N can be anything \n", 216 | "# (number of observations)\n", 217 | "D = 5 # number of input featutes\n", 218 | "M = 3 # neurons in the first hidden layer\n", 219 | "linear_map = nn.Linear(D, M)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 11, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "data": { 229 | "text/plain": [ 230 | "[Parameter containing:\n", 231 | " tensor([[-0.3345, 0.1780, 0.1944, 0.3522, -0.2162],\n", 232 | " [ 0.1899, -0.1076, 0.3387, 0.3439, 0.4197],\n", 233 | " [-0.3837, -0.2800, 0.1663, 0.1904, 0.0215]]), Parameter containing:\n", 234 | " tensor([-0.1277, 0.4425, 0.4374])]" 235 | ] 236 | }, 237 | "execution_count": 11, 238 | "metadata": {}, 239 | "output_type": "execute_result" 240 | } 241 | ], 242 | "source": [ 243 | "# parameters are initialized randomly\n", 244 | "[p for p in linear_map.parameters()]" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "# Linear Regression with Pytorch" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "The goal of linear regression is to fit a line to a set of points." 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 12, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "# Here we generate some fake data\n", 268 | "def lin(a,b,x): return a*x+b\n", 269 | "\n", 270 | "def gen_fake_data(n, a, b):\n", 271 | " x = np.random.uniform(0,1,n) \n", 272 | " y = lin(a,b,x) + 0.1 * np.random.normal(0,3,n)\n", 273 | " return x, y\n", 274 | "\n", 275 | "x, y = gen_fake_data(50, 3., 8.)" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 13, 281 | "metadata": {}, 282 | "outputs": [ 283 | { 284 | "data": { 285 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEKCAYAAADjDHn2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFgRJREFUeJzt3Xuw7XV53/H3hwMKBE25bAJBjgdSksGhzW3BqE1SWzS1jgPVmkJaJ9BBGK2XNPYSM+2oY5oUTTpNJmaCJ+KIbYIQktFTY4PUS0wbcc4GJQUZB4oIJ6BsPQSHAZEDT/9Yy5ydxdrnt87e+/f7rcv7NbNnr8t37/3wY+/1nO/zfL/flapCkqRDOaLvACRJs89kIUlqZLKQJDUyWUiSGpksJEmNTBaSpEYmC0lSI5OFJKmRyUKS1OjItr5xkg8ArwQeqqpzRo/9DPBO4GzgvKpa3eBrXw78JrADeH9VXdn080466aTatWvX9gQvSUvilltu+UZVrTSNay1ZAB8E3gt8aN1jtwOvBt630Rcl2QH8NvAyYB+wN8meqvrSoX7Yrl27WF2dmHskSRtI8tVpxrVWhqqqzwL7xx67s6q+3PCl5wF3V9U9VfUd4MPAhS2FKUmawiz2LE4D7l93f9/oMUlST2YxWWTCYxOPxk1yRZLVJKtra2sthyVJy2sWk8U+4PR1958HPDBpYFXtrqpBVQ1WVhr7M5KkTZrFZLEXOCvJGUmeBVwM7Ok5Jklaaq0liyTXAp8DfijJviSXJXlVkn3Ai4A/TnLjaOz3J/k4QFUdAN4E3AjcCVxfVXe0FackqVkW5Z3yBoNBuXRWkg5PkluqatA0bhbLUJK01O5Ze5Tr9t7HPWuP9h3KX2tzU54kLa171h5l7737OXfXCZy5ctxhfd0rf+t/UwUJfOzNP3FYX98Wk4UkbbOtvODvvXc/VfD4k09xzFE72Hvv/plIFpahJKnB4ZaF1r/gVw3vT+vcXSeQwDFH7SAZ3p8Fziwk6RA2M0vYygv+mSvH8bE3/8SmSlhtMllI0iFspiy01Rf8M1eOm5kk8V0mC0k6hM3OEmbxBX8rTBaSdAizWhbqmslCkhos2ixhM1wNJUlqZLKQJDUyWUiSGpksJEmNTBaSpEYmC0maU12eTuvSWUmaQ12fTuvMQpLm0FYOK9wMk4UkzaGuT6e1DCVJc6jrY0hMFpI0p7o8hsQylCSpkclCktTIZCFJatRaskjygSQPJbl93WMnJLkpyV2jz8dv8LVPJfni6GNPWzFKkqbT5szig8DLxx57G/DJqjoL+OTo/iSPV9WPjD4uaDFGSdIUWksWVfVZYHyXyIXANaPb1wD/pK2fL6l/XR5HoXZ1vXT2+6rqQYCqejDJyRuMOzrJKnAAuLKqPtJZhJK2RdfHUahds9rg3llVA+CfA7+R5AcmDUpyRZLVJKtra2vdRijpkLo+jkLt6jpZfD3JqQCjzw9NGlRVD4w+3wN8BvjRDcbtrqpBVQ1WVlbaiVjSpnR9HIXa1XUZag9wCXDl6PNHxweMVkg9VlVPJDkJ+HvAezqNUtKWdX0chdrVWrJIci3wEuCkJPuAdzBMEtcnuQy4D/iZ0dgB8Pqqeh1wNvC+JE8znPlcWVVfaitOSe3p8jiKZXPP2qOdJuLWkkVV/ewGT50/Yewq8LrR7T8H/k5bcUnSvOtj8cCsNrglSRvoY/GAyUKS5kwfiwc8olyS5kwfiwdMFpI0h7pePGAZSpLUyGQh6bB43tNysgwlaWqe97S8nFlImprnPS0vk4WkqXne0/KyDCVpap73tLxMFtKC2+4zhDzvaTmZLKQFZkNa28WehbTAbEhru5gspAVmQ1rbxTKUtMBsSG9d1+8bMatMFtKCsyG9efZ8DrIMJUkbsOdzkMlCkjZgz+cgy1CStAF7PgeZLCTpEOz5DFmGkiQ1MllIM8z3jtCssAwlzSiXbWqWOLOQZpTLNjVLWksWST6Q5KEkt6977IQkNyW5a/T5+A2+9pLRmLuSXNJWjNIsc9mmZkmqqp1vnPwU8Cjwoao6Z/TYe4D9VXVlkrcBx1fVL4593QnAKjAACrgF+PGqevhQP28wGNTq6moL/yVSfzxqQm1LcktVDZrGtdazqKrPJtk19vCFwEtGt68BPgP84tiYfwTcVFX7AZLcBLwcuLalUKWZ5bJNzYquexbfV1UPAow+nzxhzGnA/evu7xs99gxJrkiymmR1bW1t24OVJA3NYoM7Ex6bWCurqt1VNaiqwcrKSsthSdLy6jpZfD3JqQCjzw9NGLMPOH3d/ecBD3QQmyRpA10niz3Ad1c3XQJ8dMKYG4GfTnL8aLXUT48ekyT1pM2ls9cCnwN+KMm+JJcBVwIvS3IX8LLRfZIMkrwfYNTY/mVg7+jjXd9tdkvzou+d133/fC2e1pbOds2ls5oVfe+87vvnb4ZLhPvT+9JZaVmt33l9zFE72Hvv/k5fAPv++YdrHpPbMprF1VDSXOt753XfP/9weazJfHBmIW2zvt8wp+nnz1rJZ96S27KyZyEtkVkt+cxaAlsm9iykJXC4L7Kz2s/wWJPZZ7KQ5tRmZgmWfLRZJgtpBmymDLOZWULf/RTNL5OF1LPN9hE2O0uw5KPNMFlIPdtsH8FZgrpkspB6tpU+grMEdcVkIfXMGYLmgclCmgHOEDTrPO5D6ognwWqeObOQOjCrO6elaTmzkFowPovwsDzNO2cW0jabNItYhp3Tnu+02EwW0jabtG/ionN3LvSKp82U2Uwu88VkIW2zjWYRi7zi6XA3FtrDmT8mC2mbLeO+icMts83q6bfamMlCasEizyImOdwEuQw9nEVjspC0LQ4nQS7j7GvemSwk9WLZZl/zzn0WkqRGvSSLJD+f5PYkdyT51xOef0mSR5J8cfTx9j7ilCQNdV6GSnIOcDlwHvAd4E+S/HFV3TU29M+q6pVdxydJeqY+ZhZnAzdX1WNVdQD4U+BVPcQhzQQPGNQ86KPBfTvwK0lOBB4HXgGsThj3oiS3AQ8A/7aq7hgfkOQK4AqAnTt3thex1BI3p2ledD6zqKo7gXcDNwF/AtwGHBgbdivw/Kr6YeC3gI9s8L12V9WgqgYrKystRi21wwMGNS96aXBX1dVV9WNV9VPAfuCusee/VVWPjm5/HDgqyUk9hCq1ys1pmhe97LNIcnJVPZRkJ/Bq4EVjz58CfL2qKsl5DJPaN3sIVWqVm9M0L/ralPeHo57Fk8Abq+rhJK8HqKqrgNcAb0hygGFf4+Kqqp5i1YxalFNL3ZymedBLsqiqn5zw2FXrbr8XeG+nQWmu2BiWuuUObs2lWW0MuwxWi6pxZpHkTcDvVdXDHcQjTWUWG8OLOtvZrnLfopQNl9U0ZahTgL1JbgU+ANxo/0B9m8XG8CK+R8N2JcBFTaTLpLEMVVX/ETgLuBq4FLgrya8m+YGWY5MO6cyV47jo3J0z86Izi7Odrdquct+slg01vaka3KMlrF8DvsZwA93xwA1Jbqqqf99mgNK8mMXZzlZtVwJcxES6bNJUUUryFuAS4BvA+4GPVNWTSY4A7qqqmZhhDAaDWl2ddGqItP2Wqf5uz2KxJbmlqgZN46aZWZwEvLqqvrr+wap6OomnwmrpLFv9fbv2gbifZL5N07N4+3iiWPfcndsfkjTbrL9rGbnPQjpMm6m/u/9C88734JYO0+E2spetbKXFZLKQNuFw6u+LuP9Cy8cylNQyl41qETizkDawXUs9F3H/hZaPyUKaYLzP8Dv/4sf42re+vekXe5eNat6ZLKQJ1vcZnn3kEVz+oVV2HHGEDWotLXsW0gTr+wzD2UXcV6Gl5sxCmmB9n+GU5x7NG37vVhvUWmomC2kD6/sMNqi17EwW0hRsUGvZ2bOQJDUyWUiSGpksJEmNTBZqjSetSoujlwZ3kp8HLgcC/G5V/cbY8wF+E3gF8BhwaVXd2nmg2jRPWpUWS+cziyTnMEwU5wE/DLwyyVljw/4xcNbo4wrgdzoNUlvmGwRJi6WPMtTZwM1V9VhVHQD+FHjV2JgLgQ/V0M3A30pyateBavM8aVVaLH2UoW4HfiXJicDjDEtNq2NjTgPuX3d/3+ixBzuJUFvmSavSYuk8WVTVnUneDdwEPArcBhwYG5ZJXzr+QJIrGJap2Llz5zZHqu/a7FHdbmSTFkcvDe6quhq4GiDJrzKcOay3Dzh93f3nAQ9M+D67gd0Ag8HgGclEW2ejWhL0tHQ2ycmjzzuBVwPXjg3ZA/xchl4IPFJVlqB6cKhGtUtjpeXR19lQfzjqWTwJvLGqHk7yeoCqugr4OMNext0Ml87+y57iXHobNaqdcUjLpa8y1E9OeOyqdbcLeGOnQWmijRrV62ccxxy1g7337jdZSAvMU2fVaFKj2qWx0nIxWWhTXBorLReThTbNpbHS8vAgQUlSI5PFiMtAJWljlqFwGagkNXFmgSekSlITkwUuA5WkJpahcBmoJDUxWYy4DFSSNmYZSpLUyGQhSWpkslhw7h+RtB3sWSww949I2i7OLBaY+0ckbReTxQJz/4ik7WIZaoFtdf/IPWuPuvdEEmCyWHib3T9iv0PSepahNNHee/fz1NPF408+xVNPl/0OacmZLDTRKc89micOPA3AEwee5pTnHt1zRJL6ZLLQRF/71rd59pHDX49nH3kEX/vWt3uOSFKf7FloonN3ncCOI+JKKkmAyUIb8CReSev1UoZK8gtJ7khye5Jrkxw99vylSdaSfHH08bo+4lx2Z64cx0Xn7jRRSOo+WSQ5DXgLMKiqc4AdwMUThl5XVT8y+nh/p0FKkv6GvhrcRwLHJDkSOBZ4oKc4JElT6DxZVNVfAr8O3Ac8CDxSVZ+YMPSfJvmLJDckOb3TICVJf0MfZajjgQuBM4DvB74nyWvHhv0PYFdV/V3gfwHXbPC9rkiymmR1bW2tzbAlaan1UYZ6KfCVqlqrqieBPwJevH5AVX2zqp4Y3f1d4McnfaOq2l1Vg6oarKystBq0JC2zPpLFfcALkxybJMD5wJ3rByQ5dd3dC8af13LwjZuk2dH5Pouq+nySG4BbgQPAF4DdSd4FrFbVHuAtSS4YPb8fuLTrOD1xtV8eZCjNll425VXVO4B3jD389nXP/xLwS50GtY4vVP1b/8ZNxxy1g7337vf/gdQjz4aaYNI7zG1HSWTWyiqzFs96vnGTNFs87mOC8ReqU5579JZnGrM2W5m1eMZ53Ig0W0wWE4y/UG1HSWTWyiqzFs8km33jJknbz2SxgfEXqq2WRGatrDJr8UiabamqvmPYFoPBoFZXV7f0PQ61Amo7Vke1tcJqs9/XFV+SktxSVYOmcc4sRppq+NtREmmjrLKV3oNlHknTcjXUyKQVUPNgXuOWNF9MFiPzWsOf17glzRd7FuvMaw1/XuOW1D97FpvQZQ1/O1/g7T1IapvJogezviFOksbZs+jA+LEaNqUlzRtnFlvUVE6aNIuwKS1p3pgstmCactKkYzUuOnen5x5Jmismiy2Y5nyljWYRNqUlzROTxRZMU07y9FRJi8BksQXTJgJnEZLmnclii0wEkpaBS2clSY1MFpKkRiaLBrP8PtWS1BV7FodwqH0UHt4naZmYLA5ho30Unu0kadn0UoZK8gtJ7khye5Jrkxw99vyzk1yX5O4kn0+yq484N9pH0cXZTpa/JM2SzmcWSU4D3gK8oKoeT3I9cDHwwXXDLgMerqq/neRi4N3ARV3HutE+irbPdnLmImnW9FWGOhI4JsmTwLHAA2PPXwi8c3T7BuC9SVI9vFPTpH0Ube/KnuYYEUnqUufJoqr+MsmvA/cBjwOfqKpPjA07Dbh/NP5AkkeAE4FvdBrsIbS5Gc9TaSXNmj7KUMcznDmcAfwV8AdJXltV/339sAlf+oxZRZIrgCsAdu7c2UK0/fA8KUmzpo8G90uBr1TVWlU9CfwR8OKxMfuA0wGSHAl8L/CMLnJV7a6qQVUNVlZWWg67W2euHMdF5+40UUiaCX0ki/uAFyY5NkmA84E7x8bsAS4Z3X4N8Kk++hWSpKHOk0VVfZ5h0/pW4P+OYtid5F1JLhgNuxo4McndwFuBt3UdpyTpoCzKP9gHg0Gtrq72HYYkzZUkt1TVoGmcZ0NJkhqZLCRJjUwWkqRGJosWeb6TpEXhqbMt8XwnSYvEmUVLujiZVpK6YrJoiec7SVoklqFa4vlOkhaJyaJFbZ5MK0ldsgwlSWpkspAkNTJZSJIamSwkSY1MFpKkRiYLSVIjk4UkqdHCvPlRkjXgq2MPnwR8o4dwZo3X4SCvxZDX4aBlvxbPr6qVpkELkywmSbI6zTtALTqvw0FeiyGvw0Fei+lYhpIkNTJZSJIaLXqy2N13ADPC63CQ12LI63CQ12IKC92zkCRtj0WfWUiStsFCJIskL0/y5SR3J3nbhOefneS60fOfT7Kr+yjbN8V1eGuSLyX5iySfTPL8PuLsQtO1WDfuNUkqyUKuhpnmOiT5Z6PfizuS/H7XMXZlir+PnUk+neQLo7+RV/QR58yqqrn+AHYA/w84E3gWcBvwgrEx/wq4anT7YuC6vuPu6Tr8A+DY0e03LOJ1mPZajMY9B/gscDMw6Dvunn4nzgK+ABw/un9y33H3eC12A28Y3X4BcG/fcc/SxyLMLM4D7q6qe6rqO8CHgQvHxlwIXDO6fQNwfpJ0GGMXGq9DVX26qh4b3b0ZeF7HMXZlmt8JgF8G3gN8u8vgOjTNdbgc+O2qehigqh7qOMauTHMtCnju6Pb3Ag90GN/MW4RkcRpw/7r7+0aPTRxTVQeAR4ATO4muO9Nch/UuA/5nqxH1p/FaJPlR4PSq+liXgXVsmt+JHwR+MMn/SXJzkpd3Fl23prkW7wRem2Qf8HHgzd2ENh8W4W1VJ80Qxpd4TTNm3k3935jktcAA+PutRtSfQ16LJEcA/xW4tKuAejLN78SRDEtRL2E40/yzJOdU1V+1HFvXprkWPwt8sKr+S5IXAf9tdC2ebj+82bcIM4t9wOnr7j+PZ04f/3pMkiMZTjH3dxJdd6a5DiR5KfAfgAuq6omOYuta07V4DnAO8Jkk9wIvBPYsYJN72r+Nj1bVk1X1FeDLDJPHopnmWlwGXA9QVZ8DjmZ4bpRYjGSxFzgryRlJnsWwgb1nbMwe4JLR7dcAn6pRF2uBNF6HUenlfQwTxaLWpqHhWlTVI1V1UlXtqqpdDPs3F1TVaj/htmaav42PMFz4QJKTGJal7uk0ym5Mcy3uA84HSHI2w2Sx1mmUM2zuk8WoB/Em4EbgTuD6qrojybuSXDAadjVwYpK7gbcCGy6lnFdTXodfA44D/iDJF5OM/7EshCmvxcKb8jrcCHwzyZeATwP/rqq+2U/E7ZnyWvwb4PIktwHXApcu4D8qN80d3JKkRnM/s5Aktc9kIUlqZLKQJDUyWUiSGpksJEmNTBaSpEYmC0lSI5OF1JIk547eF+HoJN8zer+Ic/qOS9oMN+VJLUrynxgeG3EMsK+q/nPPIUmbYrKQWjQ6h2gvw/fMeHFVPdVzSNKmWIaS2nUCw/O4nsNwhiHNJWcWUotGhzV+GDgDOLWq3tRzSNKmLMKbH0kzKcnPAQeq6veT7AD+PMk/rKpP9R2bdLicWUiSGtmzkCQ1MllIkhqZLCRJjUwWkqRGJgtJUiOThSSpkclCktTIZCFJavT/AZcXR0Q04H4rAAAAAElFTkSuQmCC\n", 286 | "text/plain": [ 287 | "
" 288 | ] 289 | }, 290 | "metadata": { 291 | "needs_background": "light" 292 | }, 293 | "output_type": "display_data" 294 | } 295 | ], 296 | "source": [ 297 | "import matplotlib.pyplot as plt\n", 298 | "plt.scatter(x,y, s=8); plt.xlabel(\"x\"); plt.ylabel(\"y\"); " 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "You want to find **parameters** (weights) $a$ and $b$ such that you minimize the *error* between the points and the line $a\\cdot x + b$. Note that here $a$ and $b$ are unknown. For a regression problem the most common *error function* or *loss function* is the **mean squared error**. " 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 14, 311 | "metadata": {}, 312 | "outputs": [], 313 | "source": [ 314 | "def mse(y_hat, y): return ((y_hat - y) ** 2).mean()" 315 | ] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "metadata": {}, 320 | "source": [ 321 | "Suppose we believe $a = 10$ and $b = 5$ then we can compute `y_hat` which is our *prediction* and then compute our error." 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 15, 327 | "metadata": {}, 328 | "outputs": [ 329 | { 330 | "data": { 331 | "text/plain": [ 332 | "3.7264671933272044" 333 | ] 334 | }, 335 | "execution_count": 15, 336 | "metadata": {}, 337 | "output_type": "execute_result" 338 | } 339 | ], 340 | "source": [ 341 | "y_hat = lin(10,5,x)\n", 342 | "mse(y_hat, y)" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 16, 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "def mse_loss(a, b, x, y): return mse(lin(a,b,x), y)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 17, 357 | "metadata": {}, 358 | "outputs": [ 359 | { 360 | "data": { 361 | "text/plain": [ 362 | "3.7264671933272044" 363 | ] 364 | }, 365 | "execution_count": 17, 366 | "metadata": {}, 367 | "output_type": "execute_result" 368 | } 369 | ], 370 | "source": [ 371 | "mse_loss(10, 5, x, y)" 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "metadata": {}, 377 | "source": [ 378 | "So far we have specified the *model* (linear regression) and the *evaluation criteria* (or *loss function*). Now we need to handle *optimization*; that is, how do we find the best values for $a$ and $b$? How do we find the best *fitting* linear regression." 379 | ] 380 | }, 381 | { 382 | "cell_type": "markdown", 383 | "metadata": {}, 384 | "source": [ 385 | "## Gradient Descent with Pytorch" 386 | ] 387 | }, 388 | { 389 | "cell_type": "markdown", 390 | "metadata": {}, 391 | "source": [ 392 | "For a fixed dataset $x$ and $y$ `mse_loss(a,b)` is a function of $a$ and $b$. We would like to find the values of $a$ and $b$ that minimize that function.\n", 393 | "\n", 394 | "**Gradient descent** is an algorithm that minimizes functions. Given a function defined by a set of parameters, gradient descent starts with an initial set of parameter values and iteratively moves toward a set of parameter values that minimize the function. This iterative minimization is achieved by taking steps in the negative direction of the function gradient.\n", 395 | "\n", 396 | "Here is gradient descent implemented in [PyTorch](http://pytorch.org/)." 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": 18, 402 | "metadata": {}, 403 | "outputs": [ 404 | { 405 | "data": { 406 | "text/plain": [ 407 | "((10000,), (10000,))" 408 | ] 409 | }, 410 | "execution_count": 18, 411 | "metadata": {}, 412 | "output_type": "execute_result" 413 | } 414 | ], 415 | "source": [ 416 | "# generate some more data\n", 417 | "x, y = gen_fake_data(10000, 3., 8.)\n", 418 | "x.shape, y.shape" 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": 19, 424 | "metadata": {}, 425 | "outputs": [], 426 | "source": [ 427 | "# Wrap x and y as tensor \n", 428 | "x = torch.tensor(x)\n", 429 | "y = torch.tensor(y)" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 20, 435 | "metadata": {}, 436 | "outputs": [ 437 | { 438 | "data": { 439 | "text/plain": [ 440 | "(tensor([-0.4017], dtype=torch.float64),\n", 441 | " tensor([-0.9494], dtype=torch.float64))" 442 | ] 443 | }, 444 | "execution_count": 20, 445 | "metadata": {}, 446 | "output_type": "execute_result" 447 | } 448 | ], 449 | "source": [ 450 | "# Create random Tensors for weights, and wrap them in tensors.\n", 451 | "# Setting requires_grad=True indicates that we want to compute gradients with\n", 452 | "# respect to these tensors during the backward pass.\n", 453 | "a, b = np.random.randn(1), np.random.randn(1)\n", 454 | "a = torch.tensor(a, requires_grad=True)\n", 455 | "b = torch.tensor(b, requires_grad=True)\n", 456 | "a,b" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": 21, 462 | "metadata": {}, 463 | "outputs": [ 464 | { 465 | "name": "stdout", 466 | "output_type": "stream", 467 | "text": [ 468 | "114.33519327885749\n", 469 | "0.8827967584092252\n", 470 | "0.1509190011086734\n", 471 | "0.1336058659739962\n", 472 | "0.1237467641246501\n", 473 | "0.1161985772878354\n", 474 | "0.11040320048670306\n", 475 | "0.10595349916476945\n", 476 | "0.10253700937772219\n", 477 | "0.09991382151883298\n" 478 | ] 479 | } 480 | ], 481 | "source": [ 482 | "learning_rate = 1e-3\n", 483 | "for t in range(10000):\n", 484 | " # Forward pass: compute predicted y using operations on Variables\n", 485 | " loss = mse_loss(a,b,x,y)\n", 486 | " if t % 1000 == 0: print(loss.item())\n", 487 | " \n", 488 | " # Computes the gradient of loss with respect to all Variables with requires_grad=True.\n", 489 | " # After this call a.grad and b.grad will be Variables holding the gradient\n", 490 | " # of the loss with respect to a and b respectively\n", 491 | " loss.backward()\n", 492 | " \n", 493 | " # Update a and b using gradient descent; a.data and b.data are Tensors,\n", 494 | " # a.grad and b.grad are Variables and a.grad.data and b.grad.data are Tensors\n", 495 | " a.data -= learning_rate * a.grad.data\n", 496 | " b.data -= learning_rate * b.grad.data\n", 497 | " \n", 498 | " # Zero the gradients\n", 499 | " a.grad.data.zero_()\n", 500 | " b.grad.data.zero_() " 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": 22, 506 | "metadata": {}, 507 | "outputs": [ 508 | { 509 | "name": "stdout", 510 | "output_type": "stream", 511 | "text": [ 512 | "tensor([ 3.2942], dtype=torch.float64) tensor([ 7.8449], dtype=torch.float64)\n" 513 | ] 514 | } 515 | ], 516 | "source": [ 517 | "print(a,b)" 518 | ] 519 | }, 520 | { 521 | "cell_type": "markdown", 522 | "metadata": {}, 523 | "source": [ 524 | "## Simplified GD Loop" 525 | ] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": 23, 530 | "metadata": {}, 531 | "outputs": [ 532 | { 533 | "data": { 534 | "text/plain": [ 535 | "Linear(in_features=1, out_features=1, bias=True)" 536 | ] 537 | }, 538 | "execution_count": 23, 539 | "metadata": {}, 540 | "output_type": "execute_result" 541 | } 542 | ], 543 | "source": [ 544 | "# linear tranformation with input dimension=1 and output dimension=1\n", 545 | "nn.Linear(1, 1)" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": 24, 551 | "metadata": {}, 552 | "outputs": [ 553 | { 554 | "data": { 555 | "text/plain": [ 556 | "Sequential(\n", 557 | " (0): Linear(in_features=1, out_features=1, bias=True)\n", 558 | ")" 559 | ] 560 | }, 561 | "execution_count": 24, 562 | "metadata": {}, 563 | "output_type": "execute_result" 564 | } 565 | ], 566 | "source": [ 567 | "# simple way of specifying a linear regression model\n", 568 | "model = torch.nn.Sequential(\n", 569 | " nn.Linear(1, 1),\n", 570 | ")\n", 571 | "model" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": 25, 577 | "metadata": {}, 578 | "outputs": [], 579 | "source": [ 580 | "# equivalent way of specifiying the same model\n", 581 | "class LinearRegression(nn.Module):\n", 582 | " def __init__(self):\n", 583 | " super(LinearRegression, self).__init__()\n", 584 | " self.lin = nn.Linear(1, 1)\n", 585 | " \n", 586 | " def forward(self, x):\n", 587 | " x = self.lin(x)\n", 588 | " return x \n", 589 | "model = LinearRegression()" 590 | ] 591 | }, 592 | { 593 | "cell_type": "code", 594 | "execution_count": 26, 595 | "metadata": {}, 596 | "outputs": [ 597 | { 598 | "name": "stdout", 599 | "output_type": "stream", 600 | "text": [ 601 | "[Parameter containing:\n", 602 | "tensor([[ 0.2523]]), Parameter containing:\n", 603 | "tensor([ 0.5015])]\n" 604 | ] 605 | } 606 | ], 607 | "source": [ 608 | "print([p for p in model.parameters()])" 609 | ] 610 | }, 611 | { 612 | "cell_type": "code", 613 | "execution_count": 27, 614 | "metadata": {}, 615 | "outputs": [ 616 | { 617 | "data": { 618 | "text/plain": [ 619 | "torch.Size([10000])" 620 | ] 621 | }, 622 | "execution_count": 27, 623 | "metadata": {}, 624 | "output_type": "execute_result" 625 | } 626 | ], 627 | "source": [ 628 | "x, y = gen_fake_data(10000, 3., 8.)\n", 629 | "x = torch.tensor(x).float()\n", 630 | "y = torch.tensor(y).float()\n", 631 | "x.shape" 632 | ] 633 | }, 634 | { 635 | "cell_type": "code", 636 | "execution_count": 28, 637 | "metadata": {}, 638 | "outputs": [ 639 | { 640 | "data": { 641 | "text/plain": [ 642 | "torch.Size([10000, 1])" 643 | ] 644 | }, 645 | "execution_count": 28, 646 | "metadata": {}, 647 | "output_type": "execute_result" 648 | } 649 | ], 650 | "source": [ 651 | "# you have to be careful with the dimensions that your model is expecting\n", 652 | "x1 = torch.unsqueeze(x, 1)\n", 653 | "x1.shape" 654 | ] 655 | }, 656 | { 657 | "cell_type": "code", 658 | "execution_count": 29, 659 | "metadata": {}, 660 | "outputs": [ 661 | { 662 | "name": "stdout", 663 | "output_type": "stream", 664 | "text": [ 665 | "tensor([[ 0.6813],\n", 666 | " [ 0.6076],\n", 667 | " [ 0.6617],\n", 668 | " ...,\n", 669 | " [ 0.6750],\n", 670 | " [ 0.5348],\n", 671 | " [ 0.5524]])\n" 672 | ] 673 | } 674 | ], 675 | "source": [ 676 | "y_hat = model(x1)\n", 677 | "print(y_hat)" 678 | ] 679 | }, 680 | { 681 | "cell_type": "code", 682 | "execution_count": 30, 683 | "metadata": {}, 684 | "outputs": [], 685 | "source": [ 686 | "# Use the optim package to define an Optimizer that will update the weights of\n", 687 | "# the model for us. Here we will use Adam\n", 688 | "learning_rate = 0.1\n", 689 | "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)" 690 | ] 691 | }, 692 | { 693 | "cell_type": "code", 694 | "execution_count": 31, 695 | "metadata": {}, 696 | "outputs": [ 697 | { 698 | "name": "stdout", 699 | "output_type": "stream", 700 | "text": [ 701 | "79.40501403808594\n", 702 | "0.08850504457950592\n", 703 | "0.08850347250699997\n", 704 | "0.08850349485874176\n", 705 | "0.08850345760583878\n", 706 | "0.08850356191396713\n", 707 | "0.08850352466106415\n", 708 | "0.08850352466106415\n", 709 | "0.08850352466106415\n", 710 | "0.08850352466106415\n" 711 | ] 712 | } 713 | ], 714 | "source": [ 715 | "for t in range(10000):\n", 716 | " # Forward pass: compute predicted y using operations on Variables\n", 717 | " y_hat = model(x1)\n", 718 | " loss = F.mse_loss(y_hat, y.unsqueeze(1))\n", 719 | " if t % 1000 == 0: print(loss.item())\n", 720 | " \n", 721 | " # Before the backward pass, use the optimizer object to zero all of the\n", 722 | " # gradients for the variables\n", 723 | " optimizer.zero_grad()\n", 724 | " loss.backward()\n", 725 | " \n", 726 | " # Calling the step function on an Optimizer makes an update to its\n", 727 | " # parameters\n", 728 | " optimizer.step()" 729 | ] 730 | }, 731 | { 732 | "cell_type": "code", 733 | "execution_count": 32, 734 | "metadata": {}, 735 | "outputs": [ 736 | { 737 | "name": "stdout", 738 | "output_type": "stream", 739 | "text": [ 740 | "[Parameter containing:\n", 741 | "tensor([[ 3.0035]]), Parameter containing:\n", 742 | "tensor([ 7.9942])]\n" 743 | ] 744 | } 745 | ], 746 | "source": [ 747 | "print([p for p in model.parameters()])" 748 | ] 749 | }, 750 | { 751 | "cell_type": "markdown", 752 | "metadata": {}, 753 | "source": [ 754 | "# Logistic Regression" 755 | ] 756 | }, 757 | { 758 | "cell_type": "code", 759 | "execution_count": 33, 760 | "metadata": {}, 761 | "outputs": [], 762 | "source": [ 763 | "# generating fake data\n", 764 | "# Here we generate some fake data\n", 765 | "def lin(a,b,x): return a*x+b\n", 766 | "\n", 767 | "def gen_logistic_fake_data(n, a, b):\n", 768 | " x = np.random.uniform(-20,20, (n, 2))\n", 769 | " x2_hat = lin(a,b, x[:,0])\n", 770 | " y = x[:,1] > x2_hat\n", 771 | " return x, y.astype(int)\n", 772 | "\n", 773 | "x, y = gen_logistic_fake_data(100, 1., 0.5)" 774 | ] 775 | }, 776 | { 777 | "cell_type": "code", 778 | "execution_count": 34, 779 | "metadata": {}, 780 | "outputs": [ 781 | { 782 | "data": { 783 | "text/plain": [ 784 | "array([0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,\n", 785 | " 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,\n", 786 | " 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,\n", 787 | " 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0,\n", 788 | " 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1])" 789 | ] 790 | }, 791 | "execution_count": 34, 792 | "metadata": {}, 793 | "output_type": "execute_result" 794 | } 795 | ], 796 | "source": [ 797 | "y" 798 | ] 799 | }, 800 | { 801 | "cell_type": "code", 802 | "execution_count": 35, 803 | "metadata": {}, 804 | "outputs": [ 805 | { 806 | "data": { 807 | "text/plain": [ 808 | "[]" 809 | ] 810 | }, 811 | "execution_count": 35, 812 | "metadata": {}, 813 | "output_type": "execute_result" 814 | }, 815 | { 816 | "data": { 817 | "image/png": "\n", 818 | "text/plain": [ 819 | "
" 820 | ] 821 | }, 822 | "metadata": { 823 | "needs_background": "light" 824 | }, 825 | "output_type": "display_data" 826 | } 827 | ], 828 | "source": [ 829 | "t = np.arange(-20, 20, 0.2)\n", 830 | "import matplotlib.pyplot as plt\n", 831 | "plt.scatter(x[:,0],x[:,1],c=y, s=8);\n", 832 | "plt.xlabel(\"x1\"); plt.ylabel(\"x2\");\n", 833 | "plt.plot(t, t + 0.5, 'r--')" 834 | ] 835 | }, 836 | { 837 | "cell_type": "code", 838 | "execution_count": 36, 839 | "metadata": {}, 840 | "outputs": [], 841 | "source": [ 842 | "x = torch.tensor(x).float()\n", 843 | "y = torch.tensor(y).float()" 844 | ] 845 | }, 846 | { 847 | "cell_type": "code", 848 | "execution_count": 37, 849 | "metadata": {}, 850 | "outputs": [ 851 | { 852 | "data": { 853 | "text/plain": [ 854 | "Sequential(\n", 855 | " (0): Linear(in_features=2, out_features=1, bias=True)\n", 856 | ")" 857 | ] 858 | }, 859 | "execution_count": 37, 860 | "metadata": {}, 861 | "output_type": "execute_result" 862 | } 863 | ], 864 | "source": [ 865 | "model = torch.nn.Sequential(\n", 866 | " torch.nn.Linear(2, 1),\n", 867 | ")\n", 868 | "model" 869 | ] 870 | }, 871 | { 872 | "cell_type": "code", 873 | "execution_count": 38, 874 | "metadata": {}, 875 | "outputs": [ 876 | { 877 | "data": { 878 | "text/plain": [ 879 | "torch.Size([100, 1])" 880 | ] 881 | }, 882 | "execution_count": 38, 883 | "metadata": {}, 884 | "output_type": "execute_result" 885 | } 886 | ], 887 | "source": [ 888 | "model(x).shape" 889 | ] 890 | }, 891 | { 892 | "cell_type": "code", 893 | "execution_count": 39, 894 | "metadata": {}, 895 | "outputs": [], 896 | "source": [ 897 | "x, y = gen_logistic_fake_data(10000, 1., 0.5)\n", 898 | "x = torch.tensor(x).float()\n", 899 | "y = torch.tensor(y).float()" 900 | ] 901 | }, 902 | { 903 | "cell_type": "code", 904 | "execution_count": 40, 905 | "metadata": {}, 906 | "outputs": [], 907 | "source": [ 908 | "learning_rate = 0.1\n", 909 | "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)" 910 | ] 911 | }, 912 | { 913 | "cell_type": "code", 914 | "execution_count": 41, 915 | "metadata": {}, 916 | "outputs": [ 917 | { 918 | "name": "stdout", 919 | "output_type": "stream", 920 | "text": [ 921 | "0.9893282055854797\n", 922 | "0.012103211134672165\n", 923 | "0.008044046349823475\n", 924 | "0.006133016664534807\n", 925 | "0.004931855481117964\n", 926 | "0.0040746103040874004\n", 927 | "0.003419493557885289\n", 928 | "0.002897445810958743\n", 929 | "0.0024693021550774574\n", 930 | "0.0021104554180055857\n" 931 | ] 932 | } 933 | ], 934 | "source": [ 935 | "for t in range(10000):\n", 936 | " # Forward pass: compute predicted y using operations on Variables\n", 937 | " y_hat = model(x)\n", 938 | " loss = F.binary_cross_entropy(F.sigmoid(y_hat), y.unsqueeze(1))\n", 939 | " if t % 1000 == 0: print(loss.item())\n", 940 | " \n", 941 | " # Before the backward pass, use the optimizer object to zero all of the\n", 942 | " # gradients for the variables\n", 943 | " optimizer.zero_grad()\n", 944 | " loss.backward()\n", 945 | " \n", 946 | " # Calling the step function on an Optimizer makes an update to its\n", 947 | " # parameters\n", 948 | " optimizer.step()" 949 | ] 950 | }, 951 | { 952 | "cell_type": "code", 953 | "execution_count": 42, 954 | "metadata": {}, 955 | "outputs": [ 956 | { 957 | "name": "stdout", 958 | "output_type": "stream", 959 | "text": [ 960 | "[Parameter containing:\n", 961 | "tensor([[-21.5997, 21.6212]]), Parameter containing:\n", 962 | "tensor([-10.7937])]\n" 963 | ] 964 | } 965 | ], 966 | "source": [ 967 | "print([p for p in model.parameters()])" 968 | ] 969 | }, 970 | { 971 | "cell_type": "markdown", 972 | "metadata": {}, 973 | "source": [ 974 | "# Data loaders for SGD" 975 | ] 976 | }, 977 | { 978 | "cell_type": "markdown", 979 | "metadata": {}, 980 | "source": [ 981 | "Nearly all of deep learning is powered by one very important algorithm: **stochastic gradient descent (SGD)**. SGD can be seeing as an approximation of **gradient descent** (GD). In GD you have to run through *all* the samples in your training set to do a single itaration. In SGD you use *only one* or *a subset* of training samples to do the update for a parameter in a particular iteration. The subset use in every iteration is called a **batch** or **minibatch**." 982 | ] 983 | }, 984 | { 985 | "cell_type": "code", 986 | "execution_count": 43, 987 | "metadata": {}, 988 | "outputs": [], 989 | "source": [ 990 | "model2 = torch.nn.Sequential(\n", 991 | " torch.nn.Linear(1, 1),\n", 992 | ")" 993 | ] 994 | }, 995 | { 996 | "cell_type": "code", 997 | "execution_count": 44, 998 | "metadata": {}, 999 | "outputs": [], 1000 | "source": [ 1001 | "from torch.utils.data import Dataset, DataLoader" 1002 | ] 1003 | }, 1004 | { 1005 | "cell_type": "code", 1006 | "execution_count": 45, 1007 | "metadata": {}, 1008 | "outputs": [], 1009 | "source": [ 1010 | "def lin(a,b,x): return a*x+b\n", 1011 | "\n", 1012 | "def gen_fake_data(n, a, b):\n", 1013 | " x = np.random.uniform(0,1,n) \n", 1014 | " y = lin(a,b,x) + 0.1 * np.random.normal(0,3,n)\n", 1015 | " return x.astype(np.float32), y.astype(np.float32)\n", 1016 | "\n", 1017 | "# create a dataset\n", 1018 | "class RegressionDataset(Dataset):\n", 1019 | " def __init__(self, a=3, b=8, n=10000):\n", 1020 | " x, y = gen_fake_data(n, a, b)\n", 1021 | " x = torch.from_numpy(x).unsqueeze(1)\n", 1022 | " y = torch.from_numpy(y)\n", 1023 | " self.x, self.y = x, y\n", 1024 | " \n", 1025 | " def __len__(self):\n", 1026 | " return len(self.y)\n", 1027 | " \n", 1028 | " def __getitem__(self, idx):\n", 1029 | " return self.x[idx], self.y[idx]\n", 1030 | " \n", 1031 | "fake_dataset = RegressionDataset()" 1032 | ] 1033 | }, 1034 | { 1035 | "cell_type": "markdown", 1036 | "metadata": {}, 1037 | "source": [ 1038 | "Next we are going to create a data loader. The data loader provides the following features:\n", 1039 | "* Batching the data\n", 1040 | "* Shuffling the data\n", 1041 | "* Load the data in parallel using multiprocessing workers." 1042 | ] 1043 | }, 1044 | { 1045 | "cell_type": "code", 1046 | "execution_count": 46, 1047 | "metadata": {}, 1048 | "outputs": [], 1049 | "source": [ 1050 | "dataloader = DataLoader(fake_dataset, batch_size=1000, shuffle=True)\n", 1051 | "x, y = next(iter(dataloader))" 1052 | ] 1053 | }, 1054 | { 1055 | "cell_type": "code", 1056 | "execution_count": 47, 1057 | "metadata": {}, 1058 | "outputs": [], 1059 | "source": [ 1060 | "#y.type(torch.FloatTensor)" 1061 | ] 1062 | }, 1063 | { 1064 | "cell_type": "code", 1065 | "execution_count": 48, 1066 | "metadata": {}, 1067 | "outputs": [], 1068 | "source": [ 1069 | "learning_rate = 0.1\n", 1070 | "optimizer = torch.optim.Adam(model2.parameters(), lr=learning_rate)" 1071 | ] 1072 | }, 1073 | { 1074 | "cell_type": "code", 1075 | "execution_count": 49, 1076 | "metadata": {}, 1077 | "outputs": [ 1078 | { 1079 | "name": "stdout", 1080 | "output_type": "stream", 1081 | "text": [ 1082 | "56.18629455566406\n", 1083 | "0.09595121443271637\n", 1084 | "0.09600495547056198\n", 1085 | "0.08732529729604721\n", 1086 | "0.09132660925388336\n", 1087 | "0.08278344571590424\n", 1088 | "0.08562881499528885\n", 1089 | "0.08167734742164612\n", 1090 | "0.0862448588013649\n", 1091 | "0.09074677526950836\n" 1092 | ] 1093 | } 1094 | ], 1095 | "source": [ 1096 | "for t in range(1000):\n", 1097 | " for i, (x, y) in enumerate(dataloader): \n", 1098 | " \n", 1099 | " y_hat = model2(x)\n", 1100 | " loss = F.mse_loss(y_hat, y.unsqueeze(1))\n", 1101 | " \n", 1102 | " optimizer.zero_grad()\n", 1103 | " loss.backward()\n", 1104 | " \n", 1105 | " optimizer.step()\n", 1106 | " if t % 100 == 0: print(loss.item())" 1107 | ] 1108 | }, 1109 | { 1110 | "cell_type": "code", 1111 | "execution_count": 50, 1112 | "metadata": {}, 1113 | "outputs": [ 1114 | { 1115 | "name": "stdout", 1116 | "output_type": "stream", 1117 | "text": [ 1118 | "[Parameter containing:\n", 1119 | "tensor([[ 3.0190]]), Parameter containing:\n", 1120 | "tensor([ 7.9957])]\n" 1121 | ] 1122 | } 1123 | ], 1124 | "source": [ 1125 | "print([p for p in model2.parameters()])" 1126 | ] 1127 | }, 1128 | { 1129 | "cell_type": "markdown", 1130 | "metadata": {}, 1131 | "source": [ 1132 | "# Two layer neural network" 1133 | ] 1134 | }, 1135 | { 1136 | "cell_type": "code", 1137 | "execution_count": 51, 1138 | "metadata": {}, 1139 | "outputs": [], 1140 | "source": [ 1141 | "# generating fake data\n", 1142 | "# Here we generate some fake data\n", 1143 | "def sigmoid(x):\n", 1144 | " return 1/(1 + np.exp(-x))\n", 1145 | "\n", 1146 | "def gen_nn_fake_data(n):\n", 1147 | " x = np.random.uniform(0,10, (n, 2))\n", 1148 | " x1 = x[:,0]\n", 1149 | " x2 = x[:,1]\n", 1150 | " score1 = sigmoid(-x1 - 8* x2 + 50)\n", 1151 | " score2 = sigmoid(-7*x1 - 2* x2 + 50)\n", 1152 | " score3 = 2* score1 + 3*score2 - 0.1\n", 1153 | " y = score3 < 0\n", 1154 | " return x, y.astype(int)\n", 1155 | "\n", 1156 | "x, y = gen_nn_fake_data(500)" 1157 | ] 1158 | }, 1159 | { 1160 | "cell_type": "code", 1161 | "execution_count": 52, 1162 | "metadata": {}, 1163 | "outputs": [ 1164 | { 1165 | "data": { 1166 | "image/png": "\n", 1167 | "text/plain": [ 1168 | "
" 1169 | ] 1170 | }, 1171 | "metadata": { 1172 | "needs_background": "light" 1173 | }, 1174 | "output_type": "display_data" 1175 | } 1176 | ], 1177 | "source": [ 1178 | "import matplotlib.pyplot as plt\n", 1179 | "plt.scatter(x[:,0],x[:,1],c=y, s=8);\n", 1180 | "plt.xlabel(\"x1\"); plt.ylabel(\"x2\");" 1181 | ] 1182 | }, 1183 | { 1184 | "cell_type": "code", 1185 | "execution_count": 53, 1186 | "metadata": {}, 1187 | "outputs": [ 1188 | { 1189 | "data": { 1190 | "text/plain": [ 1191 | "Sequential(\n", 1192 | " (0): Linear(in_features=2, out_features=2, bias=True)\n", 1193 | " (1): Sigmoid()\n", 1194 | " (2): Linear(in_features=2, out_features=1, bias=True)\n", 1195 | ")" 1196 | ] 1197 | }, 1198 | "execution_count": 53, 1199 | "metadata": {}, 1200 | "output_type": "execute_result" 1201 | } 1202 | ], 1203 | "source": [ 1204 | "model = torch.nn.Sequential(\n", 1205 | " torch.nn.Linear(2, 2),\n", 1206 | " torch.nn.Sigmoid(),\n", 1207 | " torch.nn.Linear(2, 1)\n", 1208 | ")\n", 1209 | "model" 1210 | ] 1211 | }, 1212 | { 1213 | "cell_type": "code", 1214 | "execution_count": 54, 1215 | "metadata": {}, 1216 | "outputs": [], 1217 | "source": [ 1218 | "x, y = gen_nn_fake_data(10000)\n", 1219 | "x = torch.tensor(x).float()\n", 1220 | "y = torch.tensor(y).float()" 1221 | ] 1222 | }, 1223 | { 1224 | "cell_type": "code", 1225 | "execution_count": 55, 1226 | "metadata": {}, 1227 | "outputs": [], 1228 | "source": [ 1229 | "learning_rate = 0.01\n", 1230 | "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)" 1231 | ] 1232 | }, 1233 | { 1234 | "cell_type": "code", 1235 | "execution_count": 56, 1236 | "metadata": {}, 1237 | "outputs": [ 1238 | { 1239 | "name": "stdout", 1240 | "output_type": "stream", 1241 | "text": [ 1242 | "0.5998386144638062\n", 1243 | "0.08418773114681244\n", 1244 | "0.04379989951848984\n", 1245 | "0.02649916149675846\n", 1246 | "0.0179321076720953\n", 1247 | "0.013030633330345154\n", 1248 | "0.009794511832296848\n", 1249 | "0.007520528510212898\n", 1250 | "0.0058765956200659275\n", 1251 | "0.0046782889403402805\n" 1252 | ] 1253 | } 1254 | ], 1255 | "source": [ 1256 | "for t in range(10000):\n", 1257 | " # Forward pass: compute predicted y using operations on Variables\n", 1258 | " y_hat = model(x)\n", 1259 | " loss = F.binary_cross_entropy(F.sigmoid(y_hat), y.unsqueeze(1))\n", 1260 | " if t % 1000 == 0: print(loss.item())\n", 1261 | " \n", 1262 | " # Before the backward pass, use the optimizer object to zero all of the\n", 1263 | " # gradients for the variables\n", 1264 | " optimizer.zero_grad()\n", 1265 | " loss.backward()\n", 1266 | " \n", 1267 | " # Calling the step function on an Optimizer makes an update to its\n", 1268 | " # parameters\n", 1269 | " optimizer.step()" 1270 | ] 1271 | }, 1272 | { 1273 | "cell_type": "code", 1274 | "execution_count": 57, 1275 | "metadata": {}, 1276 | "outputs": [ 1277 | { 1278 | "name": "stdout", 1279 | "output_type": "stream", 1280 | "text": [ 1281 | "[Parameter containing:\n", 1282 | "tensor([[-4.4651, -1.2217],\n", 1283 | " [ 0.7295, 6.5536]]), Parameter containing:\n", 1284 | "tensor([ 33.2324, -42.4598]), Parameter containing:\n", 1285 | "tensor([[-25.9705, 23.0225]]), Parameter containing:\n", 1286 | "tensor([-12.2211])]\n" 1287 | ] 1288 | } 1289 | ], 1290 | "source": [ 1291 | "print([p for p in model.parameters()])" 1292 | ] 1293 | }, 1294 | { 1295 | "cell_type": "code", 1296 | "execution_count": 58, 1297 | "metadata": {}, 1298 | "outputs": [ 1299 | { 1300 | "data": { 1301 | "text/plain": [ 1302 | "array([ 72.7843, -134.4691, -45.7142])" 1303 | ] 1304 | }, 1305 | "execution_count": 58, 1306 | "metadata": {}, 1307 | "output_type": "execute_result" 1308 | } 1309 | ], 1310 | "source": [ 1311 | " np.array([72.7843, -134.4691, -45.7142])" 1312 | ] 1313 | }, 1314 | { 1315 | "cell_type": "markdown", 1316 | "metadata": { 1317 | "collapsed": true 1318 | }, 1319 | "source": [ 1320 | "# References\n", 1321 | "* https://pytorch.org/docs/stable/index.html\n", 1322 | "* http://pytorch.org/tutorials/beginner/pytorch_with_examples.html\n", 1323 | "* https://hsaghir.github.io/data_science/pytorch_starter/" 1324 | ] 1325 | }, 1326 | { 1327 | "cell_type": "code", 1328 | "execution_count": null, 1329 | "metadata": {}, 1330 | "outputs": [], 1331 | "source": [] 1332 | } 1333 | ], 1334 | "metadata": { 1335 | "kernelspec": { 1336 | "display_name": "Python 3", 1337 | "language": "python", 1338 | "name": "python3" 1339 | }, 1340 | "language_info": { 1341 | "codemirror_mode": { 1342 | "name": "ipython", 1343 | "version": 3 1344 | }, 1345 | "file_extension": ".py", 1346 | "mimetype": "text/x-python", 1347 | "name": "python", 1348 | "nbconvert_exporter": "python", 1349 | "pygments_lexer": "ipython3", 1350 | "version": "3.6.6" 1351 | }, 1352 | "nav_menu": {}, 1353 | "toc": { 1354 | "nav_menu": { 1355 | "height": "116px", 1356 | "width": "251px" 1357 | }, 1358 | "number_sections": true, 1359 | "sideBar": true, 1360 | "skip_h1_title": false, 1361 | "toc_cell": true, 1362 | "toc_position": {}, 1363 | "toc_section_display": "block", 1364 | "toc_window_display": false 1365 | }, 1366 | "widgets": { 1367 | "state": {}, 1368 | "version": "1.1.2" 1369 | } 1370 | }, 1371 | "nbformat": 4, 1372 | "nbformat_minor": 1 1373 | } 1374 | --------------------------------------------------------------------------------