├── README.md
├── cbow.ipynb
├── collaborative-filtering-nn.ipynb
├── environment.yml
├── image-caption-tutorial.ipynb
├── images
├── model.png
├── tiny_training2.csv
└── tiny_val2.csv
└── intro-to-pytoch.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # pytorch-tutorials
2 |
3 | Here are a few basic deep learning tutorial using Pytorch.
4 |
--------------------------------------------------------------------------------
/cbow.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "toc": true
7 | },
8 | "source": [
9 | "
Table of Contents
\n",
10 | ""
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 1,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "# import pytorch libraries\n",
20 | "%matplotlib inline\n",
21 | "import torch \n",
22 | "import torch.autograd as autograd \n",
23 | "import torch.nn as nn \n",
24 | "import torch.nn.functional as F\n",
25 | "import torch.optim as optim\n",
26 | "import numpy as np"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "# Text Classification\n",
34 | "In this part of the tutorial we develop a continuous bag of words (CBOW) model for a text classification task described [here]( https://people.cs.umass.edu/~miyyer/pubs/2015_acl_dan.pdf). The CBOW model was first described [here](https://arxiv.org/pdf/1301.3781.pdf)"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "## Subjectivity Dataset\n",
42 | "The subjectivity dataset has 5000 subjective and 5000 objective processed sentences. To get the data:\n",
43 | "```\n",
44 | "wget http://www.cs.cornell.edu/people/pabo/movie-review-data/rotten_imdb.tar.gz\n",
45 | "```"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 2,
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "def unpack_dataset():\n",
55 | " ! wget http://www.cs.cornell.edu/people/pabo/movie-review-data/rotten_imdb.tar.gz\n",
56 | " ! mkdir data\n",
57 | " ! tar -xvf rotten_imdb.tar.gz -C data"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 3,
63 | "metadata": {},
64 | "outputs": [],
65 | "source": [
66 | "#unpack_dataset()"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 4,
72 | "metadata": {},
73 | "outputs": [
74 | {
75 | "name": "stdout",
76 | "output_type": "stream",
77 | "text": [
78 | "plot.tok.gt9.5000 quote.tok.gt9.5000 subjdata.README.1.0\r\n"
79 | ]
80 | }
81 | ],
82 | "source": [
83 | "!ls data"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": 5,
89 | "metadata": {},
90 | "outputs": [
91 | {
92 | "name": "stdout",
93 | "output_type": "stream",
94 | "text": [
95 | "the movie begins in the past where a young boy named sam attempts to save celebi from a hunter . \r\n",
96 | "emerging from the human psyche and showing characteristics of abstract expressionism , minimalism and russian constructivism , graffiti removal has secured its place in the history of modern art while being created by artists who are unconscious of their artistic achievements . \r\n"
97 | ]
98 | }
99 | ],
100 | "source": [
101 | "! head -2 data/plot.tok.gt9.5000"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": 6,
107 | "metadata": {},
108 | "outputs": [
109 | {
110 | "data": {
111 | "text/plain": [
112 | "[PosixPath('data/plot.tok.gt9.5000'),\n",
113 | " PosixPath('data/subjdata.README.1.0'),\n",
114 | " PosixPath('data/quote.tok.gt9.5000')]"
115 | ]
116 | },
117 | "execution_count": 6,
118 | "metadata": {},
119 | "output_type": "execute_result"
120 | }
121 | ],
122 | "source": [
123 | "from pathlib import Path\n",
124 | "PATH = Path(\"data\")\n",
125 | "list(PATH.iterdir())"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {},
131 | "source": [
132 | "## Tokenization\n",
133 | "Tokenization is the task of chopping up text into pieces, called tokens.\n",
134 | "\n",
135 | "spaCy is an open-source software library for advanced Natural Language Processing. Here we will use it for tokenization. "
136 | ]
137 | },
138 | {
139 | "cell_type": "markdown",
140 | "metadata": {},
141 | "source": [
142 | "### Simple Tokenization"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 7,
148 | "metadata": {},
149 | "outputs": [],
150 | "source": [
151 | "# We need each line in the file \n",
152 | "def read_file(path):\n",
153 | " \"\"\" Read file returns a list of lines.\n",
154 | " \"\"\"\n",
155 | " with open(path, encoding = \"ISO-8859-1\") as f:\n",
156 | " content = f.readlines()\n",
157 | " return content"
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": 8,
163 | "metadata": {},
164 | "outputs": [],
165 | "source": [
166 | "obj_lines = read_file(PATH/\"plot.tok.gt9.5000\")"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": 9,
172 | "metadata": {},
173 | "outputs": [
174 | {
175 | "data": {
176 | "text/plain": [
177 | "'the movie begins in the past where a young boy named sam attempts to save celebi from a hunter . \\n'"
178 | ]
179 | },
180 | "execution_count": 9,
181 | "metadata": {},
182 | "output_type": "execute_result"
183 | }
184 | ],
185 | "source": [
186 | "obj_lines[0]"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": 10,
192 | "metadata": {},
193 | "outputs": [
194 | {
195 | "data": {
196 | "text/plain": [
197 | "array(['the', 'movie', 'begins', 'in', 'the', 'past', 'where', 'a',\n",
198 | " 'young', 'boy', 'named', 'sam', 'attempts', 'to', 'save', 'celebi',\n",
199 | " 'from', 'a', 'hunter', '.'], dtype='\":0, \"UNK\":1} # init with padding and unknown\n",
502 | "words = [\"\", \"UNK\"]\n",
503 | "for word in word_count:\n",
504 | " vocab2index[word] = len(words)\n",
505 | " words.append(word)"
506 | ]
507 | },
508 | {
509 | "cell_type": "code",
510 | "execution_count": 24,
511 | "metadata": {},
512 | "outputs": [],
513 | "source": [
514 | "#vocab2index"
515 | ]
516 | },
517 | {
518 | "cell_type": "markdown",
519 | "metadata": {},
520 | "source": [
521 | "## Sentence encoding\n",
522 | "Here we encode each sentence as a sequence of indices corresponding to each word."
523 | ]
524 | },
525 | {
526 | "cell_type": "code",
527 | "execution_count": 25,
528 | "metadata": {},
529 | "outputs": [],
530 | "source": [
531 | "x_train_len = np.array([len(x.split()) for x in X_train])\n",
532 | "x_val_len = np.array([len(x.split()) for x in X_val])"
533 | ]
534 | },
535 | {
536 | "cell_type": "code",
537 | "execution_count": 26,
538 | "metadata": {},
539 | "outputs": [
540 | {
541 | "data": {
542 | "text/plain": [
543 | "43.0"
544 | ]
545 | },
546 | "execution_count": 26,
547 | "metadata": {},
548 | "output_type": "execute_result"
549 | }
550 | ],
551 | "source": [
552 | "np.percentile(x_train_len, 95) # let set the max sequence len to N=40"
553 | ]
554 | },
555 | {
556 | "cell_type": "code",
557 | "execution_count": 27,
558 | "metadata": {},
559 | "outputs": [
560 | {
561 | "data": {
562 | "text/plain": [
563 | "'will god let her fall or give her a new path ?'"
564 | ]
565 | },
566 | "execution_count": 27,
567 | "metadata": {},
568 | "output_type": "execute_result"
569 | }
570 | ],
571 | "source": [
572 | "X_train[0]"
573 | ]
574 | },
575 | {
576 | "cell_type": "code",
577 | "execution_count": 28,
578 | "metadata": {},
579 | "outputs": [
580 | {
581 | "data": {
582 | "text/plain": [
583 | "8"
584 | ]
585 | },
586 | "execution_count": 28,
587 | "metadata": {},
588 | "output_type": "execute_result"
589 | }
590 | ],
591 | "source": [
592 | "# returns the index of the word or the index of \"UNK\" otherwise\n",
593 | "vocab2index.get(\"?\", vocab2index[\"UNK\"])"
594 | ]
595 | },
596 | {
597 | "cell_type": "code",
598 | "execution_count": 29,
599 | "metadata": {},
600 | "outputs": [
601 | {
602 | "data": {
603 | "text/plain": [
604 | "array([11, 3, 6, 7, 2, 12, 9, 7, 10, 4, 5, 8])"
605 | ]
606 | },
607 | "execution_count": 29,
608 | "metadata": {},
609 | "output_type": "execute_result"
610 | }
611 | ],
612 | "source": [
613 | "np.array([vocab2index.get(w, vocab2index[\"UNK\"]) for w in X_train[0].split()])"
614 | ]
615 | },
616 | {
617 | "cell_type": "code",
618 | "execution_count": 30,
619 | "metadata": {},
620 | "outputs": [],
621 | "source": [
622 | "def encode_sentence(s, N=40):\n",
623 | " enc = np.zeros(N, dtype=np.int32)\n",
624 | " enc1 = np.array([vocab2index.get(w, vocab2index[\"UNK\"]) for w in s.split()])\n",
625 | " l = min(N, len(enc1))\n",
626 | " enc[:l] = enc1[:l]\n",
627 | " return enc"
628 | ]
629 | },
630 | {
631 | "cell_type": "code",
632 | "execution_count": 31,
633 | "metadata": {},
634 | "outputs": [
635 | {
636 | "data": {
637 | "text/plain": [
638 | "array([11, 3, 6, 7, 2, 12, 9, 7, 10, 4, 5, 8, 0, 0, 0, 0, 0,\n",
639 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
640 | " 0, 0, 0, 0, 0, 0], dtype=int32)"
641 | ]
642 | },
643 | "execution_count": 31,
644 | "metadata": {},
645 | "output_type": "execute_result"
646 | }
647 | ],
648 | "source": [
649 | "encode_sentence(X_train[0])"
650 | ]
651 | },
652 | {
653 | "cell_type": "code",
654 | "execution_count": 32,
655 | "metadata": {},
656 | "outputs": [],
657 | "source": [
658 | "x_train_len = np.minimum(x_train_len, 40)\n",
659 | "x_val_len = np.minimum(x_val_len, 40)"
660 | ]
661 | },
662 | {
663 | "cell_type": "code",
664 | "execution_count": 33,
665 | "metadata": {},
666 | "outputs": [
667 | {
668 | "data": {
669 | "text/plain": [
670 | "(8000, 40)"
671 | ]
672 | },
673 | "execution_count": 33,
674 | "metadata": {},
675 | "output_type": "execute_result"
676 | }
677 | ],
678 | "source": [
679 | "x_train = np.vstack([encode_sentence(x) for x in X_train])\n",
680 | "x_train.shape"
681 | ]
682 | },
683 | {
684 | "cell_type": "code",
685 | "execution_count": 34,
686 | "metadata": {},
687 | "outputs": [
688 | {
689 | "data": {
690 | "text/plain": [
691 | "(2000, 40)"
692 | ]
693 | },
694 | "execution_count": 34,
695 | "metadata": {},
696 | "output_type": "execute_result"
697 | }
698 | ],
699 | "source": [
700 | "x_val = np.vstack([encode_sentence(x) for x in X_val])\n",
701 | "x_val.shape"
702 | ]
703 | },
704 | {
705 | "cell_type": "markdown",
706 | "metadata": {},
707 | "source": [
708 | "## Embedding layer\n",
709 | "Most deep learning models use a dense vectors of real numbers as representation of words (word embeddings), as opposed to a one-hot encoding representations. The module torch.nn.Embedding is used to represent word embeddings. It takes two arguments: the vocabulary size, and the dimensionality of the embeddings. The embeddings are initialized with random vectors. "
710 | ]
711 | },
712 | {
713 | "cell_type": "code",
714 | "execution_count": 35,
715 | "metadata": {},
716 | "outputs": [
717 | {
718 | "data": {
719 | "text/plain": [
720 | "Parameter containing:\n",
721 | "tensor([[ 0.0000, 0.0000, 0.0000, 0.0000],\n",
722 | " [-0.9722, 0.9138, 0.0743, -0.1021],\n",
723 | " [-0.0091, -0.4712, 1.2977, -1.2585],\n",
724 | " [ 0.1368, 1.4354, -0.0935, 0.1110],\n",
725 | " [ 0.7230, -0.9195, 0.9880, 1.2590],\n",
726 | " [-0.4534, 0.8292, -0.2036, 0.3768],\n",
727 | " [ 1.1694, 1.6533, 1.1898, 1.0617],\n",
728 | " [-1.1252, -0.2761, -0.1112, 0.7598],\n",
729 | " [ 0.9987, 1.0012, -0.3599, 0.5257],\n",
730 | " [ 1.2248, -0.2419, 0.2870, -1.5904]])"
731 | ]
732 | },
733 | "execution_count": 35,
734 | "metadata": {},
735 | "output_type": "execute_result"
736 | }
737 | ],
738 | "source": [
739 | "# an Embedding module containing 10 words with embedding size 4\n",
740 | "# embedding will be initialized at random\n",
741 | "embed = nn.Embedding(10, 4, padding_idx=0)\n",
742 | "embed.weight"
743 | ]
744 | },
745 | {
746 | "cell_type": "markdown",
747 | "metadata": {},
748 | "source": [
749 | "Note that the `padding_idx` has embedding vector 0."
750 | ]
751 | },
752 | {
753 | "cell_type": "code",
754 | "execution_count": 36,
755 | "metadata": {},
756 | "outputs": [
757 | {
758 | "data": {
759 | "text/plain": [
760 | "tensor([[[-0.9722, 0.9138, 0.0743, -0.1021],\n",
761 | " [ 0.7230, -0.9195, 0.9880, 1.2590],\n",
762 | " [-0.9722, 0.9138, 0.0743, -0.1021],\n",
763 | " [-0.4534, 0.8292, -0.2036, 0.3768],\n",
764 | " [-0.9722, 0.9138, 0.0743, -0.1021],\n",
765 | " [ 0.0000, 0.0000, 0.0000, 0.0000]]])"
766 | ]
767 | },
768 | "execution_count": 36,
769 | "metadata": {},
770 | "output_type": "execute_result"
771 | }
772 | ],
773 | "source": [
774 | "# given a list of ids we can \"look up\" the embedding corresponing to each id\n",
775 | "# can you see that some vectors are the same?\n",
776 | "a = torch.LongTensor([[1,4,1,5,1,0]])\n",
777 | "embed(a)"
778 | ]
779 | },
780 | {
781 | "cell_type": "markdown",
782 | "metadata": {},
783 | "source": [
784 | "This would be the representation of a sentence with words with indices [1,4,1,5,1] and a padding at the end. Bellow we have an example in which we have two sentences. the first sentence has length 3 and the last sentence has length 2. In order to use a tensor we use padding at the end of the second sentence. "
785 | ]
786 | },
787 | {
788 | "cell_type": "code",
789 | "execution_count": 37,
790 | "metadata": {},
791 | "outputs": [],
792 | "source": [
793 | "a = torch.LongTensor([[1,4,1], [1,3,0]])"
794 | ]
795 | },
796 | {
797 | "cell_type": "markdown",
798 | "metadata": {},
799 | "source": [
800 | "Our model takes an average of the word embedding of each word. Here is how we do it."
801 | ]
802 | },
803 | {
804 | "cell_type": "code",
805 | "execution_count": 38,
806 | "metadata": {},
807 | "outputs": [],
808 | "source": [
809 | "s = torch.FloatTensor([3, 2]) # here is the size of the vector"
810 | ]
811 | },
812 | {
813 | "cell_type": "code",
814 | "execution_count": 39,
815 | "metadata": {},
816 | "outputs": [
817 | {
818 | "data": {
819 | "text/plain": [
820 | "tensor([[[-0.9722, 0.9138, 0.0743, -0.1021],\n",
821 | " [ 0.7230, -0.9195, 0.9880, 1.2590],\n",
822 | " [-0.9722, 0.9138, 0.0743, -0.1021]],\n",
823 | "\n",
824 | " [[-0.9722, 0.9138, 0.0743, -0.1021],\n",
825 | " [ 0.1368, 1.4354, -0.0935, 0.1110],\n",
826 | " [ 0.0000, 0.0000, 0.0000, 0.0000]]])"
827 | ]
828 | },
829 | "execution_count": 39,
830 | "metadata": {},
831 | "output_type": "execute_result"
832 | }
833 | ],
834 | "source": [
835 | "embed(a)"
836 | ]
837 | },
838 | {
839 | "cell_type": "code",
840 | "execution_count": 40,
841 | "metadata": {},
842 | "outputs": [
843 | {
844 | "data": {
845 | "text/plain": [
846 | "tensor([[-1.2213, 0.9080, 1.1367, 1.0548],\n",
847 | " [-0.8354, 2.3491, -0.0192, 0.0089]])"
848 | ]
849 | },
850 | "execution_count": 40,
851 | "metadata": {},
852 | "output_type": "execute_result"
853 | }
854 | ],
855 | "source": [
856 | "embed(a).sum(dim=1)"
857 | ]
858 | },
859 | {
860 | "cell_type": "code",
861 | "execution_count": 41,
862 | "metadata": {},
863 | "outputs": [
864 | {
865 | "data": {
866 | "text/plain": [
867 | "tensor([[-0.4071, 0.3027, 0.3789, 0.3516],\n",
868 | " [-0.4177, 1.1746, -0.0096, 0.0044]])"
869 | ]
870 | },
871 | "execution_count": 41,
872 | "metadata": {},
873 | "output_type": "execute_result"
874 | }
875 | ],
876 | "source": [
877 | "sum_embs = embed(a).sum(dim=1) \n",
878 | "sum_embs/ s.view(s.shape[0], 1)"
879 | ]
880 | },
881 | {
882 | "cell_type": "markdown",
883 | "metadata": {},
884 | "source": [
885 | "## Continuous Bag of Words Model"
886 | ]
887 | },
888 | {
889 | "cell_type": "code",
890 | "execution_count": 42,
891 | "metadata": {},
892 | "outputs": [],
893 | "source": [
894 | "class CBOW(nn.Module):\n",
895 | " def __init__(self, vocab_size, emb_size=100):\n",
896 | " super(CBOW, self).__init__()\n",
897 | " self.word_emb = nn.Embedding(vocab_size, emb_size, padding_idx=0)\n",
898 | " self.linear = nn.Linear(emb_size, 1)\n",
899 | " \n",
900 | " def forward(self, x, s):\n",
901 | " x = self.word_emb(x)\n",
902 | " x = x.sum(dim=1)/ s\n",
903 | " x = self.linear(x)\n",
904 | " return x"
905 | ]
906 | },
907 | {
908 | "cell_type": "code",
909 | "execution_count": 43,
910 | "metadata": {},
911 | "outputs": [],
912 | "source": [
913 | "model = CBOW(vocab_size=5, emb_size=3)"
914 | ]
915 | },
916 | {
917 | "cell_type": "code",
918 | "execution_count": 44,
919 | "metadata": {},
920 | "outputs": [
921 | {
922 | "data": {
923 | "text/plain": [
924 | "Parameter containing:\n",
925 | "tensor([[ 0.0000, 0.0000, 0.0000],\n",
926 | " [ 1.6292, 1.2889, 0.7647],\n",
927 | " [ 2.5952, -0.9427, 0.3432],\n",
928 | " [ 0.5775, -2.7160, -1.4606],\n",
929 | " [ 1.2119, 0.8058, -0.0705]])"
930 | ]
931 | },
932 | "execution_count": 44,
933 | "metadata": {},
934 | "output_type": "execute_result"
935 | }
936 | ],
937 | "source": [
938 | "model.word_emb.weight"
939 | ]
940 | },
941 | {
942 | "cell_type": "code",
943 | "execution_count": 45,
944 | "metadata": {},
945 | "outputs": [
946 | {
947 | "data": {
948 | "text/plain": [
949 | "tensor([[ 0.1384],\n",
950 | " [ 0.5663]])"
951 | ]
952 | },
953 | "execution_count": 45,
954 | "metadata": {},
955 | "output_type": "execute_result"
956 | }
957 | ],
958 | "source": [
959 | "s = s.view(s.shape[0], 1)\n",
960 | "model(a, s)"
961 | ]
962 | },
963 | {
964 | "cell_type": "markdown",
965 | "metadata": {},
966 | "source": [
967 | "# Training the CBOW model "
968 | ]
969 | },
970 | {
971 | "cell_type": "code",
972 | "execution_count": 51,
973 | "metadata": {},
974 | "outputs": [
975 | {
976 | "name": "stdout",
977 | "output_type": "stream",
978 | "text": [
979 | "4067\n"
980 | ]
981 | }
982 | ],
983 | "source": [
984 | "V = len(words)\n",
985 | "model = CBOW(vocab_size=V, emb_size=50)\n",
986 | "print(V)"
987 | ]
988 | },
989 | {
990 | "cell_type": "code",
991 | "execution_count": 52,
992 | "metadata": {},
993 | "outputs": [],
994 | "source": [
995 | "def val_metrics(model):\n",
996 | " model.eval()\n",
997 | " x = torch.LongTensor(x_val) #.cuda()\n",
998 | " y = torch.Tensor(y_val).unsqueeze(1) #).cuda()\n",
999 | " s = torch.Tensor(x_val_len).view(x_val_len.shape[0], 1)\n",
1000 | " y_hat = model(x, s)\n",
1001 | " loss = F.binary_cross_entropy_with_logits(y_hat, y)\n",
1002 | " y_pred = y_hat > 0\n",
1003 | " correct = (y_pred.float() == y).float().sum()\n",
1004 | " accuracy = correct/y_pred.shape[0]\n",
1005 | " return loss.item(), accuracy.item()"
1006 | ]
1007 | },
1008 | {
1009 | "cell_type": "code",
1010 | "execution_count": 53,
1011 | "metadata": {},
1012 | "outputs": [
1013 | {
1014 | "data": {
1015 | "text/plain": [
1016 | "(0.6892560720443726, 0.5245000123977661)"
1017 | ]
1018 | },
1019 | "execution_count": 53,
1020 | "metadata": {},
1021 | "output_type": "execute_result"
1022 | }
1023 | ],
1024 | "source": [
1025 | "# accuracy of a random model should be around 0.5\n",
1026 | "val_metrics(model)"
1027 | ]
1028 | },
1029 | {
1030 | "cell_type": "code",
1031 | "execution_count": 54,
1032 | "metadata": {},
1033 | "outputs": [],
1034 | "source": [
1035 | "def train_epocs(model, epochs=10, lr=0.01):\n",
1036 | " optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n",
1037 | " for i in range(epochs):\n",
1038 | " model.train()\n",
1039 | " x = torch.LongTensor(x_train) #.cuda()\n",
1040 | " y = torch.Tensor(y_train).unsqueeze(1)\n",
1041 | " s = torch.Tensor(x_train_len).view(x_train_len.shape[0], 1)\n",
1042 | " y_hat = model(x, s)\n",
1043 | " loss = F.binary_cross_entropy_with_logits(y_hat, y)\n",
1044 | " optimizer.zero_grad()\n",
1045 | " loss.backward()\n",
1046 | " optimizer.step()\n",
1047 | " val_loss, val_accuracy = val_metrics(model)\n",
1048 | " print(\"train_loss %.3f val_loss %.3f val_accuracy %.3f\" % (loss.item(), val_loss, val_accuracy))"
1049 | ]
1050 | },
1051 | {
1052 | "cell_type": "code",
1053 | "execution_count": 55,
1054 | "metadata": {},
1055 | "outputs": [
1056 | {
1057 | "name": "stdout",
1058 | "output_type": "stream",
1059 | "text": [
1060 | "train_loss 0.689 val_loss 0.651 val_accuracy 0.582\n",
1061 | "train_loss 0.639 val_loss 0.559 val_accuracy 0.766\n",
1062 | "train_loss 0.544 val_loss 0.467 val_accuracy 0.825\n",
1063 | "train_loss 0.440 val_loss 0.375 val_accuracy 0.866\n",
1064 | "train_loss 0.337 val_loss 0.321 val_accuracy 0.867\n",
1065 | "train_loss 0.270 val_loss 0.274 val_accuracy 0.893\n",
1066 | "train_loss 0.213 val_loss 0.258 val_accuracy 0.900\n",
1067 | "train_loss 0.183 val_loss 0.253 val_accuracy 0.902\n",
1068 | "train_loss 0.160 val_loss 0.250 val_accuracy 0.909\n",
1069 | "train_loss 0.135 val_loss 0.260 val_accuracy 0.905\n"
1070 | ]
1071 | }
1072 | ],
1073 | "source": [
1074 | "train_epocs(model, epochs=10, lr=0.1)"
1075 | ]
1076 | },
1077 | {
1078 | "cell_type": "code",
1079 | "execution_count": 56,
1080 | "metadata": {},
1081 | "outputs": [
1082 | {
1083 | "name": "stdout",
1084 | "output_type": "stream",
1085 | "text": [
1086 | "train_loss 0.120 val_loss 0.257 val_accuracy 0.907\n",
1087 | "train_loss 0.115 val_loss 0.257 val_accuracy 0.909\n",
1088 | "train_loss 0.113 val_loss 0.256 val_accuracy 0.907\n",
1089 | "train_loss 0.110 val_loss 0.256 val_accuracy 0.908\n",
1090 | "train_loss 0.107 val_loss 0.255 val_accuracy 0.908\n",
1091 | "train_loss 0.103 val_loss 0.255 val_accuracy 0.907\n",
1092 | "train_loss 0.100 val_loss 0.255 val_accuracy 0.906\n",
1093 | "train_loss 0.098 val_loss 0.256 val_accuracy 0.908\n",
1094 | "train_loss 0.095 val_loss 0.257 val_accuracy 0.906\n",
1095 | "train_loss 0.092 val_loss 0.257 val_accuracy 0.906\n"
1096 | ]
1097 | }
1098 | ],
1099 | "source": [
1100 | "train_epocs(model, epochs=10, lr=0.01)"
1101 | ]
1102 | },
1103 | {
1104 | "cell_type": "markdown",
1105 | "metadata": {},
1106 | "source": [
1107 | "# Data loaders for SGD"
1108 | ]
1109 | },
1110 | {
1111 | "cell_type": "markdown",
1112 | "metadata": {},
1113 | "source": [
1114 | "Nearly all of deep learning is powered by one very important algorithm: **stochastic gradient descent (SGD)**. SGD can be seeing as an approximation of **gradient descent** (GD). In GD you have to run through *all* the samples in your training set to do a single itaration. In SGD you use *only one* or *a subset* of training samples to do the update for a parameter in a particular iteration. The subset use in every iteration is called a **batch** or **minibatch**."
1115 | ]
1116 | },
1117 | {
1118 | "cell_type": "code",
1119 | "execution_count": 57,
1120 | "metadata": {},
1121 | "outputs": [],
1122 | "source": [
1123 | "from torch.utils.data import Dataset, DataLoader"
1124 | ]
1125 | },
1126 | {
1127 | "cell_type": "markdown",
1128 | "metadata": {},
1129 | "source": [
1130 | "Next we are going to create a data loader. The data loader provides the following features:\n",
1131 | "* Batching the data\n",
1132 | "* Shuffling the data\n",
1133 | "* Load the data in parallel using multiprocessing workers."
1134 | ]
1135 | },
1136 | {
1137 | "cell_type": "code",
1138 | "execution_count": 58,
1139 | "metadata": {},
1140 | "outputs": [],
1141 | "source": [
1142 | "def encode_sentence2(s, N=40):\n",
1143 | " enc = np.zeros(N, dtype=np.int32)\n",
1144 | " enc1 = np.array([vocab2index.get(w, vocab2index[\"UNK\"]) for w in s.split()])\n",
1145 | " l = min(N, len(enc1))\n",
1146 | " enc[:l] = enc1[:l]\n",
1147 | " return enc, l"
1148 | ]
1149 | },
1150 | {
1151 | "cell_type": "code",
1152 | "execution_count": 59,
1153 | "metadata": {},
1154 | "outputs": [
1155 | {
1156 | "data": {
1157 | "text/plain": [
1158 | "(array([11, 3, 6, 7, 2, 12, 9, 7, 10, 4, 5, 8, 0, 0, 0, 0, 0,\n",
1159 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
1160 | " 0, 0, 0, 0, 0, 0], dtype=int32), 12)"
1161 | ]
1162 | },
1163 | "execution_count": 59,
1164 | "metadata": {},
1165 | "output_type": "execute_result"
1166 | }
1167 | ],
1168 | "source": [
1169 | "encode_sentence2(X_train[0])"
1170 | ]
1171 | },
1172 | {
1173 | "cell_type": "code",
1174 | "execution_count": 60,
1175 | "metadata": {},
1176 | "outputs": [],
1177 | "source": [
1178 | "class SubjectivityDataset(Dataset):\n",
1179 | " def __init__(self, X, y):\n",
1180 | " self.x = X\n",
1181 | " self.y = y\n",
1182 | " \n",
1183 | " def __len__(self):\n",
1184 | " return len(self.y)\n",
1185 | " \n",
1186 | " def __getitem__(self, idx):\n",
1187 | " x = self.x[idx]\n",
1188 | " x, s = encode_sentence2(x)\n",
1189 | " return x, self.y[idx], s\n",
1190 | " \n",
1191 | "sub_dataset_train = SubjectivityDataset(X_train, y_train)"
1192 | ]
1193 | },
1194 | {
1195 | "cell_type": "code",
1196 | "execution_count": 61,
1197 | "metadata": {},
1198 | "outputs": [],
1199 | "source": [
1200 | "train_loader = DataLoader(sub_dataset_train, batch_size=5, shuffle=True)\n",
1201 | "x, y, s = next(iter(train_loader))"
1202 | ]
1203 | },
1204 | {
1205 | "cell_type": "code",
1206 | "execution_count": 62,
1207 | "metadata": {},
1208 | "outputs": [
1209 | {
1210 | "data": {
1211 | "text/plain": [
1212 | "(tensor([[ 243, 2146, 1, 384, 57, 1, 57, 1, 1,\n",
1213 | " 37, 559, 1, 1, 2632, 1, 42, 24, 15,\n",
1214 | " 645, 3014, 2936, 88, 1, 37, 1, 2632, 2029,\n",
1215 | " 1, 80, 1, 23, 0, 0, 0, 0, 0,\n",
1216 | " 0, 0, 0, 0],\n",
1217 | " [ 147, 15, 1075, 1910, 362, 42, 3155, 125, 588,\n",
1218 | " 32, 588, 63, 40, 41, 1479, 57, 2537, 24,\n",
1219 | " 15, 1, 57, 1725, 152, 40, 233, 23, 0,\n",
1220 | " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
1221 | " 0, 0, 0, 0],\n",
1222 | " [ 15, 1873, 24, 119, 29, 2288, 477, 1575, 24,\n",
1223 | " 1645, 42, 24, 15, 127, 635, 436, 147, 173,\n",
1224 | " 128, 1551, 129, 436, 72, 704, 1, 42, 977,\n",
1225 | " 24, 2144, 42, 29, 738, 1, 434, 104, 23,\n",
1226 | " 0, 0, 0, 0],\n",
1227 | " [ 1, 476, 29, 661, 91, 3319, 42, 24, 1,\n",
1228 | " 476, 29, 661, 91, 3320, 23, 0, 0, 0,\n",
1229 | " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
1230 | " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
1231 | " 0, 0, 0, 0],\n",
1232 | " [ 261, 15, 3115, 354, 1298, 42, 243, 148, 199,\n",
1233 | " 1013, 60, 24, 60, 24, 60, 24, 60, 0,\n",
1234 | " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
1235 | " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
1236 | " 0, 0, 0, 0]], dtype=torch.int32),\n",
1237 | " tensor([ 1., 1., 0., 0., 0.], dtype=torch.float64),\n",
1238 | " tensor([ 31, 26, 36, 15, 17]))"
1239 | ]
1240 | },
1241 | "execution_count": 62,
1242 | "metadata": {},
1243 | "output_type": "execute_result"
1244 | }
1245 | ],
1246 | "source": [
1247 | "x, y, s"
1248 | ]
1249 | },
1250 | {
1251 | "cell_type": "code",
1252 | "execution_count": 63,
1253 | "metadata": {},
1254 | "outputs": [],
1255 | "source": [
1256 | "model = CBOW(vocab_size=V, emb_size=50)"
1257 | ]
1258 | },
1259 | {
1260 | "cell_type": "code",
1261 | "execution_count": 64,
1262 | "metadata": {},
1263 | "outputs": [],
1264 | "source": [
1265 | "train_loader = DataLoader(sub_dataset_train, batch_size=500, shuffle=True)"
1266 | ]
1267 | },
1268 | {
1269 | "cell_type": "code",
1270 | "execution_count": null,
1271 | "metadata": {},
1272 | "outputs": [],
1273 | "source": [
1274 | "def train_epocs(model, epochs=10, lr=0.01):\n",
1275 | " optimizer = torch.optim.Adam(model.parameters(), lr=lr)\n",
1276 | " for i in range(epochs):\n",
1277 | " total_loss = 0\n",
1278 | " total = 0\n",
1279 | " model.train()\n",
1280 | " for x, y, s in train_loader:\n",
1281 | " x = x.type(torch.LongTensor) #.cuda()\n",
1282 | " y = y.type(torch.FloatTensor).unsqueeze(1)\n",
1283 | " s = s.type(torch.Tensor).view(s.shape[0], 1)\n",
1284 | " y_hat = model(x, s)\n",
1285 | " loss = F.binary_cross_entropy_with_logits(y_hat, y)\n",
1286 | " optimizer.zero_grad()\n",
1287 | " loss.backward()\n",
1288 | " optimizer.step()\n",
1289 | " total_loss += x.size(0)*loss.item()\n",
1290 | " total += x.size(0)\n",
1291 | " train_loss\n",
1292 | " val_loss, val_accuracy = val_metrics(model)\n",
1293 | " \n",
1294 | " print(\"train_loss %.3f val_loss %.3f val_accuracy %.3f\" % (loss.item(), val_loss, val_accuracy))"
1295 | ]
1296 | },
1297 | {
1298 | "cell_type": "code",
1299 | "execution_count": null,
1300 | "metadata": {},
1301 | "outputs": [],
1302 | "source": [
1303 | "train_epocs(model, epochs=10)"
1304 | ]
1305 | },
1306 | {
1307 | "cell_type": "code",
1308 | "execution_count": null,
1309 | "metadata": {},
1310 | "outputs": [],
1311 | "source": []
1312 | }
1313 | ],
1314 | "metadata": {
1315 | "kernelspec": {
1316 | "display_name": "Python 3",
1317 | "language": "python",
1318 | "name": "python3"
1319 | },
1320 | "language_info": {
1321 | "codemirror_mode": {
1322 | "name": "ipython",
1323 | "version": 3
1324 | },
1325 | "file_extension": ".py",
1326 | "mimetype": "text/x-python",
1327 | "name": "python",
1328 | "nbconvert_exporter": "python",
1329 | "pygments_lexer": "ipython3",
1330 | "version": "3.6.6"
1331 | },
1332 | "nav_menu": {},
1333 | "toc": {
1334 | "nav_menu": {
1335 | "height": "116px",
1336 | "width": "251px"
1337 | },
1338 | "number_sections": true,
1339 | "sideBar": true,
1340 | "skip_h1_title": false,
1341 | "toc_cell": true,
1342 | "toc_position": {},
1343 | "toc_section_display": "block",
1344 | "toc_window_display": false
1345 | },
1346 | "widgets": {
1347 | "state": {},
1348 | "version": "1.1.2"
1349 | }
1350 | },
1351 | "nbformat": 4,
1352 | "nbformat_minor": 1
1353 | }
1354 |
--------------------------------------------------------------------------------
/collaborative-filtering-nn.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Collaborative Filtering with Neural Networks"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "In this notebook we will write a matrix factorization model in pytorch to solve a recommendation problem. Then we will write a more general neural model for the same problem."
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "The MovieLens dataset (ml-latest-small) describes 5-star rating and free-text tagging activity from MovieLens, a movie recommendation service. It contains 100004 ratings and 1296 tag applications across 9125 movies. https://grouplens.org/datasets/movielens/. To get the data:\n",
22 | "\n",
23 | "`wget http://files.grouplens.org/datasets/movielens/ml-latest-small.zip`"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {},
29 | "source": [
30 | "## MovieLens dataset"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 1,
36 | "metadata": {},
37 | "outputs": [],
38 | "source": [
39 | "from pathlib import Path\n",
40 | "import pandas as pd\n",
41 | "import numpy as np"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 3,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "data": {
51 | "text/plain": [
52 | "[PosixPath('/data2/yinterian/ml-latest-small/ratings.csv'),\n",
53 | " PosixPath('/data2/yinterian/ml-latest-small/tags.csv'),\n",
54 | " PosixPath('/data2/yinterian/ml-latest-small/tiny_training2.csv'),\n",
55 | " PosixPath('/data2/yinterian/ml-latest-small/links.csv'),\n",
56 | " PosixPath('/data2/yinterian/ml-latest-small/tiny_val2.csv'),\n",
57 | " PosixPath('/data2/yinterian/ml-latest-small/README.txt'),\n",
58 | " PosixPath('/data2/yinterian/ml-latest-small/movies.csv')]"
59 | ]
60 | },
61 | "execution_count": 3,
62 | "metadata": {},
63 | "output_type": "execute_result"
64 | }
65 | ],
66 | "source": [
67 | "PATH = Path(\"/Users/yinterian/teaching/deeplearning/data/ml-latest-small/\")\n",
68 | "PATH = Path(\"/data2/yinterian/ml-latest-small/\")\n",
69 | "list(PATH.iterdir())"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 4,
75 | "metadata": {},
76 | "outputs": [
77 | {
78 | "name": "stdout",
79 | "output_type": "stream",
80 | "text": [
81 | "userId,movieId,rating,timestamp\r",
82 | "\r\n",
83 | "1,31,2.5,1260759144\r",
84 | "\r\n",
85 | "1,1029,3.0,1260759179\r",
86 | "\r\n",
87 | "1,1061,3.0,1260759182\r",
88 | "\r\n",
89 | "1,1129,2.0,1260759185\r",
90 | "\r\n",
91 | "1,1172,4.0,1260759205\r",
92 | "\r\n",
93 | "1,1263,2.0,1260759151\r",
94 | "\r\n",
95 | "1,1287,2.0,1260759187\r",
96 | "\r\n",
97 | "1,1293,2.0,1260759148\r",
98 | "\r\n",
99 | "1,1339,3.5,1260759125\r",
100 | "\r\n"
101 | ]
102 | }
103 | ],
104 | "source": [
105 | "! head $PATH/ratings.csv"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": 5,
111 | "metadata": {},
112 | "outputs": [],
113 | "source": [
114 | "data = pd.read_csv(PATH/\"ratings.csv\")"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": 6,
120 | "metadata": {},
121 | "outputs": [
122 | {
123 | "data": {
124 | "text/html": [
125 | "\n",
126 | "\n",
139 | "
\n",
140 | " \n",
141 | " \n",
142 | " | \n",
143 | " userId | \n",
144 | " movieId | \n",
145 | " rating | \n",
146 | " timestamp | \n",
147 | "
\n",
148 | " \n",
149 | " \n",
150 | " \n",
151 | " 0 | \n",
152 | " 1 | \n",
153 | " 31 | \n",
154 | " 2.5 | \n",
155 | " 1260759144 | \n",
156 | "
\n",
157 | " \n",
158 | " 1 | \n",
159 | " 1 | \n",
160 | " 1029 | \n",
161 | " 3.0 | \n",
162 | " 1260759179 | \n",
163 | "
\n",
164 | " \n",
165 | " 2 | \n",
166 | " 1 | \n",
167 | " 1061 | \n",
168 | " 3.0 | \n",
169 | " 1260759182 | \n",
170 | "
\n",
171 | " \n",
172 | " 3 | \n",
173 | " 1 | \n",
174 | " 1129 | \n",
175 | " 2.0 | \n",
176 | " 1260759185 | \n",
177 | "
\n",
178 | " \n",
179 | " 4 | \n",
180 | " 1 | \n",
181 | " 1172 | \n",
182 | " 4.0 | \n",
183 | " 1260759205 | \n",
184 | "
\n",
185 | " \n",
186 | "
\n",
187 | "
"
188 | ],
189 | "text/plain": [
190 | " userId movieId rating timestamp\n",
191 | "0 1 31 2.5 1260759144\n",
192 | "1 1 1029 3.0 1260759179\n",
193 | "2 1 1061 3.0 1260759182\n",
194 | "3 1 1129 2.0 1260759185\n",
195 | "4 1 1172 4.0 1260759205"
196 | ]
197 | },
198 | "execution_count": 6,
199 | "metadata": {},
200 | "output_type": "execute_result"
201 | }
202 | ],
203 | "source": [
204 | "data.head()"
205 | ]
206 | },
207 | {
208 | "cell_type": "markdown",
209 | "metadata": {},
210 | "source": [
211 | "### Encoding data\n",
212 | "We enconde the data to have contiguous ids for users and movies. You can think about this as a categorical encoding of our two categorical variables userId and movieId."
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": 7,
218 | "metadata": {},
219 | "outputs": [],
220 | "source": [
221 | "# split train and validation before encoding\n",
222 | "np.random.seed(3)\n",
223 | "msk = np.random.rand(len(data)) < 0.8\n",
224 | "train = data[msk].copy()\n",
225 | "val = data[~msk].copy()"
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": 8,
231 | "metadata": {},
232 | "outputs": [],
233 | "source": [
234 | "# here is a handy function modified from fast.ai\n",
235 | "def proc_col(col, train_col=None):\n",
236 | " \"\"\"Encodes a pandas column with continuous ids. \n",
237 | " \"\"\"\n",
238 | " if train_col is not None:\n",
239 | " uniq = train_col.unique()\n",
240 | " else:\n",
241 | " uniq = col.unique()\n",
242 | " name2idx = {o:i for i,o in enumerate(uniq)}\n",
243 | " return name2idx, np.array([name2idx.get(x, -1) for x in col]), len(uniq)"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": 9,
249 | "metadata": {},
250 | "outputs": [],
251 | "source": [
252 | "def encode_data(df, train=None):\n",
253 | " \"\"\" Encodes rating data with continous user and movie ids. \n",
254 | " If train is provided, encodes df with the same encoding as train.\n",
255 | " \"\"\"\n",
256 | " df = df.copy()\n",
257 | " for col_name in [\"userId\", \"movieId\"]:\n",
258 | " train_col = None\n",
259 | " if train is not None:\n",
260 | " train_col = train[col_name]\n",
261 | " _,col,_ = proc_col(df[col_name], train_col)\n",
262 | " df[col_name] = col\n",
263 | " df = df[df[col_name] >= 0]\n",
264 | " return df"
265 | ]
266 | },
267 | {
268 | "cell_type": "code",
269 | "execution_count": 10,
270 | "metadata": {},
271 | "outputs": [
272 | {
273 | "name": "stdout",
274 | "output_type": "stream",
275 | "text": [
276 | " userId movieId rating\n",
277 | "0 11 1 4\n",
278 | "1 11 23 5\n",
279 | "2 2 23 5\n",
280 | "3 2 4 3\n",
281 | "4 31 1 4\n",
282 | "5 31 23 4\n",
283 | "6 4 1 5\n",
284 | "7 4 3 2\n",
285 | "8 52 1 1\n",
286 | "9 52 3 4\n",
287 | "10 61 3 5\n",
288 | "11 7 23 1\n",
289 | "12 7 3 3\n",
290 | " userId movieId rating\n",
291 | "0 0 0 4\n",
292 | "1 0 1 5\n",
293 | "2 1 1 5\n",
294 | "3 1 2 3\n",
295 | "4 2 0 4\n",
296 | "5 2 1 4\n",
297 | "6 3 0 5\n",
298 | "7 3 3 2\n",
299 | "8 4 0 1\n",
300 | "9 4 3 4\n",
301 | "10 5 3 5\n",
302 | "11 6 1 1\n",
303 | "12 6 3 3\n"
304 | ]
305 | }
306 | ],
307 | "source": [
308 | "# to check my new implementation\n",
309 | "LOCAL_PATH = Path(\"images/\")\n",
310 | "df_t = pd.read_csv(LOCAL_PATH/\"tiny_training2.csv\")\n",
311 | "df_v = pd.read_csv(LOCAL_PATH/\"tiny_val2.csv\")\n",
312 | "print(df_t)\n",
313 | "df_t_e = encode_data(df_t)\n",
314 | "df_v_e = encode_data(df_v, df_t)\n",
315 | "df_v_e\n",
316 | "print(df_t_e)"
317 | ]
318 | },
319 | {
320 | "cell_type": "code",
321 | "execution_count": 11,
322 | "metadata": {},
323 | "outputs": [],
324 | "source": [
325 | "# encoding the train and validation data\n",
326 | "df_train = encode_data(train)\n",
327 | "df_val = encode_data(val, train)"
328 | ]
329 | },
330 | {
331 | "cell_type": "markdown",
332 | "metadata": {},
333 | "source": [
334 | "## Embedding layer"
335 | ]
336 | },
337 | {
338 | "cell_type": "code",
339 | "execution_count": 12,
340 | "metadata": {},
341 | "outputs": [],
342 | "source": [
343 | "import torch\n",
344 | "import torch.nn as nn\n",
345 | "import torch.nn.functional as F"
346 | ]
347 | },
348 | {
349 | "cell_type": "code",
350 | "execution_count": 13,
351 | "metadata": {},
352 | "outputs": [],
353 | "source": [
354 | "# an Embedding module containing 10 user or item embedding size 3\n",
355 | "# embedding will be initialized at random\n",
356 | "embed = nn.Embedding(10, 3)"
357 | ]
358 | },
359 | {
360 | "cell_type": "code",
361 | "execution_count": 14,
362 | "metadata": {},
363 | "outputs": [
364 | {
365 | "data": {
366 | "text/plain": [
367 | "tensor([[[-0.1301, 0.0691, -1.1678],\n",
368 | " [-0.9865, 0.4514, -1.4770],\n",
369 | " [-1.7121, 0.0701, 0.0481],\n",
370 | " [ 1.4485, 0.1340, 0.0099],\n",
371 | " [-1.4074, -0.8650, -0.1255],\n",
372 | " [-0.1301, 0.0691, -1.1678]]])"
373 | ]
374 | },
375 | "execution_count": 14,
376 | "metadata": {},
377 | "output_type": "execute_result"
378 | }
379 | ],
380 | "source": [
381 | "# given a list of ids we can \"look up\" the embedding corresponing to each id\n",
382 | "a = torch.LongTensor([[1,2,0,4,5,1]])\n",
383 | "embed(a)"
384 | ]
385 | },
386 | {
387 | "cell_type": "markdown",
388 | "metadata": {},
389 | "source": [
390 | "## Matrix factorization model"
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "execution_count": 15,
396 | "metadata": {},
397 | "outputs": [],
398 | "source": [
399 | "class MF(nn.Module):\n",
400 | " def __init__(self, num_users, num_items, emb_size=100):\n",
401 | " super(MF, self).__init__()\n",
402 | " self.user_emb = nn.Embedding(num_users, emb_size)\n",
403 | " self.item_emb = nn.Embedding(num_items, emb_size)\n",
404 | " self.user_emb.weight.data.uniform_(0, 0.05)\n",
405 | " self.item_emb.weight.data.uniform_(0, 0.05)\n",
406 | " \n",
407 | " def forward(self, u, v):\n",
408 | " u = self.user_emb(u)\n",
409 | " v = self.item_emb(v)\n",
410 | " return (u*v).sum(1) "
411 | ]
412 | },
413 | {
414 | "cell_type": "markdown",
415 | "metadata": {},
416 | "source": [
417 | "## Debugging MF model"
418 | ]
419 | },
420 | {
421 | "cell_type": "code",
422 | "execution_count": 16,
423 | "metadata": {},
424 | "outputs": [
425 | {
426 | "data": {
427 | "text/html": [
428 | "\n",
429 | "\n",
442 | "
\n",
443 | " \n",
444 | " \n",
445 | " | \n",
446 | " userId | \n",
447 | " movieId | \n",
448 | " rating | \n",
449 | "
\n",
450 | " \n",
451 | " \n",
452 | " \n",
453 | " 0 | \n",
454 | " 0 | \n",
455 | " 0 | \n",
456 | " 4 | \n",
457 | "
\n",
458 | " \n",
459 | " 1 | \n",
460 | " 0 | \n",
461 | " 1 | \n",
462 | " 5 | \n",
463 | "
\n",
464 | " \n",
465 | " 2 | \n",
466 | " 1 | \n",
467 | " 1 | \n",
468 | " 5 | \n",
469 | "
\n",
470 | " \n",
471 | " 3 | \n",
472 | " 1 | \n",
473 | " 2 | \n",
474 | " 3 | \n",
475 | "
\n",
476 | " \n",
477 | " 4 | \n",
478 | " 2 | \n",
479 | " 0 | \n",
480 | " 4 | \n",
481 | "
\n",
482 | " \n",
483 | " 5 | \n",
484 | " 2 | \n",
485 | " 1 | \n",
486 | " 4 | \n",
487 | "
\n",
488 | " \n",
489 | " 6 | \n",
490 | " 3 | \n",
491 | " 0 | \n",
492 | " 5 | \n",
493 | "
\n",
494 | " \n",
495 | " 7 | \n",
496 | " 3 | \n",
497 | " 3 | \n",
498 | " 2 | \n",
499 | "
\n",
500 | " \n",
501 | " 8 | \n",
502 | " 4 | \n",
503 | " 0 | \n",
504 | " 1 | \n",
505 | "
\n",
506 | " \n",
507 | " 9 | \n",
508 | " 4 | \n",
509 | " 3 | \n",
510 | " 4 | \n",
511 | "
\n",
512 | " \n",
513 | " 10 | \n",
514 | " 5 | \n",
515 | " 3 | \n",
516 | " 5 | \n",
517 | "
\n",
518 | " \n",
519 | " 11 | \n",
520 | " 6 | \n",
521 | " 1 | \n",
522 | " 1 | \n",
523 | "
\n",
524 | " \n",
525 | " 12 | \n",
526 | " 6 | \n",
527 | " 3 | \n",
528 | " 3 | \n",
529 | "
\n",
530 | " \n",
531 | "
\n",
532 | "
"
533 | ],
534 | "text/plain": [
535 | " userId movieId rating\n",
536 | "0 0 0 4\n",
537 | "1 0 1 5\n",
538 | "2 1 1 5\n",
539 | "3 1 2 3\n",
540 | "4 2 0 4\n",
541 | "5 2 1 4\n",
542 | "6 3 0 5\n",
543 | "7 3 3 2\n",
544 | "8 4 0 1\n",
545 | "9 4 3 4\n",
546 | "10 5 3 5\n",
547 | "11 6 1 1\n",
548 | "12 6 3 3"
549 | ]
550 | },
551 | "execution_count": 16,
552 | "metadata": {},
553 | "output_type": "execute_result"
554 | }
555 | ],
556 | "source": [
557 | "df_t_e"
558 | ]
559 | },
560 | {
561 | "cell_type": "code",
562 | "execution_count": 16,
563 | "metadata": {},
564 | "outputs": [],
565 | "source": [
566 | "num_users = 7\n",
567 | "num_items = 4\n",
568 | "emb_size = 3\n",
569 | "\n",
570 | "user_emb = nn.Embedding(num_users, emb_size)\n",
571 | "item_emb = nn.Embedding(num_items, emb_size)\n",
572 | "users = torch.LongTensor(df_t_e.userId.values)\n",
573 | "items = torch.LongTensor(df_t_e.movieId.values)"
574 | ]
575 | },
576 | {
577 | "cell_type": "code",
578 | "execution_count": 17,
579 | "metadata": {},
580 | "outputs": [],
581 | "source": [
582 | "U = user_emb(users)\n",
583 | "V = item_emb(items)"
584 | ]
585 | },
586 | {
587 | "cell_type": "code",
588 | "execution_count": 18,
589 | "metadata": {},
590 | "outputs": [
591 | {
592 | "data": {
593 | "text/plain": [
594 | "tensor([[ 0.1547, 0.2277, 0.2442],\n",
595 | " [ 0.1547, 0.2277, 0.2442],\n",
596 | " [ 0.6601, 0.8225, -1.2139],\n",
597 | " [ 0.6601, 0.8225, -1.2139],\n",
598 | " [ 0.1672, -1.2177, 0.1403],\n",
599 | " [ 0.1672, -1.2177, 0.1403],\n",
600 | " [-1.1907, -1.2933, -0.5506],\n",
601 | " [-1.1907, -1.2933, -0.5506],\n",
602 | " [ 0.1938, -0.0683, -0.8493],\n",
603 | " [ 0.1938, -0.0683, -0.8493],\n",
604 | " [ 0.8506, -1.1564, 1.1165],\n",
605 | " [ 0.8639, -2.5148, -0.8391],\n",
606 | " [ 0.8639, -2.5148, -0.8391]])"
607 | ]
608 | },
609 | "execution_count": 18,
610 | "metadata": {},
611 | "output_type": "execute_result"
612 | }
613 | ],
614 | "source": [
615 | "U"
616 | ]
617 | },
618 | {
619 | "cell_type": "code",
620 | "execution_count": 19,
621 | "metadata": {},
622 | "outputs": [
623 | {
624 | "data": {
625 | "text/plain": [
626 | "tensor([[-0.1766, 0.2957, 0.4409],\n",
627 | " [ 0.1205, 0.1733, 0.1165],\n",
628 | " [ 0.5143, 0.6258, -0.5793],\n",
629 | " [-0.5603, 0.3582, -0.5370],\n",
630 | " [-0.1909, -1.5812, 0.2533],\n",
631 | " [ 0.1303, -0.9266, 0.0670],\n",
632 | " [ 1.3594, -1.6793, -0.9940],\n",
633 | " [-0.2324, 1.4822, 0.5151],\n",
634 | " [-0.2212, -0.0887, -1.5335],\n",
635 | " [ 0.0378, 0.0783, 0.7947],\n",
636 | " [ 0.1660, 1.3253, -1.0447],\n",
637 | " [ 0.6730, -1.9135, -0.4004],\n",
638 | " [ 0.1686, 2.8820, 0.7851]])"
639 | ]
640 | },
641 | "execution_count": 19,
642 | "metadata": {},
643 | "output_type": "execute_result"
644 | }
645 | ],
646 | "source": [
647 | "# element wise multiplication\n",
648 | "U*V "
649 | ]
650 | },
651 | {
652 | "cell_type": "code",
653 | "execution_count": 20,
654 | "metadata": {},
655 | "outputs": [
656 | {
657 | "data": {
658 | "text/plain": [
659 | "tensor([ 0.5600, 0.4103, 0.5608, -0.7391, -1.5187, -0.7294, -1.3139,\n",
660 | " 1.7649, -1.8434, 0.9108, 0.4466, -1.6409, 3.8357])"
661 | ]
662 | },
663 | "execution_count": 20,
664 | "metadata": {},
665 | "output_type": "execute_result"
666 | }
667 | ],
668 | "source": [
669 | "# what we want is a dot product per row\n",
670 | "(U*V).sum(1) "
671 | ]
672 | },
673 | {
674 | "cell_type": "markdown",
675 | "metadata": {},
676 | "source": [
677 | "## Training MF model"
678 | ]
679 | },
680 | {
681 | "cell_type": "code",
682 | "execution_count": 21,
683 | "metadata": {},
684 | "outputs": [
685 | {
686 | "name": "stdout",
687 | "output_type": "stream",
688 | "text": [
689 | "671 8442\n"
690 | ]
691 | }
692 | ],
693 | "source": [
694 | "num_users = len(df_train.userId.unique())\n",
695 | "num_items = len(df_train.movieId.unique())\n",
696 | "print(num_users, num_items) "
697 | ]
698 | },
699 | {
700 | "cell_type": "code",
701 | "execution_count": 22,
702 | "metadata": {},
703 | "outputs": [],
704 | "source": [
705 | "model = MF(num_users, num_items, emb_size=100) # .cuda() if you have a GPU"
706 | ]
707 | },
708 | {
709 | "cell_type": "code",
710 | "execution_count": 23,
711 | "metadata": {},
712 | "outputs": [],
713 | "source": [
714 | "def train_epocs(model, epochs=10, lr=0.01, wd=0.0, unsqueeze=False):\n",
715 | " optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)\n",
716 | " model.train()\n",
717 | " for i in range(epochs):\n",
718 | " users = torch.LongTensor(df_train.userId.values) # .cuda()\n",
719 | " items = torch.LongTensor(df_train.movieId.values) #.cuda()\n",
720 | " ratings = torch.FloatTensor(df_train.rating.values) #.cuda()\n",
721 | " if unsqueeze:\n",
722 | " ratings = ratings.unsqueeze(1)\n",
723 | " y_hat = model(users, items)\n",
724 | " loss = F.mse_loss(y_hat, ratings)\n",
725 | " optimizer.zero_grad()\n",
726 | " loss.backward()\n",
727 | " optimizer.step()\n",
728 | " print(loss.item()) \n",
729 | " test_loss(model, unsqueeze)"
730 | ]
731 | },
732 | {
733 | "cell_type": "code",
734 | "execution_count": 24,
735 | "metadata": {},
736 | "outputs": [
737 | {
738 | "name": "stdout",
739 | "output_type": "stream",
740 | "text": [
741 | "torch.Size([79799])\n",
742 | "torch.Size([79799, 1])\n"
743 | ]
744 | }
745 | ],
746 | "source": [
747 | "# Here is what unsqueeze does\n",
748 | "ratings = torch.FloatTensor(df_train.rating.values)\n",
749 | "print(ratings.shape)\n",
750 | "ratings = ratings.unsqueeze(1) # .cuda()\n",
751 | "print(ratings.shape)"
752 | ]
753 | },
754 | {
755 | "cell_type": "code",
756 | "execution_count": 25,
757 | "metadata": {},
758 | "outputs": [],
759 | "source": [
760 | "def test_loss(model, unsqueeze=False):\n",
761 | " model.eval()\n",
762 | " users = torch.LongTensor(df_val.userId.values) #.cuda()\n",
763 | " items = torch.LongTensor(df_val.movieId.values) #.cuda()\n",
764 | " ratings = torch.FloatTensor(df_val.rating.values) #.cuda()\n",
765 | " if unsqueeze:\n",
766 | " ratings = ratings.unsqueeze(1)\n",
767 | " y_hat = model(users, items)\n",
768 | " loss = F.mse_loss(y_hat, ratings)\n",
769 | " print(\"test loss %.3f \" % loss.item())"
770 | ]
771 | },
772 | {
773 | "cell_type": "code",
774 | "execution_count": 26,
775 | "metadata": {},
776 | "outputs": [
777 | {
778 | "name": "stdout",
779 | "output_type": "stream",
780 | "text": [
781 | "13.23068904876709\n",
782 | "5.119534015655518\n",
783 | "2.3902299404144287\n",
784 | "3.441521406173706\n",
785 | "0.9096018671989441\n",
786 | "1.8109439611434937\n",
787 | "2.749631643295288\n",
788 | "2.278921604156494\n",
789 | "1.1593214273452759\n",
790 | "0.925656795501709\n",
791 | "test loss 1.947 \n"
792 | ]
793 | }
794 | ],
795 | "source": [
796 | "train_epocs(model, epochs=10, lr=0.1)"
797 | ]
798 | },
799 | {
800 | "cell_type": "code",
801 | "execution_count": 27,
802 | "metadata": {},
803 | "outputs": [
804 | {
805 | "name": "stdout",
806 | "output_type": "stream",
807 | "text": [
808 | "1.7027523517608643\n",
809 | "1.0512956380844116\n",
810 | "0.7498359680175781\n",
811 | "0.6950282454490662\n",
812 | "0.7596880197525024\n",
813 | "0.8397833108901978\n",
814 | "0.8818210363388062\n",
815 | "0.8753886818885803\n",
816 | "0.8334189653396606\n",
817 | "0.7767009735107422\n",
818 | "0.7246581315994263\n",
819 | "0.6901594400405884\n",
820 | "0.6771144866943359\n",
821 | "0.6810137033462524\n",
822 | "0.69219970703125\n",
823 | "test loss 0.894 \n"
824 | ]
825 | }
826 | ],
827 | "source": [
828 | "train_epocs(model, epochs=15, lr=0.01)"
829 | ]
830 | },
831 | {
832 | "cell_type": "code",
833 | "execution_count": 28,
834 | "metadata": {},
835 | "outputs": [
836 | {
837 | "name": "stdout",
838 | "output_type": "stream",
839 | "text": [
840 | "0.7007282376289368\n",
841 | "0.6625022888183594\n",
842 | "0.6684340834617615\n",
843 | "0.6455244421958923\n",
844 | "0.6380830407142639\n",
845 | "0.6450700759887695\n",
846 | "0.6408411264419556\n",
847 | "0.6256920099258423\n",
848 | "0.6144804358482361\n",
849 | "0.6132143139839172\n",
850 | "0.6140048503875732\n",
851 | "0.6083489060401917\n",
852 | "0.5969548225402832\n",
853 | "0.5860226154327393\n",
854 | "0.5791704058647156\n",
855 | "test loss 0.822 \n"
856 | ]
857 | }
858 | ],
859 | "source": [
860 | "train_epocs(model, epochs=15, lr=0.01)"
861 | ]
862 | },
863 | {
864 | "cell_type": "markdown",
865 | "metadata": {},
866 | "source": [
867 | "## MF with bias"
868 | ]
869 | },
870 | {
871 | "cell_type": "code",
872 | "execution_count": 29,
873 | "metadata": {},
874 | "outputs": [],
875 | "source": [
876 | "class MF_bias(nn.Module):\n",
877 | " def __init__(self, num_users, num_items, emb_size=100):\n",
878 | " super(MF_bias, self).__init__()\n",
879 | " self.user_emb = nn.Embedding(num_users, emb_size)\n",
880 | " self.user_bias = nn.Embedding(num_users, 1)\n",
881 | " self.item_emb = nn.Embedding(num_items, emb_size)\n",
882 | " self.item_bias = nn.Embedding(num_items, 1)\n",
883 | " self.user_emb.weight.data.uniform_(0,0.05)\n",
884 | " self.item_emb.weight.data.uniform_(0,0.05)\n",
885 | " self.user_bias.weight.data.uniform_(-0.01,0.01)\n",
886 | " self.item_bias.weight.data.uniform_(-0.01,0.01)\n",
887 | " \n",
888 | " def forward(self, u, v):\n",
889 | " U = self.user_emb(u)\n",
890 | " V = self.item_emb(v)\n",
891 | " b_u = self.user_bias(u).squeeze()\n",
892 | " b_v = self.item_bias(v).squeeze()\n",
893 | " return (U*V).sum(1) + b_u + b_v"
894 | ]
895 | },
896 | {
897 | "cell_type": "code",
898 | "execution_count": 32,
899 | "metadata": {},
900 | "outputs": [],
901 | "source": [
902 | "model = MF_bias(num_users, num_items, emb_size=100) #.cuda()"
903 | ]
904 | },
905 | {
906 | "cell_type": "code",
907 | "execution_count": 33,
908 | "metadata": {},
909 | "outputs": [
910 | {
911 | "name": "stdout",
912 | "output_type": "stream",
913 | "text": [
914 | "13.233644485473633\n",
915 | "9.459980964660645\n",
916 | "4.618295669555664\n",
917 | "1.2266862392425537\n",
918 | "2.4537320137023926\n",
919 | "3.888521432876587\n",
920 | "2.6157896518707275\n",
921 | "1.1573508977890015\n",
922 | "0.8204843997955322\n",
923 | "1.3100122213363647\n",
924 | "test loss 2.126 \n"
925 | ]
926 | }
927 | ],
928 | "source": [
929 | "train_epocs(model, epochs=10, lr=0.05, wd=1e-5)"
930 | ]
931 | },
932 | {
933 | "cell_type": "code",
934 | "execution_count": 34,
935 | "metadata": {},
936 | "outputs": [
937 | {
938 | "name": "stdout",
939 | "output_type": "stream",
940 | "text": [
941 | "1.9130752086639404\n",
942 | "1.3447301387786865\n",
943 | "0.9572998285293579\n",
944 | "0.7714419364929199\n",
945 | "0.752704381942749\n",
946 | "0.8091325759887695\n",
947 | "0.8543495535850525\n",
948 | "0.8524782657623291\n",
949 | "0.8114585876464844\n",
950 | "0.7577651739120483\n",
951 | "test loss 0.851 \n"
952 | ]
953 | }
954 | ],
955 | "source": [
956 | "train_epocs(model, epochs=10, lr=0.01, wd=1e-5)"
957 | ]
958 | },
959 | {
960 | "cell_type": "code",
961 | "execution_count": 35,
962 | "metadata": {},
963 | "outputs": [
964 | {
965 | "name": "stdout",
966 | "output_type": "stream",
967 | "text": [
968 | "0.7163214087486267\n",
969 | "0.7023102045059204\n",
970 | "0.6904919147491455\n",
971 | "0.6807348728179932\n",
972 | "0.6728458404541016\n",
973 | "0.6666097044944763\n",
974 | "0.6618107557296753\n",
975 | "0.6582220792770386\n",
976 | "0.6556380391120911\n",
977 | "0.6538312435150146\n",
978 | "test loss 0.805 \n"
979 | ]
980 | }
981 | ],
982 | "source": [
983 | "train_epocs(model, epochs=10, lr=0.001, wd=1e-5)"
984 | ]
985 | },
986 | {
987 | "cell_type": "markdown",
988 | "metadata": {},
989 | "source": [
990 | "Note that these models are susceptible to weight initialization, optimization algorithm and regularization."
991 | ]
992 | },
993 | {
994 | "cell_type": "markdown",
995 | "metadata": {},
996 | "source": [
997 | "## Neural Network Model"
998 | ]
999 | },
1000 | {
1001 | "cell_type": "code",
1002 | "execution_count": 76,
1003 | "metadata": {},
1004 | "outputs": [],
1005 | "source": [
1006 | "# Note here there is no matrix multiplication, we could potentially make the embeddings of different sizes.\n",
1007 | "# Here we could get better results by keep playing with regularization.\n",
1008 | " \n",
1009 | "class CollabFNet(nn.Module):\n",
1010 | " def __init__(self, num_users, num_items, emb_size=100, n_hidden=10):\n",
1011 | " super(CollabFNet, self).__init__()\n",
1012 | " self.user_emb = nn.Embedding(num_users, emb_size)\n",
1013 | " self.item_emb = nn.Embedding(num_items, emb_size)\n",
1014 | " self.lin1 = nn.Linear(emb_size*2, n_hidden)\n",
1015 | " self.lin2 = nn.Linear(n_hidden, 1)\n",
1016 | " self.drop1 = nn.Dropout(0.1)\n",
1017 | " \n",
1018 | " def forward(self, u, v):\n",
1019 | " U = self.user_emb(u)\n",
1020 | " V = self.item_emb(v)\n",
1021 | " x = F.relu(torch.cat([U, V], dim=1))\n",
1022 | " x = self.drop1(x)\n",
1023 | " x = F.relu(self.lin1(x))\n",
1024 | " x = self.lin2(x)\n",
1025 | " return x"
1026 | ]
1027 | },
1028 | {
1029 | "cell_type": "code",
1030 | "execution_count": 77,
1031 | "metadata": {},
1032 | "outputs": [],
1033 | "source": [
1034 | "model = CollabFNet(num_users, num_items, emb_size=100) #.cuda()"
1035 | ]
1036 | },
1037 | {
1038 | "cell_type": "code",
1039 | "execution_count": 78,
1040 | "metadata": {},
1041 | "outputs": [
1042 | {
1043 | "name": "stdout",
1044 | "output_type": "stream",
1045 | "text": [
1046 | "13.101761817932129\n",
1047 | "1.957230806350708\n",
1048 | "1.2605514526367188\n",
1049 | "1.3381402492523193\n",
1050 | "1.061022162437439\n",
1051 | "1.1385098695755005\n",
1052 | "0.9165319800376892\n",
1053 | "0.9622549414634705\n",
1054 | "0.8723138570785522\n",
1055 | "0.8084518909454346\n",
1056 | "0.8500765562057495\n",
1057 | "0.7535637617111206\n",
1058 | "0.791947603225708\n",
1059 | "0.7653028964996338\n",
1060 | "0.7301635146141052\n",
1061 | "test loss 0.869 \n"
1062 | ]
1063 | }
1064 | ],
1065 | "source": [
1066 | "train_epocs(model, epochs=15, lr=0.05, wd=1e-6, unsqueeze=True) "
1067 | ]
1068 | },
1069 | {
1070 | "cell_type": "code",
1071 | "execution_count": 79,
1072 | "metadata": {},
1073 | "outputs": [
1074 | {
1075 | "name": "stdout",
1076 | "output_type": "stream",
1077 | "text": [
1078 | "0.7691234350204468\n",
1079 | "0.9072751402854919\n",
1080 | "0.7757670879364014\n",
1081 | "0.7180655598640442\n",
1082 | "0.7918605208396912\n",
1083 | "0.7724899053573608\n",
1084 | "0.7119362950325012\n",
1085 | "0.7106000185012817\n",
1086 | "0.7403213977813721\n",
1087 | "0.7438958883285522\n",
1088 | "test loss 0.816 \n"
1089 | ]
1090 | }
1091 | ],
1092 | "source": [
1093 | "train_epocs(model, epochs=10, lr=0.01, wd=1e-6, unsqueeze=True)"
1094 | ]
1095 | },
1096 | {
1097 | "cell_type": "code",
1098 | "execution_count": 80,
1099 | "metadata": {},
1100 | "outputs": [
1101 | {
1102 | "name": "stdout",
1103 | "output_type": "stream",
1104 | "text": [
1105 | "0.7163267731666565\n",
1106 | "0.7032808065414429\n",
1107 | "0.695513904094696\n",
1108 | "0.6967512369155884\n",
1109 | "0.6998187303543091\n",
1110 | "0.700666606426239\n",
1111 | "0.7004959583282471\n",
1112 | "0.6982167959213257\n",
1113 | "0.6955875158309937\n",
1114 | "0.694402813911438\n",
1115 | "test loss 0.796 \n"
1116 | ]
1117 | }
1118 | ],
1119 | "source": [
1120 | "train_epocs(model, epochs=10, lr=0.001, wd=1e-6, unsqueeze=True)"
1121 | ]
1122 | },
1123 | {
1124 | "cell_type": "code",
1125 | "execution_count": 81,
1126 | "metadata": {},
1127 | "outputs": [
1128 | {
1129 | "name": "stdout",
1130 | "output_type": "stream",
1131 | "text": [
1132 | "0.6919353008270264\n",
1133 | "0.6934647560119629\n",
1134 | "0.6922585368156433\n",
1135 | "0.6942275762557983\n",
1136 | "0.6926798224449158\n",
1137 | "0.6916202902793884\n",
1138 | "0.6911264061927795\n",
1139 | "0.6923496127128601\n",
1140 | "0.6922929286956787\n",
1141 | "0.6904215812683105\n",
1142 | "test loss 0.795 \n"
1143 | ]
1144 | }
1145 | ],
1146 | "source": [
1147 | "train_epocs(model, epochs=10, lr=0.001, wd=1e-6, unsqueeze=True)"
1148 | ]
1149 | },
1150 | {
1151 | "cell_type": "markdown",
1152 | "metadata": {},
1153 | "source": [
1154 | "# References\n",
1155 | "* This notebook is based on [lesson 5 of Jeremy Howard's Deep Learning Course](https://github.com/fastai/fastai/blob/master/courses/dl1/lesson5-movielens.ipynb)"
1156 | ]
1157 | },
1158 | {
1159 | "cell_type": "code",
1160 | "execution_count": null,
1161 | "metadata": {},
1162 | "outputs": [],
1163 | "source": []
1164 | }
1165 | ],
1166 | "metadata": {
1167 | "kernelspec": {
1168 | "display_name": "Python 3",
1169 | "language": "python",
1170 | "name": "python3"
1171 | },
1172 | "language_info": {
1173 | "codemirror_mode": {
1174 | "name": "ipython",
1175 | "version": 3
1176 | },
1177 | "file_extension": ".py",
1178 | "mimetype": "text/x-python",
1179 | "name": "python",
1180 | "nbconvert_exporter": "python",
1181 | "pygments_lexer": "ipython3",
1182 | "version": "3.6.5"
1183 | },
1184 | "toc": {
1185 | "nav_menu": {},
1186 | "number_sections": true,
1187 | "sideBar": true,
1188 | "skip_h1_title": false,
1189 | "toc_cell": false,
1190 | "toc_position": {},
1191 | "toc_section_display": "block",
1192 | "toc_window_display": false
1193 | }
1194 | },
1195 | "nbformat": 4,
1196 | "nbformat_minor": 2
1197 | }
1198 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: pytorch
2 | channels:
3 | - conda-forge
4 | - pytorch
5 | dependencies:
6 | - python==3.6
7 | - ipython
8 | - jupyter
9 | - pytorch
10 | - torchvision
11 | - numpy
12 | - matplotlib
13 | - pandas
14 | - opencv
15 | - spacy
--------------------------------------------------------------------------------
/images/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yanneta/pytorch-tutorials/53c1dc84fc11e4c50877fa141f9b84ad18265047/images/model.png
--------------------------------------------------------------------------------
/images/tiny_training2.csv:
--------------------------------------------------------------------------------
1 | userId,movieId,rating
2 | 11,1,4
3 | 11,23,5
4 | 2,23,5
5 | 2,4,3
6 | 31,1,4
7 | 31,23,4
8 | 4,1,5
9 | 4,3,2
10 | 52,1,1
11 | 52,3,4
12 | 61,3,5
13 | 7,23,1
14 | 7,3,3
15 |
--------------------------------------------------------------------------------
/images/tiny_val2.csv:
--------------------------------------------------------------------------------
1 | userId,movieId,rating
2 | 2,1,5
3 | 4,23,5
4 | 4,2,3
5 |
--------------------------------------------------------------------------------
/intro-to-pytoch.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "toc": true
7 | },
8 | "source": [
9 | "Table of Contents
\n",
10 | ""
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 1,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "# import pytorch libraries\n",
20 | "%matplotlib inline\n",
21 | "import torch \n",
22 | "import torch.autograd as autograd \n",
23 | "import torch.nn as nn \n",
24 | "import torch.nn.functional as F\n",
25 | "import torch.optim as optim\n",
26 | "import numpy as np"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "# Intro to Pytorch"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {},
39 | "source": [
40 | "PyTorch consists of 4 main packages:\n",
41 | "* torch: a general purpose array library similar to Numpy that can do computations on GPU\n",
42 | "* torch.autograd: a package for automatically obtaining gradients\n",
43 | "* torch.nn: a neural net library with common layers and cost functions\n",
44 | "* torch.optim: an optimization package with common optimization algorithms like SGD, Adam, etc"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "## Pytorch tensors\n",
52 | "Like Numpy tensors but can utilize GPUs to accelerate its numerical computations. "
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 2,
58 | "metadata": {},
59 | "outputs": [],
60 | "source": [
61 | "# Create random tensor\n",
62 | "N = 5\n",
63 | "x = torch.randn(N, 10).type(torch.FloatTensor)"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": 3,
69 | "metadata": {},
70 | "outputs": [
71 | {
72 | "data": {
73 | "text/plain": [
74 | "tensor([[-1.8239, 0.7380, 0.0890, -1.1650, 0.3185, 1.8577, 0.1110,\n",
75 | " -0.8694, 1.1761, 1.0106],\n",
76 | " [ 1.8847, -0.3493, -0.3044, 1.3749, 0.4396, 1.5092, -0.7950,\n",
77 | " 0.5705, 0.9309, -0.3835],\n",
78 | " [ 0.4320, 0.1081, -0.8353, 0.5639, 0.1228, 1.4746, -0.5602,\n",
79 | " -1.2526, 0.0964, -0.1116],\n",
80 | " [ 1.8627, -1.1173, 2.0276, 0.6197, -1.0586, 0.6214, -0.1054,\n",
81 | " -0.3784, 0.9780, -1.6672],\n",
82 | " [-0.1745, -1.0696, -0.1319, -0.5890, 0.7507, -0.3775, -1.7948,\n",
83 | " 1.2520, -0.8993, 1.2639]])"
84 | ]
85 | },
86 | "execution_count": 3,
87 | "metadata": {},
88 | "output_type": "execute_result"
89 | }
90 | ],
91 | "source": [
92 | "x"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": 4,
98 | "metadata": {},
99 | "outputs": [
100 | {
101 | "data": {
102 | "text/plain": [
103 | "tensor([[-1.8239, 0.7380, 0.0890, -1.1650, 0.3185, 1.8577, 0.1110,\n",
104 | " -0.8694, 1.1761, 1.0106, 1.8847, -0.3493, -0.3044, 1.3749,\n",
105 | " 0.4396, 1.5092, -0.7950, 0.5705, 0.9309, -0.3835, 0.4320,\n",
106 | " 0.1081, -0.8353, 0.5639, 0.1228, 1.4746, -0.5602, -1.2526,\n",
107 | " 0.0964, -0.1116, 1.8627, -1.1173, 2.0276, 0.6197, -1.0586,\n",
108 | " 0.6214, -0.1054, -0.3784, 0.9780, -1.6672, -0.1745, -1.0696,\n",
109 | " -0.1319, -0.5890, 0.7507, -0.3775, -1.7948, 1.2520, -0.8993,\n",
110 | " 1.2639]])"
111 | ]
112 | },
113 | "execution_count": 4,
114 | "metadata": {},
115 | "output_type": "execute_result"
116 | }
117 | ],
118 | "source": [
119 | "# reshaping of tensors using .view()\n",
120 | "x.view(1,-1) #-1 makes torch infer the second dim"
121 | ]
122 | },
123 | {
124 | "cell_type": "markdown",
125 | "metadata": {},
126 | "source": [
127 | "## Pytorch Autograd\n",
128 | "The autograd package in PyTorch provides classes and functions implementing automatic differentiation of arbitrary scalar valued function. For example, the gradient of the error with respect to all parameters.\n",
129 | "\n",
130 | "In order for this to happen we need to declare our paramerers as Tensors with the requires_grad=True keyword. Here is an example:"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": 5,
136 | "metadata": {},
137 | "outputs": [],
138 | "source": [
139 | "x = torch.tensor([1., 2., 3., 4., 5., 6.], requires_grad=True)"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": 6,
145 | "metadata": {},
146 | "outputs": [],
147 | "source": [
148 | "x.grad"
149 | ]
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": 7,
154 | "metadata": {},
155 | "outputs": [
156 | {
157 | "data": {
158 | "text/plain": [
159 | "tensor(48.)"
160 | ]
161 | },
162 | "execution_count": 7,
163 | "metadata": {},
164 | "output_type": "execute_result"
165 | }
166 | ],
167 | "source": [
168 | "L = (2*x+1).sum()\n",
169 | "L"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": 8,
175 | "metadata": {},
176 | "outputs": [],
177 | "source": [
178 | "L.backward() # computes the grad of L with respect to x"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": 9,
184 | "metadata": {},
185 | "outputs": [
186 | {
187 | "data": {
188 | "text/plain": [
189 | "tensor([ 2., 2., 2., 2., 2., 2.])"
190 | ]
191 | },
192 | "execution_count": 9,
193 | "metadata": {},
194 | "output_type": "execute_result"
195 | }
196 | ],
197 | "source": [
198 | "x.grad"
199 | ]
200 | },
201 | {
202 | "cell_type": "markdown",
203 | "metadata": {},
204 | "source": [
205 | "## torch.nn module\n",
206 | "A neural net library with common layers and cost functions"
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": 10,
212 | "metadata": {},
213 | "outputs": [],
214 | "source": [
215 | "# linear transformation of a Nx5 matrix into a Nx3 matrix, where N can be anything \n",
216 | "# (number of observations)\n",
217 | "D = 5 # number of input featutes\n",
218 | "M = 3 # neurons in the first hidden layer\n",
219 | "linear_map = nn.Linear(D, M)"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": 11,
225 | "metadata": {},
226 | "outputs": [
227 | {
228 | "data": {
229 | "text/plain": [
230 | "[Parameter containing:\n",
231 | " tensor([[-0.3345, 0.1780, 0.1944, 0.3522, -0.2162],\n",
232 | " [ 0.1899, -0.1076, 0.3387, 0.3439, 0.4197],\n",
233 | " [-0.3837, -0.2800, 0.1663, 0.1904, 0.0215]]), Parameter containing:\n",
234 | " tensor([-0.1277, 0.4425, 0.4374])]"
235 | ]
236 | },
237 | "execution_count": 11,
238 | "metadata": {},
239 | "output_type": "execute_result"
240 | }
241 | ],
242 | "source": [
243 | "# parameters are initialized randomly\n",
244 | "[p for p in linear_map.parameters()]"
245 | ]
246 | },
247 | {
248 | "cell_type": "markdown",
249 | "metadata": {},
250 | "source": [
251 | "# Linear Regression with Pytorch"
252 | ]
253 | },
254 | {
255 | "cell_type": "markdown",
256 | "metadata": {},
257 | "source": [
258 | "The goal of linear regression is to fit a line to a set of points."
259 | ]
260 | },
261 | {
262 | "cell_type": "code",
263 | "execution_count": 12,
264 | "metadata": {},
265 | "outputs": [],
266 | "source": [
267 | "# Here we generate some fake data\n",
268 | "def lin(a,b,x): return a*x+b\n",
269 | "\n",
270 | "def gen_fake_data(n, a, b):\n",
271 | " x = np.random.uniform(0,1,n) \n",
272 | " y = lin(a,b,x) + 0.1 * np.random.normal(0,3,n)\n",
273 | " return x, y\n",
274 | "\n",
275 | "x, y = gen_fake_data(50, 3., 8.)"
276 | ]
277 | },
278 | {
279 | "cell_type": "code",
280 | "execution_count": 13,
281 | "metadata": {},
282 | "outputs": [
283 | {
284 | "data": {
285 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEKCAYAAADjDHn2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFgRJREFUeJzt3Xuw7XV53/H3hwMKBE25bAJBjgdSksGhzW3BqE1SWzS1jgPVmkJaJ9BBGK2XNPYSM+2oY5oUTTpNJmaCJ+KIbYIQktFTY4PUS0wbcc4GJQUZB4oIJ6BsPQSHAZEDT/9Yy5ydxdrnt87e+/f7rcv7NbNnr8t37/3wY+/1nO/zfL/flapCkqRDOaLvACRJs89kIUlqZLKQJDUyWUiSGpksJEmNTBaSpEYmC0lSI5OFJKmRyUKS1OjItr5xkg8ArwQeqqpzRo/9DPBO4GzgvKpa3eBrXw78JrADeH9VXdn080466aTatWvX9gQvSUvilltu+UZVrTSNay1ZAB8E3gt8aN1jtwOvBt630Rcl2QH8NvAyYB+wN8meqvrSoX7Yrl27WF2dmHskSRtI8tVpxrVWhqqqzwL7xx67s6q+3PCl5wF3V9U9VfUd4MPAhS2FKUmawiz2LE4D7l93f9/oMUlST2YxWWTCYxOPxk1yRZLVJKtra2sthyVJy2sWk8U+4PR1958HPDBpYFXtrqpBVQ1WVhr7M5KkTZrFZLEXOCvJGUmeBVwM7Ok5Jklaaq0liyTXAp8DfijJviSXJXlVkn3Ai4A/TnLjaOz3J/k4QFUdAN4E3AjcCVxfVXe0FackqVkW5Z3yBoNBuXRWkg5PkluqatA0bhbLUJK01O5Ze5Tr9t7HPWuP9h3KX2tzU54kLa171h5l7737OXfXCZy5ctxhfd0rf+t/UwUJfOzNP3FYX98Wk4UkbbOtvODvvXc/VfD4k09xzFE72Hvv/plIFpahJKnB4ZaF1r/gVw3vT+vcXSeQwDFH7SAZ3p8Fziwk6RA2M0vYygv+mSvH8bE3/8SmSlhtMllI0iFspiy01Rf8M1eOm5kk8V0mC0k6hM3OEmbxBX8rTBaSdAizWhbqmslCkhos2ixhM1wNJUlqZLKQJDUyWUiSGpksJEmNTBaSpEYmC0maU12eTuvSWUmaQ12fTuvMQpLm0FYOK9wMk4UkzaGuT6e1DCVJc6jrY0hMFpI0p7o8hsQylCSpkclCktTIZCFJatRaskjygSQPJbl93WMnJLkpyV2jz8dv8LVPJfni6GNPWzFKkqbT5szig8DLxx57G/DJqjoL+OTo/iSPV9WPjD4uaDFGSdIUWksWVfVZYHyXyIXANaPb1wD/pK2fL6l/XR5HoXZ1vXT2+6rqQYCqejDJyRuMOzrJKnAAuLKqPtJZhJK2RdfHUahds9rg3llVA+CfA7+R5AcmDUpyRZLVJKtra2vdRijpkLo+jkLt6jpZfD3JqQCjzw9NGlRVD4w+3wN8BvjRDcbtrqpBVQ1WVlbaiVjSpnR9HIXa1XUZag9wCXDl6PNHxweMVkg9VlVPJDkJ+HvAezqNUtKWdX0chdrVWrJIci3wEuCkJPuAdzBMEtcnuQy4D/iZ0dgB8Pqqeh1wNvC+JE8znPlcWVVfaitOSe3p8jiKZXPP2qOdJuLWkkVV/ewGT50/Yewq8LrR7T8H/k5bcUnSvOtj8cCsNrglSRvoY/GAyUKS5kwfiwc8olyS5kwfiwdMFpI0h7pePGAZSpLUyGQh6bB43tNysgwlaWqe97S8nFlImprnPS0vk4WkqXne0/KyDCVpap73tLxMFtKC2+4zhDzvaTmZLKQFZkNa28WehbTAbEhru5gspAVmQ1rbxTKUtMBsSG9d1+8bMatMFtKCsyG9efZ8DrIMJUkbsOdzkMlCkjZgz+cgy1CStAF7PgeZLCTpEOz5DFmGkiQ1MllIM8z3jtCssAwlzSiXbWqWOLOQZpTLNjVLWksWST6Q5KEkt6977IQkNyW5a/T5+A2+9pLRmLuSXNJWjNIsc9mmZkmqqp1vnPwU8Cjwoao6Z/TYe4D9VXVlkrcBx1fVL4593QnAKjAACrgF+PGqevhQP28wGNTq6moL/yVSfzxqQm1LcktVDZrGtdazqKrPJtk19vCFwEtGt68BPgP84tiYfwTcVFX7AZLcBLwcuLalUKWZ5bJNzYquexbfV1UPAow+nzxhzGnA/evu7xs99gxJrkiymmR1bW1t24OVJA3NYoM7Ex6bWCurqt1VNaiqwcrKSsthSdLy6jpZfD3JqQCjzw9NGLMPOH3d/ecBD3QQmyRpA10niz3Ad1c3XQJ8dMKYG4GfTnL8aLXUT48ekyT1pM2ls9cCnwN+KMm+JJcBVwIvS3IX8LLRfZIMkrwfYNTY/mVg7+jjXd9tdkvzou+d133/fC2e1pbOds2ls5oVfe+87vvnb4ZLhPvT+9JZaVmt33l9zFE72Hvv/k5fAPv++YdrHpPbMprF1VDSXOt753XfP/9weazJfHBmIW2zvt8wp+nnz1rJZ96S27KyZyEtkVkt+cxaAlsm9iykJXC4L7Kz2s/wWJPZZ7KQ5tRmZgmWfLRZJgtpBmymDLOZWULf/RTNL5OF1LPN9hE2O0uw5KPNMFlIPdtsH8FZgrpkspB6tpU+grMEdcVkIfXMGYLmgclCmgHOEDTrPO5D6ognwWqeObOQOjCrO6elaTmzkFowPovwsDzNO2cW0jabNItYhp3Tnu+02EwW0jabtG/ionN3LvSKp82U2Uwu88VkIW2zjWYRi7zi6XA3FtrDmT8mC2mbLeO+icMts83q6bfamMlCasEizyImOdwEuQw9nEVjspC0LQ4nQS7j7GvemSwk9WLZZl/zzn0WkqRGvSSLJD+f5PYkdyT51xOef0mSR5J8cfTx9j7ilCQNdV6GSnIOcDlwHvAd4E+S/HFV3TU29M+q6pVdxydJeqY+ZhZnAzdX1WNVdQD4U+BVPcQhzQQPGNQ86KPBfTvwK0lOBB4HXgGsThj3oiS3AQ8A/7aq7hgfkOQK4AqAnTt3thex1BI3p2ledD6zqKo7gXcDNwF/AtwGHBgbdivw/Kr6YeC3gI9s8L12V9WgqgYrKystRi21wwMGNS96aXBX1dVV9WNV9VPAfuCusee/VVWPjm5/HDgqyUk9hCq1ys1pmhe97LNIcnJVPZRkJ/Bq4EVjz58CfL2qKsl5DJPaN3sIVWqVm9M0L/ralPeHo57Fk8Abq+rhJK8HqKqrgNcAb0hygGFf4+Kqqp5i1YxalFNL3ZymedBLsqiqn5zw2FXrbr8XeG+nQWmu2BiWuuUObs2lWW0MuwxWi6pxZpHkTcDvVdXDHcQjTWUWG8OLOtvZrnLfopQNl9U0ZahTgL1JbgU+ANxo/0B9m8XG8CK+R8N2JcBFTaTLpLEMVVX/ETgLuBq4FLgrya8m+YGWY5MO6cyV47jo3J0z86Izi7Odrdquct+slg01vaka3KMlrF8DvsZwA93xwA1Jbqqqf99mgNK8mMXZzlZtVwJcxES6bNJUUUryFuAS4BvA+4GPVNWTSY4A7qqqmZhhDAaDWl2ddGqItP2Wqf5uz2KxJbmlqgZN46aZWZwEvLqqvrr+wap6OomnwmrpLFv9fbv2gbifZL5N07N4+3iiWPfcndsfkjTbrL9rGbnPQjpMm6m/u/9C88734JYO0+E2spetbKXFZLKQNuFw6u+LuP9Cy8cylNQyl41qETizkDawXUs9F3H/hZaPyUKaYLzP8Dv/4sf42re+vekXe5eNat6ZLKQJ1vcZnn3kEVz+oVV2HHGEDWotLXsW0gTr+wzD2UXcV6Gl5sxCmmB9n+GU5x7NG37vVhvUWmomC2kD6/sMNqi17EwW0hRsUGvZ2bOQJDUyWUiSGpksJEmNTBZqjSetSoujlwZ3kp8HLgcC/G5V/cbY8wF+E3gF8BhwaVXd2nmg2jRPWpUWS+cziyTnMEwU5wE/DLwyyVljw/4xcNbo4wrgdzoNUlvmGwRJi6WPMtTZwM1V9VhVHQD+FHjV2JgLgQ/V0M3A30pyateBavM8aVVaLH2UoW4HfiXJicDjDEtNq2NjTgPuX3d/3+ixBzuJUFvmSavSYuk8WVTVnUneDdwEPArcBhwYG5ZJXzr+QJIrGJap2Llz5zZHqu/a7FHdbmSTFkcvDe6quhq4GiDJrzKcOay3Dzh93f3nAQ9M+D67gd0Ag8HgGclEW2ejWhL0tHQ2ycmjzzuBVwPXjg3ZA/xchl4IPFJVlqB6cKhGtUtjpeXR19lQfzjqWTwJvLGqHk7yeoCqugr4OMNext0Ml87+y57iXHobNaqdcUjLpa8y1E9OeOyqdbcLeGOnQWmijRrV62ccxxy1g7337jdZSAvMU2fVaFKj2qWx0nIxWWhTXBorLReThTbNpbHS8vAgQUlSI5PFiMtAJWljlqFwGagkNXFmgSekSlITkwUuA5WkJpahcBmoJDUxWYy4DFSSNmYZSpLUyGQhSWpkslhw7h+RtB3sWSww949I2i7OLBaY+0ckbReTxQJz/4ik7WIZaoFtdf/IPWuPuvdEEmCyWHib3T9iv0PSepahNNHee/fz1NPF408+xVNPl/0OacmZLDTRKc89micOPA3AEwee5pTnHt1zRJL6ZLLQRF/71rd59pHDX49nH3kEX/vWt3uOSFKf7FloonN3ncCOI+JKKkmAyUIb8CReSev1UoZK8gtJ7khye5Jrkxw99vylSdaSfHH08bo+4lx2Z64cx0Xn7jRRSOo+WSQ5DXgLMKiqc4AdwMUThl5XVT8y+nh/p0FKkv6GvhrcRwLHJDkSOBZ4oKc4JElT6DxZVNVfAr8O3Ac8CDxSVZ+YMPSfJvmLJDckOb3TICVJf0MfZajjgQuBM4DvB74nyWvHhv0PYFdV/V3gfwHXbPC9rkiymmR1bW2tzbAlaan1UYZ6KfCVqlqrqieBPwJevH5AVX2zqp4Y3f1d4McnfaOq2l1Vg6oarKystBq0JC2zPpLFfcALkxybJMD5wJ3rByQ5dd3dC8af13LwjZuk2dH5Pouq+nySG4BbgQPAF4DdSd4FrFbVHuAtSS4YPb8fuLTrOD1xtV8eZCjNll425VXVO4B3jD389nXP/xLwS50GtY4vVP1b/8ZNxxy1g7337vf/gdQjz4aaYNI7zG1HSWTWyiqzFs96vnGTNFs87mOC8ReqU5579JZnGrM2W5m1eMZ53Ig0W0wWE4y/UG1HSWTWyiqzFs8km33jJknbz2SxgfEXqq2WRGatrDJr8UiabamqvmPYFoPBoFZXV7f0PQ61Amo7Vke1tcJqs9/XFV+SktxSVYOmcc4sRppq+NtREmmjrLKV3oNlHknTcjXUyKQVUPNgXuOWNF9MFiPzWsOf17glzRd7FuvMaw1/XuOW1D97FpvQZQ1/O1/g7T1IapvJogezviFOksbZs+jA+LEaNqUlzRtnFlvUVE6aNIuwKS1p3pgstmCactKkYzUuOnen5x5Jmismiy2Y5nyljWYRNqUlzROTxRZMU07y9FRJi8BksQXTJgJnEZLmnclii0wEkpaBS2clSY1MFpKkRiaLBrP8PtWS1BV7FodwqH0UHt4naZmYLA5ho30Unu0kadn0UoZK8gtJ7khye5Jrkxw99vyzk1yX5O4kn0+yq484N9pH0cXZTpa/JM2SzmcWSU4D3gK8oKoeT3I9cDHwwXXDLgMerqq/neRi4N3ARV3HutE+irbPdnLmImnW9FWGOhI4JsmTwLHAA2PPXwi8c3T7BuC9SVI9vFPTpH0Ube/KnuYYEUnqUufJoqr+MsmvA/cBjwOfqKpPjA07Dbh/NP5AkkeAE4FvdBrsIbS5Gc9TaSXNmj7KUMcznDmcAfwV8AdJXltV/339sAlf+oxZRZIrgCsAdu7c2UK0/fA8KUmzpo8G90uBr1TVWlU9CfwR8OKxMfuA0wGSHAl8L/CMLnJV7a6qQVUNVlZWWg67W2euHMdF5+40UUiaCX0ki/uAFyY5NkmA84E7x8bsAS4Z3X4N8Kk++hWSpKHOk0VVfZ5h0/pW4P+OYtid5F1JLhgNuxo4McndwFuBt3UdpyTpoCzKP9gHg0Gtrq72HYYkzZUkt1TVoGmcZ0NJkhqZLCRJjUwWkqRGJosWeb6TpEXhqbMt8XwnSYvEmUVLujiZVpK6YrJoiec7SVoklqFa4vlOkhaJyaJFbZ5MK0ldsgwlSWpkspAkNTJZSJIamSwkSY1MFpKkRiYLSVIjk4UkqdHCvPlRkjXgq2MPnwR8o4dwZo3X4SCvxZDX4aBlvxbPr6qVpkELkywmSbI6zTtALTqvw0FeiyGvw0Fei+lYhpIkNTJZSJIaLXqy2N13ADPC63CQ12LI63CQ12IKC92zkCRtj0WfWUiStsFCJIskL0/y5SR3J3nbhOefneS60fOfT7Kr+yjbN8V1eGuSLyX5iySfTPL8PuLsQtO1WDfuNUkqyUKuhpnmOiT5Z6PfizuS/H7XMXZlir+PnUk+neQLo7+RV/QR58yqqrn+AHYA/w84E3gWcBvwgrEx/wq4anT7YuC6vuPu6Tr8A+DY0e03LOJ1mPZajMY9B/gscDMw6Dvunn4nzgK+ABw/un9y33H3eC12A28Y3X4BcG/fcc/SxyLMLM4D7q6qe6rqO8CHgQvHxlwIXDO6fQNwfpJ0GGMXGq9DVX26qh4b3b0ZeF7HMXZlmt8JgF8G3gN8u8vgOjTNdbgc+O2qehigqh7qOMauTHMtCnju6Pb3Ag90GN/MW4RkcRpw/7r7+0aPTRxTVQeAR4ATO4muO9Nch/UuA/5nqxH1p/FaJPlR4PSq+liXgXVsmt+JHwR+MMn/SXJzkpd3Fl23prkW7wRem2Qf8HHgzd2ENh8W4W1VJ80Qxpd4TTNm3k3935jktcAA+PutRtSfQ16LJEcA/xW4tKuAejLN78SRDEtRL2E40/yzJOdU1V+1HFvXprkWPwt8sKr+S5IXAf9tdC2ebj+82bcIM4t9wOnr7j+PZ04f/3pMkiMZTjH3dxJdd6a5DiR5KfAfgAuq6omOYuta07V4DnAO8Jkk9wIvBPYsYJN72r+Nj1bVk1X1FeDLDJPHopnmWlwGXA9QVZ8DjmZ4bpRYjGSxFzgryRlJnsWwgb1nbMwe4JLR7dcAn6pRF2uBNF6HUenlfQwTxaLWpqHhWlTVI1V1UlXtqqpdDPs3F1TVaj/htmaav42PMFz4QJKTGJal7uk0ym5Mcy3uA84HSHI2w2Sx1mmUM2zuk8WoB/Em4EbgTuD6qrojybuSXDAadjVwYpK7gbcCGy6lnFdTXodfA44D/iDJF5OM/7EshCmvxcKb8jrcCHwzyZeATwP/rqq+2U/E7ZnyWvwb4PIktwHXApcu4D8qN80d3JKkRnM/s5Aktc9kIUlqZLKQJDUyWUiSGpksJEmNTBaSpEYmC0lSI5OF1JIk547eF+HoJN8zer+Ic/qOS9oMN+VJLUrynxgeG3EMsK+q/nPPIUmbYrKQWjQ6h2gvw/fMeHFVPdVzSNKmWIaS2nUCw/O4nsNwhiHNJWcWUotGhzV+GDgDOLWq3tRzSNKmLMKbH0kzKcnPAQeq6veT7AD+PMk/rKpP9R2bdLicWUiSGtmzkCQ1MllIkhqZLCRJjUwWkqRGJgtJUiOThSSpkclCktTIZCFJavT/AZcXR0Q04H4rAAAAAElFTkSuQmCC\n",
286 | "text/plain": [
287 | ""
288 | ]
289 | },
290 | "metadata": {
291 | "needs_background": "light"
292 | },
293 | "output_type": "display_data"
294 | }
295 | ],
296 | "source": [
297 | "import matplotlib.pyplot as plt\n",
298 | "plt.scatter(x,y, s=8); plt.xlabel(\"x\"); plt.ylabel(\"y\"); "
299 | ]
300 | },
301 | {
302 | "cell_type": "markdown",
303 | "metadata": {},
304 | "source": [
305 | "You want to find **parameters** (weights) $a$ and $b$ such that you minimize the *error* between the points and the line $a\\cdot x + b$. Note that here $a$ and $b$ are unknown. For a regression problem the most common *error function* or *loss function* is the **mean squared error**. "
306 | ]
307 | },
308 | {
309 | "cell_type": "code",
310 | "execution_count": 14,
311 | "metadata": {},
312 | "outputs": [],
313 | "source": [
314 | "def mse(y_hat, y): return ((y_hat - y) ** 2).mean()"
315 | ]
316 | },
317 | {
318 | "cell_type": "markdown",
319 | "metadata": {},
320 | "source": [
321 | "Suppose we believe $a = 10$ and $b = 5$ then we can compute `y_hat` which is our *prediction* and then compute our error."
322 | ]
323 | },
324 | {
325 | "cell_type": "code",
326 | "execution_count": 15,
327 | "metadata": {},
328 | "outputs": [
329 | {
330 | "data": {
331 | "text/plain": [
332 | "3.7264671933272044"
333 | ]
334 | },
335 | "execution_count": 15,
336 | "metadata": {},
337 | "output_type": "execute_result"
338 | }
339 | ],
340 | "source": [
341 | "y_hat = lin(10,5,x)\n",
342 | "mse(y_hat, y)"
343 | ]
344 | },
345 | {
346 | "cell_type": "code",
347 | "execution_count": 16,
348 | "metadata": {},
349 | "outputs": [],
350 | "source": [
351 | "def mse_loss(a, b, x, y): return mse(lin(a,b,x), y)"
352 | ]
353 | },
354 | {
355 | "cell_type": "code",
356 | "execution_count": 17,
357 | "metadata": {},
358 | "outputs": [
359 | {
360 | "data": {
361 | "text/plain": [
362 | "3.7264671933272044"
363 | ]
364 | },
365 | "execution_count": 17,
366 | "metadata": {},
367 | "output_type": "execute_result"
368 | }
369 | ],
370 | "source": [
371 | "mse_loss(10, 5, x, y)"
372 | ]
373 | },
374 | {
375 | "cell_type": "markdown",
376 | "metadata": {},
377 | "source": [
378 | "So far we have specified the *model* (linear regression) and the *evaluation criteria* (or *loss function*). Now we need to handle *optimization*; that is, how do we find the best values for $a$ and $b$? How do we find the best *fitting* linear regression."
379 | ]
380 | },
381 | {
382 | "cell_type": "markdown",
383 | "metadata": {},
384 | "source": [
385 | "## Gradient Descent with Pytorch"
386 | ]
387 | },
388 | {
389 | "cell_type": "markdown",
390 | "metadata": {},
391 | "source": [
392 | "For a fixed dataset $x$ and $y$ `mse_loss(a,b)` is a function of $a$ and $b$. We would like to find the values of $a$ and $b$ that minimize that function.\n",
393 | "\n",
394 | "**Gradient descent** is an algorithm that minimizes functions. Given a function defined by a set of parameters, gradient descent starts with an initial set of parameter values and iteratively moves toward a set of parameter values that minimize the function. This iterative minimization is achieved by taking steps in the negative direction of the function gradient.\n",
395 | "\n",
396 | "Here is gradient descent implemented in [PyTorch](http://pytorch.org/)."
397 | ]
398 | },
399 | {
400 | "cell_type": "code",
401 | "execution_count": 18,
402 | "metadata": {},
403 | "outputs": [
404 | {
405 | "data": {
406 | "text/plain": [
407 | "((10000,), (10000,))"
408 | ]
409 | },
410 | "execution_count": 18,
411 | "metadata": {},
412 | "output_type": "execute_result"
413 | }
414 | ],
415 | "source": [
416 | "# generate some more data\n",
417 | "x, y = gen_fake_data(10000, 3., 8.)\n",
418 | "x.shape, y.shape"
419 | ]
420 | },
421 | {
422 | "cell_type": "code",
423 | "execution_count": 19,
424 | "metadata": {},
425 | "outputs": [],
426 | "source": [
427 | "# Wrap x and y as tensor \n",
428 | "x = torch.tensor(x)\n",
429 | "y = torch.tensor(y)"
430 | ]
431 | },
432 | {
433 | "cell_type": "code",
434 | "execution_count": 20,
435 | "metadata": {},
436 | "outputs": [
437 | {
438 | "data": {
439 | "text/plain": [
440 | "(tensor([-0.4017], dtype=torch.float64),\n",
441 | " tensor([-0.9494], dtype=torch.float64))"
442 | ]
443 | },
444 | "execution_count": 20,
445 | "metadata": {},
446 | "output_type": "execute_result"
447 | }
448 | ],
449 | "source": [
450 | "# Create random Tensors for weights, and wrap them in tensors.\n",
451 | "# Setting requires_grad=True indicates that we want to compute gradients with\n",
452 | "# respect to these tensors during the backward pass.\n",
453 | "a, b = np.random.randn(1), np.random.randn(1)\n",
454 | "a = torch.tensor(a, requires_grad=True)\n",
455 | "b = torch.tensor(b, requires_grad=True)\n",
456 | "a,b"
457 | ]
458 | },
459 | {
460 | "cell_type": "code",
461 | "execution_count": 21,
462 | "metadata": {},
463 | "outputs": [
464 | {
465 | "name": "stdout",
466 | "output_type": "stream",
467 | "text": [
468 | "114.33519327885749\n",
469 | "0.8827967584092252\n",
470 | "0.1509190011086734\n",
471 | "0.1336058659739962\n",
472 | "0.1237467641246501\n",
473 | "0.1161985772878354\n",
474 | "0.11040320048670306\n",
475 | "0.10595349916476945\n",
476 | "0.10253700937772219\n",
477 | "0.09991382151883298\n"
478 | ]
479 | }
480 | ],
481 | "source": [
482 | "learning_rate = 1e-3\n",
483 | "for t in range(10000):\n",
484 | " # Forward pass: compute predicted y using operations on Variables\n",
485 | " loss = mse_loss(a,b,x,y)\n",
486 | " if t % 1000 == 0: print(loss.item())\n",
487 | " \n",
488 | " # Computes the gradient of loss with respect to all Variables with requires_grad=True.\n",
489 | " # After this call a.grad and b.grad will be Variables holding the gradient\n",
490 | " # of the loss with respect to a and b respectively\n",
491 | " loss.backward()\n",
492 | " \n",
493 | " # Update a and b using gradient descent; a.data and b.data are Tensors,\n",
494 | " # a.grad and b.grad are Variables and a.grad.data and b.grad.data are Tensors\n",
495 | " a.data -= learning_rate * a.grad.data\n",
496 | " b.data -= learning_rate * b.grad.data\n",
497 | " \n",
498 | " # Zero the gradients\n",
499 | " a.grad.data.zero_()\n",
500 | " b.grad.data.zero_() "
501 | ]
502 | },
503 | {
504 | "cell_type": "code",
505 | "execution_count": 22,
506 | "metadata": {},
507 | "outputs": [
508 | {
509 | "name": "stdout",
510 | "output_type": "stream",
511 | "text": [
512 | "tensor([ 3.2942], dtype=torch.float64) tensor([ 7.8449], dtype=torch.float64)\n"
513 | ]
514 | }
515 | ],
516 | "source": [
517 | "print(a,b)"
518 | ]
519 | },
520 | {
521 | "cell_type": "markdown",
522 | "metadata": {},
523 | "source": [
524 | "## Simplified GD Loop"
525 | ]
526 | },
527 | {
528 | "cell_type": "code",
529 | "execution_count": 23,
530 | "metadata": {},
531 | "outputs": [
532 | {
533 | "data": {
534 | "text/plain": [
535 | "Linear(in_features=1, out_features=1, bias=True)"
536 | ]
537 | },
538 | "execution_count": 23,
539 | "metadata": {},
540 | "output_type": "execute_result"
541 | }
542 | ],
543 | "source": [
544 | "# linear tranformation with input dimension=1 and output dimension=1\n",
545 | "nn.Linear(1, 1)"
546 | ]
547 | },
548 | {
549 | "cell_type": "code",
550 | "execution_count": 24,
551 | "metadata": {},
552 | "outputs": [
553 | {
554 | "data": {
555 | "text/plain": [
556 | "Sequential(\n",
557 | " (0): Linear(in_features=1, out_features=1, bias=True)\n",
558 | ")"
559 | ]
560 | },
561 | "execution_count": 24,
562 | "metadata": {},
563 | "output_type": "execute_result"
564 | }
565 | ],
566 | "source": [
567 | "# simple way of specifying a linear regression model\n",
568 | "model = torch.nn.Sequential(\n",
569 | " nn.Linear(1, 1),\n",
570 | ")\n",
571 | "model"
572 | ]
573 | },
574 | {
575 | "cell_type": "code",
576 | "execution_count": 25,
577 | "metadata": {},
578 | "outputs": [],
579 | "source": [
580 | "# equivalent way of specifiying the same model\n",
581 | "class LinearRegression(nn.Module):\n",
582 | " def __init__(self):\n",
583 | " super(LinearRegression, self).__init__()\n",
584 | " self.lin = nn.Linear(1, 1)\n",
585 | " \n",
586 | " def forward(self, x):\n",
587 | " x = self.lin(x)\n",
588 | " return x \n",
589 | "model = LinearRegression()"
590 | ]
591 | },
592 | {
593 | "cell_type": "code",
594 | "execution_count": 26,
595 | "metadata": {},
596 | "outputs": [
597 | {
598 | "name": "stdout",
599 | "output_type": "stream",
600 | "text": [
601 | "[Parameter containing:\n",
602 | "tensor([[ 0.2523]]), Parameter containing:\n",
603 | "tensor([ 0.5015])]\n"
604 | ]
605 | }
606 | ],
607 | "source": [
608 | "print([p for p in model.parameters()])"
609 | ]
610 | },
611 | {
612 | "cell_type": "code",
613 | "execution_count": 27,
614 | "metadata": {},
615 | "outputs": [
616 | {
617 | "data": {
618 | "text/plain": [
619 | "torch.Size([10000])"
620 | ]
621 | },
622 | "execution_count": 27,
623 | "metadata": {},
624 | "output_type": "execute_result"
625 | }
626 | ],
627 | "source": [
628 | "x, y = gen_fake_data(10000, 3., 8.)\n",
629 | "x = torch.tensor(x).float()\n",
630 | "y = torch.tensor(y).float()\n",
631 | "x.shape"
632 | ]
633 | },
634 | {
635 | "cell_type": "code",
636 | "execution_count": 28,
637 | "metadata": {},
638 | "outputs": [
639 | {
640 | "data": {
641 | "text/plain": [
642 | "torch.Size([10000, 1])"
643 | ]
644 | },
645 | "execution_count": 28,
646 | "metadata": {},
647 | "output_type": "execute_result"
648 | }
649 | ],
650 | "source": [
651 | "# you have to be careful with the dimensions that your model is expecting\n",
652 | "x1 = torch.unsqueeze(x, 1)\n",
653 | "x1.shape"
654 | ]
655 | },
656 | {
657 | "cell_type": "code",
658 | "execution_count": 29,
659 | "metadata": {},
660 | "outputs": [
661 | {
662 | "name": "stdout",
663 | "output_type": "stream",
664 | "text": [
665 | "tensor([[ 0.6813],\n",
666 | " [ 0.6076],\n",
667 | " [ 0.6617],\n",
668 | " ...,\n",
669 | " [ 0.6750],\n",
670 | " [ 0.5348],\n",
671 | " [ 0.5524]])\n"
672 | ]
673 | }
674 | ],
675 | "source": [
676 | "y_hat = model(x1)\n",
677 | "print(y_hat)"
678 | ]
679 | },
680 | {
681 | "cell_type": "code",
682 | "execution_count": 30,
683 | "metadata": {},
684 | "outputs": [],
685 | "source": [
686 | "# Use the optim package to define an Optimizer that will update the weights of\n",
687 | "# the model for us. Here we will use Adam\n",
688 | "learning_rate = 0.1\n",
689 | "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)"
690 | ]
691 | },
692 | {
693 | "cell_type": "code",
694 | "execution_count": 31,
695 | "metadata": {},
696 | "outputs": [
697 | {
698 | "name": "stdout",
699 | "output_type": "stream",
700 | "text": [
701 | "79.40501403808594\n",
702 | "0.08850504457950592\n",
703 | "0.08850347250699997\n",
704 | "0.08850349485874176\n",
705 | "0.08850345760583878\n",
706 | "0.08850356191396713\n",
707 | "0.08850352466106415\n",
708 | "0.08850352466106415\n",
709 | "0.08850352466106415\n",
710 | "0.08850352466106415\n"
711 | ]
712 | }
713 | ],
714 | "source": [
715 | "for t in range(10000):\n",
716 | " # Forward pass: compute predicted y using operations on Variables\n",
717 | " y_hat = model(x1)\n",
718 | " loss = F.mse_loss(y_hat, y.unsqueeze(1))\n",
719 | " if t % 1000 == 0: print(loss.item())\n",
720 | " \n",
721 | " # Before the backward pass, use the optimizer object to zero all of the\n",
722 | " # gradients for the variables\n",
723 | " optimizer.zero_grad()\n",
724 | " loss.backward()\n",
725 | " \n",
726 | " # Calling the step function on an Optimizer makes an update to its\n",
727 | " # parameters\n",
728 | " optimizer.step()"
729 | ]
730 | },
731 | {
732 | "cell_type": "code",
733 | "execution_count": 32,
734 | "metadata": {},
735 | "outputs": [
736 | {
737 | "name": "stdout",
738 | "output_type": "stream",
739 | "text": [
740 | "[Parameter containing:\n",
741 | "tensor([[ 3.0035]]), Parameter containing:\n",
742 | "tensor([ 7.9942])]\n"
743 | ]
744 | }
745 | ],
746 | "source": [
747 | "print([p for p in model.parameters()])"
748 | ]
749 | },
750 | {
751 | "cell_type": "markdown",
752 | "metadata": {},
753 | "source": [
754 | "# Logistic Regression"
755 | ]
756 | },
757 | {
758 | "cell_type": "code",
759 | "execution_count": 33,
760 | "metadata": {},
761 | "outputs": [],
762 | "source": [
763 | "# generating fake data\n",
764 | "# Here we generate some fake data\n",
765 | "def lin(a,b,x): return a*x+b\n",
766 | "\n",
767 | "def gen_logistic_fake_data(n, a, b):\n",
768 | " x = np.random.uniform(-20,20, (n, 2))\n",
769 | " x2_hat = lin(a,b, x[:,0])\n",
770 | " y = x[:,1] > x2_hat\n",
771 | " return x, y.astype(int)\n",
772 | "\n",
773 | "x, y = gen_logistic_fake_data(100, 1., 0.5)"
774 | ]
775 | },
776 | {
777 | "cell_type": "code",
778 | "execution_count": 34,
779 | "metadata": {},
780 | "outputs": [
781 | {
782 | "data": {
783 | "text/plain": [
784 | "array([0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,\n",
785 | " 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,\n",
786 | " 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,\n",
787 | " 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0,\n",
788 | " 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1])"
789 | ]
790 | },
791 | "execution_count": 34,
792 | "metadata": {},
793 | "output_type": "execute_result"
794 | }
795 | ],
796 | "source": [
797 | "y"
798 | ]
799 | },
800 | {
801 | "cell_type": "code",
802 | "execution_count": 35,
803 | "metadata": {},
804 | "outputs": [
805 | {
806 | "data": {
807 | "text/plain": [
808 | "[]"
809 | ]
810 | },
811 | "execution_count": 35,
812 | "metadata": {},
813 | "output_type": "execute_result"
814 | },
815 | {
816 | "data": {
817 | "image/png": "\n",
818 | "text/plain": [
819 | ""
820 | ]
821 | },
822 | "metadata": {
823 | "needs_background": "light"
824 | },
825 | "output_type": "display_data"
826 | }
827 | ],
828 | "source": [
829 | "t = np.arange(-20, 20, 0.2)\n",
830 | "import matplotlib.pyplot as plt\n",
831 | "plt.scatter(x[:,0],x[:,1],c=y, s=8);\n",
832 | "plt.xlabel(\"x1\"); plt.ylabel(\"x2\");\n",
833 | "plt.plot(t, t + 0.5, 'r--')"
834 | ]
835 | },
836 | {
837 | "cell_type": "code",
838 | "execution_count": 36,
839 | "metadata": {},
840 | "outputs": [],
841 | "source": [
842 | "x = torch.tensor(x).float()\n",
843 | "y = torch.tensor(y).float()"
844 | ]
845 | },
846 | {
847 | "cell_type": "code",
848 | "execution_count": 37,
849 | "metadata": {},
850 | "outputs": [
851 | {
852 | "data": {
853 | "text/plain": [
854 | "Sequential(\n",
855 | " (0): Linear(in_features=2, out_features=1, bias=True)\n",
856 | ")"
857 | ]
858 | },
859 | "execution_count": 37,
860 | "metadata": {},
861 | "output_type": "execute_result"
862 | }
863 | ],
864 | "source": [
865 | "model = torch.nn.Sequential(\n",
866 | " torch.nn.Linear(2, 1),\n",
867 | ")\n",
868 | "model"
869 | ]
870 | },
871 | {
872 | "cell_type": "code",
873 | "execution_count": 38,
874 | "metadata": {},
875 | "outputs": [
876 | {
877 | "data": {
878 | "text/plain": [
879 | "torch.Size([100, 1])"
880 | ]
881 | },
882 | "execution_count": 38,
883 | "metadata": {},
884 | "output_type": "execute_result"
885 | }
886 | ],
887 | "source": [
888 | "model(x).shape"
889 | ]
890 | },
891 | {
892 | "cell_type": "code",
893 | "execution_count": 39,
894 | "metadata": {},
895 | "outputs": [],
896 | "source": [
897 | "x, y = gen_logistic_fake_data(10000, 1., 0.5)\n",
898 | "x = torch.tensor(x).float()\n",
899 | "y = torch.tensor(y).float()"
900 | ]
901 | },
902 | {
903 | "cell_type": "code",
904 | "execution_count": 40,
905 | "metadata": {},
906 | "outputs": [],
907 | "source": [
908 | "learning_rate = 0.1\n",
909 | "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)"
910 | ]
911 | },
912 | {
913 | "cell_type": "code",
914 | "execution_count": 41,
915 | "metadata": {},
916 | "outputs": [
917 | {
918 | "name": "stdout",
919 | "output_type": "stream",
920 | "text": [
921 | "0.9893282055854797\n",
922 | "0.012103211134672165\n",
923 | "0.008044046349823475\n",
924 | "0.006133016664534807\n",
925 | "0.004931855481117964\n",
926 | "0.0040746103040874004\n",
927 | "0.003419493557885289\n",
928 | "0.002897445810958743\n",
929 | "0.0024693021550774574\n",
930 | "0.0021104554180055857\n"
931 | ]
932 | }
933 | ],
934 | "source": [
935 | "for t in range(10000):\n",
936 | " # Forward pass: compute predicted y using operations on Variables\n",
937 | " y_hat = model(x)\n",
938 | " loss = F.binary_cross_entropy(F.sigmoid(y_hat), y.unsqueeze(1))\n",
939 | " if t % 1000 == 0: print(loss.item())\n",
940 | " \n",
941 | " # Before the backward pass, use the optimizer object to zero all of the\n",
942 | " # gradients for the variables\n",
943 | " optimizer.zero_grad()\n",
944 | " loss.backward()\n",
945 | " \n",
946 | " # Calling the step function on an Optimizer makes an update to its\n",
947 | " # parameters\n",
948 | " optimizer.step()"
949 | ]
950 | },
951 | {
952 | "cell_type": "code",
953 | "execution_count": 42,
954 | "metadata": {},
955 | "outputs": [
956 | {
957 | "name": "stdout",
958 | "output_type": "stream",
959 | "text": [
960 | "[Parameter containing:\n",
961 | "tensor([[-21.5997, 21.6212]]), Parameter containing:\n",
962 | "tensor([-10.7937])]\n"
963 | ]
964 | }
965 | ],
966 | "source": [
967 | "print([p for p in model.parameters()])"
968 | ]
969 | },
970 | {
971 | "cell_type": "markdown",
972 | "metadata": {},
973 | "source": [
974 | "# Data loaders for SGD"
975 | ]
976 | },
977 | {
978 | "cell_type": "markdown",
979 | "metadata": {},
980 | "source": [
981 | "Nearly all of deep learning is powered by one very important algorithm: **stochastic gradient descent (SGD)**. SGD can be seeing as an approximation of **gradient descent** (GD). In GD you have to run through *all* the samples in your training set to do a single itaration. In SGD you use *only one* or *a subset* of training samples to do the update for a parameter in a particular iteration. The subset use in every iteration is called a **batch** or **minibatch**."
982 | ]
983 | },
984 | {
985 | "cell_type": "code",
986 | "execution_count": 43,
987 | "metadata": {},
988 | "outputs": [],
989 | "source": [
990 | "model2 = torch.nn.Sequential(\n",
991 | " torch.nn.Linear(1, 1),\n",
992 | ")"
993 | ]
994 | },
995 | {
996 | "cell_type": "code",
997 | "execution_count": 44,
998 | "metadata": {},
999 | "outputs": [],
1000 | "source": [
1001 | "from torch.utils.data import Dataset, DataLoader"
1002 | ]
1003 | },
1004 | {
1005 | "cell_type": "code",
1006 | "execution_count": 45,
1007 | "metadata": {},
1008 | "outputs": [],
1009 | "source": [
1010 | "def lin(a,b,x): return a*x+b\n",
1011 | "\n",
1012 | "def gen_fake_data(n, a, b):\n",
1013 | " x = np.random.uniform(0,1,n) \n",
1014 | " y = lin(a,b,x) + 0.1 * np.random.normal(0,3,n)\n",
1015 | " return x.astype(np.float32), y.astype(np.float32)\n",
1016 | "\n",
1017 | "# create a dataset\n",
1018 | "class RegressionDataset(Dataset):\n",
1019 | " def __init__(self, a=3, b=8, n=10000):\n",
1020 | " x, y = gen_fake_data(n, a, b)\n",
1021 | " x = torch.from_numpy(x).unsqueeze(1)\n",
1022 | " y = torch.from_numpy(y)\n",
1023 | " self.x, self.y = x, y\n",
1024 | " \n",
1025 | " def __len__(self):\n",
1026 | " return len(self.y)\n",
1027 | " \n",
1028 | " def __getitem__(self, idx):\n",
1029 | " return self.x[idx], self.y[idx]\n",
1030 | " \n",
1031 | "fake_dataset = RegressionDataset()"
1032 | ]
1033 | },
1034 | {
1035 | "cell_type": "markdown",
1036 | "metadata": {},
1037 | "source": [
1038 | "Next we are going to create a data loader. The data loader provides the following features:\n",
1039 | "* Batching the data\n",
1040 | "* Shuffling the data\n",
1041 | "* Load the data in parallel using multiprocessing workers."
1042 | ]
1043 | },
1044 | {
1045 | "cell_type": "code",
1046 | "execution_count": 46,
1047 | "metadata": {},
1048 | "outputs": [],
1049 | "source": [
1050 | "dataloader = DataLoader(fake_dataset, batch_size=1000, shuffle=True)\n",
1051 | "x, y = next(iter(dataloader))"
1052 | ]
1053 | },
1054 | {
1055 | "cell_type": "code",
1056 | "execution_count": 47,
1057 | "metadata": {},
1058 | "outputs": [],
1059 | "source": [
1060 | "#y.type(torch.FloatTensor)"
1061 | ]
1062 | },
1063 | {
1064 | "cell_type": "code",
1065 | "execution_count": 48,
1066 | "metadata": {},
1067 | "outputs": [],
1068 | "source": [
1069 | "learning_rate = 0.1\n",
1070 | "optimizer = torch.optim.Adam(model2.parameters(), lr=learning_rate)"
1071 | ]
1072 | },
1073 | {
1074 | "cell_type": "code",
1075 | "execution_count": 49,
1076 | "metadata": {},
1077 | "outputs": [
1078 | {
1079 | "name": "stdout",
1080 | "output_type": "stream",
1081 | "text": [
1082 | "56.18629455566406\n",
1083 | "0.09595121443271637\n",
1084 | "0.09600495547056198\n",
1085 | "0.08732529729604721\n",
1086 | "0.09132660925388336\n",
1087 | "0.08278344571590424\n",
1088 | "0.08562881499528885\n",
1089 | "0.08167734742164612\n",
1090 | "0.0862448588013649\n",
1091 | "0.09074677526950836\n"
1092 | ]
1093 | }
1094 | ],
1095 | "source": [
1096 | "for t in range(1000):\n",
1097 | " for i, (x, y) in enumerate(dataloader): \n",
1098 | " \n",
1099 | " y_hat = model2(x)\n",
1100 | " loss = F.mse_loss(y_hat, y.unsqueeze(1))\n",
1101 | " \n",
1102 | " optimizer.zero_grad()\n",
1103 | " loss.backward()\n",
1104 | " \n",
1105 | " optimizer.step()\n",
1106 | " if t % 100 == 0: print(loss.item())"
1107 | ]
1108 | },
1109 | {
1110 | "cell_type": "code",
1111 | "execution_count": 50,
1112 | "metadata": {},
1113 | "outputs": [
1114 | {
1115 | "name": "stdout",
1116 | "output_type": "stream",
1117 | "text": [
1118 | "[Parameter containing:\n",
1119 | "tensor([[ 3.0190]]), Parameter containing:\n",
1120 | "tensor([ 7.9957])]\n"
1121 | ]
1122 | }
1123 | ],
1124 | "source": [
1125 | "print([p for p in model2.parameters()])"
1126 | ]
1127 | },
1128 | {
1129 | "cell_type": "markdown",
1130 | "metadata": {},
1131 | "source": [
1132 | "# Two layer neural network"
1133 | ]
1134 | },
1135 | {
1136 | "cell_type": "code",
1137 | "execution_count": 51,
1138 | "metadata": {},
1139 | "outputs": [],
1140 | "source": [
1141 | "# generating fake data\n",
1142 | "# Here we generate some fake data\n",
1143 | "def sigmoid(x):\n",
1144 | " return 1/(1 + np.exp(-x))\n",
1145 | "\n",
1146 | "def gen_nn_fake_data(n):\n",
1147 | " x = np.random.uniform(0,10, (n, 2))\n",
1148 | " x1 = x[:,0]\n",
1149 | " x2 = x[:,1]\n",
1150 | " score1 = sigmoid(-x1 - 8* x2 + 50)\n",
1151 | " score2 = sigmoid(-7*x1 - 2* x2 + 50)\n",
1152 | " score3 = 2* score1 + 3*score2 - 0.1\n",
1153 | " y = score3 < 0\n",
1154 | " return x, y.astype(int)\n",
1155 | "\n",
1156 | "x, y = gen_nn_fake_data(500)"
1157 | ]
1158 | },
1159 | {
1160 | "cell_type": "code",
1161 | "execution_count": 52,
1162 | "metadata": {},
1163 | "outputs": [
1164 | {
1165 | "data": {
1166 | "image/png": "\n",
1167 | "text/plain": [
1168 | ""
1169 | ]
1170 | },
1171 | "metadata": {
1172 | "needs_background": "light"
1173 | },
1174 | "output_type": "display_data"
1175 | }
1176 | ],
1177 | "source": [
1178 | "import matplotlib.pyplot as plt\n",
1179 | "plt.scatter(x[:,0],x[:,1],c=y, s=8);\n",
1180 | "plt.xlabel(\"x1\"); plt.ylabel(\"x2\");"
1181 | ]
1182 | },
1183 | {
1184 | "cell_type": "code",
1185 | "execution_count": 53,
1186 | "metadata": {},
1187 | "outputs": [
1188 | {
1189 | "data": {
1190 | "text/plain": [
1191 | "Sequential(\n",
1192 | " (0): Linear(in_features=2, out_features=2, bias=True)\n",
1193 | " (1): Sigmoid()\n",
1194 | " (2): Linear(in_features=2, out_features=1, bias=True)\n",
1195 | ")"
1196 | ]
1197 | },
1198 | "execution_count": 53,
1199 | "metadata": {},
1200 | "output_type": "execute_result"
1201 | }
1202 | ],
1203 | "source": [
1204 | "model = torch.nn.Sequential(\n",
1205 | " torch.nn.Linear(2, 2),\n",
1206 | " torch.nn.Sigmoid(),\n",
1207 | " torch.nn.Linear(2, 1)\n",
1208 | ")\n",
1209 | "model"
1210 | ]
1211 | },
1212 | {
1213 | "cell_type": "code",
1214 | "execution_count": 54,
1215 | "metadata": {},
1216 | "outputs": [],
1217 | "source": [
1218 | "x, y = gen_nn_fake_data(10000)\n",
1219 | "x = torch.tensor(x).float()\n",
1220 | "y = torch.tensor(y).float()"
1221 | ]
1222 | },
1223 | {
1224 | "cell_type": "code",
1225 | "execution_count": 55,
1226 | "metadata": {},
1227 | "outputs": [],
1228 | "source": [
1229 | "learning_rate = 0.01\n",
1230 | "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)"
1231 | ]
1232 | },
1233 | {
1234 | "cell_type": "code",
1235 | "execution_count": 56,
1236 | "metadata": {},
1237 | "outputs": [
1238 | {
1239 | "name": "stdout",
1240 | "output_type": "stream",
1241 | "text": [
1242 | "0.5998386144638062\n",
1243 | "0.08418773114681244\n",
1244 | "0.04379989951848984\n",
1245 | "0.02649916149675846\n",
1246 | "0.0179321076720953\n",
1247 | "0.013030633330345154\n",
1248 | "0.009794511832296848\n",
1249 | "0.007520528510212898\n",
1250 | "0.0058765956200659275\n",
1251 | "0.0046782889403402805\n"
1252 | ]
1253 | }
1254 | ],
1255 | "source": [
1256 | "for t in range(10000):\n",
1257 | " # Forward pass: compute predicted y using operations on Variables\n",
1258 | " y_hat = model(x)\n",
1259 | " loss = F.binary_cross_entropy(F.sigmoid(y_hat), y.unsqueeze(1))\n",
1260 | " if t % 1000 == 0: print(loss.item())\n",
1261 | " \n",
1262 | " # Before the backward pass, use the optimizer object to zero all of the\n",
1263 | " # gradients for the variables\n",
1264 | " optimizer.zero_grad()\n",
1265 | " loss.backward()\n",
1266 | " \n",
1267 | " # Calling the step function on an Optimizer makes an update to its\n",
1268 | " # parameters\n",
1269 | " optimizer.step()"
1270 | ]
1271 | },
1272 | {
1273 | "cell_type": "code",
1274 | "execution_count": 57,
1275 | "metadata": {},
1276 | "outputs": [
1277 | {
1278 | "name": "stdout",
1279 | "output_type": "stream",
1280 | "text": [
1281 | "[Parameter containing:\n",
1282 | "tensor([[-4.4651, -1.2217],\n",
1283 | " [ 0.7295, 6.5536]]), Parameter containing:\n",
1284 | "tensor([ 33.2324, -42.4598]), Parameter containing:\n",
1285 | "tensor([[-25.9705, 23.0225]]), Parameter containing:\n",
1286 | "tensor([-12.2211])]\n"
1287 | ]
1288 | }
1289 | ],
1290 | "source": [
1291 | "print([p for p in model.parameters()])"
1292 | ]
1293 | },
1294 | {
1295 | "cell_type": "code",
1296 | "execution_count": 58,
1297 | "metadata": {},
1298 | "outputs": [
1299 | {
1300 | "data": {
1301 | "text/plain": [
1302 | "array([ 72.7843, -134.4691, -45.7142])"
1303 | ]
1304 | },
1305 | "execution_count": 58,
1306 | "metadata": {},
1307 | "output_type": "execute_result"
1308 | }
1309 | ],
1310 | "source": [
1311 | " np.array([72.7843, -134.4691, -45.7142])"
1312 | ]
1313 | },
1314 | {
1315 | "cell_type": "markdown",
1316 | "metadata": {
1317 | "collapsed": true
1318 | },
1319 | "source": [
1320 | "# References\n",
1321 | "* https://pytorch.org/docs/stable/index.html\n",
1322 | "* http://pytorch.org/tutorials/beginner/pytorch_with_examples.html\n",
1323 | "* https://hsaghir.github.io/data_science/pytorch_starter/"
1324 | ]
1325 | },
1326 | {
1327 | "cell_type": "code",
1328 | "execution_count": null,
1329 | "metadata": {},
1330 | "outputs": [],
1331 | "source": []
1332 | }
1333 | ],
1334 | "metadata": {
1335 | "kernelspec": {
1336 | "display_name": "Python 3",
1337 | "language": "python",
1338 | "name": "python3"
1339 | },
1340 | "language_info": {
1341 | "codemirror_mode": {
1342 | "name": "ipython",
1343 | "version": 3
1344 | },
1345 | "file_extension": ".py",
1346 | "mimetype": "text/x-python",
1347 | "name": "python",
1348 | "nbconvert_exporter": "python",
1349 | "pygments_lexer": "ipython3",
1350 | "version": "3.6.6"
1351 | },
1352 | "nav_menu": {},
1353 | "toc": {
1354 | "nav_menu": {
1355 | "height": "116px",
1356 | "width": "251px"
1357 | },
1358 | "number_sections": true,
1359 | "sideBar": true,
1360 | "skip_h1_title": false,
1361 | "toc_cell": true,
1362 | "toc_position": {},
1363 | "toc_section_display": "block",
1364 | "toc_window_display": false
1365 | },
1366 | "widgets": {
1367 | "state": {},
1368 | "version": "1.1.2"
1369 | }
1370 | },
1371 | "nbformat": 4,
1372 | "nbformat_minor": 1
1373 | }
1374 |
--------------------------------------------------------------------------------