├── README.md
└── assignment1
    ├── .DS_Store
    ├── .gitignore
    ├── collectSubmission.sh
    ├── cs224d
        ├── .DS_Store
        ├── __init__.py
        ├── data_utils.py
        └── datasets
        │   ├── .DS_Store
        │   ├── __MACOSX
        │       └── stanfordSentimentTreebank
        │       │   ├── ._README.txt
        │       │   ├── ._datasetSentences.txt
        │       │   ├── ._datasetSplit.txt
        │       │   ├── ._dictionary.txt
        │       │   ├── ._original_rt_snippets.txt
        │       │   └── ._sentiment_labels.txt
        │   ├── get_datasets.sh
        │   └── stanfordSentimentTreebank
        │       ├── README.txt
        │       ├── SOStr.txt
        │       ├── STree.txt
        │       ├── datasetSentences.txt
        │       ├── datasetSplit.txt
        │       ├── dictionary.txt
        │       ├── original_rt_snippets.txt
        │       └── sentiment_labels.txt
    ├── q1_softmax.ipynb
    ├── q1_softmax.py
    ├── q2_gradcheck.ipynb
    ├── q2_gradcheck.py
    ├── q2_neural.ipynb
    ├── q2_neural.py
    ├── q2_sigmoid.ipynb
    ├── q2_sigmoid.py
    ├── q3_run.py
    ├── q3_sgd.ipynb
    ├── q3_sgd.py
    ├── q3_word2vec.ipynb
    ├── q3_word2vec.py
    ├── q4_sentiment.py
    ├── q4_softmaxreg.py
    └── requirements.txt


/README.md:
--------------------------------------------------------------------------------
1 | # CS224d
2 | Stanford cs224d 2016 assignment
3 | 


--------------------------------------------------------------------------------
/assignment1/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/.DS_Store


--------------------------------------------------------------------------------
/assignment1/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.pyc
3 | .ipynb_checkpoints/*
4 | 


--------------------------------------------------------------------------------
/assignment1/collectSubmission.sh:
--------------------------------------------------------------------------------
1 | rm -f assignment1.zip
2 | zip -r assignment1.zip *.py *.png saved_params_40000.npy
3 | 


--------------------------------------------------------------------------------
/assignment1/cs224d/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/.DS_Store


--------------------------------------------------------------------------------
/assignment1/cs224d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/__init__.py


--------------------------------------------------------------------------------
/assignment1/cs224d/data_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import cPickle as pickle
  5 | import numpy as np
  6 | import os
  7 | import random
  8 | 
  9 | class StanfordSentiment:
 10 |     def __init__(self, path=None, tablesize = 1000000):
 11 |         if not path:
 12 |             path = "cs224d/datasets/stanfordSentimentTreebank"
 13 | 
 14 |         self.path = path
 15 |         self.tablesize = tablesize
 16 | 
 17 |     def tokens(self):
 18 |         if hasattr(self, "_tokens") and self._tokens:
 19 |             return self._tokens
 20 | 
 21 |         tokens = dict()
 22 |         tokenfreq = dict()
 23 |         wordcount = 0
 24 |         revtokens = []
 25 |         idx = 0
 26 | 
 27 |         for sentence in self.sentences():
 28 |             for w in sentence:
 29 |                 wordcount += 1
 30 |                 if not w in tokens:
 31 |                     tokens[w] = idx
 32 |                     revtokens += [w]
 33 |                     tokenfreq[w] = 1
 34 |                     idx += 1
 35 |                 else:
 36 |                     tokenfreq[w] += 1
 37 | 
 38 |         tokens["UNK"] = idx
 39 |         revtokens += ["UNK"]
 40 |         tokenfreq["UNK"] = 1
 41 |         wordcount += 1
 42 | 
 43 |         self._tokens = tokens
 44 |         self._tokenfreq = tokenfreq
 45 |         self._wordcount = wordcount
 46 |         self._revtokens = revtokens
 47 |         return self._tokens
 48 |     
 49 |     def sentences(self):
 50 |         if hasattr(self, "_sentences") and self._sentences:
 51 |             return self._sentences
 52 | 
 53 |         sentences = []
 54 |         with open(self.path + "/datasetSentences.txt", "r") as f:
 55 |             first = True
 56 |             for line in f:
 57 |                 if first:
 58 |                     first = False
 59 |                     continue
 60 | 
 61 |                 splitted = line.strip().split()[1:]
 62 |                 # Deal with some peculiar encoding issues with this file
 63 |                 sentences += [[w.lower().decode("utf-8").encode('latin1') for w in splitted]]
 64 |                 
 65 |         self._sentences = sentences
 66 |         self._sentlengths = np.array([len(s) for s in sentences])
 67 |         self._cumsentlen = np.cumsum(self._sentlengths)
 68 | 
 69 |         return self._sentences
 70 | 
 71 |     def numSentences(self):
 72 |         if hasattr(self, "_numSentences") and self._numSentences:
 73 |             return self._numSentences
 74 |         else:
 75 |             self._numSentences = len(self.sentences())
 76 |             return self._numSentences
 77 | 
 78 |     def allSentences(self):
 79 |         if hasattr(self, "_allsentences") and self._allsentences:
 80 |             return self._allsentences
 81 | 
 82 |         sentences = self.sentences()
 83 |         rejectProb = self.rejectProb()
 84 |         tokens = self.tokens()
 85 |         allsentences = [[w for w in s 
 86 |             if 0 >= rejectProb[tokens[w]] or random.random() >= rejectProb[tokens[w]]]
 87 |             for s in sentences * 30]
 88 | 
 89 |         allsentences = [s for s in allsentences if len(s) > 1]
 90 |         
 91 |         self._allsentences = allsentences
 92 |         
 93 |         return self._allsentences
 94 | 
 95 |     def getRandomContext(self, C=5):
 96 |         allsent = self.allSentences()
 97 |         sentID = random.randint(0, len(allsent) - 1)
 98 |         sent = allsent[sentID]
 99 |         wordID = random.randint(0, len(sent) - 1)
100 | 
101 |         context = sent[max(0, wordID - C):wordID] 
102 |         if wordID+1 < len(sent):
103 |             context += sent[wordID+1:min(len(sent), wordID + C + 1)]
104 | 
105 |         centerword = sent[wordID]
106 |         context = [w for w in context if w != centerword]
107 | 
108 |         if len(context) > 0:
109 |             return centerword, context
110 |         else:
111 |             return self.getRandomContext(C)
112 | 
113 |     def sent_labels(self):
114 |         if hasattr(self, "_sent_labels") and self._sent_labels:
115 |             return self._sent_labels
116 | 
117 |         dictionary = dict()
118 |         phrases = 0
119 |         with open(self.path + "/dictionary.txt", "r") as f:
120 |             for line in f:
121 |                 line = line.strip()
122 |                 if not line: continue
123 |                 splitted = line.split("|")
124 |                 dictionary[splitted[0].lower()] = int(splitted[1])
125 |                 phrases += 1
126 | 
127 |         labels = [0.0] * phrases
128 |         with open(self.path + "/sentiment_labels.txt", "r") as f:
129 |             first = True
130 |             for line in f:
131 |                 if first:
132 |                     first = False
133 |                     continue
134 | 
135 |                 line = line.strip()
136 |                 if not line: continue
137 |                 splitted = line.split("|")
138 |                 labels[int(splitted[0])] = float(splitted[1])
139 | 
140 |         sent_labels = [0.0] * self.numSentences()
141 |         sentences = self.sentences()
142 |         for i in xrange(self.numSentences()):
143 |             sentence = sentences[i]
144 |             full_sent = " ".join(sentence).replace('-lrb-', '(').replace('-rrb-', ')')
145 |             sent_labels[i] = labels[dictionary[full_sent]]
146 |             
147 |         self._sent_labels = sent_labels
148 |         return self._sent_labels
149 | 
150 |     def dataset_split(self):
151 |         if hasattr(self, "_split") and self._split:
152 |             return self._split
153 | 
154 |         split = [[] for i in xrange(3)]
155 |         with open(self.path + "/datasetSplit.txt", "r") as f:
156 |             first = True
157 |             for line in f:
158 |                 if first:
159 |                     first = False
160 |                     continue
161 | 
162 |                 splitted = line.strip().split(",")
163 |                 split[int(splitted[1]) - 1] += [int(splitted[0]) - 1]
164 | 
165 |         self._split = split
166 |         return self._split
167 | 
168 |     def getRandomTrainSentence(self):
169 |         split = self.dataset_split()
170 |         sentId = split[0][random.randint(0, len(split[0]) - 1)]
171 |         return self.sentences()[sentId], self.categorify(self.sent_labels()[sentId])
172 | 
173 |     def categorify(self, label):
174 |         if label <= 0.2:
175 |             return 0
176 |         elif label <= 0.4:
177 |             return 1
178 |         elif label <= 0.6:
179 |             return 2
180 |         elif label <= 0.8:
181 |             return 3
182 |         else:
183 |             return 4
184 | 
185 |     def getDevSentences(self):
186 |         return self.getSplitSentences(2)
187 | 
188 |     def getTestSentences(self):
189 |         return self.getSplitSentences(1)
190 | 
191 |     def getTrainSentences(self):
192 |         return self.getSplitSentences(0)
193 | 
194 |     def getSplitSentences(self, split=0):
195 |         ds_split = self.dataset_split()
196 |         return [(self.sentences()[i], self.categorify(self.sent_labels()[i])) for i in ds_split[split]]
197 | 
198 |     def sampleTable(self):
199 |         if hasattr(self, '_sampleTable') and self._sampleTable is not None:
200 |             return self._sampleTable
201 | 
202 |         nTokens = len(self.tokens())
203 |         samplingFreq = np.zeros((nTokens,))
204 |         self.allSentences()
205 |         i = 0
206 |         for w in xrange(nTokens):
207 |             w = self._revtokens[i]
208 |             if w in self._tokenfreq:
209 |                 freq = 1.0 * self._tokenfreq[w]
210 |                 # Reweigh
211 |                 freq = freq ** 0.75
212 |             else:
213 |                 freq = 0.0
214 |             samplingFreq[i] = freq
215 |             i += 1
216 | 
217 |         samplingFreq /= np.sum(samplingFreq)
218 |         samplingFreq = np.cumsum(samplingFreq) * self.tablesize
219 | 
220 |         self._sampleTable = [0] * self.tablesize
221 | 
222 |         j = 0
223 |         for i in xrange(self.tablesize):
224 |             while i > samplingFreq[j]:
225 |                 j += 1
226 |             self._sampleTable[i] = j
227 | 
228 |         return self._sampleTable
229 | 
230 |     def rejectProb(self):
231 |         if hasattr(self, '_rejectProb') and self._rejectProb is not None:
232 |             return self._rejectProb
233 | 
234 |         threshold = 1e-5 * self._wordcount
235 | 
236 |         nTokens = len(self.tokens())
237 |         rejectProb = np.zeros((nTokens,))
238 |         for i in xrange(nTokens):
239 |             w = self._revtokens[i]
240 |             freq = 1.0 * self._tokenfreq[w]
241 |             # Reweigh
242 |             rejectProb[i] = max(0, 1 - np.sqrt(threshold / freq))
243 | 
244 |         self._rejectProb = rejectProb
245 |         return self._rejectProb
246 | 
247 |     def sampleTokenIdx(self):
248 |         return self.sampleTable()[random.randint(0, self.tablesize - 1)]


--------------------------------------------------------------------------------
/assignment1/cs224d/datasets/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/datasets/.DS_Store


--------------------------------------------------------------------------------
/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._README.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._README.txt


--------------------------------------------------------------------------------
/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._datasetSentences.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._datasetSentences.txt


--------------------------------------------------------------------------------
/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._datasetSplit.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._datasetSplit.txt


--------------------------------------------------------------------------------
/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._dictionary.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._dictionary.txt


--------------------------------------------------------------------------------
/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._original_rt_snippets.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._original_rt_snippets.txt


--------------------------------------------------------------------------------
/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._sentiment_labels.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._sentiment_labels.txt


--------------------------------------------------------------------------------
/assignment1/cs224d/datasets/get_datasets.sh:
--------------------------------------------------------------------------------
1 | # Get Stanford Sentiment Treebank
2 | wget http://nlp.stanford.edu/~socherr/stanfordSentimentTreebank.zip
3 | unzip stanfordSentimentTreebank.zip
4 | rm stanfordSentimentTreebank.zip
5 | 


--------------------------------------------------------------------------------
/assignment1/cs224d/datasets/stanfordSentimentTreebank/README.txt:
--------------------------------------------------------------------------------
 1 | Stanford Sentiment Treebank V1.0
 2 | 
 3 | This is the dataset of the paper:
 4 | 
 5 | Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank
 6 | Richard Socher, Alex Perelygin, Jean Wu, Jason Chuang, Christopher Manning, Andrew Ng and Christopher Potts
 7 | Conference on Empirical Methods in Natural Language Processing (EMNLP 2013)
 8 | 
 9 | If you use this dataset in your research, please cite the above paper.
10 | 
11 | @incollection{SocherEtAl2013:RNTN,
12 | title = {{Parsing With Compositional Vector Grammars}},
13 | author = {Richard Socher and Alex Perelygin and Jean Wu and Jason Chuang and Christopher Manning and Andrew Ng and Christopher Potts},
14 | booktitle = {{EMNLP}},
15 | year = {2013}
16 | }
17 | 
18 | This file includes:
19 | 1. original_rt_snippets.txt contains 10,605 processed snippets from the original pool of Rotten Tomatoes HTML files. Please note that some snippet may contain multiple sentences.
20 | 
21 | 2. dictionary.txt contains all phrases and their IDs, separated by a vertical line |
22 | 
23 | 3. sentiment_labels.txt contains all phrase ids and the corresponding sentiment labels, separated by a vertical line.
24 | Note that you can recover the 5 classes by mapping the positivity probability using the following cut-offs:
25 | [0, 0.2], (0.2, 0.4], (0.4, 0.6], (0.6, 0.8], (0.8, 1.0]
26 | for very negative, negative, neutral, positive, very positive, respectively.
27 | Please note that phrase ids and sentence ids are not the same.
28 | 
29 | 4. SOStr.txt and STree.txt encode the structure of the parse trees. 
30 | STree encodes the trees in a parent pointer format. Each line corresponds to each sentence in the datasetSentences.txt file. The Matlab code of this paper will show you how to read this format if you are not familiar with it.
31 | 
32 | 5. datasetSentences.txt contains the sentence index, followed by the sentence string separated by a tab. These are the sentences of the train/dev/test sets.
33 | 
34 | 6. datasetSplit.txt contains the sentence index (corresponding to the index in datasetSentences.txt file) followed by the set label separated by a comma:
35 | 	1 = train
36 | 	2 = test
37 | 	3 = dev
38 | 
39 | Please note that the datasetSentences.txt file has more sentences/lines than the original_rt_snippet.txt. 
40 | Each row in the latter represents a snippet as shown on RT, whereas the former is each sub sentence as determined by the Stanford parser.
41 | 
42 | For comparing research and training models, please use the provided train/dev/test splits.
43 | 
44 | 


--------------------------------------------------------------------------------
/assignment1/q1_softmax.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import random"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 10,
 18 |    "metadata": {
 19 |     "collapsed": false
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "def softmax(x):\n",
 24 |     "    \"\"\"\n",
 25 |     "    Compute the softmax function for each row of the input x.\n",
 26 |     "\n",
 27 |     "    It is crucial that this function is optimized for speed because\n",
 28 |     "    it will be used frequently in later code.\n",
 29 |     "    You might find numpy functions np.exp, np.sum, np.reshape,\n",
 30 |     "    np.max, and numpy broadcasting useful for this task. (numpy\n",
 31 |     "    broadcasting documentation:\n",
 32 |     "    http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)\n",
 33 |     "\n",
 34 |     "    You should also make sure that your code works for one\n",
 35 |     "    dimensional inputs (treat the vector as a row), you might find\n",
 36 |     "    it helpful for your later problems.\n",
 37 |     "\n",
 38 |     "    You must implement the optimization in problem 1(a) of the \n",
 39 |     "    written assignment!\n",
 40 |     "    \"\"\"\n",
 41 |     "\n",
 42 |     "    ### YOUR CODE HERE\n",
 43 |     "    \n",
 44 |     "    if x.ndim == 1:\n",
 45 |     "        x -= np.min(x)  # solving overflow problem\n",
 46 |     "        x = np.exp(x)\n",
 47 |     "        x /= np.sum(x)\n",
 48 |     "    else:\n",
 49 |     "        x -= np.min(x, axis=1, keepdims=True)  # solving overflow problem\n",
 50 |     "        x = np.exp(x)\n",
 51 |     "        x /= np.sum(x, axis=1, keepdims=True)\n",
 52 |     "\n",
 53 |     "    ### END YOUR CODE\n",
 54 |     "    \n",
 55 |     "    return x\n",
 56 |     "\n",
 57 |     "def test_softmax_basic():\n",
 58 |     "    \"\"\"\n",
 59 |     "    Some simple tests to get you started. \n",
 60 |     "    Warning: these are not exhaustive.\n",
 61 |     "    \"\"\"\n",
 62 |     "    print \"Running basic tests...\"\n",
 63 |     "    test1 = softmax(np.array([1,2]))\n",
 64 |     "    print test1\n",
 65 |     "    assert np.amax(np.fabs(test1 - np.array(\n",
 66 |     "        [0.26894142,  0.73105858]))) <= 1e-6\n",
 67 |     "\n",
 68 |     "    test2 = softmax(np.array([[1001,1002],[3,4]]))\n",
 69 |     "    print test2\n",
 70 |     "    assert np.amax(np.fabs(test2 - np.array(\n",
 71 |     "        [[0.26894142, 0.73105858], [0.26894142, 0.73105858]]))) <= 1e-6\n",
 72 |     "\n",
 73 |     "    test3 = softmax(np.array([[-1001,-1002]]))\n",
 74 |     "    print test3\n",
 75 |     "    assert np.amax(np.fabs(test3 - np.array(\n",
 76 |     "        [0.73105858, 0.26894142]))) <= 1e-6\n",
 77 |     "\n",
 78 |     "    print \"You should verify these results!\\n\""
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 11,
 84 |    "metadata": {
 85 |     "collapsed": false
 86 |    },
 87 |    "outputs": [
 88 |     {
 89 |      "name": "stdout",
 90 |      "output_type": "stream",
 91 |      "text": [
 92 |       "Running basic tests...\n",
 93 |       "[ 0.26894142  0.73105858]\n",
 94 |       "[[ 0.26894142  0.73105858]\n",
 95 |       " [ 0.26894142  0.73105858]]\n",
 96 |       "[[ 0.73105858  0.26894142]]\n",
 97 |       "You should verify these results!\n",
 98 |       "\n"
 99 |      ]
100 |     }
101 |    ],
102 |    "source": [
103 |     "test_softmax_basic()"
104 |    ]
105 |   }
106 |  ],
107 |  "metadata": {
108 |   "kernelspec": {
109 |    "display_name": "Python 2",
110 |    "language": "python",
111 |    "name": "python2"
112 |   },
113 |   "language_info": {
114 |    "codemirror_mode": {
115 |     "name": "ipython",
116 |     "version": 2
117 |    },
118 |    "file_extension": ".py",
119 |    "mimetype": "text/x-python",
120 |    "name": "python",
121 |    "nbconvert_exporter": "python",
122 |    "pygments_lexer": "ipython2",
123 |    "version": "2.7.11"
124 |   }
125 |  },
126 |  "nbformat": 4,
127 |  "nbformat_minor": 0
128 | }
129 | 


--------------------------------------------------------------------------------
/assignment1/q1_softmax.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | 
 4 | def softmax(x):
 5 |     """
 6 |     Compute the softmax function for each row of the input x.
 7 | 
 8 |     It is crucial that this function is optimized for speed because
 9 |     it will be used frequently in later code.
10 |     You might find numpy functions np.exp, np.sum, np.reshape,
11 |     np.max, and numpy broadcasting useful for this task. (numpy
12 |     broadcasting documentation:
13 |     http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
14 | 
15 |     You should also make sure that your code works for one
16 |     dimensional inputs (treat the vector as a row), you might find
17 |     it helpful for your later problems.
18 | 
19 |     You must implement the optimization in problem 1(a) of the 
20 |     written assignment!
21 |     """
22 | 
23 |     ### YOUR CODE HERE
24 |     
25 |     if x.ndim == 1:
26 |         x -= np.min(x)  # solving overflow problem
27 |         x = np.exp(x)
28 |         x /= np.sum(x)
29 |     else:
30 |         x -= np.min(x, axis=1, keepdims=True)  # solving overflow problem
31 |         x = np.exp(x)
32 |         x /= np.sum(x, axis=1, keepdims=True)
33 |    
34 |     ### END YOUR CODE
35 |     
36 |     return x
37 | 
38 | def test_softmax_basic():
39 |     """
40 |     Some simple tests to get you started. 
41 |     Warning: these are not exhaustive.
42 |     """
43 |     print "Running basic tests..."
44 |     test1 = softmax(np.array([1,2]))
45 |     print test1
46 |     assert np.amax(np.fabs(test1 - np.array(
47 |         [0.26894142,  0.73105858]))) <= 1e-6
48 | 
49 |     test2 = softmax(np.array([[1001,1002],[3,4]]))
50 |     print test2
51 |     assert np.amax(np.fabs(test2 - np.array(
52 |         [[0.26894142, 0.73105858], [0.26894142, 0.73105858]]))) <= 1e-6
53 | 
54 |     test3 = softmax(np.array([[-1001,-1002]]))
55 |     print test3
56 |     assert np.amax(np.fabs(test3 - np.array(
57 |         [0.73105858, 0.26894142]))) <= 1e-6
58 | 
59 |     print "You should verify these results!\n"
60 | 
61 | def test_softmax():
62 |     """ 
63 |     Use this space to test your softmax implementation by running:
64 |         python q1_softmax.py 
65 |     This function will not be called by the autograder, nor will
66 |     your tests be graded.
67 |     """
68 |     print "Running your tests..."
69 |     ### YOUR CODE HERE
70 |     raise NotImplementedError
71 |     ### END YOUR CODE  
72 | 
73 | if __name__ == "__main__":
74 |     test_softmax_basic()
75 |     test_softmax()


--------------------------------------------------------------------------------
/assignment1/q2_gradcheck.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 4,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import random"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 7,
 18 |    "metadata": {
 19 |     "collapsed": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# First implement a gradient checker by filling in the following functions\n",
 24 |     "def gradcheck_naive(f, x):\n",
 25 |     "    \"\"\" \n",
 26 |     "    Gradient check for a function f \n",
 27 |     "    - f should be a function that takes a single argument and outputs the cost and its gradients\n",
 28 |     "    - x is the point (numpy array) to check the gradient at\n",
 29 |     "    \"\"\" \n",
 30 |     "\n",
 31 |     "    rndstate = random.getstate()\n",
 32 |     "    random.setstate(rndstate)  \n",
 33 |     "    fx, grad = f(x) # Evaluate function value at original point\n",
 34 |     "    h = 1e-4\n",
 35 |     "\n",
 36 |     "    # Iterate over all indexes in x\n",
 37 |     "    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])\n",
 38 |     "    while not it.finished:\n",
 39 |     "        ix = it.multi_index\n",
 40 |     "\n",
 41 |     "        ### try modifying x[ix] with h defined above to compute numerical gradients\n",
 42 |     "        ### make sure you call random.setstate(rndstate) before calling f(x) each time, this will make it \n",
 43 |     "        ### possible to test cost functions with built in randomness later\n",
 44 |     "        ### YOUR CODE HERE:\n",
 45 |     "        \n",
 46 |     "        random.setstate(rndstate)\n",
 47 |     "        tmp1 = np.copy(x) \n",
 48 |     "        tmp1[ix] = tmp1[ix] + h\n",
 49 |     "        f1, _ = f(tmp1)\n",
 50 |     "        \n",
 51 |     "        random.setstate(rndstate)\n",
 52 |     "        tmp2 = np.copy(x) \n",
 53 |     "        tmp2[ix] = tmp2[ix] - h\n",
 54 |     "        f2, _ = f(tmp2)\n",
 55 |     "        numgrad = (f1 - f2) / (2 * h)\n",
 56 |     "        \n",
 57 |     "        ### END YOUR CODE\n",
 58 |     "\n",
 59 |     "        # Compare gradients\n",
 60 |     "        reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))\n",
 61 |     "        if reldiff > 1e-5:\n",
 62 |     "            print \"Gradient check failed.\"\n",
 63 |     "            print \"First gradient error found at index %s\" % str(ix)\n",
 64 |     "            print \"Your gradient: %f \\t Numerical gradient: %f\" % (grad[ix], numgrad)\n",
 65 |     "            return\n",
 66 |     "    \n",
 67 |     "        it.iternext() # Step to next dimension\n",
 68 |     "\n",
 69 |     "    print \"Gradient check passed!\"\n",
 70 |     "\n",
 71 |     "def sanity_check():\n",
 72 |     "    \"\"\"\n",
 73 |     "    Some basic sanity checks.\n",
 74 |     "    \"\"\"\n",
 75 |     "    quad = lambda x: (np.sum(x ** 2), x * 2)\n",
 76 |     "\n",
 77 |     "    print \"Running sanity checks...\"\n",
 78 |     "    gradcheck_naive(quad, np.array(123.456))      # scalar test\n",
 79 |     "    gradcheck_naive(quad, np.random.randn(3,))    # 1-D test\n",
 80 |     "    gradcheck_naive(quad, np.random.randn(4,5))   # 2-D test\n",
 81 |     "    print \"\""
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 8,
 87 |    "metadata": {
 88 |     "collapsed": false
 89 |    },
 90 |    "outputs": [
 91 |     {
 92 |      "name": "stdout",
 93 |      "output_type": "stream",
 94 |      "text": [
 95 |       "Running sanity checks...\n",
 96 |       "Gradient check passed!\n",
 97 |       "Gradient check passed!\n",
 98 |       "Gradient check passed!\n",
 99 |       "\n"
100 |      ]
101 |     }
102 |    ],
103 |    "source": [
104 |     "sanity_check()"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {
111 |     "collapsed": true
112 |    },
113 |    "outputs": [],
114 |    "source": []
115 |   }
116 |  ],
117 |  "metadata": {
118 |   "kernelspec": {
119 |    "display_name": "Python 2",
120 |    "language": "python",
121 |    "name": "python2"
122 |   },
123 |   "language_info": {
124 |    "codemirror_mode": {
125 |     "name": "ipython",
126 |     "version": 2
127 |    },
128 |    "file_extension": ".py",
129 |    "mimetype": "text/x-python",
130 |    "name": "python",
131 |    "nbconvert_exporter": "python",
132 |    "pygments_lexer": "ipython2",
133 |    "version": "2.7.11"
134 |   }
135 |  },
136 |  "nbformat": 4,
137 |  "nbformat_minor": 0
138 | }
139 | 


--------------------------------------------------------------------------------
/assignment1/q2_gradcheck.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | 
 4 | # First implement a gradient checker by filling in the following functions
 5 | def gradcheck_naive(f, x):
 6 |     """ 
 7 |     Gradient check for a function f 
 8 |     - f should be a function that takes a single argument and outputs the cost and its gradients
 9 |     - x is the point (numpy array) to check the gradient at
10 |     """ 
11 | 
12 |     rndstate = random.getstate()
13 |     random.setstate(rndstate)  
14 |     fx, grad = f(x) # Evaluate function value at original point
15 |     h = 1e-6
16 | 
17 |     # Iterate over all indexes in x
18 |     it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
19 |     while not it.finished:
20 |         ix = it.multi_index
21 | 
22 |         ### try modifying x[ix] with h defined above to compute numerical gradients
23 |         ### make sure you call random.setstate(rndstate) before calling f(x) each time, this will make it 
24 |         ### possible to test cost functions with built in randomness later
25 |         ### YOUR CODE HERE:
26 |         '''
27 |         x[ix] += h
28 |         random.setstate(rndstate)
29 |         plus_h_fx, plus_h_grad = f( x )
30 |         random.setstate(rndstate)
31 |         x[ix] -= 2. * h
32 |         minus_h_fx, minus_h_grad = f( x )
33 |         numgrad = (plus_h_fx - minus_h_fx) / 2. /h
34 |         '''
35 |    
36 |         params = np.copy(x) 
37 |         params[ix] = params[ix] + h
38 |         random.setstate(rndstate)
39 |         f1, _ = f(params)
40 |         
41 |         params[ix] = params[ix] - 2 * h
42 |         random.setstate(rndstate)
43 |         f2, _ = f(params)
44 |         numgrad = (f1 - f2) / (2 * h)
45 |         
46 |         ### END YOUR CODE
47 | 
48 |         # Compare gradients
49 |         reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
50 |         if reldiff > 1e-5:
51 |             print "Gradient check failed."
52 |             print "First gradient error found at index %s" % str(ix)
53 |             print "Your gradient: %f \t Numerical gradient: %f" % (grad[ix], numgrad)
54 |             return
55 |     
56 |         it.iternext() # Step to next dimension
57 | 
58 |     print "Gradient check passed!"
59 | 
60 | def sanity_check():
61 |     """
62 |     Some basic sanity checks.
63 |     """
64 |     quad = lambda x: (np.sum(x ** 2), x * 2)
65 | 
66 |     print "Running sanity checks..."
67 |     gradcheck_naive(quad, np.array(123.456))      # scalar test
68 |     gradcheck_naive(quad, np.random.randn(3,))    # 1-D test
69 |     gradcheck_naive(quad, np.random.randn(4,5))   # 2-D test
70 |     print ""
71 | 
72 | def your_sanity_checks(): 
73 |     """
74 |     Use this space add any additional sanity checks by running:
75 |         python q2_gradcheck.py 
76 |     This function will not be called by the autograder, nor will
77 |     your additional tests be graded.
78 |     """
79 |     print "Running your sanity checks..."
80 |     ### YOUR CODE HERE
81 |     raise NotImplementedError
82 |     ### END YOUR CODE
83 | 
84 | if __name__ == "__main__":
85 |     sanity_check()
86 |     your_sanity_checks()
87 | 


--------------------------------------------------------------------------------
/assignment1/q2_neural.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 3,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import random\n",
 13 |     "\n",
 14 |     "from q1_softmax import softmax\n",
 15 |     "from q2_sigmoid import sigmoid, sigmoid_grad\n",
 16 |     "from q2_gradcheck import gradcheck_naive"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 4,
 22 |    "metadata": {
 23 |     "collapsed": false
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "def forward_backward_prop(data, labels, params, dimensions):\n",
 28 |     "    \"\"\" \n",
 29 |     "    Forward and backward propagation for a two-layer sigmoidal network \n",
 30 |     "    \n",
 31 |     "    Compute the forward propagation and for the cross entropy cost,\n",
 32 |     "    and backward propagation for the gradients for all parameters.\n",
 33 |     "    \"\"\"\n",
 34 |     "\n",
 35 |     "    ### Unpack network parameters (do not modify)\n",
 36 |     "    ofs = 0\n",
 37 |     "    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2]) # (dim_x, dim_h, dim_y)\n",
 38 |     "\n",
 39 |     "    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H)) # (dim_x, dim_h)\n",
 40 |     "    ofs += Dx * H\n",
 41 |     "    b1 = np.reshape(params[ofs:ofs + H], (1, H)) # (1, dim_h)\n",
 42 |     "    ofs += H\n",
 43 |     "    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy)) # (dim_h, dim_y)\n",
 44 |     "    ofs += H * Dy\n",
 45 |     "    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy)) # (1, dim_y)\n",
 46 |     " \n",
 47 |     "    \n",
 48 |     "    ### YOUR CODE HERE: forward propagation\n",
 49 |     "    \n",
 50 |     "    h = sigmoid(np.dot(data, W1) + b1)  \n",
 51 |     "    pred = sigmoid(np.dot(h, W2) + b2) \n",
 52 |     "    cost = (-1) * np.sum(labels * np.log(pred) + (1 - labels) * np.log(1 - pred))\n",
 53 |     "    \n",
 54 |     "    ### END YOUR CODE\n",
 55 |     "    \n",
 56 |     "    \n",
 57 |     "    ### YOUR CODE HERE: backward propagation\n",
 58 |     "    \n",
 59 |     "    dout = pred - labels \n",
 60 |     "    dh = np.dot(dout, W2.T) * sigmoid_grad(h)  \n",
 61 |     "    \n",
 62 |     "    gradW2 = np.dot(h.T, dout) \n",
 63 |     "    gradb2 = np.sum(dout, 0)  \n",
 64 |     "    gradW1 = np.dot(data.T, dh)\n",
 65 |     "    gradb1 = np.sum(dh, 0)\n",
 66 |     "    \n",
 67 |     "    ### END YOUR CODE\n",
 68 |     "\n",
 69 |     "    ### Stack gradients (do not modify)\n",
 70 |     "    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), \n",
 71 |     "        gradW2.flatten(), gradb2.flatten()))\n",
 72 |     "    \n",
 73 |     "    return cost, grad\n",
 74 |     "\n",
 75 |     "def sanity_check():\n",
 76 |     "    \"\"\"\n",
 77 |     "    Set up fake data and parameters for the neural network, and test using \n",
 78 |     "    gradcheck.\n",
 79 |     "    \"\"\"\n",
 80 |     "    print \"Running sanity check...\"\n",
 81 |     "\n",
 82 |     "    N = 20\n",
 83 |     "    dimensions = [10, 5, 10]\n",
 84 |     "    data = np.random.randn(N, dimensions[0])   # each row will be a datum\n",
 85 |     "    labels = np.zeros((N, dimensions[2]))\n",
 86 |     "    for i in xrange(N):\n",
 87 |     "        labels[i,random.randint(0,dimensions[2]-1)] = 1  # one-hot labels\n",
 88 |     "    \n",
 89 |     "    params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (\n",
 90 |     "        dimensions[1] + 1) * dimensions[2], )\n",
 91 |     "\n",
 92 |     "    gradcheck_naive(lambda params: forward_backward_prop(data, labels, params,\n",
 93 |     "        dimensions), params)\n"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 5,
 99 |    "metadata": {
100 |     "collapsed": false
101 |    },
102 |    "outputs": [
103 |     {
104 |      "name": "stdout",
105 |      "output_type": "stream",
106 |      "text": [
107 |       "Running sanity check...\n",
108 |       "Gradient check passed!\n"
109 |      ]
110 |     }
111 |    ],
112 |    "source": [
113 |     "sanity_check()"
114 |    ]
115 |   }
116 |  ],
117 |  "metadata": {
118 |   "kernelspec": {
119 |    "display_name": "Python 2",
120 |    "language": "python",
121 |    "name": "python2"
122 |   },
123 |   "language_info": {
124 |    "codemirror_mode": {
125 |     "name": "ipython",
126 |     "version": 2
127 |    },
128 |    "file_extension": ".py",
129 |    "mimetype": "text/x-python",
130 |    "name": "python",
131 |    "nbconvert_exporter": "python",
132 |    "pygments_lexer": "ipython2",
133 |    "version": "2.7.11"
134 |   }
135 |  },
136 |  "nbformat": 4,
137 |  "nbformat_minor": 0
138 | }
139 | 


--------------------------------------------------------------------------------
/assignment1/q2_neural.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | 
 4 | from q1_softmax import softmax
 5 | from q2_sigmoid import sigmoid, sigmoid_grad
 6 | from q2_gradcheck import gradcheck_naive
 7 | def forward_backward_prop(data, labels, params, dimensions):
 8 |     """ 
 9 |     Forward and backward propagation for a two-layer sigmoidal network 
10 |     
11 |     Compute the forward propagation and for the cross entropy cost,
12 |     and backward propagation for the gradients for all parameters.
13 |     """
14 | 
15 |     ### Unpack network parameters (do not modify)
16 |     ofs = 0
17 |     Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2]) # (dim_x, dim_h, dim_y)
18 | 
19 |     W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H)) # (dim_x, dim_h)
20 |     ofs += Dx * H
21 |     b1 = np.reshape(params[ofs:ofs + H], (1, H)) # (1, dim_h)
22 |     ofs += H
23 |     W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy)) # (dim_h, dim_y)
24 |     ofs += H * Dy
25 |     b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy)) # (1, dim_y)
26 |  
27 |     
28 |     ### YOUR CODE HERE: forward propagation
29 |     
30 |     h = sigmoid(np.dot(data, W1) + b1)  
31 |     pred = sigmoid(np.dot(h, W2) + b2) 
32 |     cost = (-1) * np.sum(labels * np.log(pred) + (1 - labels) * np.log(1 - pred))  # sigmoid 함수를 썼을 때 cost function
33 |     
34 |     ### END YOUR CODE
35 |     
36 |     
37 |     ### YOUR CODE HERE: backward propagation
38 |     
39 |     dout = pred - labels 
40 |     dh = np.dot(dout, W2.T) * sigmoid_grad(h)  
41 |     
42 |     gradW2 = np.dot(h.T, dout) 
43 |     gradb2 = np.sum(dout, 0)  
44 |     gradW1 = np.dot(data.T, dh)
45 |     gradb1 = np.sum(dh, 0)
46 |     
47 |     ### END YOUR CODE
48 | 
49 |     ### Stack gradients (do not modify)
50 |     grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 
51 |         gradW2.flatten(), gradb2.flatten()))
52 |     
53 |     return cost, grad
54 | 
55 | def sanity_check():
56 |     """
57 |     Set up fake data and parameters for the neural network, and test using 
58 |     gradcheck.
59 |     """
60 |     print "Running sanity check..."
61 | 
62 |     N = 20
63 |     dimensions = [10, 5, 10]
64 |     data = np.random.randn(N, dimensions[0])   # each row will be a datum
65 |     labels = np.zeros((N, dimensions[2]))
66 |     for i in xrange(N):
67 |         labels[i,random.randint(0,dimensions[2]-1)] = 1  # one-hot labels
68 |     
69 |     params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
70 |         dimensions[1] + 1) * dimensions[2], )
71 | 
72 |     gradcheck_naive(lambda params: forward_backward_prop(data, labels, params,
73 |         dimensions), params)
74 | 
75 | 
76 | def your_sanity_checks(): 
77 |     """
78 |     Use this space add any additional sanity checks by running:
79 |         python q2_neural.py 
80 |     This function will not be called by the autograder, nor will
81 |     your additional tests be graded.
82 |     """
83 |     print "Running your sanity checks..."
84 |     ### YOUR CODE HERE
85 |     raise NotImplementedError
86 |     ### END YOUR CODE
87 | 
88 | if __name__ == "__main__":
89 |     sanity_check()
90 |     your_sanity_checks()


--------------------------------------------------------------------------------
/assignment1/q2_sigmoid.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 7,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "\n",
 13 |     "def sigmoid(x):\n",
 14 |     "    \"\"\"\n",
 15 |     "    Compute the sigmoid function for the input here.\n",
 16 |     "    \"\"\"\n",
 17 |     "    \n",
 18 |     "    ### YOUR CODE HERE\n",
 19 |     "    \n",
 20 |     "    x = 1. / (1. + np.exp(-x))\n",
 21 |     "    \n",
 22 |     "    ### END YOUR CODE\n",
 23 |     "    \n",
 24 |     "    return x\n",
 25 |     "\n",
 26 |     "def sigmoid_grad(f):\n",
 27 |     "    \"\"\"\n",
 28 |     "    Compute the gradient for the sigmoid function here. Note that\n",
 29 |     "    for this implementation, the input f should be the sigmoid\n",
 30 |     "    function value of your original input x. \n",
 31 |     "    \"\"\"\n",
 32 |     "    \n",
 33 |     "    ### YOUR CODE HERE\n",
 34 |     "    \n",
 35 |     "    f = f * (1 - f)\n",
 36 |     "    \n",
 37 |     "    ### END YOUR CODE\n",
 38 |     "    \n",
 39 |     "    return f\n",
 40 |     "\n",
 41 |     "def test_sigmoid_basic():\n",
 42 |     "    \"\"\"\n",
 43 |     "    Some simple tests to get you started. \n",
 44 |     "    Warning: these are not exhaustive.\n",
 45 |     "    \"\"\"\n",
 46 |     "    print \"Running basic tests...\"\n",
 47 |     "    x = np.array([[1, 2], [-1, -2]])\n",
 48 |     "    f = sigmoid(x)\n",
 49 |     "    g = sigmoid_grad(f)\n",
 50 |     "    print f\n",
 51 |     "    assert np.amax(f - np.array([[0.73105858, 0.88079708], \n",
 52 |     "        [0.26894142, 0.11920292]])) <= 1e-6\n",
 53 |     "    print g\n",
 54 |     "    assert np.amax(g - np.array([[0.19661193, 0.10499359],\n",
 55 |     "        [0.19661193, 0.10499359]])) <= 1e-6\n",
 56 |     "    print \"You should verify these results!\\n\""
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 8,
 62 |    "metadata": {
 63 |     "collapsed": false
 64 |    },
 65 |    "outputs": [
 66 |     {
 67 |      "name": "stdout",
 68 |      "output_type": "stream",
 69 |      "text": [
 70 |       "Running basic tests...\n",
 71 |       "[[ 0.73105858  0.88079708]\n",
 72 |       " [ 0.26894142  0.11920292]]\n",
 73 |       "[[ 0.19661193  0.10499359]\n",
 74 |       " [ 0.19661193  0.10499359]]\n",
 75 |       "You should verify these results!\n",
 76 |       "\n"
 77 |      ]
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "test_sigmoid_basic()"
 82 |    ]
 83 |   }
 84 |  ],
 85 |  "metadata": {
 86 |   "kernelspec": {
 87 |    "display_name": "Python 2",
 88 |    "language": "python",
 89 |    "name": "python2"
 90 |   },
 91 |   "language_info": {
 92 |    "codemirror_mode": {
 93 |     "name": "ipython",
 94 |     "version": 2
 95 |    },
 96 |    "file_extension": ".py",
 97 |    "mimetype": "text/x-python",
 98 |    "name": "python",
 99 |    "nbconvert_exporter": "python",
100 |    "pygments_lexer": "ipython2",
101 |    "version": "2.7.11"
102 |   }
103 |  },
104 |  "nbformat": 4,
105 |  "nbformat_minor": 0
106 | }
107 | 


--------------------------------------------------------------------------------
/assignment1/q2_sigmoid.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def sigmoid(x):
 4 |     """
 5 |     Compute the sigmoid function for the input here.
 6 |     """
 7 |     
 8 |     ### YOUR CODE HERE
 9 |     
10 |     x = 1. / (1. + np.exp(-x))
11 |     
12 |     ### END YOUR CODE
13 |     
14 |     return x
15 | 
16 | def sigmoid_grad(f):
17 |     """
18 |     Compute the gradient for the sigmoid function here. Note that
19 |     for this implementation, the input f should be the sigmoid
20 |     function value of your original input x. 
21 |     """
22 |     
23 |     ### YOUR CODE HERE
24 |     
25 |     f = f * (1 - f)
26 |     
27 |     ### END YOUR CODE
28 |     
29 |     return f
30 | 
31 | def test_sigmoid_basic():
32 |     """
33 |     Some simple tests to get you started. 
34 |     Warning: these are not exhaustive.
35 |     """
36 |     print "Running basic tests..."
37 |     x = np.array([[1, 2], [-1, -2]])
38 |     f = sigmoid(x)
39 |     g = sigmoid_grad(f)
40 |     print f
41 |     assert np.amax(f - np.array([[0.73105858, 0.88079708], 
42 |         [0.26894142, 0.11920292]])) <= 1e-6
43 |     print g
44 |     assert np.amax(g - np.array([[0.19661193, 0.10499359],
45 |         [0.19661193, 0.10499359]])) <= 1e-6
46 |     print "You should verify these results!\n"
47 | 
48 | def test_sigmoid(): 
49 |     """
50 |     Use this space to test your sigmoid implementation by running:
51 |         python q2_sigmoid.py 
52 |     This function will not be called by the autograder, nor will
53 |     your tests be graded.
54 |     """
55 |     print "Running your tests..."
56 |     ### YOUR CODE HERE
57 |     raise NotImplementedError
58 |     ### END YOUR CODE
59 | 
60 | if __name__ == "__main__":
61 |     test_sigmoid_basic();
62 |     test_sigmoid()
63 | 


--------------------------------------------------------------------------------
/assignment1/q3_run.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | from cs224d.data_utils import *
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | from q3_word2vec import *
 7 | from q3_sgd import *
 8 | 
 9 | # Reset the random seed to make sure that everyone gets the same results
10 | random.seed(314)
11 | dataset = StanfordSentiment()
12 | tokens = dataset.tokens()
13 | nWords = len(tokens)
14 | 
15 | # We are going to train 10-dimensional vectors for this assignment
16 | dimVectors = 10
17 | 
18 | # Context size
19 | C = 5
20 | 
21 | # Reset the random seed to make sure that everyone gets the same results
22 | random.seed(31415)
23 | np.random.seed(9265)
24 | wordVectors = np.concatenate(((np.random.rand(nWords, dimVectors) - .5) / \
25 | 	dimVectors, np.zeros((nWords, dimVectors))), axis=0)
26 | wordVectors0 = sgd(
27 |     lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C, 
28 |     	negSamplingCostAndGradient), 
29 |     wordVectors, 0.3, 40000, None, True, PRINT_EVERY=10)
30 | print "sanity check: cost at convergence should be around or below 10"
31 | 
32 | # sum the input and output word vectors
33 | wordVectors = (wordVectors0[:nWords,:] + wordVectors0[nWords:,:])
34 | 
35 | # Visualize the word vectors you trained
36 | _, wordVectors0, _ = load_saved_params()
37 | wordVectors = (wordVectors0[:nWords,:] + wordVectors0[nWords:,:])
38 | visualizeWords = ["the", "a", "an", ",", ".", "?", "!", "``", "''", "--", 
39 | 	"good", "great", "cool", "brilliant", "wonderful", "well", "amazing",
40 | 	"worth", "sweet", "enjoyable", "boring", "bad", "waste", "dumb", 
41 | 	"annoying"]
42 | visualizeIdx = [tokens[word] for word in visualizeWords]
43 | visualizeVecs = wordVectors[visualizeIdx, :]
44 | temp = (visualizeVecs - np.mean(visualizeVecs, axis=0))
45 | covariance = 1.0 / len(visualizeIdx) * temp.T.dot(temp)
46 | U,S,V = np.linalg.svd(covariance)
47 | coord = temp.dot(U[:,0:2]) 
48 | 
49 | for i in xrange(len(visualizeWords)):
50 |     plt.text(coord[i,0], coord[i,1], visualizeWords[i], 
51 |     	bbox=dict(facecolor='green', alpha=0.1))
52 |     
53 | plt.xlim((np.min(coord[:,0]), np.max(coord[:,0])))
54 | plt.ylim((np.min(coord[:,1]), np.max(coord[:,1])))
55 | 
56 | plt.savefig('q3_word_vectors.png')
57 | plt.show()


--------------------------------------------------------------------------------
/assignment1/q3_sgd.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# Save parameters every a few SGD iterations as fail-safe\n",
 12 |     "SAVE_PARAMS_EVERY = 1000\n",
 13 |     "\n",
 14 |     "import glob\n",
 15 |     "import random\n",
 16 |     "import numpy as np\n",
 17 |     "import os.path as op\n",
 18 |     "import cPickle as pickle"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 7,
 24 |    "metadata": {
 25 |     "collapsed": true
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "def load_saved_params():\n",
 30 |     "    \"\"\" A helper function that loads previously saved parameters and resets iteration start \"\"\"\n",
 31 |     "    st = 0\n",
 32 |     "    for f in glob.glob(\"saved_params_*.npy\"):\n",
 33 |     "        iter = int(op.splitext(op.basename(f))[0].split(\"_\")[2])\n",
 34 |     "        if (iter > st):\n",
 35 |     "            st = iter\n",
 36 |     "            \n",
 37 |     "    if st > 0:\n",
 38 |     "        with open(\"saved_params_%d.npy\" % st, \"r\") as f:\n",
 39 |     "            params = pickle.load(f)\n",
 40 |     "            state = pickle.load(f)\n",
 41 |     "        return st, params, state\n",
 42 |     "    else:\n",
 43 |     "        return st, None, None\n",
 44 |     "    \n",
 45 |     "def save_params(iter, params):\n",
 46 |     "    with open(\"saved_params_%d.npy\" % iter, \"w\") as f:\n",
 47 |     "        pickle.dump(params, f)\n",
 48 |     "        pickle.dump(random.getstate(), f)\n",
 49 |     "\n",
 50 |     "def sgd(f, x0, step, iterations, postprocessing = None, useSaved = False, PRINT_EVERY=10):\n",
 51 |     "    \"\"\" Stochastic Gradient Descent \"\"\"\n",
 52 |     "    # Implement the stochastic gradient descent method in this        \n",
 53 |     "    # function.                                                       \n",
 54 |     "    \n",
 55 |     "    # Inputs:                                                         \n",
 56 |     "    # - f: the function to optimize, it should take a single        \n",
 57 |     "    #     argument and yield two outputs, a cost and the gradient  \n",
 58 |     "    #     with respect to the arguments                            \n",
 59 |     "    # - x0: the initial point to start SGD from                     \n",
 60 |     "    # - step: the step size for SGD                                 \n",
 61 |     "    # - iterations: total iterations to run SGD for                 \n",
 62 |     "    # - postprocessing: postprocessing function for the parameters  \n",
 63 |     "    #     if necessary. In the case of word2vec we will need to    \n",
 64 |     "    #     normalize the word vectors to have unit length.          \n",
 65 |     "    # - PRINT_EVERY: specifies every how many iterations to output  \n",
 66 |     "\n",
 67 |     "    # Output:                                                         \n",
 68 |     "    # - x: the parameter value after SGD finishes  \n",
 69 |     "    \n",
 70 |     "    # Anneal learning rate every several iterations\n",
 71 |     "    ANNEAL_EVERY = 20000\n",
 72 |     "    \n",
 73 |     "    if useSaved:\n",
 74 |     "        start_iter, oldx, state = load_saved_params()\n",
 75 |     "        if start_iter > 0:\n",
 76 |     "            x0 = oldx;\n",
 77 |     "            step *= 0.5 ** (start_iter / ANNEAL_EVERY)\n",
 78 |     "            \n",
 79 |     "        if state:\n",
 80 |     "            random.setstate(state)\n",
 81 |     "    else:\n",
 82 |     "        start_iter = 0\n",
 83 |     "    \n",
 84 |     "    x = x0\n",
 85 |     "    \n",
 86 |     "    if not postprocessing:\n",
 87 |     "        postprocessing = lambda x: x\n",
 88 |     "    \n",
 89 |     "    expcost = None\n",
 90 |     "    \n",
 91 |     "    for iter in xrange(start_iter + 1, iterations + 1):\n",
 92 |     "        ### Don't forget to apply the postprocessing after every iteration!\n",
 93 |     "        ### You might want to print the progress every few iterations.\n",
 94 |     "\n",
 95 |     "        cost = None\n",
 96 |     "        ### YOUR CODE HERE\n",
 97 |     "        \n",
 98 |     "        cost, grad = f(x)\n",
 99 |     "        x = x - step * grad\n",
100 |     "        \n",
101 |     "        \n",
102 |     "        ### END YOUR CODE\n",
103 |     "        \n",
104 |     "        if iter % PRINT_EVERY == 0:\n",
105 |     "            if not expcost:\n",
106 |     "                expcost = cost\n",
107 |     "            else:\n",
108 |     "                expcost = .95 * expcost + .05 * cost\n",
109 |     "            print \"iter %d: %f\" % (iter, expcost)\n",
110 |     "        \n",
111 |     "        if iter % SAVE_PARAMS_EVERY == 0 and useSaved:\n",
112 |     "            save_params(iter, x)\n",
113 |     "            \n",
114 |     "        if iter % ANNEAL_EVERY == 0:\n",
115 |     "            step *= 0.5\n",
116 |     "    \n",
117 |     "    return x\n",
118 |     "\n",
119 |     "def sanity_check():\n",
120 |     "    quad = lambda x: (np.sum(x ** 2), x * 2)\n",
121 |     "\n",
122 |     "    print \"Running sanity checks...\"\n",
123 |     "    t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=100)\n",
124 |     "    print \"test 1 result:\", t1\n",
125 |     "    assert abs(t1) <= 1e-6\n",
126 |     "\n",
127 |     "    t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100)\n",
128 |     "    print \"test 2 result:\", t2\n",
129 |     "    assert abs(t2) <= 1e-6\n",
130 |     "\n",
131 |     "    t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100)\n",
132 |     "    print \"test 3 result:\", t3\n",
133 |     "    assert abs(t3) <= 1e-6\n",
134 |     "    \n",
135 |     "    print \"\""
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 8,
141 |    "metadata": {
142 |     "collapsed": false
143 |    },
144 |    "outputs": [
145 |     {
146 |      "name": "stdout",
147 |      "output_type": "stream",
148 |      "text": [
149 |       "Running sanity checks...\n",
150 |       "iter 100: 0.004578\n",
151 |       "iter 200: 0.004353\n",
152 |       "iter 300: 0.004136\n",
153 |       "iter 400: 0.003929\n",
154 |       "iter 500: 0.003733\n",
155 |       "iter 600: 0.003546\n",
156 |       "iter 700: 0.003369\n",
157 |       "iter 800: 0.003200\n",
158 |       "iter 900: 0.003040\n",
159 |       "iter 1000: 0.002888\n",
160 |       "test 1 result: 8.41483678608e-10\n",
161 |       "iter 100: 0.000000\n",
162 |       "iter 200: 0.000000\n",
163 |       "iter 300: 0.000000\n",
164 |       "iter 400: 0.000000\n",
165 |       "iter 500: 0.000000\n",
166 |       "iter 600: 0.000000\n",
167 |       "iter 700: 0.000000\n",
168 |       "iter 800: 0.000000\n",
169 |       "iter 900: 0.000000\n",
170 |       "iter 1000: 0.000000\n",
171 |       "test 2 result: 0.0\n",
172 |       "iter 100: 0.041205\n",
173 |       "iter 200: 0.039181\n",
174 |       "iter 300: 0.037222\n",
175 |       "iter 400: 0.035361\n",
176 |       "iter 500: 0.033593\n",
177 |       "iter 600: 0.031913\n",
178 |       "iter 700: 0.030318\n",
179 |       "iter 800: 0.028802\n",
180 |       "iter 900: 0.027362\n",
181 |       "iter 1000: 0.025994\n",
182 |       "test 3 result: -2.52445103582e-09\n",
183 |       "\n"
184 |      ]
185 |     }
186 |    ],
187 |    "source": [
188 |     "sanity_check()"
189 |    ]
190 |   }
191 |  ],
192 |  "metadata": {
193 |   "kernelspec": {
194 |    "display_name": "Python 2",
195 |    "language": "python",
196 |    "name": "python2"
197 |   },
198 |   "language_info": {
199 |    "codemirror_mode": {
200 |     "name": "ipython",
201 |     "version": 2
202 |    },
203 |    "file_extension": ".py",
204 |    "mimetype": "text/x-python",
205 |    "name": "python",
206 |    "nbconvert_exporter": "python",
207 |    "pygments_lexer": "ipython2",
208 |    "version": "2.7.11"
209 |   }
210 |  },
211 |  "nbformat": 4,
212 |  "nbformat_minor": 0
213 | }
214 | 


--------------------------------------------------------------------------------
/assignment1/q3_sgd.py:
--------------------------------------------------------------------------------
  1 | # Save parameters every a few SGD iterations as fail-safe
  2 | SAVE_PARAMS_EVERY = 1000
  3 | 
  4 | import glob
  5 | import random
  6 | import numpy as np
  7 | import os.path as op
  8 | import cPickle as pickle
  9 | 
 10 | def load_saved_params():
 11 |     """ A helper function that loads previously saved parameters and resets iteration start """
 12 |     st = 0
 13 |     for f in glob.glob("saved_params_*.npy"):
 14 |         iter = int(op.splitext(op.basename(f))[0].split("_")[2])
 15 |         if (iter > st):
 16 |             st = iter
 17 |             
 18 |     if st > 0:
 19 |         with open("saved_params_%d.npy" % st, "r") as f:
 20 |             params = pickle.load(f)
 21 |             state = pickle.load(f)
 22 |         return st, params, state
 23 |     else:
 24 |         return st, None, None
 25 |     
 26 | def save_params(iter, params):
 27 |     with open("saved_params_%d.npy" % iter, "w") as f:
 28 |         pickle.dump(params, f)
 29 |         pickle.dump(random.getstate(), f)
 30 | 
 31 | def sgd(f, x0, step, iterations, postprocessing = None, useSaved = False, PRINT_EVERY=10):
 32 |     """ Stochastic Gradient Descent """
 33 |     # Implement the stochastic gradient descent method in this        
 34 |     # function.                                                       
 35 |     
 36 |     # Inputs:                                                         
 37 |     # - f: the function to optimize, it should take a single        
 38 |     #     argument and yield two outputs, a cost and the gradient  
 39 |     #     with respect to the arguments                            
 40 |     # - x0: the initial point to start SGD from                     
 41 |     # - step: the step size for SGD                                 
 42 |     # - iterations: total iterations to run SGD for                 
 43 |     # - postprocessing: postprocessing function for the parameters  
 44 |     #     if necessary. In the case of word2vec we will need to    
 45 |     #     normalize the word vectors to have unit length.          
 46 |     # - PRINT_EVERY: specifies every how many iterations to output  
 47 | 
 48 |     # Output:                                                         
 49 |     # - x: the parameter value after SGD finishes  
 50 |     
 51 |     # Anneal learning rate every several iterations
 52 |     ANNEAL_EVERY = 20000
 53 |     
 54 |     if useSaved:
 55 |         start_iter, oldx, state = load_saved_params()
 56 |         if start_iter > 0:
 57 |             x0 = oldx;
 58 |             step *= 0.5 ** (start_iter / ANNEAL_EVERY)
 59 |             
 60 |         if state:
 61 |             random.setstate(state)
 62 |     else:
 63 |         start_iter = 0
 64 |     
 65 |     x = x0
 66 |     
 67 |     if not postprocessing:
 68 |         postprocessing = lambda x: x
 69 |     
 70 |     expcost = None
 71 |     
 72 |     for iter in xrange(start_iter + 1, iterations + 1):
 73 |         ### Don't forget to apply the postprocessing after every iteration!
 74 |         ### You might want to print the progress every few iterations.
 75 | 
 76 |         cost = None
 77 |         ### YOUR CODE HERE
 78 |         
 79 |         cost, grad = f(x)
 80 |         x = x - step * grad
 81 |         
 82 |         ### END YOUR CODE
 83 |         
 84 |         if iter % PRINT_EVERY == 0:
 85 |             if not expcost:
 86 |                 expcost = cost
 87 |             else:
 88 |                 expcost = .95 * expcost + .05 * cost
 89 |             print "iter %d: %f" % (iter, expcost)
 90 |         
 91 |         if iter % SAVE_PARAMS_EVERY == 0 and useSaved:
 92 |             save_params(iter, x)
 93 |             
 94 |         if iter % ANNEAL_EVERY == 0:
 95 |             step *= 0.5
 96 |     
 97 |     return x
 98 | 
 99 | def sanity_check():
100 |     quad = lambda x: (np.sum(x ** 2), x * 2)
101 | 
102 |     print "Running sanity checks..."
103 |     t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=100)
104 |     print "test 1 result:", t1
105 |     assert abs(t1) <= 1e-6
106 | 
107 |     t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100)
108 |     print "test 2 result:", t2
109 |     assert abs(t2) <= 1e-6
110 | 
111 |     t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100)
112 |     print "test 3 result:", t3
113 |     assert abs(t3) <= 1e-6
114 |     
115 |     print ""
116 | 
117 | def your_sanity_checks(): 
118 |     """
119 |     Use this space add any additional sanity checks by running:
120 |         python q3_sgd.py 
121 |     This function will not be called by the autograder, nor will
122 |     your additional tests be graded.
123 |     """
124 |     print "Running your sanity checks..."
125 |     ### YOUR CODE HERE
126 |     raise NotImplementedError
127 |     ### END YOUR CODE
128 | 
129 | if __name__ == "__main__":
130 |     sanity_check();
131 |     your_sanity_checks();


--------------------------------------------------------------------------------
/assignment1/q3_word2vec.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import random\n",
 13 |     "\n",
 14 |     "from q1_softmax import softmax\n",
 15 |     "from q2_gradcheck import gradcheck_naive\n",
 16 |     "from q2_sigmoid import sigmoid, sigmoid_grad"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {
 23 |     "collapsed": false
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "def normalizeRows(x):\n",
 28 |     "    \"\"\" Row normalization function \"\"\"\n",
 29 |     "    # Implement a function that normalizes each row of a matrix to have unit length\n",
 30 |     "    \n",
 31 |     "    ### YOUR CODE HERE\n",
 32 |     "    \n",
 33 |     "    x_sum = np.sqrt(np.sum(x**2, 1))\n",
 34 |     "    x /= np.reshape(x_sum, (-1, 1)) + 1e-20\n",
 35 |     "    \n",
 36 |     "    ### END YOUR CODE\n",
 37 |     "    \n",
 38 |     "    return x\n",
 39 |     "\n",
 40 |     "def test_normalize_rows():\n",
 41 |     "    print \"Testing normalizeRows...\"\n",
 42 |     "    x = normalizeRows(np.array([[3.0,4.0],[1, 2]])) \n",
 43 |     "    # the result should be [[0.6, 0.8], [0.4472, 0.8944]]\n",
 44 |     "    print x\n",
 45 |     "    assert (x.all() == np.array([[0.6, 0.8], [0.4472, 0.8944]]).all())\n",
 46 |     "    print \"\""
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 65,
 52 |    "metadata": {
 53 |     "collapsed": false
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "def softmaxCostAndGradient(predicted, target, outputVectors, dataset):\n",
 58 |     "    \"\"\" Softmax cost function for word2vec models \"\"\"\n",
 59 |     "    \n",
 60 |     "    # Implement the cost and gradients for one predicted word vector  \n",
 61 |     "    # and one target word vector as a building block for word2vec     \n",
 62 |     "    # models, assuming the softmax prediction function and cross      \n",
 63 |     "    # entropy loss.                                                   \n",
 64 |     "    \n",
 65 |     "    # Inputs:                                                         \n",
 66 |     "    # - predicted: shape with (1, dim_embed) numpy ndarray, predicted word vector (\\hat{v} in \n",
 67 |     "    #   the written component or \\hat{r} in an earlier version)\n",
 68 |     "    # - target: integer, the index of the target word               \n",
 69 |     "    # - outputVectors: \"output\" vectors (as rows) for all tokens     \n",
 70 |     "    # - dataset: needed for negative sampling, unused here.         \n",
 71 |     "    \n",
 72 |     "    # Outputs:                                                        \n",
 73 |     "    # - cost: cross entropy cost for the softmax word prediction    \n",
 74 |     "    # - gradPred: the gradient with respect to the predicted word   \n",
 75 |     "    #        vector                                                \n",
 76 |     "    # - grad: the gradient with respect to all the other word        \n",
 77 |     "    #        vectors                                               \n",
 78 |     "    \n",
 79 |     "    # We will not provide starter code for this function, but feel    \n",
 80 |     "    # free to reference the code you previously wrote for this        \n",
 81 |     "    # assignment!                                                  \n",
 82 |     "    \n",
 83 |     "    ### YOUR CODE HERE\n",
 84 |     "    \n",
 85 |     "    N = outputVectors.shape[0]                     # n_words: vocab size\n",
 86 |     "    y = np.zeros(N)\n",
 87 |     "    y[target] = 1                                     # (n_words)\n",
 88 |     "    \n",
 89 |     "    score = np.dot(predicted, outputVectors.T)               # (1, n_words)\n",
 90 |     "    out = softmax(score)\n",
 91 |     "    \n",
 92 |     "    cost = np.sum(-y * np.log(out))         \n",
 93 |     "    \n",
 94 |     "    dout = out - y                            # (1, n_words)\n",
 95 |     "    gradPred = np.dot(dout, outputVectors)             # (1, dim_embed)\n",
 96 |     "    grad = np.dot(dout.T, predicted)              # (n_words, dim_embed)\n",
 97 |     "    \n",
 98 |     "    ### END YOUR CODE\n",
 99 |     "    \n",
100 |     "    return cost, gradPred, grad\n",
101 |     "\n",
102 |     "def negSamplingCostAndGradient(predicted, target, outputVectors, dataset, \n",
103 |     "    K=10):\n",
104 |     "    \"\"\" Negative sampling cost function for word2vec models \"\"\"\n",
105 |     "\n",
106 |     "    # Implement the cost and gradients for one predicted word vector  \n",
107 |     "    # and one target word vector as a building block for word2vec     \n",
108 |     "    # models, using the negative sampling technique. K is the sample  \n",
109 |     "    # size. You might want to use dataset.sampleTokenIdx() to sample  \n",
110 |     "    # a random word index. \n",
111 |     "    # \n",
112 |     "    # Note: See test_word2vec below for dataset's initialization.\n",
113 |     "    #                                       \n",
114 |     "    # Input/Output Specifications: same as softmaxCostAndGradient     \n",
115 |     "    # We will not provide starter code for this function, but feel    \n",
116 |     "    # free to reference the code you previously wrote for this        \n",
117 |     "    # assignment!\n",
118 |     "    \n",
119 |     "    \n",
120 |     "    cost = 0.0\n",
121 |     "    grad = np.zeros_like(outputVectors)\n",
122 |     "    gradPred = np.zeros_like(predicted)\n",
123 |     "    \n",
124 |     "    ### YOUR CODE HERE\n",
125 |     "    \n",
126 |     "    \n",
127 |     "    a_target = sigmoid(np.dot(predicted.reshape(-1), outputVectors[target].T))\n",
128 |     "    cost += -np.log(a_target)                                # cost for target value\n",
129 |     "    grad[target:target+1] = (a_target - 1) * predicted       # gradient for target value\n",
130 |     "    gradPred += (a_target - 1) * outputVectors[target]\n",
131 |     "    \n",
132 |     "    neg_samples = []  \n",
133 |     "    \n",
134 |     "    for i in range(K):\n",
135 |     "        j = dataset.sampleTokenIdx()\n",
136 |     "        if j == target or (j in neg_samples):\n",
137 |     "            i -= 1           # if negative sample is same with target or already sampled, then resample.\n",
138 |     "            continue\n",
139 |     "        neg_samples.append(j)\n",
140 |     "        \n",
141 |     "        a_neg = sigmoid(-np.dot(predicted.reshape(-1), outputVectors[j].T))\n",
142 |     "        cost += -np.log(a_neg)                                              # cost for negative sample\n",
143 |     "        grad[j:j+1] = (1 - a_neg) * predicted                     # gradient for negative sample\n",
144 |     "        gradPred += (1 - a_neg) * outputVectors[j] \n",
145 |     "    \n",
146 |     "\n",
147 |     "    ### END YOUR CODE\n",
148 |     "    \n",
149 |     "    return cost, gradPred, grad\n",
150 |     "\n",
151 |     "\n",
152 |     "def skipgram(currentWord, C, contextWords, tokens, inputVectors, outputVectors, \n",
153 |     "    dataset, word2vecCostAndGradient = softmaxCostAndGradient):\n",
154 |     "    \"\"\" Skip-gram model in word2vec \"\"\"\n",
155 |     "\n",
156 |     "    # Implement the skip-gram model in this function.\n",
157 |     "\n",
158 |     "    # Inputs:                                                         \n",
159 |     "    # - currrentWord: a string of the current center word           \n",
160 |     "    # - C: integer, context size                                    \n",
161 |     "    # - contextWords: list of no more than 2*C strings, the context words                                               \n",
162 |     "    # - tokens: a dictionary that maps words to their indices in    \n",
163 |     "    #      the word vector list                                \n",
164 |     "    # - inputVectors: \"input\" word vectors (as rows) for all tokens           \n",
165 |     "    # - outputVectors: \"output\" word vectors (as rows) for all tokens         \n",
166 |     "    # - word2vecCostAndGradient: the cost and gradient function for \n",
167 |     "    #      a prediction vector given the target word vectors,  \n",
168 |     "    #      could be one of the two cost functions you          \n",
169 |     "    #      implemented above\n",
170 |     "\n",
171 |     "    # Outputs:                                                        \n",
172 |     "    # - cost: the cost function value for the skip-gram model       \n",
173 |     "    # - grad: the gradient with respect to the word vectors         \n",
174 |     "    # We will not provide starter code for this function, but feel    \n",
175 |     "    # free to reference the code you previously wrote for this        \n",
176 |     "    # assignment!\n",
177 |     "\n",
178 |     "    cost = 0.0\n",
179 |     "    gradIn = np.zeros(inputVectors.shape) \n",
180 |     "    gradOut = np.zeros(outputVectors.shape)\n",
181 |     "    \n",
182 |     "    \n",
183 |     "    ### YOUR CODE HERE\n",
184 |     "    \n",
185 |     "    \n",
186 |     "    idx = tokens[currentWord]               # tokens['a'] = 1\n",
187 |     "    input_vector = inputVectors[idx:idx+1]              # (1, dim_embed)   \n",
188 |     "    \n",
189 |     "    for context in contextWords:\n",
190 |     "        c, g_in, g_out = word2vecCostAndGradient(input_vector, tokens[context], outputVectors, dataset)\n",
191 |     "        cost += c\n",
192 |     "        gradIn[idx:idx+1, :] += g_in\n",
193 |     "        gradOut += g_out\n",
194 |     "\n",
195 |     "        \n",
196 |     "    ### END YOUR CODE\n",
197 |     "    \n",
198 |     "    return cost, gradIn, gradOut\n",
199 |     "\n",
200 |     "def cbow(currentWord, C, contextWords, tokens, inputVectors, outputVectors, \n",
201 |     "    dataset, word2vecCostAndGradient = softmaxCostAndGradient):\n",
202 |     "    \"\"\" CBOW model in word2vec \"\"\"\n",
203 |     "\n",
204 |     "    # Implement the continuous bag-of-words model in this function.            \n",
205 |     "    # Input/Output specifications: same as the skip-gram model        \n",
206 |     "    # We will not provide starter code for this function, but feel    \n",
207 |     "    # free to reference the code you previously wrote for this        \n",
208 |     "    # assignment!\n",
209 |     "\n",
210 |     "    #################################################################\n",
211 |     "    # IMPLEMENTING CBOW IS EXTRA CREDIT, DERIVATIONS IN THE WRIITEN #\n",
212 |     "    # ASSIGNMENT ARE NOT!                                           #  \n",
213 |     "    #################################################################\n",
214 |     "    \n",
215 |     "    cost = 0\n",
216 |     "    gradIn = np.zeros(inputVectors.shape)\n",
217 |     "    gradOut = np.zeros(outputVectors.shape)\n",
218 |     "\n",
219 |     "    ### YOUR CODE HERE\n",
220 |     "    \n",
221 |     "    \n",
222 |     "    for contextWord in contextWords:\n",
223 |     "        idx = tokens[contextWord]               # tokens['a'] = 1\n",
224 |     "        input_vector = inputVectors[idx:idx+1]   \n",
225 |     "        c, g_in, g_out = word2vecCostAndGradient(input_vector, tokens[currentWord], outputVectors, dataset)\n",
226 |     "        cost += c\n",
227 |     "        gradIn[idx:idx+1, :] += g_in\n",
228 |     "        gradOut += g_out\n",
229 |     "    \n",
230 |     "    \n",
231 |     "    ### END YOUR CODE\n",
232 |     "    \n",
233 |     "    return cost, gradIn, gradOut\n",
234 |     "\n",
235 |     "#############################################\n",
236 |     "# Testing functions below. DO NOT MODIFY!   #\n",
237 |     "#############################################\n",
238 |     "\n",
239 |     "def word2vec_sgd_wrapper(word2vecModel, tokens, wordVectors, dataset, C, word2vecCostAndGradient = softmaxCostAndGradient):\n",
240 |     "    batchsize = 50\n",
241 |     "    cost = 0.0\n",
242 |     "    grad = np.zeros(wordVectors.shape)\n",
243 |     "    N = wordVectors.shape[0]\n",
244 |     "    inputVectors = wordVectors[:N/2,:]\n",
245 |     "    outputVectors = wordVectors[N/2:,:]\n",
246 |     "    for i in xrange(batchsize):\n",
247 |     "        C1 = random.randint(1,C) # window size (양 옆으로 )\n",
248 |     "        centerword, context = dataset.getRandomContext(C1)\n",
249 |     "        \n",
250 |     "        if word2vecModel == skipgram:\n",
251 |     "            denom = 1\n",
252 |     "        else:\n",
253 |     "            denom = 1\n",
254 |     "        \n",
255 |     "        c, gin, gout = word2vecModel(centerword, C1, context, tokens, inputVectors, outputVectors, dataset, word2vecCostAndGradient)\n",
256 |     "        cost += c / batchsize / denom\n",
257 |     "        grad[:N/2, :] += gin / batchsize / denom\n",
258 |     "        grad[N/2:, :] += gout / batchsize / denom\n",
259 |     "        \n",
260 |     "    return cost, grad\n",
261 |     "\n",
262 |     "def test_word2vec():\n",
263 |     "    # Interface to the dataset for negative sampling\n",
264 |     "    dataset = type('dummy', (), {})()\n",
265 |     "    def dummySampleTokenIdx():\n",
266 |     "        return random.randint(0, 4)\n",
267 |     "\n",
268 |     "    def getRandomContext(C):\n",
269 |     "        tokens = [\"a\", \"b\", \"c\", \"d\", \"e\"]\n",
270 |     "        return tokens[random.randint(0,4)], [tokens[random.randint(0,4)] \\\n",
271 |     "           for i in xrange(2*C)]\n",
272 |     "    dataset.sampleTokenIdx = dummySampleTokenIdx\n",
273 |     "    dataset.getRandomContext = getRandomContext\n",
274 |     "\n",
275 |     "    random.seed(10230)\n",
276 |     "    np.random.seed(9265)\n",
277 |     "    dummy_vectors = normalizeRows(np.random.randn(10,3))\n",
278 |     "    dummy_tokens = dict([(\"a\",0), (\"b\",1), (\"c\",2),(\"d\",3),(\"e\",4)])\n",
279 |     "    print \"==== Gradient check for skip-gram ====\"\n",
280 |     "    gradcheck_naive(lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5), dummy_vectors)\n",
281 |     "    gradcheck_naive(lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors)\n",
282 |     "    print \"\\n==== Gradient check for CBOW      ====\"\n",
283 |     "    gradcheck_naive(lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5), dummy_vectors)\n",
284 |     "    gradcheck_naive(lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors)\n",
285 |     "\n",
286 |     "    print \"\\n=== Results ===\"\n",
287 |     "    print skipgram(\"c\", 3, [\"a\", \"b\", \"e\", \"d\", \"b\", \"c\"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset)\n",
288 |     "    print skipgram(\"c\", 1, [\"a\", \"b\"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset, negSamplingCostAndGradient)\n",
289 |     "    print cbow(\"a\", 2, [\"a\", \"b\", \"c\", \"a\"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset)\n",
290 |     "    print cbow(\"a\", 2, [\"a\", \"b\", \"a\", \"c\"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset, negSamplingCostAndGradient)"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": 66,
296 |    "metadata": {
297 |     "collapsed": false
298 |    },
299 |    "outputs": [
300 |     {
301 |      "name": "stdout",
302 |      "output_type": "stream",
303 |      "text": [
304 |       "==== Gradient check for skip-gram ====\n",
305 |       "Gradient check passed!\n",
306 |       "Gradient check passed!\n",
307 |       "\n",
308 |       "==== Gradient check for CBOW      ====\n",
309 |       "Gradient check passed!\n",
310 |       "Gradient check passed!\n",
311 |       "\n",
312 |       "=== Results ===\n",
313 |       "(11.166109001533979, array([[ 0.        ,  0.        ,  0.        ],\n",
314 |       "       [ 0.        ,  0.        ,  0.        ],\n",
315 |       "       [-1.26947339, -1.36873189,  2.45158957],\n",
316 |       "       [ 0.        ,  0.        ,  0.        ],\n",
317 |       "       [ 0.        ,  0.        ,  0.        ]]), array([[-0.41045956,  0.18834851,  1.43272264],\n",
318 |       "       [ 0.38202831, -0.17530219, -1.33348241],\n",
319 |       "       [ 0.07009355, -0.03216399, -0.24466386],\n",
320 |       "       [ 0.09472154, -0.04346509, -0.33062865],\n",
321 |       "       [-0.13638384,  0.06258276,  0.47605228]]))\n",
322 |       "(6.4123666986130292, array([[ 0.        ,  0.        ,  0.        ],\n",
323 |       "       [ 0.        ,  0.        ,  0.        ],\n",
324 |       "       [-1.79237853, -1.61783916,  0.22229718],\n",
325 |       "       [ 0.        ,  0.        ,  0.        ],\n",
326 |       "       [ 0.        ,  0.        ,  0.        ]]), array([[-0.11265089,  0.05169237,  0.39321163],\n",
327 |       "       [ 0.17315617, -0.07945656, -0.60440731],\n",
328 |       "       [-0.22764219,  0.10445868,  0.79459256],\n",
329 |       "       [-0.21068407,  0.09667707,  0.73539969],\n",
330 |       "       [-0.32248118,  0.14797767,  1.1256312 ]]))\n",
331 |       "(5.5798856283496789, array([[ 0.3741715 , -0.234476  , -1.36551259],\n",
332 |       "       [ 0.35927914, -0.11439876, -0.98756037],\n",
333 |       "       [ 0.17201142, -0.11892354, -0.53014219],\n",
334 |       "       [ 0.        ,  0.        ,  0.        ],\n",
335 |       "       [ 0.        ,  0.        ,  0.        ]]), array([[ 0.841774  ,  0.39105083, -0.47861909],\n",
336 |       "       [-0.02845097, -0.1067265 ,  0.02802426],\n",
337 |       "       [-0.31375535, -0.06447558,  0.1492707 ],\n",
338 |       "       [-0.10632801, -0.14957598,  0.03188348],\n",
339 |       "       [-0.39323966, -0.07027277,  0.26944066]]))\n",
340 |       "(12.464842117519513, array([[-0.87034332, -0.94713331, -1.41428685],\n",
341 |       "       [ 0.12556491,  0.14811621, -1.34941464],\n",
342 |       "       [-0.42965887, -0.26805817, -0.6785951 ],\n",
343 |       "       [ 0.        ,  0.        ,  0.        ],\n",
344 |       "       [ 0.        ,  0.        ,  0.        ]]), array([[ 0.14837703,  0.31110522, -0.10079555],\n",
345 |       "       [-0.51929714, -0.22034123,  0.31252798],\n",
346 |       "       [-0.40797326, -0.15206494,  0.36152752],\n",
347 |       "       [-0.74542585, -0.2600954 ,  0.36736716],\n",
348 |       "       [-1.41505916,  0.04569902,  0.89005586]]))\n"
349 |      ]
350 |     }
351 |    ],
352 |    "source": [
353 |     "test_word2vec()"
354 |    ]
355 |   }
356 |  ],
357 |  "metadata": {
358 |   "kernelspec": {
359 |    "display_name": "Python 2",
360 |    "language": "python",
361 |    "name": "python2"
362 |   },
363 |   "language_info": {
364 |    "codemirror_mode": {
365 |     "name": "ipython",
366 |     "version": 2
367 |    },
368 |    "file_extension": ".py",
369 |    "mimetype": "text/x-python",
370 |    "name": "python",
371 |    "nbconvert_exporter": "python",
372 |    "pygments_lexer": "ipython2",
373 |    "version": "2.7.11"
374 |   }
375 |  },
376 |  "nbformat": 4,
377 |  "nbformat_minor": 0
378 | }
379 | 


--------------------------------------------------------------------------------
/assignment1/q3_word2vec.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | 
  4 | from q1_softmax import softmax
  5 | from q2_gradcheck import gradcheck_naive
  6 | from q2_sigmoid import sigmoid, sigmoid_grad
  7 | 
  8 | def normalizeRows(x):
  9 |     """ Row normalization function """
 10 |     # Implement a function that normalizes each row of a matrix to have unit length
 11 |     
 12 |     ### YOUR CODE HERE
 13 |     
 14 |     
 15 |     x_sum = np.sqrt(np.sum(x**2, 1))
 16 |     x /= np.reshape(x_sum, (-1, 1)) + 1e-20
 17 |     
 18 |     
 19 |     ### END YOUR CODE
 20 |     
 21 |     return x
 22 | 
 23 | def test_normalize_rows():
 24 |     print "Testing normalizeRows..."
 25 |     x = normalizeRows(np.array([[3.0,4.0],[1, 2]])) 
 26 |     # the result should be [[0.6, 0.8], [0.4472, 0.8944]]
 27 |     print x
 28 |     assert (x.all() == np.array([[0.6, 0.8], [0.4472, 0.8944]]).all())
 29 |     print ""
 30 | 
 31 | def softmaxCostAndGradient(predicted, target, outputVectors, dataset):
 32 |     """ Softmax cost function for word2vec models """
 33 |     
 34 |     # Implement the cost and gradients for one predicted word vector  
 35 |     # and one target word vector as a building block for word2vec     
 36 |     # models, assuming the softmax prediction function and cross      
 37 |     # entropy loss.                                                   
 38 |     
 39 |     # Inputs:                                                         
 40 |     # - predicted: numpy ndarray, predicted word vector (\hat{v} in 
 41 |     #   the written component or \hat{r} in an earlier version)
 42 |     # - target: integer, the index of the target word               
 43 |     # - outputVectors: "output" vectors (as rows) for all tokens     
 44 |     # - dataset: needed for negative sampling, unused here.         
 45 |     
 46 |     # Outputs:                                                        
 47 |     # - cost: cross entropy cost for the softmax word prediction    
 48 |     # - gradPred: the gradient with respect to the predicted word   
 49 |     #        vector                                                
 50 |     # - grad: the gradient with respect to all the other word        
 51 |     #        vectors                                               
 52 |     
 53 |     # We will not provide starter code for this function, but feel    
 54 |     # free to reference the code you previously wrote for this        
 55 |     # assignment!                                                  
 56 |     
 57 |     ### YOUR CODE HERE
 58 |     
 59 |     N = outputVectors.shape[0]                     # n_words: vocab size
 60 |     y = np.zeros(N)
 61 |     y[target] = 1                                     # (n_words)
 62 |     
 63 |     score = np.dot(predicted, outputVectors.T)               # (1, n_words)
 64 |     out = softmax(score)
 65 |     
 66 |     cost = np.sum(-y * np.log(out))         
 67 |     
 68 |     dout = out - y                            # (1, n_words)
 69 |     gradPred = np.dot(dout, outputVectors)             # (1, dim_embed)
 70 |     grad = np.dot(dout.T, predicted)              # (n_words, dim_embed)
 71 |     
 72 |     
 73 |     ### END YOUR CODE
 74 |     
 75 |     return cost, gradPred, grad
 76 | 
 77 | def negSamplingCostAndGradient(predicted, target, outputVectors, dataset, 
 78 |     K=10):
 79 |     """ Negative sampling cost function for word2vec models """
 80 | 
 81 |     # Implement the cost and gradients for one predicted word vector  
 82 |     # and one target word vector as a building block for word2vec     
 83 |     # models, using the negative sampling technique. K is the sample  
 84 |     # size. You might want to use dataset.sampleTokenIdx() to sample  
 85 |     # a random word index. 
 86 |     # 
 87 |     # Note: See test_word2vec below for dataset's initialization.
 88 |     #                                       
 89 |     # Input/Output Specifications: same as softmaxCostAndGradient     
 90 |     # We will not provide starter code for this function, but feel    
 91 |     # free to reference the code you previously wrote for this        
 92 |     # assignment!
 93 |     
 94 |     cost = 0.0
 95 |     grad = np.zeros_like(outputVectors)
 96 |     gradPred = np.zeros_like(predicted)
 97 |     
 98 |     ### YOUR CODE HERE
 99 |     
100 |     
101 |     a_target = sigmoid(np.dot(predicted.reshape(-1), outputVectors[target].T))
102 |     cost += -np.log(a_target)                                # cost for target value
103 |     grad[target:target+1] = (a_target - 1) * predicted        # gradient for target value
104 |     gradPred += (a_target - 1) * outputVectors[target]
105 |     
106 |     neg_samples = []  
107 |     
108 |     for i in range(K):
109 |         j = dataset.sampleTokenIdx()
110 |         if j == target or (j in neg_samples):
111 |             i -= 1           # if negative sample is same with target or already sampled, then resample.
112 |             continue
113 |         neg_samples.append(j)
114 |         
115 |         a_neg = sigmoid(-np.dot(predicted.reshape(-1), outputVectors[j].T))
116 |         cost += -np.log(a_neg)                                              # cost for negative sample
117 |         grad[j:j+1] = (1 - a_neg) * predicted                                # gradient for negative sample
118 |         gradPred += (1 - a_neg) * outputVectors[j] 
119 |     
120 | 
121 |     ### END YOUR CODE
122 |     
123 |     return cost, gradPred, grad
124 | 
125 | 
126 | def skipgram(currentWord, C, contextWords, tokens, inputVectors, outputVectors, 
127 |     dataset, word2vecCostAndGradient = softmaxCostAndGradient):
128 |     """ Skip-gram model in word2vec """
129 | 
130 |     # Implement the skip-gram model in this function.
131 | 
132 |     # Inputs:                                                         
133 |     # - currrentWord: a string of the current center word           
134 |     # - C: integer, context size                                    
135 |     # - contextWords: list of no more than 2*C strings, the context words                                               
136 |     # - tokens: a dictionary that maps words to their indices in    
137 |     #      the word vector list                                
138 |     # - inputVectors: "input" word vectors (as rows) for all tokens           
139 |     # - outputVectors: "output" word vectors (as rows) for all tokens         
140 |     # - word2vecCostAndGradient: the cost and gradient function for 
141 |     #      a prediction vector given the target word vectors,  
142 |     #      could be one of the two cost functions you          
143 |     #      implemented above
144 | 
145 |     # Outputs:                                                        
146 |     # - cost: the cost function value for the skip-gram model       
147 |     # - grad: the gradient with respect to the word vectors         
148 |     # We will not provide starter code for this function, but feel    
149 |     # free to reference the code you previously wrote for this        
150 |     # assignment!
151 | 
152 |     cost = 0.0
153 |     gradIn = np.zeros(inputVectors.shape) 
154 |     gradOut = np.zeros(outputVectors.shape)
155 |     
156 |     
157 |     ### YOUR CODE HERE
158 |     
159 |     
160 |     idx = tokens[currentWord]                 # tokens['a'] = 1
161 |     input_vector = inputVectors[idx:idx+1]              # (1, dim_embed)   
162 |     
163 |     for context in contextWords:
164 |         c, g_in, g_out = word2vecCostAndGradient(input_vector, tokens[context], outputVectors, dataset)
165 |         cost += c
166 |         gradIn[idx:idx+1, :] += g_in
167 |         gradOut += g_out
168 | 
169 |         
170 |     ### END YOUR CODE
171 |     
172 |     return cost, gradIn, gradOut
173 | 
174 | def cbow(currentWord, C, contextWords, tokens, inputVectors, outputVectors, 
175 |     dataset, word2vecCostAndGradient = softmaxCostAndGradient):
176 |     """ CBOW model in word2vec """
177 | 
178 |     # Implement the continuous bag-of-words model in this function.            
179 |     # Input/Output specifications: same as the skip-gram model        
180 |     # We will not provide starter code for this function, but feel    
181 |     # free to reference the code you previously wrote for this        
182 |     # assignment!
183 | 
184 |     #################################################################
185 |     # IMPLEMENTING CBOW IS EXTRA CREDIT, DERIVATIONS IN THE WRIITEN #
186 |     # ASSIGNMENT ARE NOT!                                           #  
187 |     #################################################################
188 |     
189 |     cost = 0
190 |     gradIn = np.zeros(inputVectors.shape)
191 |     gradOut = np.zeros(outputVectors.shape)
192 | 
193 |     ### YOUR CODE HERE
194 |     
195 |     
196 |     for contextWord in contextWords:
197 |         idx = tokens[contextWord]               # tokens['a'] = 1
198 |         input_vector = inputVectors[idx:idx+1]   
199 |         c, g_in, g_out = word2vecCostAndGradient(input_vector, tokens[currentWord], outputVectors, dataset)
200 |         cost += c
201 |         gradIn[idx:idx+1, :] += g_in
202 |         gradOut += g_out
203 |     
204 |     
205 |     ### END YOUR CODE
206 |     
207 |     return cost, gradIn, gradOut
208 | 
209 | #############################################
210 | # Testing functions below. DO NOT MODIFY!   #
211 | #############################################
212 | 
213 | def word2vec_sgd_wrapper(word2vecModel, tokens, wordVectors, dataset, C, word2vecCostAndGradient = softmaxCostAndGradient):
214 |     batchsize = 50
215 |     cost = 0.0
216 |     grad = np.zeros(wordVectors.shape)
217 |     N = wordVectors.shape[0]
218 |     inputVectors = wordVectors[:N/2,:]
219 |     outputVectors = wordVectors[N/2:,:]
220 |     for i in xrange(batchsize):
221 |         C1 = random.randint(1,C)
222 |         centerword, context = dataset.getRandomContext(C1)
223 |         
224 |         if word2vecModel == skipgram:
225 |             denom = 1
226 |         else:
227 |             denom = 1
228 |         
229 |         c, gin, gout = word2vecModel(centerword, C1, context, tokens, inputVectors, outputVectors, dataset, word2vecCostAndGradient)
230 |         cost += c / batchsize / denom
231 |         grad[:N/2, :] += gin / batchsize / denom
232 |         grad[N/2:, :] += gout / batchsize / denom
233 |         
234 |     return cost, grad
235 | 
236 | def test_word2vec():
237 |     # Interface to the dataset for negative sampling
238 |     dataset = type('dummy', (), {})()
239 |     def dummySampleTokenIdx():
240 |         return random.randint(0, 4)
241 | 
242 |     def getRandomContext(C):
243 |         tokens = ["a", "b", "c", "d", "e"]
244 |         return tokens[random.randint(0,4)], [tokens[random.randint(0,4)] \
245 |            for i in xrange(2*C)]
246 |     dataset.sampleTokenIdx = dummySampleTokenIdx
247 |     dataset.getRandomContext = getRandomContext
248 | 
249 |     random.seed(31415)
250 |     np.random.seed(9265)
251 |     dummy_vectors = normalizeRows(np.random.randn(10,3))
252 |     dummy_tokens = dict([("a",0), ("b",1), ("c",2),("d",3),("e",4)])
253 |     print "==== Gradient check for skip-gram ===="
254 |     gradcheck_naive(lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5), dummy_vectors)
255 |     gradcheck_naive(lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors)
256 |     print "\n==== Gradient check for CBOW      ===="
257 |     gradcheck_naive(lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5), dummy_vectors)
258 |     gradcheck_naive(lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors)
259 | 
260 |     print "\n=== Results ==="
261 |     print skipgram("c", 3, ["a", "b", "e", "d", "b", "c"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset)
262 |     print skipgram("c", 1, ["a", "b"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset, negSamplingCostAndGradient)
263 |     print cbow("a", 2, ["a", "b", "c", "a"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset)
264 |     print cbow("a", 2, ["a", "b", "a", "c"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset, negSamplingCostAndGradient)
265 | 
266 | if __name__ == "__main__":
267 |     test_normalize_rows()
268 |     test_word2vec()


--------------------------------------------------------------------------------
/assignment1/q4_sentiment.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | 
  4 | from cs224d.data_utils import *
  5 | 
  6 | from q3_sgd import load_saved_params, sgd
  7 | from q4_softmaxreg import softmaxRegression, getSentenceFeature, accuracy, softmax_wrapper
  8 | 
  9 | # Try different regularizations and pick the best!
 10 | # NOTE: fill in one more "your code here" below before running!
 11 | REGULARIZATION = None   # Assign a list of floats in the block below
 12 | ### YOUR CODE HERE
 13 | raise NotImplementedError
 14 | ### END YOUR CODE
 15 | 
 16 | # Load the dataset
 17 | dataset = StanfordSentiment()
 18 | tokens = dataset.tokens()
 19 | nWords = len(tokens)
 20 | 
 21 | # Load the word vectors we trained earlier 
 22 | _, wordVectors0, _ = load_saved_params()
 23 | wordVectors = (wordVectors0[:nWords,:] + wordVectors0[nWords:,:])
 24 | dimVectors = wordVectors.shape[1]
 25 | 
 26 | # Load the train set
 27 | trainset = dataset.getTrainSentences()
 28 | nTrain = len(trainset)
 29 | trainFeatures = np.zeros((nTrain, dimVectors))
 30 | trainLabels = np.zeros((nTrain,), dtype=np.int32)
 31 | for i in xrange(nTrain):
 32 |     words, trainLabels[i] = trainset[i]
 33 |     trainFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words)
 34 | 
 35 | # Prepare dev set features
 36 | devset = dataset.getDevSentences()
 37 | nDev = len(devset)
 38 | devFeatures = np.zeros((nDev, dimVectors))
 39 | devLabels = np.zeros((nDev,), dtype=np.int32)
 40 | for i in xrange(nDev):
 41 |     words, devLabels[i] = devset[i]
 42 |     devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words)
 43 | 
 44 | # Try our regularization parameters
 45 | results = []
 46 | for regularization in REGULARIZATION:
 47 |     random.seed(3141)
 48 |     np.random.seed(59265)
 49 |     weights = np.random.randn(dimVectors, 5)
 50 |     print "Training for reg=%f" % regularization 
 51 | 
 52 |     # We will do batch optimization
 53 |     weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, 
 54 |         weights, regularization), weights, 3.0, 10000, PRINT_EVERY=100)
 55 | 
 56 |     # Test on train set
 57 |     _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights)
 58 |     trainAccuracy = accuracy(trainLabels, pred)
 59 |     print "Train accuracy (%%): %f" % trainAccuracy
 60 | 
 61 |     # Test on dev set
 62 |     _, _, pred = softmaxRegression(devFeatures, devLabels, weights)
 63 |     devAccuracy = accuracy(devLabels, pred)
 64 |     print "Dev accuracy (%%): %f" % devAccuracy
 65 | 
 66 |     # Save the results and weights
 67 |     results.append({
 68 |         "reg" : regularization, 
 69 |         "weights" : weights, 
 70 |         "train" : trainAccuracy, 
 71 |         "dev" : devAccuracy})
 72 | 
 73 | # Print the accuracies
 74 | print ""
 75 | print "=== Recap ==="
 76 | print "Reg\t\tTrain\t\tDev"
 77 | for result in results:
 78 |     print "%E\t%f\t%f" % (
 79 |         result["reg"], 
 80 |         result["train"], 
 81 |         result["dev"])
 82 | print ""
 83 | 
 84 | # Pick the best regularization parameters
 85 | BEST_REGULARIZATION = None
 86 | BEST_WEIGHTS = None
 87 | 
 88 | ### YOUR CODE HERE 
 89 | raise NotImplementedError
 90 | ### END YOUR CODE
 91 | 
 92 | # Test your findings on the test set
 93 | testset = dataset.getTestSentences()
 94 | nTest = len(testset)
 95 | testFeatures = np.zeros((nTest, dimVectors))
 96 | testLabels = np.zeros((nTest,), dtype=np.int32)
 97 | for i in xrange(nTest):
 98 |     words, testLabels[i] = testset[i]
 99 |     testFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words)
100 | 
101 | _, _, pred = softmaxRegression(testFeatures, testLabels, BEST_WEIGHTS)
102 | print "Best regularization value: %E" % BEST_REGULARIZATION
103 | print "Test accuracy (%%): %f" % accuracy(testLabels, pred)
104 | 
105 | # Make a plot of regularization vs accuracy
106 | plt.plot(REGULARIZATION, [x["train"] for x in results])
107 | plt.plot(REGULARIZATION, [x["dev"] for x in results])
108 | plt.xscale('log')
109 | plt.xlabel("regularization")
110 | plt.ylabel("accuracy")
111 | plt.legend(['train', 'dev'], loc='upper left')
112 | plt.savefig("q4_reg_v_acc.png")
113 | plt.show()
114 | 
115 | 


--------------------------------------------------------------------------------
/assignment1/q4_softmaxreg.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | 
  4 | from cs224d.data_utils import *
  5 | 
  6 | from q1_softmax import softmax
  7 | from q2_gradcheck import gradcheck_naive
  8 | from q3_sgd import load_saved_params
  9 | 
 10 | def getSentenceFeature(tokens, wordVectors, sentence):
 11 |     """ Obtain the sentence feature for sentiment analysis by averaging its word vectors """
 12 |     # Implement computation for the sentence features given a sentence.                                                       
 13 |     
 14 |     # Inputs:                                                         
 15 |     # - tokens: a dictionary that maps words to their indices in    
 16 |     #          the word vector list                                
 17 |     # - wordVectors: word vectors (each row) for all tokens                
 18 |     # - sentence: a list of words in the sentence of interest 
 19 | 
 20 |     # Output:                                                         
 21 |     # - sentVector: feature vector for the sentence    
 22 |     
 23 |     sentVector = np.zeros((wordVectors.shape[1],))
 24 |     
 25 |     ### YOUR CODE HERE
 26 |     raise NotImplementedError
 27 |     ### END YOUR CODE
 28 |     
 29 |     return sentVector
 30 | 
 31 | def softmaxRegression(features, labels, weights, regularization = 0.0, nopredictions = False):
 32 |     """ Softmax Regression """
 33 |     # Implement softmax regression with weight regularization.        
 34 |     
 35 |     # Inputs:                                                         
 36 |     # - features: feature vectors, each row is a feature vector     
 37 |     # - labels: labels corresponding to the feature vectors         
 38 |     # - weights: weights of the regressor                           
 39 |     # - regularization: L2 regularization constant                  
 40 |     
 41 |     # Output:                                                         
 42 |     # - cost: cost of the regressor                                 
 43 |     # - grad: gradient of the regressor cost with respect to its    
 44 |     #        weights                                               
 45 |     # - pred: label predictions of the regressor (you might find    
 46 |     #        np.argmax helpful)  
 47 |     
 48 |     prob = softmax(features.dot(weights))
 49 |     if len(features.shape) > 1:
 50 |         N = features.shape[0]
 51 |     else:
 52 |         N = 1
 53 |     # A vectorized implementation of    1/N * sum(cross_entropy(x_i, y_i)) + 1/2*|w|^2
 54 |     cost = np.sum(-np.log(prob[range(N), labels])) / N 
 55 |     cost += 0.5 * regularization * np.sum(weights ** 2)
 56 |     
 57 |     ### YOUR CODE HERE: compute the gradients and predictions
 58 |     raise NotImplementedError
 59 |     ### END YOUR CODE
 60 |     
 61 |     if nopredictions:
 62 |         return cost, grad
 63 |     else:
 64 |         return cost, grad, pred
 65 | 
 66 | def accuracy(y, yhat):
 67 |     """ Precision for classifier """
 68 |     assert(y.shape == yhat.shape)
 69 |     return np.sum(y == yhat) * 100.0 / y.size
 70 | 
 71 | def softmax_wrapper(features, labels, weights, regularization = 0.0):
 72 |     cost, grad, _ = softmaxRegression(features, labels, weights, 
 73 |         regularization)
 74 |     return cost, grad
 75 | 
 76 | def sanity_check():
 77 |     """
 78 |     Run python q4_softmaxreg.py.
 79 |     """
 80 |     random.seed(314159)
 81 |     np.random.seed(265)
 82 | 
 83 |     dataset = StanfordSentiment()
 84 |     tokens = dataset.tokens()
 85 |     nWords = len(tokens)
 86 | 
 87 |     _, wordVectors0, _ = load_saved_params()
 88 |     wordVectors = (wordVectors0[:nWords,:] + wordVectors0[nWords:,:])
 89 |     dimVectors = wordVectors.shape[1]
 90 | 
 91 |     dummy_weights = 0.1 * np.random.randn(dimVectors, 5)
 92 |     dummy_features = np.zeros((10, dimVectors))
 93 |     dummy_labels = np.zeros((10,), dtype=np.int32)    
 94 |     for i in xrange(10):
 95 |         words, dummy_labels[i] = dataset.getRandomTrainSentence()
 96 |         dummy_features[i, :] = getSentenceFeature(tokens, wordVectors, words)
 97 |     print "==== Gradient check for softmax regression ===="
 98 |     gradcheck_naive(lambda weights: softmaxRegression(dummy_features,
 99 |         dummy_labels, weights, 1.0, nopredictions = True), dummy_weights)
100 | 
101 |     print "\n=== Results ==="
102 |     print softmaxRegression(dummy_features, dummy_labels, dummy_weights, 1.0)
103 | 
104 | if __name__ == "__main__":
105 |     sanity_check()


--------------------------------------------------------------------------------
/assignment1/requirements.txt:
--------------------------------------------------------------------------------
 1 | Jinja2==2.7.3
 2 | MarkupSafe==0.23
 3 | backports.ssl-match-hostname==3.4.0.2
 4 | certifi==14.05.14
 5 | gnureadline==6.3.3
 6 | mock==1.0.1
 7 | nose==1.3.4
 8 | pyparsing==2.0.3
 9 | python-dateutil==2.4.0
10 | pytz==2014.10
11 | pyzmq==14.4.1
12 | six==1.9.0
13 | tornado==4.0.2
14 | wsgiref==0.1.2
15 | 


--------------------------------------------------------------------------------