├── README.md └── assignment1 ├── .DS_Store ├── .gitignore ├── collectSubmission.sh ├── cs224d ├── .DS_Store ├── __init__.py ├── data_utils.py └── datasets │ ├── .DS_Store │ ├── __MACOSX │ └── stanfordSentimentTreebank │ │ ├── ._README.txt │ │ ├── ._datasetSentences.txt │ │ ├── ._datasetSplit.txt │ │ ├── ._dictionary.txt │ │ ├── ._original_rt_snippets.txt │ │ └── ._sentiment_labels.txt │ ├── get_datasets.sh │ └── stanfordSentimentTreebank │ ├── README.txt │ ├── SOStr.txt │ ├── STree.txt │ ├── datasetSentences.txt │ ├── datasetSplit.txt │ ├── dictionary.txt │ ├── original_rt_snippets.txt │ └── sentiment_labels.txt ├── q1_softmax.ipynb ├── q1_softmax.py ├── q2_gradcheck.ipynb ├── q2_gradcheck.py ├── q2_neural.ipynb ├── q2_neural.py ├── q2_sigmoid.ipynb ├── q2_sigmoid.py ├── q3_run.py ├── q3_sgd.ipynb ├── q3_sgd.py ├── q3_word2vec.ipynb ├── q3_word2vec.py ├── q4_sentiment.py ├── q4_softmaxreg.py └── requirements.txt /README.md: -------------------------------------------------------------------------------- 1 | # CS224d 2 | Stanford cs224d 2016 assignment 3 | -------------------------------------------------------------------------------- /assignment1/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/.DS_Store -------------------------------------------------------------------------------- /assignment1/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | .ipynb_checkpoints/* 4 | -------------------------------------------------------------------------------- /assignment1/collectSubmission.sh: -------------------------------------------------------------------------------- 1 | rm -f assignment1.zip 2 | zip -r assignment1.zip *.py *.png saved_params_40000.npy 3 | -------------------------------------------------------------------------------- /assignment1/cs224d/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/.DS_Store -------------------------------------------------------------------------------- /assignment1/cs224d/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/__init__.py -------------------------------------------------------------------------------- /assignment1/cs224d/data_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import cPickle as pickle 5 | import numpy as np 6 | import os 7 | import random 8 | 9 | class StanfordSentiment: 10 | def __init__(self, path=None, tablesize = 1000000): 11 | if not path: 12 | path = "cs224d/datasets/stanfordSentimentTreebank" 13 | 14 | self.path = path 15 | self.tablesize = tablesize 16 | 17 | def tokens(self): 18 | if hasattr(self, "_tokens") and self._tokens: 19 | return self._tokens 20 | 21 | tokens = dict() 22 | tokenfreq = dict() 23 | wordcount = 0 24 | revtokens = [] 25 | idx = 0 26 | 27 | for sentence in self.sentences(): 28 | for w in sentence: 29 | wordcount += 1 30 | if not w in tokens: 31 | tokens[w] = idx 32 | revtokens += [w] 33 | tokenfreq[w] = 1 34 | idx += 1 35 | else: 36 | tokenfreq[w] += 1 37 | 38 | tokens["UNK"] = idx 39 | revtokens += ["UNK"] 40 | tokenfreq["UNK"] = 1 41 | wordcount += 1 42 | 43 | self._tokens = tokens 44 | self._tokenfreq = tokenfreq 45 | self._wordcount = wordcount 46 | self._revtokens = revtokens 47 | return self._tokens 48 | 49 | def sentences(self): 50 | if hasattr(self, "_sentences") and self._sentences: 51 | return self._sentences 52 | 53 | sentences = [] 54 | with open(self.path + "/datasetSentences.txt", "r") as f: 55 | first = True 56 | for line in f: 57 | if first: 58 | first = False 59 | continue 60 | 61 | splitted = line.strip().split()[1:] 62 | # Deal with some peculiar encoding issues with this file 63 | sentences += [[w.lower().decode("utf-8").encode('latin1') for w in splitted]] 64 | 65 | self._sentences = sentences 66 | self._sentlengths = np.array([len(s) for s in sentences]) 67 | self._cumsentlen = np.cumsum(self._sentlengths) 68 | 69 | return self._sentences 70 | 71 | def numSentences(self): 72 | if hasattr(self, "_numSentences") and self._numSentences: 73 | return self._numSentences 74 | else: 75 | self._numSentences = len(self.sentences()) 76 | return self._numSentences 77 | 78 | def allSentences(self): 79 | if hasattr(self, "_allsentences") and self._allsentences: 80 | return self._allsentences 81 | 82 | sentences = self.sentences() 83 | rejectProb = self.rejectProb() 84 | tokens = self.tokens() 85 | allsentences = [[w for w in s 86 | if 0 >= rejectProb[tokens[w]] or random.random() >= rejectProb[tokens[w]]] 87 | for s in sentences * 30] 88 | 89 | allsentences = [s for s in allsentences if len(s) > 1] 90 | 91 | self._allsentences = allsentences 92 | 93 | return self._allsentences 94 | 95 | def getRandomContext(self, C=5): 96 | allsent = self.allSentences() 97 | sentID = random.randint(0, len(allsent) - 1) 98 | sent = allsent[sentID] 99 | wordID = random.randint(0, len(sent) - 1) 100 | 101 | context = sent[max(0, wordID - C):wordID] 102 | if wordID+1 < len(sent): 103 | context += sent[wordID+1:min(len(sent), wordID + C + 1)] 104 | 105 | centerword = sent[wordID] 106 | context = [w for w in context if w != centerword] 107 | 108 | if len(context) > 0: 109 | return centerword, context 110 | else: 111 | return self.getRandomContext(C) 112 | 113 | def sent_labels(self): 114 | if hasattr(self, "_sent_labels") and self._sent_labels: 115 | return self._sent_labels 116 | 117 | dictionary = dict() 118 | phrases = 0 119 | with open(self.path + "/dictionary.txt", "r") as f: 120 | for line in f: 121 | line = line.strip() 122 | if not line: continue 123 | splitted = line.split("|") 124 | dictionary[splitted[0].lower()] = int(splitted[1]) 125 | phrases += 1 126 | 127 | labels = [0.0] * phrases 128 | with open(self.path + "/sentiment_labels.txt", "r") as f: 129 | first = True 130 | for line in f: 131 | if first: 132 | first = False 133 | continue 134 | 135 | line = line.strip() 136 | if not line: continue 137 | splitted = line.split("|") 138 | labels[int(splitted[0])] = float(splitted[1]) 139 | 140 | sent_labels = [0.0] * self.numSentences() 141 | sentences = self.sentences() 142 | for i in xrange(self.numSentences()): 143 | sentence = sentences[i] 144 | full_sent = " ".join(sentence).replace('-lrb-', '(').replace('-rrb-', ')') 145 | sent_labels[i] = labels[dictionary[full_sent]] 146 | 147 | self._sent_labels = sent_labels 148 | return self._sent_labels 149 | 150 | def dataset_split(self): 151 | if hasattr(self, "_split") and self._split: 152 | return self._split 153 | 154 | split = [[] for i in xrange(3)] 155 | with open(self.path + "/datasetSplit.txt", "r") as f: 156 | first = True 157 | for line in f: 158 | if first: 159 | first = False 160 | continue 161 | 162 | splitted = line.strip().split(",") 163 | split[int(splitted[1]) - 1] += [int(splitted[0]) - 1] 164 | 165 | self._split = split 166 | return self._split 167 | 168 | def getRandomTrainSentence(self): 169 | split = self.dataset_split() 170 | sentId = split[0][random.randint(0, len(split[0]) - 1)] 171 | return self.sentences()[sentId], self.categorify(self.sent_labels()[sentId]) 172 | 173 | def categorify(self, label): 174 | if label <= 0.2: 175 | return 0 176 | elif label <= 0.4: 177 | return 1 178 | elif label <= 0.6: 179 | return 2 180 | elif label <= 0.8: 181 | return 3 182 | else: 183 | return 4 184 | 185 | def getDevSentences(self): 186 | return self.getSplitSentences(2) 187 | 188 | def getTestSentences(self): 189 | return self.getSplitSentences(1) 190 | 191 | def getTrainSentences(self): 192 | return self.getSplitSentences(0) 193 | 194 | def getSplitSentences(self, split=0): 195 | ds_split = self.dataset_split() 196 | return [(self.sentences()[i], self.categorify(self.sent_labels()[i])) for i in ds_split[split]] 197 | 198 | def sampleTable(self): 199 | if hasattr(self, '_sampleTable') and self._sampleTable is not None: 200 | return self._sampleTable 201 | 202 | nTokens = len(self.tokens()) 203 | samplingFreq = np.zeros((nTokens,)) 204 | self.allSentences() 205 | i = 0 206 | for w in xrange(nTokens): 207 | w = self._revtokens[i] 208 | if w in self._tokenfreq: 209 | freq = 1.0 * self._tokenfreq[w] 210 | # Reweigh 211 | freq = freq ** 0.75 212 | else: 213 | freq = 0.0 214 | samplingFreq[i] = freq 215 | i += 1 216 | 217 | samplingFreq /= np.sum(samplingFreq) 218 | samplingFreq = np.cumsum(samplingFreq) * self.tablesize 219 | 220 | self._sampleTable = [0] * self.tablesize 221 | 222 | j = 0 223 | for i in xrange(self.tablesize): 224 | while i > samplingFreq[j]: 225 | j += 1 226 | self._sampleTable[i] = j 227 | 228 | return self._sampleTable 229 | 230 | def rejectProb(self): 231 | if hasattr(self, '_rejectProb') and self._rejectProb is not None: 232 | return self._rejectProb 233 | 234 | threshold = 1e-5 * self._wordcount 235 | 236 | nTokens = len(self.tokens()) 237 | rejectProb = np.zeros((nTokens,)) 238 | for i in xrange(nTokens): 239 | w = self._revtokens[i] 240 | freq = 1.0 * self._tokenfreq[w] 241 | # Reweigh 242 | rejectProb[i] = max(0, 1 - np.sqrt(threshold / freq)) 243 | 244 | self._rejectProb = rejectProb 245 | return self._rejectProb 246 | 247 | def sampleTokenIdx(self): 248 | return self.sampleTable()[random.randint(0, self.tablesize - 1)] -------------------------------------------------------------------------------- /assignment1/cs224d/datasets/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/datasets/.DS_Store -------------------------------------------------------------------------------- /assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._README.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._README.txt -------------------------------------------------------------------------------- /assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._datasetSentences.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._datasetSentences.txt -------------------------------------------------------------------------------- /assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._datasetSplit.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._datasetSplit.txt -------------------------------------------------------------------------------- /assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._dictionary.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._dictionary.txt -------------------------------------------------------------------------------- /assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._original_rt_snippets.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._original_rt_snippets.txt -------------------------------------------------------------------------------- /assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._sentiment_labels.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yunjey/cs224d/0f9ba0c6e2bc21dff855eda4a2b1f9ec79b66da8/assignment1/cs224d/datasets/__MACOSX/stanfordSentimentTreebank/._sentiment_labels.txt -------------------------------------------------------------------------------- /assignment1/cs224d/datasets/get_datasets.sh: -------------------------------------------------------------------------------- 1 | # Get Stanford Sentiment Treebank 2 | wget http://nlp.stanford.edu/~socherr/stanfordSentimentTreebank.zip 3 | unzip stanfordSentimentTreebank.zip 4 | rm stanfordSentimentTreebank.zip 5 | -------------------------------------------------------------------------------- /assignment1/cs224d/datasets/stanfordSentimentTreebank/README.txt: -------------------------------------------------------------------------------- 1 | Stanford Sentiment Treebank V1.0 2 | 3 | This is the dataset of the paper: 4 | 5 | Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank 6 | Richard Socher, Alex Perelygin, Jean Wu, Jason Chuang, Christopher Manning, Andrew Ng and Christopher Potts 7 | Conference on Empirical Methods in Natural Language Processing (EMNLP 2013) 8 | 9 | If you use this dataset in your research, please cite the above paper. 10 | 11 | @incollection{SocherEtAl2013:RNTN, 12 | title = {{Parsing With Compositional Vector Grammars}}, 13 | author = {Richard Socher and Alex Perelygin and Jean Wu and Jason Chuang and Christopher Manning and Andrew Ng and Christopher Potts}, 14 | booktitle = {{EMNLP}}, 15 | year = {2013} 16 | } 17 | 18 | This file includes: 19 | 1. original_rt_snippets.txt contains 10,605 processed snippets from the original pool of Rotten Tomatoes HTML files. Please note that some snippet may contain multiple sentences. 20 | 21 | 2. dictionary.txt contains all phrases and their IDs, separated by a vertical line | 22 | 23 | 3. sentiment_labels.txt contains all phrase ids and the corresponding sentiment labels, separated by a vertical line. 24 | Note that you can recover the 5 classes by mapping the positivity probability using the following cut-offs: 25 | [0, 0.2], (0.2, 0.4], (0.4, 0.6], (0.6, 0.8], (0.8, 1.0] 26 | for very negative, negative, neutral, positive, very positive, respectively. 27 | Please note that phrase ids and sentence ids are not the same. 28 | 29 | 4. SOStr.txt and STree.txt encode the structure of the parse trees. 30 | STree encodes the trees in a parent pointer format. Each line corresponds to each sentence in the datasetSentences.txt file. The Matlab code of this paper will show you how to read this format if you are not familiar with it. 31 | 32 | 5. datasetSentences.txt contains the sentence index, followed by the sentence string separated by a tab. These are the sentences of the train/dev/test sets. 33 | 34 | 6. datasetSplit.txt contains the sentence index (corresponding to the index in datasetSentences.txt file) followed by the set label separated by a comma: 35 | 1 = train 36 | 2 = test 37 | 3 = dev 38 | 39 | Please note that the datasetSentences.txt file has more sentences/lines than the original_rt_snippet.txt. 40 | Each row in the latter represents a snippet as shown on RT, whereas the former is each sub sentence as determined by the Stanford parser. 41 | 42 | For comparing research and training models, please use the provided train/dev/test splits. 43 | 44 | -------------------------------------------------------------------------------- /assignment1/q1_softmax.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import random" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 10, 18 | "metadata": { 19 | "collapsed": false 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "def softmax(x):\n", 24 | " \"\"\"\n", 25 | " Compute the softmax function for each row of the input x.\n", 26 | "\n", 27 | " It is crucial that this function is optimized for speed because\n", 28 | " it will be used frequently in later code.\n", 29 | " You might find numpy functions np.exp, np.sum, np.reshape,\n", 30 | " np.max, and numpy broadcasting useful for this task. (numpy\n", 31 | " broadcasting documentation:\n", 32 | " http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)\n", 33 | "\n", 34 | " You should also make sure that your code works for one\n", 35 | " dimensional inputs (treat the vector as a row), you might find\n", 36 | " it helpful for your later problems.\n", 37 | "\n", 38 | " You must implement the optimization in problem 1(a) of the \n", 39 | " written assignment!\n", 40 | " \"\"\"\n", 41 | "\n", 42 | " ### YOUR CODE HERE\n", 43 | " \n", 44 | " if x.ndim == 1:\n", 45 | " x -= np.min(x) # solving overflow problem\n", 46 | " x = np.exp(x)\n", 47 | " x /= np.sum(x)\n", 48 | " else:\n", 49 | " x -= np.min(x, axis=1, keepdims=True) # solving overflow problem\n", 50 | " x = np.exp(x)\n", 51 | " x /= np.sum(x, axis=1, keepdims=True)\n", 52 | "\n", 53 | " ### END YOUR CODE\n", 54 | " \n", 55 | " return x\n", 56 | "\n", 57 | "def test_softmax_basic():\n", 58 | " \"\"\"\n", 59 | " Some simple tests to get you started. \n", 60 | " Warning: these are not exhaustive.\n", 61 | " \"\"\"\n", 62 | " print \"Running basic tests...\"\n", 63 | " test1 = softmax(np.array([1,2]))\n", 64 | " print test1\n", 65 | " assert np.amax(np.fabs(test1 - np.array(\n", 66 | " [0.26894142, 0.73105858]))) <= 1e-6\n", 67 | "\n", 68 | " test2 = softmax(np.array([[1001,1002],[3,4]]))\n", 69 | " print test2\n", 70 | " assert np.amax(np.fabs(test2 - np.array(\n", 71 | " [[0.26894142, 0.73105858], [0.26894142, 0.73105858]]))) <= 1e-6\n", 72 | "\n", 73 | " test3 = softmax(np.array([[-1001,-1002]]))\n", 74 | " print test3\n", 75 | " assert np.amax(np.fabs(test3 - np.array(\n", 76 | " [0.73105858, 0.26894142]))) <= 1e-6\n", 77 | "\n", 78 | " print \"You should verify these results!\\n\"" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 11, 84 | "metadata": { 85 | "collapsed": false 86 | }, 87 | "outputs": [ 88 | { 89 | "name": "stdout", 90 | "output_type": "stream", 91 | "text": [ 92 | "Running basic tests...\n", 93 | "[ 0.26894142 0.73105858]\n", 94 | "[[ 0.26894142 0.73105858]\n", 95 | " [ 0.26894142 0.73105858]]\n", 96 | "[[ 0.73105858 0.26894142]]\n", 97 | "You should verify these results!\n", 98 | "\n" 99 | ] 100 | } 101 | ], 102 | "source": [ 103 | "test_softmax_basic()" 104 | ] 105 | } 106 | ], 107 | "metadata": { 108 | "kernelspec": { 109 | "display_name": "Python 2", 110 | "language": "python", 111 | "name": "python2" 112 | }, 113 | "language_info": { 114 | "codemirror_mode": { 115 | "name": "ipython", 116 | "version": 2 117 | }, 118 | "file_extension": ".py", 119 | "mimetype": "text/x-python", 120 | "name": "python", 121 | "nbconvert_exporter": "python", 122 | "pygments_lexer": "ipython2", 123 | "version": "2.7.11" 124 | } 125 | }, 126 | "nbformat": 4, 127 | "nbformat_minor": 0 128 | } 129 | -------------------------------------------------------------------------------- /assignment1/q1_softmax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | def softmax(x): 5 | """ 6 | Compute the softmax function for each row of the input x. 7 | 8 | It is crucial that this function is optimized for speed because 9 | it will be used frequently in later code. 10 | You might find numpy functions np.exp, np.sum, np.reshape, 11 | np.max, and numpy broadcasting useful for this task. (numpy 12 | broadcasting documentation: 13 | http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) 14 | 15 | You should also make sure that your code works for one 16 | dimensional inputs (treat the vector as a row), you might find 17 | it helpful for your later problems. 18 | 19 | You must implement the optimization in problem 1(a) of the 20 | written assignment! 21 | """ 22 | 23 | ### YOUR CODE HERE 24 | 25 | if x.ndim == 1: 26 | x -= np.min(x) # solving overflow problem 27 | x = np.exp(x) 28 | x /= np.sum(x) 29 | else: 30 | x -= np.min(x, axis=1, keepdims=True) # solving overflow problem 31 | x = np.exp(x) 32 | x /= np.sum(x, axis=1, keepdims=True) 33 | 34 | ### END YOUR CODE 35 | 36 | return x 37 | 38 | def test_softmax_basic(): 39 | """ 40 | Some simple tests to get you started. 41 | Warning: these are not exhaustive. 42 | """ 43 | print "Running basic tests..." 44 | test1 = softmax(np.array([1,2])) 45 | print test1 46 | assert np.amax(np.fabs(test1 - np.array( 47 | [0.26894142, 0.73105858]))) <= 1e-6 48 | 49 | test2 = softmax(np.array([[1001,1002],[3,4]])) 50 | print test2 51 | assert np.amax(np.fabs(test2 - np.array( 52 | [[0.26894142, 0.73105858], [0.26894142, 0.73105858]]))) <= 1e-6 53 | 54 | test3 = softmax(np.array([[-1001,-1002]])) 55 | print test3 56 | assert np.amax(np.fabs(test3 - np.array( 57 | [0.73105858, 0.26894142]))) <= 1e-6 58 | 59 | print "You should verify these results!\n" 60 | 61 | def test_softmax(): 62 | """ 63 | Use this space to test your softmax implementation by running: 64 | python q1_softmax.py 65 | This function will not be called by the autograder, nor will 66 | your tests be graded. 67 | """ 68 | print "Running your tests..." 69 | ### YOUR CODE HERE 70 | raise NotImplementedError 71 | ### END YOUR CODE 72 | 73 | if __name__ == "__main__": 74 | test_softmax_basic() 75 | test_softmax() -------------------------------------------------------------------------------- /assignment1/q2_gradcheck.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import random" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 7, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "# First implement a gradient checker by filling in the following functions\n", 24 | "def gradcheck_naive(f, x):\n", 25 | " \"\"\" \n", 26 | " Gradient check for a function f \n", 27 | " - f should be a function that takes a single argument and outputs the cost and its gradients\n", 28 | " - x is the point (numpy array) to check the gradient at\n", 29 | " \"\"\" \n", 30 | "\n", 31 | " rndstate = random.getstate()\n", 32 | " random.setstate(rndstate) \n", 33 | " fx, grad = f(x) # Evaluate function value at original point\n", 34 | " h = 1e-4\n", 35 | "\n", 36 | " # Iterate over all indexes in x\n", 37 | " it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])\n", 38 | " while not it.finished:\n", 39 | " ix = it.multi_index\n", 40 | "\n", 41 | " ### try modifying x[ix] with h defined above to compute numerical gradients\n", 42 | " ### make sure you call random.setstate(rndstate) before calling f(x) each time, this will make it \n", 43 | " ### possible to test cost functions with built in randomness later\n", 44 | " ### YOUR CODE HERE:\n", 45 | " \n", 46 | " random.setstate(rndstate)\n", 47 | " tmp1 = np.copy(x) \n", 48 | " tmp1[ix] = tmp1[ix] + h\n", 49 | " f1, _ = f(tmp1)\n", 50 | " \n", 51 | " random.setstate(rndstate)\n", 52 | " tmp2 = np.copy(x) \n", 53 | " tmp2[ix] = tmp2[ix] - h\n", 54 | " f2, _ = f(tmp2)\n", 55 | " numgrad = (f1 - f2) / (2 * h)\n", 56 | " \n", 57 | " ### END YOUR CODE\n", 58 | "\n", 59 | " # Compare gradients\n", 60 | " reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))\n", 61 | " if reldiff > 1e-5:\n", 62 | " print \"Gradient check failed.\"\n", 63 | " print \"First gradient error found at index %s\" % str(ix)\n", 64 | " print \"Your gradient: %f \\t Numerical gradient: %f\" % (grad[ix], numgrad)\n", 65 | " return\n", 66 | " \n", 67 | " it.iternext() # Step to next dimension\n", 68 | "\n", 69 | " print \"Gradient check passed!\"\n", 70 | "\n", 71 | "def sanity_check():\n", 72 | " \"\"\"\n", 73 | " Some basic sanity checks.\n", 74 | " \"\"\"\n", 75 | " quad = lambda x: (np.sum(x ** 2), x * 2)\n", 76 | "\n", 77 | " print \"Running sanity checks...\"\n", 78 | " gradcheck_naive(quad, np.array(123.456)) # scalar test\n", 79 | " gradcheck_naive(quad, np.random.randn(3,)) # 1-D test\n", 80 | " gradcheck_naive(quad, np.random.randn(4,5)) # 2-D test\n", 81 | " print \"\"" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 8, 87 | "metadata": { 88 | "collapsed": false 89 | }, 90 | "outputs": [ 91 | { 92 | "name": "stdout", 93 | "output_type": "stream", 94 | "text": [ 95 | "Running sanity checks...\n", 96 | "Gradient check passed!\n", 97 | "Gradient check passed!\n", 98 | "Gradient check passed!\n", 99 | "\n" 100 | ] 101 | } 102 | ], 103 | "source": [ 104 | "sanity_check()" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": { 111 | "collapsed": true 112 | }, 113 | "outputs": [], 114 | "source": [] 115 | } 116 | ], 117 | "metadata": { 118 | "kernelspec": { 119 | "display_name": "Python 2", 120 | "language": "python", 121 | "name": "python2" 122 | }, 123 | "language_info": { 124 | "codemirror_mode": { 125 | "name": "ipython", 126 | "version": 2 127 | }, 128 | "file_extension": ".py", 129 | "mimetype": "text/x-python", 130 | "name": "python", 131 | "nbconvert_exporter": "python", 132 | "pygments_lexer": "ipython2", 133 | "version": "2.7.11" 134 | } 135 | }, 136 | "nbformat": 4, 137 | "nbformat_minor": 0 138 | } 139 | -------------------------------------------------------------------------------- /assignment1/q2_gradcheck.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | # First implement a gradient checker by filling in the following functions 5 | def gradcheck_naive(f, x): 6 | """ 7 | Gradient check for a function f 8 | - f should be a function that takes a single argument and outputs the cost and its gradients 9 | - x is the point (numpy array) to check the gradient at 10 | """ 11 | 12 | rndstate = random.getstate() 13 | random.setstate(rndstate) 14 | fx, grad = f(x) # Evaluate function value at original point 15 | h = 1e-6 16 | 17 | # Iterate over all indexes in x 18 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 19 | while not it.finished: 20 | ix = it.multi_index 21 | 22 | ### try modifying x[ix] with h defined above to compute numerical gradients 23 | ### make sure you call random.setstate(rndstate) before calling f(x) each time, this will make it 24 | ### possible to test cost functions with built in randomness later 25 | ### YOUR CODE HERE: 26 | ''' 27 | x[ix] += h 28 | random.setstate(rndstate) 29 | plus_h_fx, plus_h_grad = f( x ) 30 | random.setstate(rndstate) 31 | x[ix] -= 2. * h 32 | minus_h_fx, minus_h_grad = f( x ) 33 | numgrad = (plus_h_fx - minus_h_fx) / 2. /h 34 | ''' 35 | 36 | params = np.copy(x) 37 | params[ix] = params[ix] + h 38 | random.setstate(rndstate) 39 | f1, _ = f(params) 40 | 41 | params[ix] = params[ix] - 2 * h 42 | random.setstate(rndstate) 43 | f2, _ = f(params) 44 | numgrad = (f1 - f2) / (2 * h) 45 | 46 | ### END YOUR CODE 47 | 48 | # Compare gradients 49 | reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix])) 50 | if reldiff > 1e-5: 51 | print "Gradient check failed." 52 | print "First gradient error found at index %s" % str(ix) 53 | print "Your gradient: %f \t Numerical gradient: %f" % (grad[ix], numgrad) 54 | return 55 | 56 | it.iternext() # Step to next dimension 57 | 58 | print "Gradient check passed!" 59 | 60 | def sanity_check(): 61 | """ 62 | Some basic sanity checks. 63 | """ 64 | quad = lambda x: (np.sum(x ** 2), x * 2) 65 | 66 | print "Running sanity checks..." 67 | gradcheck_naive(quad, np.array(123.456)) # scalar test 68 | gradcheck_naive(quad, np.random.randn(3,)) # 1-D test 69 | gradcheck_naive(quad, np.random.randn(4,5)) # 2-D test 70 | print "" 71 | 72 | def your_sanity_checks(): 73 | """ 74 | Use this space add any additional sanity checks by running: 75 | python q2_gradcheck.py 76 | This function will not be called by the autograder, nor will 77 | your additional tests be graded. 78 | """ 79 | print "Running your sanity checks..." 80 | ### YOUR CODE HERE 81 | raise NotImplementedError 82 | ### END YOUR CODE 83 | 84 | if __name__ == "__main__": 85 | sanity_check() 86 | your_sanity_checks() 87 | -------------------------------------------------------------------------------- /assignment1/q2_neural.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import random\n", 13 | "\n", 14 | "from q1_softmax import softmax\n", 15 | "from q2_sigmoid import sigmoid, sigmoid_grad\n", 16 | "from q2_gradcheck import gradcheck_naive" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 4, 22 | "metadata": { 23 | "collapsed": false 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "def forward_backward_prop(data, labels, params, dimensions):\n", 28 | " \"\"\" \n", 29 | " Forward and backward propagation for a two-layer sigmoidal network \n", 30 | " \n", 31 | " Compute the forward propagation and for the cross entropy cost,\n", 32 | " and backward propagation for the gradients for all parameters.\n", 33 | " \"\"\"\n", 34 | "\n", 35 | " ### Unpack network parameters (do not modify)\n", 36 | " ofs = 0\n", 37 | " Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2]) # (dim_x, dim_h, dim_y)\n", 38 | "\n", 39 | " W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H)) # (dim_x, dim_h)\n", 40 | " ofs += Dx * H\n", 41 | " b1 = np.reshape(params[ofs:ofs + H], (1, H)) # (1, dim_h)\n", 42 | " ofs += H\n", 43 | " W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy)) # (dim_h, dim_y)\n", 44 | " ofs += H * Dy\n", 45 | " b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy)) # (1, dim_y)\n", 46 | " \n", 47 | " \n", 48 | " ### YOUR CODE HERE: forward propagation\n", 49 | " \n", 50 | " h = sigmoid(np.dot(data, W1) + b1) \n", 51 | " pred = sigmoid(np.dot(h, W2) + b2) \n", 52 | " cost = (-1) * np.sum(labels * np.log(pred) + (1 - labels) * np.log(1 - pred))\n", 53 | " \n", 54 | " ### END YOUR CODE\n", 55 | " \n", 56 | " \n", 57 | " ### YOUR CODE HERE: backward propagation\n", 58 | " \n", 59 | " dout = pred - labels \n", 60 | " dh = np.dot(dout, W2.T) * sigmoid_grad(h) \n", 61 | " \n", 62 | " gradW2 = np.dot(h.T, dout) \n", 63 | " gradb2 = np.sum(dout, 0) \n", 64 | " gradW1 = np.dot(data.T, dh)\n", 65 | " gradb1 = np.sum(dh, 0)\n", 66 | " \n", 67 | " ### END YOUR CODE\n", 68 | "\n", 69 | " ### Stack gradients (do not modify)\n", 70 | " grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), \n", 71 | " gradW2.flatten(), gradb2.flatten()))\n", 72 | " \n", 73 | " return cost, grad\n", 74 | "\n", 75 | "def sanity_check():\n", 76 | " \"\"\"\n", 77 | " Set up fake data and parameters for the neural network, and test using \n", 78 | " gradcheck.\n", 79 | " \"\"\"\n", 80 | " print \"Running sanity check...\"\n", 81 | "\n", 82 | " N = 20\n", 83 | " dimensions = [10, 5, 10]\n", 84 | " data = np.random.randn(N, dimensions[0]) # each row will be a datum\n", 85 | " labels = np.zeros((N, dimensions[2]))\n", 86 | " for i in xrange(N):\n", 87 | " labels[i,random.randint(0,dimensions[2]-1)] = 1 # one-hot labels\n", 88 | " \n", 89 | " params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (\n", 90 | " dimensions[1] + 1) * dimensions[2], )\n", 91 | "\n", 92 | " gradcheck_naive(lambda params: forward_backward_prop(data, labels, params,\n", 93 | " dimensions), params)\n" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 5, 99 | "metadata": { 100 | "collapsed": false 101 | }, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "Running sanity check...\n", 108 | "Gradient check passed!\n" 109 | ] 110 | } 111 | ], 112 | "source": [ 113 | "sanity_check()" 114 | ] 115 | } 116 | ], 117 | "metadata": { 118 | "kernelspec": { 119 | "display_name": "Python 2", 120 | "language": "python", 121 | "name": "python2" 122 | }, 123 | "language_info": { 124 | "codemirror_mode": { 125 | "name": "ipython", 126 | "version": 2 127 | }, 128 | "file_extension": ".py", 129 | "mimetype": "text/x-python", 130 | "name": "python", 131 | "nbconvert_exporter": "python", 132 | "pygments_lexer": "ipython2", 133 | "version": "2.7.11" 134 | } 135 | }, 136 | "nbformat": 4, 137 | "nbformat_minor": 0 138 | } 139 | -------------------------------------------------------------------------------- /assignment1/q2_neural.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | from q1_softmax import softmax 5 | from q2_sigmoid import sigmoid, sigmoid_grad 6 | from q2_gradcheck import gradcheck_naive 7 | def forward_backward_prop(data, labels, params, dimensions): 8 | """ 9 | Forward and backward propagation for a two-layer sigmoidal network 10 | 11 | Compute the forward propagation and for the cross entropy cost, 12 | and backward propagation for the gradients for all parameters. 13 | """ 14 | 15 | ### Unpack network parameters (do not modify) 16 | ofs = 0 17 | Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2]) # (dim_x, dim_h, dim_y) 18 | 19 | W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H)) # (dim_x, dim_h) 20 | ofs += Dx * H 21 | b1 = np.reshape(params[ofs:ofs + H], (1, H)) # (1, dim_h) 22 | ofs += H 23 | W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy)) # (dim_h, dim_y) 24 | ofs += H * Dy 25 | b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy)) # (1, dim_y) 26 | 27 | 28 | ### YOUR CODE HERE: forward propagation 29 | 30 | h = sigmoid(np.dot(data, W1) + b1) 31 | pred = sigmoid(np.dot(h, W2) + b2) 32 | cost = (-1) * np.sum(labels * np.log(pred) + (1 - labels) * np.log(1 - pred)) # sigmoid 함수를 썼을 때 cost function 33 | 34 | ### END YOUR CODE 35 | 36 | 37 | ### YOUR CODE HERE: backward propagation 38 | 39 | dout = pred - labels 40 | dh = np.dot(dout, W2.T) * sigmoid_grad(h) 41 | 42 | gradW2 = np.dot(h.T, dout) 43 | gradb2 = np.sum(dout, 0) 44 | gradW1 = np.dot(data.T, dh) 45 | gradb1 = np.sum(dh, 0) 46 | 47 | ### END YOUR CODE 48 | 49 | ### Stack gradients (do not modify) 50 | grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 51 | gradW2.flatten(), gradb2.flatten())) 52 | 53 | return cost, grad 54 | 55 | def sanity_check(): 56 | """ 57 | Set up fake data and parameters for the neural network, and test using 58 | gradcheck. 59 | """ 60 | print "Running sanity check..." 61 | 62 | N = 20 63 | dimensions = [10, 5, 10] 64 | data = np.random.randn(N, dimensions[0]) # each row will be a datum 65 | labels = np.zeros((N, dimensions[2])) 66 | for i in xrange(N): 67 | labels[i,random.randint(0,dimensions[2]-1)] = 1 # one-hot labels 68 | 69 | params = np.random.randn((dimensions[0] + 1) * dimensions[1] + ( 70 | dimensions[1] + 1) * dimensions[2], ) 71 | 72 | gradcheck_naive(lambda params: forward_backward_prop(data, labels, params, 73 | dimensions), params) 74 | 75 | 76 | def your_sanity_checks(): 77 | """ 78 | Use this space add any additional sanity checks by running: 79 | python q2_neural.py 80 | This function will not be called by the autograder, nor will 81 | your additional tests be graded. 82 | """ 83 | print "Running your sanity checks..." 84 | ### YOUR CODE HERE 85 | raise NotImplementedError 86 | ### END YOUR CODE 87 | 88 | if __name__ == "__main__": 89 | sanity_check() 90 | your_sanity_checks() -------------------------------------------------------------------------------- /assignment1/q2_sigmoid.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 7, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "\n", 13 | "def sigmoid(x):\n", 14 | " \"\"\"\n", 15 | " Compute the sigmoid function for the input here.\n", 16 | " \"\"\"\n", 17 | " \n", 18 | " ### YOUR CODE HERE\n", 19 | " \n", 20 | " x = 1. / (1. + np.exp(-x))\n", 21 | " \n", 22 | " ### END YOUR CODE\n", 23 | " \n", 24 | " return x\n", 25 | "\n", 26 | "def sigmoid_grad(f):\n", 27 | " \"\"\"\n", 28 | " Compute the gradient for the sigmoid function here. Note that\n", 29 | " for this implementation, the input f should be the sigmoid\n", 30 | " function value of your original input x. \n", 31 | " \"\"\"\n", 32 | " \n", 33 | " ### YOUR CODE HERE\n", 34 | " \n", 35 | " f = f * (1 - f)\n", 36 | " \n", 37 | " ### END YOUR CODE\n", 38 | " \n", 39 | " return f\n", 40 | "\n", 41 | "def test_sigmoid_basic():\n", 42 | " \"\"\"\n", 43 | " Some simple tests to get you started. \n", 44 | " Warning: these are not exhaustive.\n", 45 | " \"\"\"\n", 46 | " print \"Running basic tests...\"\n", 47 | " x = np.array([[1, 2], [-1, -2]])\n", 48 | " f = sigmoid(x)\n", 49 | " g = sigmoid_grad(f)\n", 50 | " print f\n", 51 | " assert np.amax(f - np.array([[0.73105858, 0.88079708], \n", 52 | " [0.26894142, 0.11920292]])) <= 1e-6\n", 53 | " print g\n", 54 | " assert np.amax(g - np.array([[0.19661193, 0.10499359],\n", 55 | " [0.19661193, 0.10499359]])) <= 1e-6\n", 56 | " print \"You should verify these results!\\n\"" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 8, 62 | "metadata": { 63 | "collapsed": false 64 | }, 65 | "outputs": [ 66 | { 67 | "name": "stdout", 68 | "output_type": "stream", 69 | "text": [ 70 | "Running basic tests...\n", 71 | "[[ 0.73105858 0.88079708]\n", 72 | " [ 0.26894142 0.11920292]]\n", 73 | "[[ 0.19661193 0.10499359]\n", 74 | " [ 0.19661193 0.10499359]]\n", 75 | "You should verify these results!\n", 76 | "\n" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "test_sigmoid_basic()" 82 | ] 83 | } 84 | ], 85 | "metadata": { 86 | "kernelspec": { 87 | "display_name": "Python 2", 88 | "language": "python", 89 | "name": "python2" 90 | }, 91 | "language_info": { 92 | "codemirror_mode": { 93 | "name": "ipython", 94 | "version": 2 95 | }, 96 | "file_extension": ".py", 97 | "mimetype": "text/x-python", 98 | "name": "python", 99 | "nbconvert_exporter": "python", 100 | "pygments_lexer": "ipython2", 101 | "version": "2.7.11" 102 | } 103 | }, 104 | "nbformat": 4, 105 | "nbformat_minor": 0 106 | } 107 | -------------------------------------------------------------------------------- /assignment1/q2_sigmoid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def sigmoid(x): 4 | """ 5 | Compute the sigmoid function for the input here. 6 | """ 7 | 8 | ### YOUR CODE HERE 9 | 10 | x = 1. / (1. + np.exp(-x)) 11 | 12 | ### END YOUR CODE 13 | 14 | return x 15 | 16 | def sigmoid_grad(f): 17 | """ 18 | Compute the gradient for the sigmoid function here. Note that 19 | for this implementation, the input f should be the sigmoid 20 | function value of your original input x. 21 | """ 22 | 23 | ### YOUR CODE HERE 24 | 25 | f = f * (1 - f) 26 | 27 | ### END YOUR CODE 28 | 29 | return f 30 | 31 | def test_sigmoid_basic(): 32 | """ 33 | Some simple tests to get you started. 34 | Warning: these are not exhaustive. 35 | """ 36 | print "Running basic tests..." 37 | x = np.array([[1, 2], [-1, -2]]) 38 | f = sigmoid(x) 39 | g = sigmoid_grad(f) 40 | print f 41 | assert np.amax(f - np.array([[0.73105858, 0.88079708], 42 | [0.26894142, 0.11920292]])) <= 1e-6 43 | print g 44 | assert np.amax(g - np.array([[0.19661193, 0.10499359], 45 | [0.19661193, 0.10499359]])) <= 1e-6 46 | print "You should verify these results!\n" 47 | 48 | def test_sigmoid(): 49 | """ 50 | Use this space to test your sigmoid implementation by running: 51 | python q2_sigmoid.py 52 | This function will not be called by the autograder, nor will 53 | your tests be graded. 54 | """ 55 | print "Running your tests..." 56 | ### YOUR CODE HERE 57 | raise NotImplementedError 58 | ### END YOUR CODE 59 | 60 | if __name__ == "__main__": 61 | test_sigmoid_basic(); 62 | test_sigmoid() 63 | -------------------------------------------------------------------------------- /assignment1/q3_run.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | from cs224d.data_utils import * 4 | import matplotlib.pyplot as plt 5 | 6 | from q3_word2vec import * 7 | from q3_sgd import * 8 | 9 | # Reset the random seed to make sure that everyone gets the same results 10 | random.seed(314) 11 | dataset = StanfordSentiment() 12 | tokens = dataset.tokens() 13 | nWords = len(tokens) 14 | 15 | # We are going to train 10-dimensional vectors for this assignment 16 | dimVectors = 10 17 | 18 | # Context size 19 | C = 5 20 | 21 | # Reset the random seed to make sure that everyone gets the same results 22 | random.seed(31415) 23 | np.random.seed(9265) 24 | wordVectors = np.concatenate(((np.random.rand(nWords, dimVectors) - .5) / \ 25 | dimVectors, np.zeros((nWords, dimVectors))), axis=0) 26 | wordVectors0 = sgd( 27 | lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C, 28 | negSamplingCostAndGradient), 29 | wordVectors, 0.3, 40000, None, True, PRINT_EVERY=10) 30 | print "sanity check: cost at convergence should be around or below 10" 31 | 32 | # sum the input and output word vectors 33 | wordVectors = (wordVectors0[:nWords,:] + wordVectors0[nWords:,:]) 34 | 35 | # Visualize the word vectors you trained 36 | _, wordVectors0, _ = load_saved_params() 37 | wordVectors = (wordVectors0[:nWords,:] + wordVectors0[nWords:,:]) 38 | visualizeWords = ["the", "a", "an", ",", ".", "?", "!", "``", "''", "--", 39 | "good", "great", "cool", "brilliant", "wonderful", "well", "amazing", 40 | "worth", "sweet", "enjoyable", "boring", "bad", "waste", "dumb", 41 | "annoying"] 42 | visualizeIdx = [tokens[word] for word in visualizeWords] 43 | visualizeVecs = wordVectors[visualizeIdx, :] 44 | temp = (visualizeVecs - np.mean(visualizeVecs, axis=0)) 45 | covariance = 1.0 / len(visualizeIdx) * temp.T.dot(temp) 46 | U,S,V = np.linalg.svd(covariance) 47 | coord = temp.dot(U[:,0:2]) 48 | 49 | for i in xrange(len(visualizeWords)): 50 | plt.text(coord[i,0], coord[i,1], visualizeWords[i], 51 | bbox=dict(facecolor='green', alpha=0.1)) 52 | 53 | plt.xlim((np.min(coord[:,0]), np.max(coord[:,0]))) 54 | plt.ylim((np.min(coord[:,1]), np.max(coord[:,1]))) 55 | 56 | plt.savefig('q3_word_vectors.png') 57 | plt.show() -------------------------------------------------------------------------------- /assignment1/q3_sgd.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# Save parameters every a few SGD iterations as fail-safe\n", 12 | "SAVE_PARAMS_EVERY = 1000\n", 13 | "\n", 14 | "import glob\n", 15 | "import random\n", 16 | "import numpy as np\n", 17 | "import os.path as op\n", 18 | "import cPickle as pickle" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 7, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "def load_saved_params():\n", 30 | " \"\"\" A helper function that loads previously saved parameters and resets iteration start \"\"\"\n", 31 | " st = 0\n", 32 | " for f in glob.glob(\"saved_params_*.npy\"):\n", 33 | " iter = int(op.splitext(op.basename(f))[0].split(\"_\")[2])\n", 34 | " if (iter > st):\n", 35 | " st = iter\n", 36 | " \n", 37 | " if st > 0:\n", 38 | " with open(\"saved_params_%d.npy\" % st, \"r\") as f:\n", 39 | " params = pickle.load(f)\n", 40 | " state = pickle.load(f)\n", 41 | " return st, params, state\n", 42 | " else:\n", 43 | " return st, None, None\n", 44 | " \n", 45 | "def save_params(iter, params):\n", 46 | " with open(\"saved_params_%d.npy\" % iter, \"w\") as f:\n", 47 | " pickle.dump(params, f)\n", 48 | " pickle.dump(random.getstate(), f)\n", 49 | "\n", 50 | "def sgd(f, x0, step, iterations, postprocessing = None, useSaved = False, PRINT_EVERY=10):\n", 51 | " \"\"\" Stochastic Gradient Descent \"\"\"\n", 52 | " # Implement the stochastic gradient descent method in this \n", 53 | " # function. \n", 54 | " \n", 55 | " # Inputs: \n", 56 | " # - f: the function to optimize, it should take a single \n", 57 | " # argument and yield two outputs, a cost and the gradient \n", 58 | " # with respect to the arguments \n", 59 | " # - x0: the initial point to start SGD from \n", 60 | " # - step: the step size for SGD \n", 61 | " # - iterations: total iterations to run SGD for \n", 62 | " # - postprocessing: postprocessing function for the parameters \n", 63 | " # if necessary. In the case of word2vec we will need to \n", 64 | " # normalize the word vectors to have unit length. \n", 65 | " # - PRINT_EVERY: specifies every how many iterations to output \n", 66 | "\n", 67 | " # Output: \n", 68 | " # - x: the parameter value after SGD finishes \n", 69 | " \n", 70 | " # Anneal learning rate every several iterations\n", 71 | " ANNEAL_EVERY = 20000\n", 72 | " \n", 73 | " if useSaved:\n", 74 | " start_iter, oldx, state = load_saved_params()\n", 75 | " if start_iter > 0:\n", 76 | " x0 = oldx;\n", 77 | " step *= 0.5 ** (start_iter / ANNEAL_EVERY)\n", 78 | " \n", 79 | " if state:\n", 80 | " random.setstate(state)\n", 81 | " else:\n", 82 | " start_iter = 0\n", 83 | " \n", 84 | " x = x0\n", 85 | " \n", 86 | " if not postprocessing:\n", 87 | " postprocessing = lambda x: x\n", 88 | " \n", 89 | " expcost = None\n", 90 | " \n", 91 | " for iter in xrange(start_iter + 1, iterations + 1):\n", 92 | " ### Don't forget to apply the postprocessing after every iteration!\n", 93 | " ### You might want to print the progress every few iterations.\n", 94 | "\n", 95 | " cost = None\n", 96 | " ### YOUR CODE HERE\n", 97 | " \n", 98 | " cost, grad = f(x)\n", 99 | " x = x - step * grad\n", 100 | " \n", 101 | " \n", 102 | " ### END YOUR CODE\n", 103 | " \n", 104 | " if iter % PRINT_EVERY == 0:\n", 105 | " if not expcost:\n", 106 | " expcost = cost\n", 107 | " else:\n", 108 | " expcost = .95 * expcost + .05 * cost\n", 109 | " print \"iter %d: %f\" % (iter, expcost)\n", 110 | " \n", 111 | " if iter % SAVE_PARAMS_EVERY == 0 and useSaved:\n", 112 | " save_params(iter, x)\n", 113 | " \n", 114 | " if iter % ANNEAL_EVERY == 0:\n", 115 | " step *= 0.5\n", 116 | " \n", 117 | " return x\n", 118 | "\n", 119 | "def sanity_check():\n", 120 | " quad = lambda x: (np.sum(x ** 2), x * 2)\n", 121 | "\n", 122 | " print \"Running sanity checks...\"\n", 123 | " t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=100)\n", 124 | " print \"test 1 result:\", t1\n", 125 | " assert abs(t1) <= 1e-6\n", 126 | "\n", 127 | " t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100)\n", 128 | " print \"test 2 result:\", t2\n", 129 | " assert abs(t2) <= 1e-6\n", 130 | "\n", 131 | " t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100)\n", 132 | " print \"test 3 result:\", t3\n", 133 | " assert abs(t3) <= 1e-6\n", 134 | " \n", 135 | " print \"\"" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 8, 141 | "metadata": { 142 | "collapsed": false 143 | }, 144 | "outputs": [ 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "Running sanity checks...\n", 150 | "iter 100: 0.004578\n", 151 | "iter 200: 0.004353\n", 152 | "iter 300: 0.004136\n", 153 | "iter 400: 0.003929\n", 154 | "iter 500: 0.003733\n", 155 | "iter 600: 0.003546\n", 156 | "iter 700: 0.003369\n", 157 | "iter 800: 0.003200\n", 158 | "iter 900: 0.003040\n", 159 | "iter 1000: 0.002888\n", 160 | "test 1 result: 8.41483678608e-10\n", 161 | "iter 100: 0.000000\n", 162 | "iter 200: 0.000000\n", 163 | "iter 300: 0.000000\n", 164 | "iter 400: 0.000000\n", 165 | "iter 500: 0.000000\n", 166 | "iter 600: 0.000000\n", 167 | "iter 700: 0.000000\n", 168 | "iter 800: 0.000000\n", 169 | "iter 900: 0.000000\n", 170 | "iter 1000: 0.000000\n", 171 | "test 2 result: 0.0\n", 172 | "iter 100: 0.041205\n", 173 | "iter 200: 0.039181\n", 174 | "iter 300: 0.037222\n", 175 | "iter 400: 0.035361\n", 176 | "iter 500: 0.033593\n", 177 | "iter 600: 0.031913\n", 178 | "iter 700: 0.030318\n", 179 | "iter 800: 0.028802\n", 180 | "iter 900: 0.027362\n", 181 | "iter 1000: 0.025994\n", 182 | "test 3 result: -2.52445103582e-09\n", 183 | "\n" 184 | ] 185 | } 186 | ], 187 | "source": [ 188 | "sanity_check()" 189 | ] 190 | } 191 | ], 192 | "metadata": { 193 | "kernelspec": { 194 | "display_name": "Python 2", 195 | "language": "python", 196 | "name": "python2" 197 | }, 198 | "language_info": { 199 | "codemirror_mode": { 200 | "name": "ipython", 201 | "version": 2 202 | }, 203 | "file_extension": ".py", 204 | "mimetype": "text/x-python", 205 | "name": "python", 206 | "nbconvert_exporter": "python", 207 | "pygments_lexer": "ipython2", 208 | "version": "2.7.11" 209 | } 210 | }, 211 | "nbformat": 4, 212 | "nbformat_minor": 0 213 | } 214 | -------------------------------------------------------------------------------- /assignment1/q3_sgd.py: -------------------------------------------------------------------------------- 1 | # Save parameters every a few SGD iterations as fail-safe 2 | SAVE_PARAMS_EVERY = 1000 3 | 4 | import glob 5 | import random 6 | import numpy as np 7 | import os.path as op 8 | import cPickle as pickle 9 | 10 | def load_saved_params(): 11 | """ A helper function that loads previously saved parameters and resets iteration start """ 12 | st = 0 13 | for f in glob.glob("saved_params_*.npy"): 14 | iter = int(op.splitext(op.basename(f))[0].split("_")[2]) 15 | if (iter > st): 16 | st = iter 17 | 18 | if st > 0: 19 | with open("saved_params_%d.npy" % st, "r") as f: 20 | params = pickle.load(f) 21 | state = pickle.load(f) 22 | return st, params, state 23 | else: 24 | return st, None, None 25 | 26 | def save_params(iter, params): 27 | with open("saved_params_%d.npy" % iter, "w") as f: 28 | pickle.dump(params, f) 29 | pickle.dump(random.getstate(), f) 30 | 31 | def sgd(f, x0, step, iterations, postprocessing = None, useSaved = False, PRINT_EVERY=10): 32 | """ Stochastic Gradient Descent """ 33 | # Implement the stochastic gradient descent method in this 34 | # function. 35 | 36 | # Inputs: 37 | # - f: the function to optimize, it should take a single 38 | # argument and yield two outputs, a cost and the gradient 39 | # with respect to the arguments 40 | # - x0: the initial point to start SGD from 41 | # - step: the step size for SGD 42 | # - iterations: total iterations to run SGD for 43 | # - postprocessing: postprocessing function for the parameters 44 | # if necessary. In the case of word2vec we will need to 45 | # normalize the word vectors to have unit length. 46 | # - PRINT_EVERY: specifies every how many iterations to output 47 | 48 | # Output: 49 | # - x: the parameter value after SGD finishes 50 | 51 | # Anneal learning rate every several iterations 52 | ANNEAL_EVERY = 20000 53 | 54 | if useSaved: 55 | start_iter, oldx, state = load_saved_params() 56 | if start_iter > 0: 57 | x0 = oldx; 58 | step *= 0.5 ** (start_iter / ANNEAL_EVERY) 59 | 60 | if state: 61 | random.setstate(state) 62 | else: 63 | start_iter = 0 64 | 65 | x = x0 66 | 67 | if not postprocessing: 68 | postprocessing = lambda x: x 69 | 70 | expcost = None 71 | 72 | for iter in xrange(start_iter + 1, iterations + 1): 73 | ### Don't forget to apply the postprocessing after every iteration! 74 | ### You might want to print the progress every few iterations. 75 | 76 | cost = None 77 | ### YOUR CODE HERE 78 | 79 | cost, grad = f(x) 80 | x = x - step * grad 81 | 82 | ### END YOUR CODE 83 | 84 | if iter % PRINT_EVERY == 0: 85 | if not expcost: 86 | expcost = cost 87 | else: 88 | expcost = .95 * expcost + .05 * cost 89 | print "iter %d: %f" % (iter, expcost) 90 | 91 | if iter % SAVE_PARAMS_EVERY == 0 and useSaved: 92 | save_params(iter, x) 93 | 94 | if iter % ANNEAL_EVERY == 0: 95 | step *= 0.5 96 | 97 | return x 98 | 99 | def sanity_check(): 100 | quad = lambda x: (np.sum(x ** 2), x * 2) 101 | 102 | print "Running sanity checks..." 103 | t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=100) 104 | print "test 1 result:", t1 105 | assert abs(t1) <= 1e-6 106 | 107 | t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100) 108 | print "test 2 result:", t2 109 | assert abs(t2) <= 1e-6 110 | 111 | t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100) 112 | print "test 3 result:", t3 113 | assert abs(t3) <= 1e-6 114 | 115 | print "" 116 | 117 | def your_sanity_checks(): 118 | """ 119 | Use this space add any additional sanity checks by running: 120 | python q3_sgd.py 121 | This function will not be called by the autograder, nor will 122 | your additional tests be graded. 123 | """ 124 | print "Running your sanity checks..." 125 | ### YOUR CODE HERE 126 | raise NotImplementedError 127 | ### END YOUR CODE 128 | 129 | if __name__ == "__main__": 130 | sanity_check(); 131 | your_sanity_checks(); -------------------------------------------------------------------------------- /assignment1/q3_word2vec.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import random\n", 13 | "\n", 14 | "from q1_softmax import softmax\n", 15 | "from q2_gradcheck import gradcheck_naive\n", 16 | "from q2_sigmoid import sigmoid, sigmoid_grad" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": { 23 | "collapsed": false 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "def normalizeRows(x):\n", 28 | " \"\"\" Row normalization function \"\"\"\n", 29 | " # Implement a function that normalizes each row of a matrix to have unit length\n", 30 | " \n", 31 | " ### YOUR CODE HERE\n", 32 | " \n", 33 | " x_sum = np.sqrt(np.sum(x**2, 1))\n", 34 | " x /= np.reshape(x_sum, (-1, 1)) + 1e-20\n", 35 | " \n", 36 | " ### END YOUR CODE\n", 37 | " \n", 38 | " return x\n", 39 | "\n", 40 | "def test_normalize_rows():\n", 41 | " print \"Testing normalizeRows...\"\n", 42 | " x = normalizeRows(np.array([[3.0,4.0],[1, 2]])) \n", 43 | " # the result should be [[0.6, 0.8], [0.4472, 0.8944]]\n", 44 | " print x\n", 45 | " assert (x.all() == np.array([[0.6, 0.8], [0.4472, 0.8944]]).all())\n", 46 | " print \"\"" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 65, 52 | "metadata": { 53 | "collapsed": false 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "def softmaxCostAndGradient(predicted, target, outputVectors, dataset):\n", 58 | " \"\"\" Softmax cost function for word2vec models \"\"\"\n", 59 | " \n", 60 | " # Implement the cost and gradients for one predicted word vector \n", 61 | " # and one target word vector as a building block for word2vec \n", 62 | " # models, assuming the softmax prediction function and cross \n", 63 | " # entropy loss. \n", 64 | " \n", 65 | " # Inputs: \n", 66 | " # - predicted: shape with (1, dim_embed) numpy ndarray, predicted word vector (\\hat{v} in \n", 67 | " # the written component or \\hat{r} in an earlier version)\n", 68 | " # - target: integer, the index of the target word \n", 69 | " # - outputVectors: \"output\" vectors (as rows) for all tokens \n", 70 | " # - dataset: needed for negative sampling, unused here. \n", 71 | " \n", 72 | " # Outputs: \n", 73 | " # - cost: cross entropy cost for the softmax word prediction \n", 74 | " # - gradPred: the gradient with respect to the predicted word \n", 75 | " # vector \n", 76 | " # - grad: the gradient with respect to all the other word \n", 77 | " # vectors \n", 78 | " \n", 79 | " # We will not provide starter code for this function, but feel \n", 80 | " # free to reference the code you previously wrote for this \n", 81 | " # assignment! \n", 82 | " \n", 83 | " ### YOUR CODE HERE\n", 84 | " \n", 85 | " N = outputVectors.shape[0] # n_words: vocab size\n", 86 | " y = np.zeros(N)\n", 87 | " y[target] = 1 # (n_words)\n", 88 | " \n", 89 | " score = np.dot(predicted, outputVectors.T) # (1, n_words)\n", 90 | " out = softmax(score)\n", 91 | " \n", 92 | " cost = np.sum(-y * np.log(out)) \n", 93 | " \n", 94 | " dout = out - y # (1, n_words)\n", 95 | " gradPred = np.dot(dout, outputVectors) # (1, dim_embed)\n", 96 | " grad = np.dot(dout.T, predicted) # (n_words, dim_embed)\n", 97 | " \n", 98 | " ### END YOUR CODE\n", 99 | " \n", 100 | " return cost, gradPred, grad\n", 101 | "\n", 102 | "def negSamplingCostAndGradient(predicted, target, outputVectors, dataset, \n", 103 | " K=10):\n", 104 | " \"\"\" Negative sampling cost function for word2vec models \"\"\"\n", 105 | "\n", 106 | " # Implement the cost and gradients for one predicted word vector \n", 107 | " # and one target word vector as a building block for word2vec \n", 108 | " # models, using the negative sampling technique. K is the sample \n", 109 | " # size. You might want to use dataset.sampleTokenIdx() to sample \n", 110 | " # a random word index. \n", 111 | " # \n", 112 | " # Note: See test_word2vec below for dataset's initialization.\n", 113 | " # \n", 114 | " # Input/Output Specifications: same as softmaxCostAndGradient \n", 115 | " # We will not provide starter code for this function, but feel \n", 116 | " # free to reference the code you previously wrote for this \n", 117 | " # assignment!\n", 118 | " \n", 119 | " \n", 120 | " cost = 0.0\n", 121 | " grad = np.zeros_like(outputVectors)\n", 122 | " gradPred = np.zeros_like(predicted)\n", 123 | " \n", 124 | " ### YOUR CODE HERE\n", 125 | " \n", 126 | " \n", 127 | " a_target = sigmoid(np.dot(predicted.reshape(-1), outputVectors[target].T))\n", 128 | " cost += -np.log(a_target) # cost for target value\n", 129 | " grad[target:target+1] = (a_target - 1) * predicted # gradient for target value\n", 130 | " gradPred += (a_target - 1) * outputVectors[target]\n", 131 | " \n", 132 | " neg_samples = [] \n", 133 | " \n", 134 | " for i in range(K):\n", 135 | " j = dataset.sampleTokenIdx()\n", 136 | " if j == target or (j in neg_samples):\n", 137 | " i -= 1 # if negative sample is same with target or already sampled, then resample.\n", 138 | " continue\n", 139 | " neg_samples.append(j)\n", 140 | " \n", 141 | " a_neg = sigmoid(-np.dot(predicted.reshape(-1), outputVectors[j].T))\n", 142 | " cost += -np.log(a_neg) # cost for negative sample\n", 143 | " grad[j:j+1] = (1 - a_neg) * predicted # gradient for negative sample\n", 144 | " gradPred += (1 - a_neg) * outputVectors[j] \n", 145 | " \n", 146 | "\n", 147 | " ### END YOUR CODE\n", 148 | " \n", 149 | " return cost, gradPred, grad\n", 150 | "\n", 151 | "\n", 152 | "def skipgram(currentWord, C, contextWords, tokens, inputVectors, outputVectors, \n", 153 | " dataset, word2vecCostAndGradient = softmaxCostAndGradient):\n", 154 | " \"\"\" Skip-gram model in word2vec \"\"\"\n", 155 | "\n", 156 | " # Implement the skip-gram model in this function.\n", 157 | "\n", 158 | " # Inputs: \n", 159 | " # - currrentWord: a string of the current center word \n", 160 | " # - C: integer, context size \n", 161 | " # - contextWords: list of no more than 2*C strings, the context words \n", 162 | " # - tokens: a dictionary that maps words to their indices in \n", 163 | " # the word vector list \n", 164 | " # - inputVectors: \"input\" word vectors (as rows) for all tokens \n", 165 | " # - outputVectors: \"output\" word vectors (as rows) for all tokens \n", 166 | " # - word2vecCostAndGradient: the cost and gradient function for \n", 167 | " # a prediction vector given the target word vectors, \n", 168 | " # could be one of the two cost functions you \n", 169 | " # implemented above\n", 170 | "\n", 171 | " # Outputs: \n", 172 | " # - cost: the cost function value for the skip-gram model \n", 173 | " # - grad: the gradient with respect to the word vectors \n", 174 | " # We will not provide starter code for this function, but feel \n", 175 | " # free to reference the code you previously wrote for this \n", 176 | " # assignment!\n", 177 | "\n", 178 | " cost = 0.0\n", 179 | " gradIn = np.zeros(inputVectors.shape) \n", 180 | " gradOut = np.zeros(outputVectors.shape)\n", 181 | " \n", 182 | " \n", 183 | " ### YOUR CODE HERE\n", 184 | " \n", 185 | " \n", 186 | " idx = tokens[currentWord] # tokens['a'] = 1\n", 187 | " input_vector = inputVectors[idx:idx+1] # (1, dim_embed) \n", 188 | " \n", 189 | " for context in contextWords:\n", 190 | " c, g_in, g_out = word2vecCostAndGradient(input_vector, tokens[context], outputVectors, dataset)\n", 191 | " cost += c\n", 192 | " gradIn[idx:idx+1, :] += g_in\n", 193 | " gradOut += g_out\n", 194 | "\n", 195 | " \n", 196 | " ### END YOUR CODE\n", 197 | " \n", 198 | " return cost, gradIn, gradOut\n", 199 | "\n", 200 | "def cbow(currentWord, C, contextWords, tokens, inputVectors, outputVectors, \n", 201 | " dataset, word2vecCostAndGradient = softmaxCostAndGradient):\n", 202 | " \"\"\" CBOW model in word2vec \"\"\"\n", 203 | "\n", 204 | " # Implement the continuous bag-of-words model in this function. \n", 205 | " # Input/Output specifications: same as the skip-gram model \n", 206 | " # We will not provide starter code for this function, but feel \n", 207 | " # free to reference the code you previously wrote for this \n", 208 | " # assignment!\n", 209 | "\n", 210 | " #################################################################\n", 211 | " # IMPLEMENTING CBOW IS EXTRA CREDIT, DERIVATIONS IN THE WRIITEN #\n", 212 | " # ASSIGNMENT ARE NOT! # \n", 213 | " #################################################################\n", 214 | " \n", 215 | " cost = 0\n", 216 | " gradIn = np.zeros(inputVectors.shape)\n", 217 | " gradOut = np.zeros(outputVectors.shape)\n", 218 | "\n", 219 | " ### YOUR CODE HERE\n", 220 | " \n", 221 | " \n", 222 | " for contextWord in contextWords:\n", 223 | " idx = tokens[contextWord] # tokens['a'] = 1\n", 224 | " input_vector = inputVectors[idx:idx+1] \n", 225 | " c, g_in, g_out = word2vecCostAndGradient(input_vector, tokens[currentWord], outputVectors, dataset)\n", 226 | " cost += c\n", 227 | " gradIn[idx:idx+1, :] += g_in\n", 228 | " gradOut += g_out\n", 229 | " \n", 230 | " \n", 231 | " ### END YOUR CODE\n", 232 | " \n", 233 | " return cost, gradIn, gradOut\n", 234 | "\n", 235 | "#############################################\n", 236 | "# Testing functions below. DO NOT MODIFY! #\n", 237 | "#############################################\n", 238 | "\n", 239 | "def word2vec_sgd_wrapper(word2vecModel, tokens, wordVectors, dataset, C, word2vecCostAndGradient = softmaxCostAndGradient):\n", 240 | " batchsize = 50\n", 241 | " cost = 0.0\n", 242 | " grad = np.zeros(wordVectors.shape)\n", 243 | " N = wordVectors.shape[0]\n", 244 | " inputVectors = wordVectors[:N/2,:]\n", 245 | " outputVectors = wordVectors[N/2:,:]\n", 246 | " for i in xrange(batchsize):\n", 247 | " C1 = random.randint(1,C) # window size (양 옆으로 )\n", 248 | " centerword, context = dataset.getRandomContext(C1)\n", 249 | " \n", 250 | " if word2vecModel == skipgram:\n", 251 | " denom = 1\n", 252 | " else:\n", 253 | " denom = 1\n", 254 | " \n", 255 | " c, gin, gout = word2vecModel(centerword, C1, context, tokens, inputVectors, outputVectors, dataset, word2vecCostAndGradient)\n", 256 | " cost += c / batchsize / denom\n", 257 | " grad[:N/2, :] += gin / batchsize / denom\n", 258 | " grad[N/2:, :] += gout / batchsize / denom\n", 259 | " \n", 260 | " return cost, grad\n", 261 | "\n", 262 | "def test_word2vec():\n", 263 | " # Interface to the dataset for negative sampling\n", 264 | " dataset = type('dummy', (), {})()\n", 265 | " def dummySampleTokenIdx():\n", 266 | " return random.randint(0, 4)\n", 267 | "\n", 268 | " def getRandomContext(C):\n", 269 | " tokens = [\"a\", \"b\", \"c\", \"d\", \"e\"]\n", 270 | " return tokens[random.randint(0,4)], [tokens[random.randint(0,4)] \\\n", 271 | " for i in xrange(2*C)]\n", 272 | " dataset.sampleTokenIdx = dummySampleTokenIdx\n", 273 | " dataset.getRandomContext = getRandomContext\n", 274 | "\n", 275 | " random.seed(10230)\n", 276 | " np.random.seed(9265)\n", 277 | " dummy_vectors = normalizeRows(np.random.randn(10,3))\n", 278 | " dummy_tokens = dict([(\"a\",0), (\"b\",1), (\"c\",2),(\"d\",3),(\"e\",4)])\n", 279 | " print \"==== Gradient check for skip-gram ====\"\n", 280 | " gradcheck_naive(lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5), dummy_vectors)\n", 281 | " gradcheck_naive(lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors)\n", 282 | " print \"\\n==== Gradient check for CBOW ====\"\n", 283 | " gradcheck_naive(lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5), dummy_vectors)\n", 284 | " gradcheck_naive(lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors)\n", 285 | "\n", 286 | " print \"\\n=== Results ===\"\n", 287 | " print skipgram(\"c\", 3, [\"a\", \"b\", \"e\", \"d\", \"b\", \"c\"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset)\n", 288 | " print skipgram(\"c\", 1, [\"a\", \"b\"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset, negSamplingCostAndGradient)\n", 289 | " print cbow(\"a\", 2, [\"a\", \"b\", \"c\", \"a\"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset)\n", 290 | " print cbow(\"a\", 2, [\"a\", \"b\", \"a\", \"c\"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset, negSamplingCostAndGradient)" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 66, 296 | "metadata": { 297 | "collapsed": false 298 | }, 299 | "outputs": [ 300 | { 301 | "name": "stdout", 302 | "output_type": "stream", 303 | "text": [ 304 | "==== Gradient check for skip-gram ====\n", 305 | "Gradient check passed!\n", 306 | "Gradient check passed!\n", 307 | "\n", 308 | "==== Gradient check for CBOW ====\n", 309 | "Gradient check passed!\n", 310 | "Gradient check passed!\n", 311 | "\n", 312 | "=== Results ===\n", 313 | "(11.166109001533979, array([[ 0. , 0. , 0. ],\n", 314 | " [ 0. , 0. , 0. ],\n", 315 | " [-1.26947339, -1.36873189, 2.45158957],\n", 316 | " [ 0. , 0. , 0. ],\n", 317 | " [ 0. , 0. , 0. ]]), array([[-0.41045956, 0.18834851, 1.43272264],\n", 318 | " [ 0.38202831, -0.17530219, -1.33348241],\n", 319 | " [ 0.07009355, -0.03216399, -0.24466386],\n", 320 | " [ 0.09472154, -0.04346509, -0.33062865],\n", 321 | " [-0.13638384, 0.06258276, 0.47605228]]))\n", 322 | "(6.4123666986130292, array([[ 0. , 0. , 0. ],\n", 323 | " [ 0. , 0. , 0. ],\n", 324 | " [-1.79237853, -1.61783916, 0.22229718],\n", 325 | " [ 0. , 0. , 0. ],\n", 326 | " [ 0. , 0. , 0. ]]), array([[-0.11265089, 0.05169237, 0.39321163],\n", 327 | " [ 0.17315617, -0.07945656, -0.60440731],\n", 328 | " [-0.22764219, 0.10445868, 0.79459256],\n", 329 | " [-0.21068407, 0.09667707, 0.73539969],\n", 330 | " [-0.32248118, 0.14797767, 1.1256312 ]]))\n", 331 | "(5.5798856283496789, array([[ 0.3741715 , -0.234476 , -1.36551259],\n", 332 | " [ 0.35927914, -0.11439876, -0.98756037],\n", 333 | " [ 0.17201142, -0.11892354, -0.53014219],\n", 334 | " [ 0. , 0. , 0. ],\n", 335 | " [ 0. , 0. , 0. ]]), array([[ 0.841774 , 0.39105083, -0.47861909],\n", 336 | " [-0.02845097, -0.1067265 , 0.02802426],\n", 337 | " [-0.31375535, -0.06447558, 0.1492707 ],\n", 338 | " [-0.10632801, -0.14957598, 0.03188348],\n", 339 | " [-0.39323966, -0.07027277, 0.26944066]]))\n", 340 | "(12.464842117519513, array([[-0.87034332, -0.94713331, -1.41428685],\n", 341 | " [ 0.12556491, 0.14811621, -1.34941464],\n", 342 | " [-0.42965887, -0.26805817, -0.6785951 ],\n", 343 | " [ 0. , 0. , 0. ],\n", 344 | " [ 0. , 0. , 0. ]]), array([[ 0.14837703, 0.31110522, -0.10079555],\n", 345 | " [-0.51929714, -0.22034123, 0.31252798],\n", 346 | " [-0.40797326, -0.15206494, 0.36152752],\n", 347 | " [-0.74542585, -0.2600954 , 0.36736716],\n", 348 | " [-1.41505916, 0.04569902, 0.89005586]]))\n" 349 | ] 350 | } 351 | ], 352 | "source": [ 353 | "test_word2vec()" 354 | ] 355 | } 356 | ], 357 | "metadata": { 358 | "kernelspec": { 359 | "display_name": "Python 2", 360 | "language": "python", 361 | "name": "python2" 362 | }, 363 | "language_info": { 364 | "codemirror_mode": { 365 | "name": "ipython", 366 | "version": 2 367 | }, 368 | "file_extension": ".py", 369 | "mimetype": "text/x-python", 370 | "name": "python", 371 | "nbconvert_exporter": "python", 372 | "pygments_lexer": "ipython2", 373 | "version": "2.7.11" 374 | } 375 | }, 376 | "nbformat": 4, 377 | "nbformat_minor": 0 378 | } 379 | -------------------------------------------------------------------------------- /assignment1/q3_word2vec.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | from q1_softmax import softmax 5 | from q2_gradcheck import gradcheck_naive 6 | from q2_sigmoid import sigmoid, sigmoid_grad 7 | 8 | def normalizeRows(x): 9 | """ Row normalization function """ 10 | # Implement a function that normalizes each row of a matrix to have unit length 11 | 12 | ### YOUR CODE HERE 13 | 14 | 15 | x_sum = np.sqrt(np.sum(x**2, 1)) 16 | x /= np.reshape(x_sum, (-1, 1)) + 1e-20 17 | 18 | 19 | ### END YOUR CODE 20 | 21 | return x 22 | 23 | def test_normalize_rows(): 24 | print "Testing normalizeRows..." 25 | x = normalizeRows(np.array([[3.0,4.0],[1, 2]])) 26 | # the result should be [[0.6, 0.8], [0.4472, 0.8944]] 27 | print x 28 | assert (x.all() == np.array([[0.6, 0.8], [0.4472, 0.8944]]).all()) 29 | print "" 30 | 31 | def softmaxCostAndGradient(predicted, target, outputVectors, dataset): 32 | """ Softmax cost function for word2vec models """ 33 | 34 | # Implement the cost and gradients for one predicted word vector 35 | # and one target word vector as a building block for word2vec 36 | # models, assuming the softmax prediction function and cross 37 | # entropy loss. 38 | 39 | # Inputs: 40 | # - predicted: numpy ndarray, predicted word vector (\hat{v} in 41 | # the written component or \hat{r} in an earlier version) 42 | # - target: integer, the index of the target word 43 | # - outputVectors: "output" vectors (as rows) for all tokens 44 | # - dataset: needed for negative sampling, unused here. 45 | 46 | # Outputs: 47 | # - cost: cross entropy cost for the softmax word prediction 48 | # - gradPred: the gradient with respect to the predicted word 49 | # vector 50 | # - grad: the gradient with respect to all the other word 51 | # vectors 52 | 53 | # We will not provide starter code for this function, but feel 54 | # free to reference the code you previously wrote for this 55 | # assignment! 56 | 57 | ### YOUR CODE HERE 58 | 59 | N = outputVectors.shape[0] # n_words: vocab size 60 | y = np.zeros(N) 61 | y[target] = 1 # (n_words) 62 | 63 | score = np.dot(predicted, outputVectors.T) # (1, n_words) 64 | out = softmax(score) 65 | 66 | cost = np.sum(-y * np.log(out)) 67 | 68 | dout = out - y # (1, n_words) 69 | gradPred = np.dot(dout, outputVectors) # (1, dim_embed) 70 | grad = np.dot(dout.T, predicted) # (n_words, dim_embed) 71 | 72 | 73 | ### END YOUR CODE 74 | 75 | return cost, gradPred, grad 76 | 77 | def negSamplingCostAndGradient(predicted, target, outputVectors, dataset, 78 | K=10): 79 | """ Negative sampling cost function for word2vec models """ 80 | 81 | # Implement the cost and gradients for one predicted word vector 82 | # and one target word vector as a building block for word2vec 83 | # models, using the negative sampling technique. K is the sample 84 | # size. You might want to use dataset.sampleTokenIdx() to sample 85 | # a random word index. 86 | # 87 | # Note: See test_word2vec below for dataset's initialization. 88 | # 89 | # Input/Output Specifications: same as softmaxCostAndGradient 90 | # We will not provide starter code for this function, but feel 91 | # free to reference the code you previously wrote for this 92 | # assignment! 93 | 94 | cost = 0.0 95 | grad = np.zeros_like(outputVectors) 96 | gradPred = np.zeros_like(predicted) 97 | 98 | ### YOUR CODE HERE 99 | 100 | 101 | a_target = sigmoid(np.dot(predicted.reshape(-1), outputVectors[target].T)) 102 | cost += -np.log(a_target) # cost for target value 103 | grad[target:target+1] = (a_target - 1) * predicted # gradient for target value 104 | gradPred += (a_target - 1) * outputVectors[target] 105 | 106 | neg_samples = [] 107 | 108 | for i in range(K): 109 | j = dataset.sampleTokenIdx() 110 | if j == target or (j in neg_samples): 111 | i -= 1 # if negative sample is same with target or already sampled, then resample. 112 | continue 113 | neg_samples.append(j) 114 | 115 | a_neg = sigmoid(-np.dot(predicted.reshape(-1), outputVectors[j].T)) 116 | cost += -np.log(a_neg) # cost for negative sample 117 | grad[j:j+1] = (1 - a_neg) * predicted # gradient for negative sample 118 | gradPred += (1 - a_neg) * outputVectors[j] 119 | 120 | 121 | ### END YOUR CODE 122 | 123 | return cost, gradPred, grad 124 | 125 | 126 | def skipgram(currentWord, C, contextWords, tokens, inputVectors, outputVectors, 127 | dataset, word2vecCostAndGradient = softmaxCostAndGradient): 128 | """ Skip-gram model in word2vec """ 129 | 130 | # Implement the skip-gram model in this function. 131 | 132 | # Inputs: 133 | # - currrentWord: a string of the current center word 134 | # - C: integer, context size 135 | # - contextWords: list of no more than 2*C strings, the context words 136 | # - tokens: a dictionary that maps words to their indices in 137 | # the word vector list 138 | # - inputVectors: "input" word vectors (as rows) for all tokens 139 | # - outputVectors: "output" word vectors (as rows) for all tokens 140 | # - word2vecCostAndGradient: the cost and gradient function for 141 | # a prediction vector given the target word vectors, 142 | # could be one of the two cost functions you 143 | # implemented above 144 | 145 | # Outputs: 146 | # - cost: the cost function value for the skip-gram model 147 | # - grad: the gradient with respect to the word vectors 148 | # We will not provide starter code for this function, but feel 149 | # free to reference the code you previously wrote for this 150 | # assignment! 151 | 152 | cost = 0.0 153 | gradIn = np.zeros(inputVectors.shape) 154 | gradOut = np.zeros(outputVectors.shape) 155 | 156 | 157 | ### YOUR CODE HERE 158 | 159 | 160 | idx = tokens[currentWord] # tokens['a'] = 1 161 | input_vector = inputVectors[idx:idx+1] # (1, dim_embed) 162 | 163 | for context in contextWords: 164 | c, g_in, g_out = word2vecCostAndGradient(input_vector, tokens[context], outputVectors, dataset) 165 | cost += c 166 | gradIn[idx:idx+1, :] += g_in 167 | gradOut += g_out 168 | 169 | 170 | ### END YOUR CODE 171 | 172 | return cost, gradIn, gradOut 173 | 174 | def cbow(currentWord, C, contextWords, tokens, inputVectors, outputVectors, 175 | dataset, word2vecCostAndGradient = softmaxCostAndGradient): 176 | """ CBOW model in word2vec """ 177 | 178 | # Implement the continuous bag-of-words model in this function. 179 | # Input/Output specifications: same as the skip-gram model 180 | # We will not provide starter code for this function, but feel 181 | # free to reference the code you previously wrote for this 182 | # assignment! 183 | 184 | ################################################################# 185 | # IMPLEMENTING CBOW IS EXTRA CREDIT, DERIVATIONS IN THE WRIITEN # 186 | # ASSIGNMENT ARE NOT! # 187 | ################################################################# 188 | 189 | cost = 0 190 | gradIn = np.zeros(inputVectors.shape) 191 | gradOut = np.zeros(outputVectors.shape) 192 | 193 | ### YOUR CODE HERE 194 | 195 | 196 | for contextWord in contextWords: 197 | idx = tokens[contextWord] # tokens['a'] = 1 198 | input_vector = inputVectors[idx:idx+1] 199 | c, g_in, g_out = word2vecCostAndGradient(input_vector, tokens[currentWord], outputVectors, dataset) 200 | cost += c 201 | gradIn[idx:idx+1, :] += g_in 202 | gradOut += g_out 203 | 204 | 205 | ### END YOUR CODE 206 | 207 | return cost, gradIn, gradOut 208 | 209 | ############################################# 210 | # Testing functions below. DO NOT MODIFY! # 211 | ############################################# 212 | 213 | def word2vec_sgd_wrapper(word2vecModel, tokens, wordVectors, dataset, C, word2vecCostAndGradient = softmaxCostAndGradient): 214 | batchsize = 50 215 | cost = 0.0 216 | grad = np.zeros(wordVectors.shape) 217 | N = wordVectors.shape[0] 218 | inputVectors = wordVectors[:N/2,:] 219 | outputVectors = wordVectors[N/2:,:] 220 | for i in xrange(batchsize): 221 | C1 = random.randint(1,C) 222 | centerword, context = dataset.getRandomContext(C1) 223 | 224 | if word2vecModel == skipgram: 225 | denom = 1 226 | else: 227 | denom = 1 228 | 229 | c, gin, gout = word2vecModel(centerword, C1, context, tokens, inputVectors, outputVectors, dataset, word2vecCostAndGradient) 230 | cost += c / batchsize / denom 231 | grad[:N/2, :] += gin / batchsize / denom 232 | grad[N/2:, :] += gout / batchsize / denom 233 | 234 | return cost, grad 235 | 236 | def test_word2vec(): 237 | # Interface to the dataset for negative sampling 238 | dataset = type('dummy', (), {})() 239 | def dummySampleTokenIdx(): 240 | return random.randint(0, 4) 241 | 242 | def getRandomContext(C): 243 | tokens = ["a", "b", "c", "d", "e"] 244 | return tokens[random.randint(0,4)], [tokens[random.randint(0,4)] \ 245 | for i in xrange(2*C)] 246 | dataset.sampleTokenIdx = dummySampleTokenIdx 247 | dataset.getRandomContext = getRandomContext 248 | 249 | random.seed(31415) 250 | np.random.seed(9265) 251 | dummy_vectors = normalizeRows(np.random.randn(10,3)) 252 | dummy_tokens = dict([("a",0), ("b",1), ("c",2),("d",3),("e",4)]) 253 | print "==== Gradient check for skip-gram ====" 254 | gradcheck_naive(lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5), dummy_vectors) 255 | gradcheck_naive(lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors) 256 | print "\n==== Gradient check for CBOW ====" 257 | gradcheck_naive(lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5), dummy_vectors) 258 | gradcheck_naive(lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors) 259 | 260 | print "\n=== Results ===" 261 | print skipgram("c", 3, ["a", "b", "e", "d", "b", "c"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset) 262 | print skipgram("c", 1, ["a", "b"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset, negSamplingCostAndGradient) 263 | print cbow("a", 2, ["a", "b", "c", "a"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset) 264 | print cbow("a", 2, ["a", "b", "a", "c"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset, negSamplingCostAndGradient) 265 | 266 | if __name__ == "__main__": 267 | test_normalize_rows() 268 | test_word2vec() -------------------------------------------------------------------------------- /assignment1/q4_sentiment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from cs224d.data_utils import * 5 | 6 | from q3_sgd import load_saved_params, sgd 7 | from q4_softmaxreg import softmaxRegression, getSentenceFeature, accuracy, softmax_wrapper 8 | 9 | # Try different regularizations and pick the best! 10 | # NOTE: fill in one more "your code here" below before running! 11 | REGULARIZATION = None # Assign a list of floats in the block below 12 | ### YOUR CODE HERE 13 | raise NotImplementedError 14 | ### END YOUR CODE 15 | 16 | # Load the dataset 17 | dataset = StanfordSentiment() 18 | tokens = dataset.tokens() 19 | nWords = len(tokens) 20 | 21 | # Load the word vectors we trained earlier 22 | _, wordVectors0, _ = load_saved_params() 23 | wordVectors = (wordVectors0[:nWords,:] + wordVectors0[nWords:,:]) 24 | dimVectors = wordVectors.shape[1] 25 | 26 | # Load the train set 27 | trainset = dataset.getTrainSentences() 28 | nTrain = len(trainset) 29 | trainFeatures = np.zeros((nTrain, dimVectors)) 30 | trainLabels = np.zeros((nTrain,), dtype=np.int32) 31 | for i in xrange(nTrain): 32 | words, trainLabels[i] = trainset[i] 33 | trainFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) 34 | 35 | # Prepare dev set features 36 | devset = dataset.getDevSentences() 37 | nDev = len(devset) 38 | devFeatures = np.zeros((nDev, dimVectors)) 39 | devLabels = np.zeros((nDev,), dtype=np.int32) 40 | for i in xrange(nDev): 41 | words, devLabels[i] = devset[i] 42 | devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) 43 | 44 | # Try our regularization parameters 45 | results = [] 46 | for regularization in REGULARIZATION: 47 | random.seed(3141) 48 | np.random.seed(59265) 49 | weights = np.random.randn(dimVectors, 5) 50 | print "Training for reg=%f" % regularization 51 | 52 | # We will do batch optimization 53 | weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, 54 | weights, regularization), weights, 3.0, 10000, PRINT_EVERY=100) 55 | 56 | # Test on train set 57 | _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights) 58 | trainAccuracy = accuracy(trainLabels, pred) 59 | print "Train accuracy (%%): %f" % trainAccuracy 60 | 61 | # Test on dev set 62 | _, _, pred = softmaxRegression(devFeatures, devLabels, weights) 63 | devAccuracy = accuracy(devLabels, pred) 64 | print "Dev accuracy (%%): %f" % devAccuracy 65 | 66 | # Save the results and weights 67 | results.append({ 68 | "reg" : regularization, 69 | "weights" : weights, 70 | "train" : trainAccuracy, 71 | "dev" : devAccuracy}) 72 | 73 | # Print the accuracies 74 | print "" 75 | print "=== Recap ===" 76 | print "Reg\t\tTrain\t\tDev" 77 | for result in results: 78 | print "%E\t%f\t%f" % ( 79 | result["reg"], 80 | result["train"], 81 | result["dev"]) 82 | print "" 83 | 84 | # Pick the best regularization parameters 85 | BEST_REGULARIZATION = None 86 | BEST_WEIGHTS = None 87 | 88 | ### YOUR CODE HERE 89 | raise NotImplementedError 90 | ### END YOUR CODE 91 | 92 | # Test your findings on the test set 93 | testset = dataset.getTestSentences() 94 | nTest = len(testset) 95 | testFeatures = np.zeros((nTest, dimVectors)) 96 | testLabels = np.zeros((nTest,), dtype=np.int32) 97 | for i in xrange(nTest): 98 | words, testLabels[i] = testset[i] 99 | testFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) 100 | 101 | _, _, pred = softmaxRegression(testFeatures, testLabels, BEST_WEIGHTS) 102 | print "Best regularization value: %E" % BEST_REGULARIZATION 103 | print "Test accuracy (%%): %f" % accuracy(testLabels, pred) 104 | 105 | # Make a plot of regularization vs accuracy 106 | plt.plot(REGULARIZATION, [x["train"] for x in results]) 107 | plt.plot(REGULARIZATION, [x["dev"] for x in results]) 108 | plt.xscale('log') 109 | plt.xlabel("regularization") 110 | plt.ylabel("accuracy") 111 | plt.legend(['train', 'dev'], loc='upper left') 112 | plt.savefig("q4_reg_v_acc.png") 113 | plt.show() 114 | 115 | -------------------------------------------------------------------------------- /assignment1/q4_softmaxreg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | from cs224d.data_utils import * 5 | 6 | from q1_softmax import softmax 7 | from q2_gradcheck import gradcheck_naive 8 | from q3_sgd import load_saved_params 9 | 10 | def getSentenceFeature(tokens, wordVectors, sentence): 11 | """ Obtain the sentence feature for sentiment analysis by averaging its word vectors """ 12 | # Implement computation for the sentence features given a sentence. 13 | 14 | # Inputs: 15 | # - tokens: a dictionary that maps words to their indices in 16 | # the word vector list 17 | # - wordVectors: word vectors (each row) for all tokens 18 | # - sentence: a list of words in the sentence of interest 19 | 20 | # Output: 21 | # - sentVector: feature vector for the sentence 22 | 23 | sentVector = np.zeros((wordVectors.shape[1],)) 24 | 25 | ### YOUR CODE HERE 26 | raise NotImplementedError 27 | ### END YOUR CODE 28 | 29 | return sentVector 30 | 31 | def softmaxRegression(features, labels, weights, regularization = 0.0, nopredictions = False): 32 | """ Softmax Regression """ 33 | # Implement softmax regression with weight regularization. 34 | 35 | # Inputs: 36 | # - features: feature vectors, each row is a feature vector 37 | # - labels: labels corresponding to the feature vectors 38 | # - weights: weights of the regressor 39 | # - regularization: L2 regularization constant 40 | 41 | # Output: 42 | # - cost: cost of the regressor 43 | # - grad: gradient of the regressor cost with respect to its 44 | # weights 45 | # - pred: label predictions of the regressor (you might find 46 | # np.argmax helpful) 47 | 48 | prob = softmax(features.dot(weights)) 49 | if len(features.shape) > 1: 50 | N = features.shape[0] 51 | else: 52 | N = 1 53 | # A vectorized implementation of 1/N * sum(cross_entropy(x_i, y_i)) + 1/2*|w|^2 54 | cost = np.sum(-np.log(prob[range(N), labels])) / N 55 | cost += 0.5 * regularization * np.sum(weights ** 2) 56 | 57 | ### YOUR CODE HERE: compute the gradients and predictions 58 | raise NotImplementedError 59 | ### END YOUR CODE 60 | 61 | if nopredictions: 62 | return cost, grad 63 | else: 64 | return cost, grad, pred 65 | 66 | def accuracy(y, yhat): 67 | """ Precision for classifier """ 68 | assert(y.shape == yhat.shape) 69 | return np.sum(y == yhat) * 100.0 / y.size 70 | 71 | def softmax_wrapper(features, labels, weights, regularization = 0.0): 72 | cost, grad, _ = softmaxRegression(features, labels, weights, 73 | regularization) 74 | return cost, grad 75 | 76 | def sanity_check(): 77 | """ 78 | Run python q4_softmaxreg.py. 79 | """ 80 | random.seed(314159) 81 | np.random.seed(265) 82 | 83 | dataset = StanfordSentiment() 84 | tokens = dataset.tokens() 85 | nWords = len(tokens) 86 | 87 | _, wordVectors0, _ = load_saved_params() 88 | wordVectors = (wordVectors0[:nWords,:] + wordVectors0[nWords:,:]) 89 | dimVectors = wordVectors.shape[1] 90 | 91 | dummy_weights = 0.1 * np.random.randn(dimVectors, 5) 92 | dummy_features = np.zeros((10, dimVectors)) 93 | dummy_labels = np.zeros((10,), dtype=np.int32) 94 | for i in xrange(10): 95 | words, dummy_labels[i] = dataset.getRandomTrainSentence() 96 | dummy_features[i, :] = getSentenceFeature(tokens, wordVectors, words) 97 | print "==== Gradient check for softmax regression ====" 98 | gradcheck_naive(lambda weights: softmaxRegression(dummy_features, 99 | dummy_labels, weights, 1.0, nopredictions = True), dummy_weights) 100 | 101 | print "\n=== Results ===" 102 | print softmaxRegression(dummy_features, dummy_labels, dummy_weights, 1.0) 103 | 104 | if __name__ == "__main__": 105 | sanity_check() -------------------------------------------------------------------------------- /assignment1/requirements.txt: -------------------------------------------------------------------------------- 1 | Jinja2==2.7.3 2 | MarkupSafe==0.23 3 | backports.ssl-match-hostname==3.4.0.2 4 | certifi==14.05.14 5 | gnureadline==6.3.3 6 | mock==1.0.1 7 | nose==1.3.4 8 | pyparsing==2.0.3 9 | python-dateutil==2.4.0 10 | pytz==2014.10 11 | pyzmq==14.4.1 12 | six==1.9.0 13 | tornado==4.0.2 14 | wsgiref==0.1.2 15 | --------------------------------------------------------------------------------