├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── assignment1 ├── README.md ├── assignment1.pdf ├── collectSubmission.sh ├── cs224d │ ├── __init__.py │ ├── data_utils.py │ └── datasets │ │ └── get_datasets.sh ├── q1_softmax.py ├── q1_softmax_sol.py ├── q2_gradcheck.py ├── q2_neural.py ├── q2_neural_sol.py ├── q2_sigmoid.py ├── q2_sigmoid_sol.py ├── q3_run.py ├── q3_sgd.py ├── q3_word2vec.py ├── q3_word2vec_sol.py ├── q3_word_vectors.png ├── q4_reg_v_acc.png ├── q4_sentiment.py ├── q4_softmaxreg.py ├── requirements.txt ├── solutions │ ├── .gitignore │ └── assignment1_solutions.tex ├── tensorflow_word2vec.py └── tests │ ├── test_gradcheck.py │ ├── test_neural.py │ ├── test_neural_to_solutions.py │ ├── test_normalize.py │ ├── test_sgd.py │ ├── test_sigmoid.py │ ├── test_sigmoid_to_solutions.py │ ├── test_softmax.py │ ├── test_softmax_regression.py │ ├── test_softmax_to_solutions.py │ └── test_word2vec_to_solutions.py ├── assignment2 ├── README.md ├── assignment2.pdf ├── data │ ├── ner │ │ ├── dev │ │ ├── test.masked │ │ ├── train │ │ ├── vocab.txt │ │ └── wordVectors.txt │ └── ptb │ │ ├── ptb.test.txt │ │ ├── ptb.train.txt │ │ ├── ptb.valid.txt │ │ └── vocab.ptb.txt ├── data_utils │ ├── __init__.py │ ├── ner.py │ └── utils.py ├── model.py ├── q1_classifier.py ├── q1_softmax.py ├── q2_NER.py ├── q2_initialization.py ├── q3_RNNLM.py ├── solutions │ ├── .gitignore │ └── assignment2_solutions.tex ├── test_confusion.py ├── tests │ └── test_softmax.py └── utils.py ├── assignment3 ├── README.md ├── assignment3_2016.pdf ├── codebase_release │ ├── loss_history.png │ ├── prepare_submission.sh │ ├── rnn.py │ ├── rnn_pytorch.py │ ├── rnn_tensorarray.py │ ├── rnn_while_loop_storage.py │ ├── setup.sh │ ├── tree.py │ └── utils.py └── recursive.png ├── class_notebooks ├── tensorflow_scan.ipynb └── vanishing_grad_example.ipynb └── old_assignments ├── assignment1.pdf ├── assignment1 ├── README.md ├── collectSubmission.sh ├── cs224d │ ├── __init__.py │ ├── data_utils.py │ └── datasets │ │ └── get_datasets.sh ├── requirements.txt ├── solutions │ ├── .gitignore │ └── assignment1_solutions.tex ├── updateAssignment.sh └── wordvec_sentiment.ipynb ├── assignment2.pdf ├── assignment2 ├── README.md ├── collectSubmission.py ├── data_utils │ ├── __init__.py │ ├── ner.py │ └── utils.py ├── misc.py ├── nerwindow.py ├── nn │ ├── __init__.py │ ├── base.py │ └── math.py ├── part0-XOR.ipynb ├── part1-NER.ipynb ├── part11probing.py ├── part2-RNNLM.ipynb ├── requirements.txt ├── rnnlm.py └── softmax_example.py ├── assignment3.pdf └── assignment3 └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | #numpy weight arrays 7 | *.npy 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | *.zip 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *,cover 50 | .hypothesis/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | 59 | # Sphinx documentation 60 | docs/_build/ 61 | 62 | # PyBuilder 63 | target/ 64 | 65 | #Ipython Notebook 66 | .ipynb_checkpoints 67 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | sudo: false 3 | addons: 4 | apt: 5 | packages: 6 | - python3-scipy 7 | - python3-numpy 8 | - python-sklearn 9 | # Whitelisting master 10 | branches: 11 | only: 12 | - master 13 | git: 14 | depth: 10 15 | python: 16 | - "3.5" 17 | before_install: 18 | - echo "before_install" 19 | - echo $VIRTUAL_ENV 20 | - df -h 21 | - date 22 | - pwd 23 | - uname -m 24 | - python -V 25 | - which python2 26 | - which python3 27 | - git --version 28 | - git tag 29 | - pip install pytest-cov pylint unidecode 30 | - apt-cache show python3-numpy python3-scipy 31 | install: 32 | - echo "install start" 33 | # - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh 34 | # - bash miniconda.sh -b -p ${HOME}/miniconda 35 | # - export PATH="$HOME/miniconda/bin:$PATH" 36 | # - conda config --set always_yes yes --set changeps1 no 37 | # - conda update -q conda 38 | # - conda info -a 39 | # - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy matplotlib seaborn pytest pytest-cov 40 | # - source activate test-environment 41 | before_script: 42 | - echo "before_script" 43 | script: 44 | - PYTHONPATH=assignment1 python -m py.test --cov=assignment1 --cov-report term assignment1/tests/ -s --durations=10 45 | ##PYTHONPATH=assignment1 py.test --cov=assignment1 --cov-report term assignment1/tests/ -s --durations=10 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Gregory King 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Stanford CS224D: Deep Learning for Natural Language Processing 2 | CS224D Assignments; 3 | [`Assignment's Page`](http://cs224d.stanford.edu/assignments.html) 4 | 5 | Notes 6 | ----- 7 | * Certain featuers may require a compiler to be installed: Visual Studio C++, GCC, clang 8 | * Code base is stored in separate assignment directories; might contain ipython notebooks used for running and displaying results; 9 | * Assignment will contain a small number of python modules (and possibly test scripts) 10 | * Assignment directories might have a dataset directory (with either a scripts to download datasets, or at least details on how to get datasets); 11 | 12 | 13 | [`Assignment 1`](https://github.com/kingtaurus/cs224d/blob/master/assignment1/assignment1.pdf)[![Build Status](https://travis-ci.com/kingtaurus/cs224d.svg?token=S5K3fgjLh8cmmfpF6ZLy&branch=master)](https://travis-ci.com/kingtaurus/cs224d) 14 | ------------ 15 | See [`Assignment1 README.md`](https://github.com/kingtaurus/cs224d/blob/master/assignment1/README.md) 16 | * **Softmax** 17 | * **Neural Network Basics** 18 | * **`word2vec`** 19 | * **Sentiment Analysis** 20 | 21 | [`Assignment 2`](https://github.com/kingtaurus/cs224d/blob/master/assignment2/assignment2.pdf) 22 | ------------- 23 | See [`Assignment2 README.md`](https://github.com/kingtaurus/cs224d/blob/master/assignment2/README.md) 24 | * **TensorFlow Softmax (coding)** 25 | * **TensorFlow NER Window Model (coding and theory)** 26 | * **TensorFlow RNN Language Model (coding and theory)** 27 | 28 | [`Assignment 3`](https://github.com/kingtaurus/cs224d/blob/master/assignment3/assignment3_2016.pdf) 29 | ------------- 30 | See [`Assignment3 README.md`](https://github.com/kingtaurus/cs224d/blob/master/assignment3/README.md) 31 | * **TensorFlow Recursive Neural Network (RNN) and Sentiment Analysis** 32 | 33 | Old Assignments 34 | =============== 35 | Assignments from 2015. 36 | 37 | [`Assignment 1`](https://github.com/kingtaurus/cs224d/blob/master/old_assignments/assignment1.pdf) 38 | -------------- 39 | See [`Assignment1 README.md`](https://github.com/kingtaurus/cs224d/blob/master/old_assignments/assignment1/README.md) 40 | * **Softmax** 41 | * **Neural Network Basics** 42 | * **`word2vec`** 43 | * **Sentiment Analysis** 44 | 45 | [`Assignment 2`](https://github.com/kingtaurus/cs224d/blob/master/old_assignments/assignment2.pdf) 46 | -------------- 47 | See [`Assignment2 README.md`](https://github.com/kingtaurus/cs224d/blob/master/old_assignments/assignment2/README.md) 48 | * **Boolean Logic** 49 | * **Deep Network (for Named Entity Recognition)** 50 | * **Recurrent Neural Networks (Language Modeling)** 51 | 52 | [`Assignment 3`](https://github.com/kingtaurus/cs224d/blob/master/old_assignments/assignment3.pdf) 53 | -------------- 54 | See [`Assignment3 README.md`](https://github.com/kingtaurus/cs224d/blob/master/old_assignments/assignment3/README.md) 55 | * **Recursive Neural Network** 56 | * **2-Layer Deep RNN** 57 | * **Recursive Neural Tensor Networks (Extra Credit)** 58 | -------------------------------------------------------------------------------- /assignment1/README.md: -------------------------------------------------------------------------------- 1 | [`CS224d: Deep Learning for Natural Language Processing`](http://cs224d.stanford.edu/) 2 | ====================================================================================== 3 | [![Build Status](https://travis-ci.com/kingtaurus/cs224d.svg?token=S5K3fgjLh8cmmfpF6ZLy&branch=master)](https://travis-ci.com/kingtaurus/cs224d) 4 | 5 | **Due Date: 4/19/2016 (Thursday) 11:59 PM PST. Hard deadline: 4/22 (Sun) 11:59 PM PST with 3 late days** 6 | 7 | In this assignment we will familiarize you with basic concepts of neural networks, word vectors, and their application to sentiment analysis. 8 | 9 | Setup 10 | ----- 11 | 12 | **Note:** Please be sure you have Python 2.7.x installed on your system. The following instructions should work on Mac or Linux. If you have any trouble getting set up, please come to office hours and the TAs will be happy to help. 13 | 14 | Get the code: [Download the starter code here](http://cs224d.stanford.edu/assignment1/assignment1.zip) and the [complementary written problems here](http://cs224d.stanford.edu/assignment1/assignment1.pdf). 15 | 16 | **[Optional] virtual environment:** Once you have unzipped the starter code, you might want to create a [`virtual environment`](http://docs.python-guide.org/en/latest/dev/virtualenvs/) for the project. If you choose not to use a virtual environment, it is up to you to make sure that all dependencies for the code are installed on your machine. To set up a virtual environment, run the following: 17 | 18 | ```bash 19 | cd assignment1 20 | sudo pip install virtualenv # This may already be installed 21 | virtualenv .env # Create a virtual environment 22 | source .env/bin/activate # Activate the virtual environment 23 | pip install -r requirements.txt # Install dependencies 24 | # Work on the assignment for a while ... 25 | deactivate # Exit the virtual environment 26 | ``` 27 | 28 | **Install requirements (without a virtual environment):** To install the required packages locally without setting up a virtual environment, run the following: 29 | 30 | ```bash 31 | cd assignment1 32 | pip install -r requirements.txt # Install dependencies 33 | ``` 34 | 35 | **Download data:** Once you have the starter code, you will need to download the Stanford Sentiment Treebank dataset. Run the following from the assignment1 directory: 36 | 37 | ```bash 38 | cd cs224d/datasets 39 | ./get_datasets.sh 40 | ``` 41 | 42 | Submitting your work 43 | -------------------- 44 | 45 | Once you are done working, put the written part in the same directory as your IPython notebook file, and run the `collectSubmission.sh` script; this will produce a file called `assignment1.zip`. Rename this file to `.zip`, for instance if your stanford email is `jdoe@stanford.edu`, your file name should be 46 | 47 | ```bash 48 | cd cs224d/datasets 49 | jdoe.zip 50 | ``` 51 | 52 | Stay tuned for a submission link, which will be posted here and on Piazza. 53 | For the written component, please upload a PDF file of your solutions to Gradescope. If you are enrolled in the class you should have been signed up automatically. If you added the class late or are not signed up, post privately to Piazza and we will add you to the roster. When asked to map question parts to your PDF, please map the parts accordingly as courtesy to your TAs. This is crucial so that we can provide accurate feedback. If a question has no written component (completely programatic), map it on the same page as the previous section or next section. 54 | 55 | Tasks 56 | ----- 57 | 58 | There will be four parts to this assignment. Each part has written and code components. The assignment is designed to be completed in order as later sections will leverage solutions to earlier parts. We recommend reading the assignment carefully and starting early as some parts may take significant time to run. 59 | 60 | Q1: Softmax (10 points) 61 | ----------------------- 62 | 63 | Q2: Neural Network Basics (30 points) 64 | ------------------------------------- 65 | 66 | Q3: word2vec (40 points + 5 bonus) 67 | ---------------------------------- 68 | 69 | Q4: Sentiment Analysis (20 points) 70 | ---------------------------------- 71 | -------------------------------------------------------------------------------- /assignment1/assignment1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment1/assignment1.pdf -------------------------------------------------------------------------------- /assignment1/collectSubmission.sh: -------------------------------------------------------------------------------- 1 | rm -f assignment1.zip 2 | zip -r assignment1.zip *.py *.png saved_params_40000.npy 3 | -------------------------------------------------------------------------------- /assignment1/cs224d/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment1/cs224d/__init__.py -------------------------------------------------------------------------------- /assignment1/cs224d/datasets/get_datasets.sh: -------------------------------------------------------------------------------- 1 | # Get Stanford Sentiment Treebank 2 | wget http://nlp.stanford.edu/~socherr/stanfordSentimentTreebank.zip 3 | unzip stanfordSentimentTreebank.zip 4 | rm stanfordSentimentTreebank.zip 5 | -------------------------------------------------------------------------------- /assignment1/q1_softmax.py: -------------------------------------------------------------------------------- 1 | """Solution to the coding part for question (1) of CS224D. 2 | """ 3 | 4 | import numpy as np 5 | 6 | def softmax(x): 7 | """ 8 | Compute the softmax function for each row of the input x. 9 | 10 | It is crucial that this function is optimized for speed because 11 | it will be used frequently in later code. 12 | You might find numpy functions np.exp, np.sum, np.reshape, 13 | np.max, and numpy broadcasting useful for this task. (numpy 14 | broadcasting documentation: 15 | http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) 16 | 17 | You should also make sure that your code works for one 18 | dimensional inputs (treat the vector as a row), you might find 19 | it helpful for your later problems. 20 | 21 | You must implement the optimization in problem 1(a) of the 22 | written assignment! 23 | """ 24 | ### YOUR CODE HERE 25 | log_c = np.max(x, axis=x.ndim - 1, keepdims=True) 26 | #for numerical stability 27 | y = np.sum(np.exp(x - log_c), axis=x.ndim - 1, keepdims=True) 28 | x = np.exp(x - log_c)/y 29 | ### END YOUR CODE 30 | return x 31 | 32 | def test_softmax_basic(): 33 | """ 34 | Some simple tests to get you started. 35 | Warning: these are not exhaustive. 36 | """ 37 | print("Running basic tests...") 38 | test1 = softmax(np.array([1, 2])) 39 | print(test1) 40 | assert np.amax(np.fabs(test1 - np.array( 41 | [0.26894142, 0.73105858]))) <= 1e-6 42 | 43 | test2 = softmax(np.array([[1001, 1002], [3, 4]])) 44 | print(test2) 45 | assert np.amax(np.fabs(test2 - np.array( 46 | [[0.26894142, 0.73105858], [0.26894142, 0.73105858]]))) <= 1e-6 47 | 48 | test3 = softmax(np.array([[-1001, -1002]])) 49 | print(test3) 50 | assert np.amax(np.fabs(test3 - np.array( 51 | [0.73105858, 0.26894142]))) <= 1e-6 52 | 53 | print("You should verify these results!\n") 54 | 55 | def test_softmax(): 56 | """ 57 | Use this space to test your softmax implementation by running: 58 | python q1_softmax.py 59 | This function will not be called by the autograder, nor will 60 | your tests be graded. 61 | """ 62 | print("Running your tests...") 63 | ### YOUR CODE HERE 64 | ### END YOUR CODE 65 | 66 | if __name__ == "__main__": 67 | test_softmax_basic() 68 | test_softmax() 69 | -------------------------------------------------------------------------------- /assignment1/q1_softmax_sol.py: -------------------------------------------------------------------------------- 1 | """Solution to the coding part for question (1) of CS224D. 2 | """ 3 | 4 | import numpy as np 5 | 6 | def softmax_sol(x): 7 | """ 8 | Compute the softmax function for each row of the input x. 9 | 10 | It is crucial that this function is optimized for speed because 11 | it will be used frequently in later code. 12 | You might find numpy functions np.exp, np.sum, np.reshape, 13 | np.max, and numpy broadcasting useful for this task. (numpy 14 | broadcasting documentation: 15 | http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) 16 | 17 | You should also make sure that your code works for one 18 | dimensional inputs (treat the vector as a row), you might find 19 | it helpful for your later problems. 20 | 21 | You must implement the optimization in problem 1(a) of the 22 | written assignment! 23 | """ 24 | 25 | ### YOUR CODE HERE 26 | if len(x.shape) > 1: 27 | tmp = np.max(x, axis = 1) 28 | x -= tmp.reshape((x.shape[0], 1)) 29 | x = np.exp(x) 30 | tmp = np.sum(x, axis = 1) 31 | x /= tmp.reshape((x.shape[0], 1)) 32 | else: 33 | tmp = np.max(x) 34 | x -= tmp 35 | x = np.exp(x) 36 | tmp = np.sum(x) 37 | x /= tmp 38 | ### END YOUR CODE 39 | return x 40 | -------------------------------------------------------------------------------- /assignment1/q2_gradcheck.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | # First implement a gradient checker by filling in the following functions 5 | def gradcheck_naive(f, x): 6 | """ 7 | Gradient check for a function f 8 | - f should be a function that takes a single argument and outputs the cost 9 | and its gradients 10 | - x is the point (numpy array) to check the gradient at 11 | """ 12 | 13 | rndstate = random.getstate() 14 | random.setstate(rndstate) 15 | fx, grad = f(x) # Evaluate function value at original point 16 | h = 1e-4 17 | 18 | # Iterate over all indexes in x 19 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 20 | while not it.finished: 21 | ix = it.multi_index 22 | 23 | ### try modifying x[ix] with h defined above to compute numerical gradients 24 | ### make sure you call random.setstate(rndstate) before calling f(x) each time, this will make it 25 | ### possible to test cost functions with built in randomness later 26 | ### YOUR CODE HERE: 27 | old_xix = x[ix] 28 | x[ix] = old_xix + h 29 | random.setstate(rndstate) 30 | fp = f(x)[0] 31 | x[ix] = old_xix - h 32 | random.setstate(rndstate) 33 | fm = f(x)[0] 34 | x[ix] = old_xix 35 | 36 | numgrad = (fp - fm)/(2* h) 37 | ### END YOUR CODE 38 | 39 | # Compare gradients 40 | reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix])) 41 | if reldiff > 1e-5: 42 | print("Gradient check failed.") 43 | print("First gradient error found at index %s" % str(ix)) 44 | print("Your gradient: %f \t Numerical gradient: %f" % (grad[ix], numgrad)) 45 | return 46 | 47 | it.iternext() # Step to next dimension 48 | 49 | print("Gradient check passed!") 50 | 51 | def grad_numerical(f, x, h=1e-4): 52 | """ 53 | Gradient check for a function f 54 | - f should be a function that takes a single argument and outputs the cost 55 | and its gradients 56 | - x is the point (numpy array) to check the gradient at 57 | - h is the size of the shift for all dimensions 58 | """ 59 | 60 | rndstate = random.getstate() 61 | random.setstate(rndstate) 62 | fx, grad = f(x) # Evaluate function value at original point 63 | num_grad = np.zeros(x.shape) 64 | 65 | # Iterate over all indexes in x 66 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 67 | while not it.finished: 68 | ix = it.multi_index 69 | 70 | ### try modifying x[ix] with h defined above to compute numerical gradients 71 | ### make sure you call random.setstate(rndstate) before calling f(x) each time, this will make it 72 | ### possible to test cost functions with built in randomness later 73 | ### YOUR CODE HERE: 74 | old_xix = x[ix] 75 | x[ix] += 0.5 * h 76 | random.setstate(rndstate) 77 | fp = f(x)[0] 78 | x[ix] -= h 79 | random.setstate(rndstate) 80 | fm = f(x)[0] 81 | x[ix] = old_xix 82 | 83 | num_grad += (fp - fm)/h 84 | ### END YOUR CODE 85 | it.iternext() # Step to next dimension 86 | return num_grad 87 | 88 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 89 | """ 90 | Evaluate a numeric gradient for a function that accepts a numpy 91 | array and returns a numpy array. 92 | """ 93 | grad = np.zeros_like(x) 94 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 95 | while not it.finished: 96 | ix = it.multi_index 97 | 98 | oldval = x[ix] 99 | x[ix] = oldval + h 100 | pos = f(x).copy() 101 | x[ix] = oldval - h 102 | neg = f(x).copy() 103 | x[ix] = oldval 104 | 105 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 106 | it.iternext() 107 | return grad 108 | 109 | def sanity_check(): 110 | """ 111 | Some basic sanity checks. 112 | """ 113 | quad = lambda x: (np.sum(x ** 2), x * 2) 114 | 115 | print("Running sanity checks...") 116 | gradcheck_naive(quad, np.array(123.456)) # scalar test 117 | gradcheck_naive(quad, np.random.randn(3,)) # 1-D test 118 | gradcheck_naive(quad, np.random.randn(4,5)) # 2-D test 119 | print("") 120 | 121 | def your_sanity_checks(): 122 | """ 123 | Use this space add any additional sanity checks by running: 124 | python q2_gradcheck.py 125 | This function will not be called by the autograder, nor will 126 | your additional tests be graded. 127 | """ 128 | print("Running your sanity checks...") 129 | ### YOUR CODE HERE 130 | print("") 131 | ### END YOUR CODE 132 | 133 | if __name__ == "__main__": 134 | sanity_check() 135 | your_sanity_checks() 136 | -------------------------------------------------------------------------------- /assignment1/q2_neural.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | from q1_softmax import softmax 5 | from q2_sigmoid import sigmoid, sigmoid_grad 6 | from q2_gradcheck import gradcheck_naive 7 | 8 | def affine_forward(x, w, b): 9 | """ 10 | Computes the forward pass for an affine (fully-connected) layer. 11 | 12 | The input x has shape (N, d_1, ..., d_k) and contains a minibatch of N 13 | examples, where each example x[i] has shape (d_1, ..., d_k). We will 14 | reshape each input into a vector of dimension D = d_1 * ... * d_k, and 15 | then transform it to an output vector of dimension M. 16 | 17 | Inputs: 18 | - x: A numpy array containing input data, of shape (N, d_1, ..., d_k) 19 | - w: A numpy array of weights, of shape (D, M) 20 | - b: A numpy array of biases, of shape (M,) 21 | 22 | Returns a tuple of: 23 | - out: output, of shape (N, M) 24 | - cache: (x, w, b) 25 | """ 26 | out = None 27 | N = x.shape[0] 28 | D = np.prod(x.shape[1:]) 29 | M = b.shape[1] 30 | out = np.dot(x.reshape(N, D), w.reshape(D, M)) + b.reshape(1, M) 31 | return out, (x,w,b) 32 | 33 | def affine_backward(dout, cache): 34 | """ 35 | Computes the backward pass for an affine layer. 36 | 37 | Inputs: 38 | - dout: Upstream derivative, of shape (N, M) 39 | - cache: Tuple of: 40 | - x: Input data, of shape (N, d_1, ... d_k) 41 | - w: Weights, of shape (D, M) 42 | 43 | Returns a tuple of: 44 | - dx: Gradient with respect to x, of shape (N, d1, ..., d_k) 45 | - dw: Gradient with respect to w, of shape (D, M) 46 | - db: Gradient with respect to b, of shape (M,) 47 | """ 48 | x, w, b = cache 49 | dx, dw, db = None, None, None 50 | N = x.shape[0] 51 | D = np.prod(x.shape[1:]) 52 | M = b.shape[1] 53 | 54 | dx = np.dot(dout, w.reshape(D, M).T).reshape(x.shape) 55 | dw = np.dot(x.reshape(N, D).T, dout).reshape(w.shape) 56 | db = np.sum(dout, axis=0) 57 | 58 | return dx, dw, db 59 | 60 | def sigmoid_forward(x): 61 | """ 62 | Computes the forward pass for a sigmoid activation. 63 | 64 | Inputs: 65 | - x: Input data, numpy array of arbitary shape; 66 | 67 | Returns a tuple (out, cache) 68 | - out: output of the same shape as x 69 | - cache: identical to out; required for backpropagation 70 | """ 71 | return sigmoid(x), sigmoid(x) 72 | 73 | def sigmoid_backward(dout, cache): 74 | """ 75 | Computes the backward pass for an sigmoid layer. 76 | 77 | Inputs: 78 | - dout: Upstream derivative, same shape as the input 79 | to the sigmoid layer (x) 80 | - cache: sigmoid(x) 81 | Returns a tuple of: 82 | - dx: back propagated gradient with respect to x 83 | """ 84 | x = cache 85 | return sigmoid_grad(x) * dout 86 | 87 | def forward_backward_prop(data, labels, params, dimensions): 88 | """ 89 | Forward and backward propagation for a two-layer sigmoidal network 90 | 91 | Compute the forward propagation and for the cross entropy cost, 92 | and backward propagation for the gradients for all parameters. 93 | """ 94 | 95 | ### Unpack network parameters (do not modify) 96 | ofs = 0 97 | Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2]) 98 | N = data.shape[0] 99 | 100 | W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H)) 101 | ofs += Dx * H 102 | b1 = np.reshape(params[ofs:ofs + H], (1, H)) 103 | ofs += H 104 | W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy)) 105 | ofs += H * Dy 106 | b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy)) 107 | 108 | ### YOUR CODE HERE: forward propagation 109 | hidden = np.dot(data,W1) + b1 110 | layer1_a = sigmoid(hidden) 111 | layer2 = np.dot(layer1_a, W2) + b2 112 | # need to calculate the softmax loss 113 | probs = softmax(layer2) 114 | cost = - np.sum(np.log(probs[np.arange(N), np.argmax(labels, axis=1)])) 115 | ### END YOUR CODE 116 | 117 | ### YOUR CODE HERE: backward propagation 118 | #There is no regularization :/ 119 | # dx -> sigmoid -> W2 * layer1_a + b -> sigmoid -> W1 * data + b1 -> .. 120 | dx = probs.copy() 121 | dx -= labels 122 | 123 | dlayer2 = np.zeros_like(dx) 124 | gradW2 = np.zeros_like(W2) 125 | gradW1 = np.zeros_like(W1) 126 | gradb2 = np.zeros_like(b2) 127 | gradb1 = np.zeros_like(b1) 128 | 129 | gradW2 = np.dot(layer1_a.T, dx) 130 | gradb2 = np.sum(dx, axis=0) 131 | dlayer2 = np.dot(dx, W2.T) 132 | dlayer1 = sigmoid_grad(layer1_a) * dlayer2 133 | gradW1 = np.dot(data.T, dlayer1) 134 | gradb1 = np.sum(dlayer1, axis=0) 135 | 136 | # Decided to implement affine (forward and backward function) 137 | # sigmoid (forward and backward function) 138 | # These should work properly; 139 | # scores, cache_1 = affine_forward(data, W1, b1) 140 | # scores, cache_s1 = sigmoid_forward(scores) 141 | # scores, cache_2 = affine_forward(scores, W2, b2) 142 | 143 | # # need to calculate the softmax loss 144 | # probs = softmax(scores) 145 | # cost = -np.sum(np.log(probs[np.arange(N), np.argmax(labels)] + 1e-12)) / N 146 | # softmax_dx = probs.copy() 147 | # softmax_dx[np.arange(N), np.argmax(labels,axis=1)] -= 1 148 | # softmax_dx /= N 149 | 150 | # grads = {} 151 | 152 | # dlayer2, grads['W2'], grads['b2'] = affine_backward(softmax_dx, cache_2) 153 | # dlayer1s = sigmoid_backward(dlayer2, cache_s1) 154 | # dlayer1, grads['W1'], grads['b1'] = affine_backward(dlayer1s, cache_1) 155 | #softmax_dx is the gradient of the loss w.r.t. y_{est} 156 | ### END YOUR CODE 157 | 158 | ### Stack gradients (do not modify) 159 | grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 160 | gradW2.flatten(), gradb2.flatten())) 161 | 162 | return cost, grad 163 | 164 | def sanity_check(): 165 | """ 166 | Set up fake data and parameters for the neural network, and test using 167 | gradcheck. 168 | """ 169 | print("Running sanity check...") 170 | 171 | N = 300 172 | dimensions = [10, 5, 10] 173 | data = np.random.randn(N, dimensions[0]) # each row will be a datum 174 | labels = np.zeros((N, dimensions[2])) 175 | for i in range(N): 176 | labels[i,random.randint(0,dimensions[2]-1)] = 1 177 | 178 | params = np.random.randn((dimensions[0] + 1) * dimensions[1] + ( 179 | dimensions[1] + 1) * dimensions[2], ) 180 | 181 | #cost, _ = forward_backward_prop(data, labels, params, dimensions) 182 | # # expect to get 1 in 10 correct 183 | #print(np.exp(-cost)) 184 | # #cost is roughly correct 185 | 186 | gradcheck_naive(lambda params: forward_backward_prop(data, labels, params, 187 | dimensions), params) 188 | 189 | def your_sanity_checks(): 190 | """ 191 | Use this space add any additional sanity checks by running: 192 | python q2_neural.py 193 | This function will not be called by the autograder, nor will 194 | your additional tests be graded. 195 | """ 196 | print("Running your sanity checks...") 197 | ### YOUR CODE HERE 198 | #raise NotImplementedError 199 | ### END YOUR CODE 200 | 201 | if __name__ == "__main__": 202 | sanity_check() 203 | your_sanity_checks() 204 | -------------------------------------------------------------------------------- /assignment1/q2_neural_sol.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | from q1_softmax_sol import softmax_sol as softmax 5 | from q2_sigmoid_sol import sigmoid_sol as sigmoid 6 | from q2_sigmoid_sol import sigmoid_grad_sol as sigmoid_grad 7 | from q2_gradcheck import gradcheck_naive 8 | 9 | def forward_backward_prop_sol(data, labels, params, dimensions): 10 | """ 11 | Forward and backward propagation for a two-layer sigmoidal network 12 | 13 | Compute the forward propagation and for the cross entropy cost, 14 | and backward propagation for the gradients for all parameters. 15 | """ 16 | 17 | ### Unpack network parameters (do not modify) 18 | ofs = 0 19 | Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2]) 20 | 21 | W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H)) 22 | ofs += Dx * H 23 | b1 = np.reshape(params[ofs:ofs + H], (1, H)) 24 | ofs += H 25 | W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy)) 26 | ofs += H * Dy 27 | b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy)) 28 | 29 | ### YOUR CODE HERE: forward propagation 30 | hidden = sigmoid(data.dot(W1) + b1) 31 | prediction = softmax(hidden.dot(W2) + b2) 32 | cost = -np.sum(np.log(prediction) * labels) 33 | ### END YOUR CODE 34 | 35 | ### YOUR CODE HERE: backward propagation 36 | delta = prediction - labels 37 | gradW2 = hidden.T.dot(delta) 38 | gradb2 = np.sum(delta, axis = 0) 39 | delta = delta.dot(W2.T) * sigmoid_grad(hidden) 40 | gradW1 = data.T.dot(delta) 41 | gradb1 = np.sum(delta, axis = 0) 42 | ### END YOUR CODE 43 | 44 | ### Stack gradients (do not modify) 45 | grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 46 | gradW2.flatten(), gradb2.flatten())) 47 | 48 | return cost, grad 49 | -------------------------------------------------------------------------------- /assignment1/q2_sigmoid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def sigmoid(x): 4 | """ 5 | Compute the sigmoid function for the input here. 6 | """ 7 | 8 | ### YOUR CODE HERE 9 | x = 1 / (1 + np.exp(-x)) 10 | ### END YOUR CODE 11 | 12 | return x 13 | 14 | def sigmoid_grad(f): 15 | """ 16 | Compute the gradient for the sigmoid function here. Note that 17 | for this implementation, the input f should be the sigmoid 18 | function value of your original input x. 19 | """ 20 | 21 | ### YOUR CODE HERE 22 | f *= (1-f) 23 | ### END YOUR CODE 24 | 25 | return f 26 | 27 | def test_sigmoid_basic(): 28 | """ 29 | Some simple tests to get you started. 30 | Warning: these are not exhaustive. 31 | """ 32 | print("Running basic tests...") 33 | x = np.array([[1, 2], [-1, -2]]) 34 | f = sigmoid(x) 35 | g = sigmoid_grad(f) 36 | print(f) 37 | assert np.amax(f - np.array([[0.73105858, 0.88079708], 38 | [0.26894142, 0.11920292]])) <= 1e-6 39 | print(g) 40 | assert np.amax(g - np.array([[0.19661193, 0.10499359], 41 | [0.19661193, 0.10499359]])) <= 1e-6 42 | print("You should verify these results!\n") 43 | 44 | def test_sigmoid(): 45 | """ 46 | Use this space to test your sigmoid implementation by running: 47 | python q2_sigmoid.py 48 | This function will not be called by the autograder, nor will 49 | your tests be graded. 50 | """ 51 | print("Running your tests...") 52 | ### YOUR CODE HERE 53 | #raise NotImplementedError 54 | ### END YOUR CODE 55 | 56 | if __name__ == "__main__": 57 | test_sigmoid_basic(); 58 | test_sigmoid() 59 | -------------------------------------------------------------------------------- /assignment1/q2_sigmoid_sol.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def sigmoid_sol(x): 4 | """ 5 | Compute the sigmoid function for the input here. 6 | """ 7 | ### YOUR CODE HERE 8 | x = 1. / (1 + np.exp(-x)) 9 | ### END YOUR CODE 10 | return x 11 | 12 | def sigmoid_grad_sol(f): 13 | """ 14 | Compute the gradient for the sigmoid function here. Note that 15 | for this implementation, the input f should be the sigmoid 16 | function value of your original input x. 17 | """ 18 | ### YOUR CODE HERE 19 | f = f * (1-f) 20 | ### END YOUR CODE 21 | return f 22 | 23 | -------------------------------------------------------------------------------- /assignment1/q3_run.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | from cs224d.data_utils import * 4 | import matplotlib.pyplot as plt 5 | 6 | from q3_word2vec import * 7 | from q3_sgd import * 8 | 9 | import seaborn as sns 10 | sns.set(style='whitegrid', context='talk') 11 | 12 | # Reset the random seed to make sure that everyone gets the same results 13 | random.seed(314) 14 | dataset = StanfordSentiment() 15 | tokens = dataset.tokens() 16 | nWords = len(tokens) 17 | # We are going to train 10-dimensional vectors for this assignment 18 | dimVectors = 10 19 | 20 | # Context size 21 | C = 5 22 | 23 | # Reset the random seed to make sure that everyone gets the same results 24 | random.seed(31415) 25 | np.random.seed(9265) 26 | print("creating initial word vectors") 27 | wordVectors = np.concatenate(((np.random.rand(nWords, dimVectors) - .5) / \ 28 | dimVectors, np.zeros((nWords, dimVectors))), axis=0) 29 | wordVectors0 = sgd( 30 | lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C, 31 | negSamplingCostAndGradient), 32 | wordVectors, 0.30, 40000, None, True, PRINT_EVERY=10) 33 | print("sanity check: cost at convergence should be around or below 10") 34 | 35 | # sum the input and output word vectors 36 | wordVectors = (wordVectors0[:nWords,:] + wordVectors0[nWords:,:]) 37 | 38 | # Visualize the word vectors you trained 39 | _, wordVectors0, _ = load_saved_params() 40 | print(wordVectors0.shape) 41 | wordVectors = (wordVectors0[:nWords,:] + wordVectors0[nWords:,:]) 42 | visualizeWords = ["the", "a", "an", ",", ".", "?", "!", "``", "''", "--", 43 | "good", "great", "cool", "brilliant", "wonderful", "well", "amazing", 44 | "worth", "sweet", "enjoyable", "boring", "bad", "waste", "dumb", 45 | "annoying"] 46 | visualizeIdx = [tokens[word] for word in visualizeWords] 47 | visualizeVecs = wordVectors[visualizeIdx, :] 48 | temp = (visualizeVecs - np.mean(visualizeVecs, axis=0)) 49 | covariance = 1.0 / len(visualizeIdx) * temp.T.dot(temp) 50 | U,S,V = np.linalg.svd(covariance) 51 | coord = temp.dot(U[:,0:2]) 52 | 53 | plt.figure(figsize=(12,12)) 54 | for i in range(len(visualizeWords)): 55 | plt.scatter(x=coord[i,0], y=coord[i,1]) 56 | plt.text(coord[i,0]+0.01, coord[i,1]+0.01, visualizeWords[i], 57 | bbox=dict(facecolor='green', alpha=0.1)) 58 | plt.xlim((np.min(coord[:,0])-0.1, np.max(coord[:,0])+0.1)) 59 | plt.ylim((np.min(coord[:,1])-0.1, np.max(coord[:,1])+0.1)) 60 | plt.xlabel("SVD[0]") 61 | plt.ylabel("SVD[1]") 62 | 63 | plt.savefig('q3_word_vectors.png') 64 | plt.show() 65 | -------------------------------------------------------------------------------- /assignment1/q3_sgd.py: -------------------------------------------------------------------------------- 1 | # Save parameters every a few SGD iterations as fail-safe 2 | SAVE_PARAMS_EVERY = 1000 3 | 4 | import glob 5 | import random 6 | import numpy as np 7 | import os.path as op 8 | import pickle as pickle 9 | 10 | def load_saved_params(): 11 | """ A helper function that loads previously saved parameters and resets iteration start """ 12 | st = 0 13 | for f in glob.glob("saved_params_*.npy"): 14 | iter = int(op.splitext(op.basename(f))[0].split("_")[2]) 15 | if (iter > st): 16 | st = iter 17 | 18 | if st > 0: 19 | with open("saved_params_%d.npy" % st, "rb") as f: 20 | params = pickle.load(f) 21 | state = pickle.load(f) 22 | return st, params, state 23 | else: 24 | return st, None, None 25 | 26 | def save_params(iter, params): 27 | with open("saved_params_%d.npy" % iter, "wb") as f: 28 | pickle.dump(params, f) 29 | pickle.dump(random.getstate(), f) 30 | 31 | def sgd(f, x0, step, iterations, postprocessing = None, useSaved = False, PRINT_EVERY=10): 32 | """ Stochastic Gradient Descent """ 33 | # Implement the stochastic gradient descent method in this 34 | # function. 35 | 36 | # Inputs: 37 | # - f: the function to optimize, it should take a single 38 | # argument and yield two outputs, a cost and the gradient 39 | # with respect to the arguments 40 | # - x0: the initial point to start SGD from 41 | # - step: the step size for SGD 42 | # - iterations: total iterations to run SGD for 43 | # - postprocessing: postprocessing function for the parameters 44 | # if necessary. In the case of word2vec we will need to 45 | # normalize the word vectors to have unit length. 46 | # - PRINT_EVERY: specifies every how many iterations to output 47 | 48 | # Output: 49 | # - x: the parameter value after SGD finishes 50 | 51 | # Anneal learning rate every several iterations 52 | ANNEAL_EVERY = 20000 53 | 54 | if useSaved: 55 | start_iter, oldx, state = load_saved_params() 56 | if start_iter > 0: 57 | x0 = oldx; 58 | step *= 0.5 ** (start_iter / ANNEAL_EVERY) 59 | 60 | if state: 61 | random.setstate(state) 62 | else: 63 | start_iter = 0 64 | 65 | x = x0 66 | 67 | if not postprocessing: 68 | postprocessing = lambda x: x 69 | 70 | expcost = None 71 | 72 | for iter in range(start_iter + 1, iterations + 1): 73 | ### Don't forget to apply the postprocessing after every iteration! 74 | ### You might want to print the progress every few iterations. 75 | 76 | cost = None 77 | ### YOUR CODE HERE 78 | cost, grad = f(x) 79 | x -= step * grad 80 | 81 | x = postprocessing(x) 82 | ### END YOUR CODE 83 | 84 | if PRINT_EVERY is not None and iter % PRINT_EVERY == 0: 85 | if not expcost: 86 | expcost = cost 87 | else: 88 | expcost = .95 * expcost + .05 * cost 89 | print("iter %d: %f" % (iter, expcost)) 90 | 91 | if iter % SAVE_PARAMS_EVERY == 0 and useSaved: 92 | save_params(iter, x) 93 | 94 | if iter % ANNEAL_EVERY == 0: 95 | step *= 0.5 96 | 97 | return x 98 | 99 | def sanity_check(): 100 | quad = lambda x: (np.sum(x ** 2), x * 2) 101 | 102 | print("Running sanity checks...") 103 | t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=None) 104 | print("test 1 result:", t1) 105 | assert abs(t1) <= 1e-6 106 | 107 | t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=None) 108 | print("test 2 result:", t2) 109 | assert abs(t2) <= 1e-6 110 | 111 | t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=None) 112 | print("test 3 result:", t3) 113 | assert abs(t3) <= 1e-6 114 | 115 | print("") 116 | 117 | def your_sanity_checks(): 118 | """ 119 | Use this space add any additional sanity checks by running: 120 | python q3_sgd.py 121 | This function will not be called by the autograder, nor will 122 | your additional tests be graded. 123 | """ 124 | print("Running your sanity checks...") 125 | ### YOUR CODE HERE 126 | #raise NotImplementedError 127 | ### END YOUR CODE 128 | 129 | if __name__ == "__main__": 130 | sanity_check(); 131 | your_sanity_checks(); -------------------------------------------------------------------------------- /assignment1/q3_word2vec_sol.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | from q1_softmax_sol import softmax_sol as softmax 5 | from q2_gradcheck import gradcheck_naive 6 | from q2_sigmoid_sol import sigmoid_sol as sigmoid 7 | from q2_sigmoid_sol import sigmoid_grad_sol as sigmoid_grad 8 | 9 | def normalizeRows_sol(x): 10 | """ Row normalization function """ 11 | # Implement a function that normalizes each row of a matrix to have unit length 12 | ### YOUR CODE HERE 13 | N = x.shape[0] 14 | x /= np.sqrt(np.sum(x**2, axis=1)).reshape((N,1)) + 1e-30 15 | ### END YOUR CODE 16 | return x 17 | 18 | def softmaxCostAndGradient_sol(predicted, target, outputVectors, dataset): 19 | """ Softmax cost function for word2vec models """ 20 | 21 | # Implement the cost and gradients for one predicted word vector 22 | # and one target word vector as a building block for word2vec 23 | # models, assuming the softmax prediction function and cross 24 | # entropy loss. 25 | # Inputs: 26 | # - predicted: numpy ndarray, predicted word vector (\hat{v} in 27 | # the written component or \hat{r} in an earlier version) 28 | # - target: integer, the index of the target word 29 | # - outputVectors: "output" vectors (as rows) for all tokens 30 | # - dataset: needed for negative sampling, unused here. 31 | # Outputs: 32 | # - cost: cross entropy cost for the softmax word prediction 33 | # - gradPred: the gradient with respect to the predicted word 34 | # vector 35 | # - grad: the gradient with respect to all the other word 36 | # vectors 37 | # We will not provide starter code for this function, but feel 38 | # free to reference the code you previously wrote for this 39 | # assignment! 40 | ### YOUR CODE HERE 41 | probabilities = softmax(predicted.dot(outputVectors.T)) 42 | cost = -np.log(probabilities[target]) 43 | delta = probabilities 44 | delta[target] -= 1 45 | N = delta.shape[0] 46 | D = predicted.shape[0] 47 | grad = delta.reshape((N,1)) * predicted.reshape((1,D)) 48 | gradPred = (delta.reshape((1,N)).dot(outputVectors)).flatten() 49 | ### END YOUR CODE 50 | 51 | return cost, gradPred, grad 52 | 53 | def negSamplingCostAndGradient_sol(predicted, target, outputVectors, dataset, 54 | K=10): 55 | """ Negative sampling cost function for word2vec models """ 56 | # Implement the cost and gradients for one predicted word vector 57 | # and one target word vector as a building block for word2vec 58 | # models, using the negative sampling technique. K is the sample 59 | # size. You might want to use dataset.sampleTokenIdx() to sample 60 | # a random word index. 61 | # 62 | # Note: See test_word2vec below for dataset's initialization. 63 | # 64 | # Input/Output Specifications: same as softmaxCostAndGradient 65 | # We will not provide starter code for this function, but feel 66 | # free to reference the code you previously wrote for this 67 | # assignment! 68 | ### YOUR CODE HERE 69 | grad = np.zeros(outputVectors.shape) 70 | gradPred = np.zeros(predicted.shape) 71 | 72 | indices = [target] 73 | for k in range(K): 74 | newidx = dataset.sampleTokenIdx() 75 | while newidx == target: 76 | newidx = dataset.sampleTokenIdx() 77 | indices += [newidx] 78 | 79 | labels = np.array([1] + [-1 for k in range(K)]) 80 | vecs = outputVectors[indices,:] 81 | 82 | t = sigmoid(vecs.dot(predicted) * labels) 83 | cost = -np.sum(np.log(t)) 84 | 85 | delta = labels * (t - 1) 86 | gradPred = delta.reshape((1,K+1)).dot(vecs).flatten() 87 | gradtemp = delta.reshape((K+1,1)).dot(predicted.reshape( 88 | (1,predicted.shape[0]))) 89 | for k in range(K+1): 90 | grad[indices[k]] += gradtemp[k,:] 91 | # t = sigmoid(predicted.dot(outputVectors[target,:])) 92 | # cost = -np.log(t) 93 | # delta = t - 1 94 | # gradPred += delta * outputVectors[target, :] 95 | # grad[target, :] += delta * predicted 96 | # for k in range(K): 97 | # idx = dataset.sampleTokenIdx() 98 | # t = sigmoid(-predicted.dot(outputVectors[idx,:])) 99 | # cost += -np.log(t) 100 | # delta = 1 - t 101 | # gradPred += delta * outputVectors[idx, :] 102 | # grad[idx, :] += delta * predicted 103 | ### END YOUR CODE 104 | 105 | return cost, gradPred, grad 106 | 107 | 108 | def skipgram_sol(currentWord, C, contextWords, tokens, inputVectors, outputVectors, 109 | dataset, word2vecCostAndGradient = softmaxCostAndGradient_sol): 110 | """ Skip-gram model in word2vec """ 111 | # Implement the skip-gram model in this function. 112 | # Inputs: 113 | # - currrentWord: a string of the current center word 114 | # - C: integer, context size 115 | # - contextWords: list of no more than 2*C strings, the context words 116 | # - tokens: a dictionary that maps words to their indices in 117 | # the word vector list 118 | # - inputVectors: "input" word vectors (as rows) for all tokens 119 | # - outputVectors: "output" word vectors (as rows) for all tokens 120 | # - word2vecCostAndGradient: the cost and gradient function for 121 | # a prediction vector given the target word vectors, 122 | # could be one of the two cost functions you 123 | # implemented above 124 | # Outputs: 125 | # - cost: the cost function value for the skip-gram model 126 | # - grad: the gradient with respect to the word vectors 127 | # We will not provide starter code for this function, but feel 128 | # free to reference the code you previously wrote for this 129 | # assignment! 130 | ### YOUR CODE HERE 131 | currentI = tokens[currentWord] 132 | predicted = inputVectors[currentI, :] 133 | 134 | cost = 0.0 135 | gradIn = np.zeros(inputVectors.shape) 136 | gradOut = np.zeros(outputVectors.shape) 137 | for cwd in contextWords: 138 | idx = tokens[cwd] 139 | cc, gp, gg = word2vecCostAndGradient(predicted, idx, outputVectors, dataset) 140 | cost += cc 141 | gradOut += gg 142 | gradIn[currentI, :] += gp 143 | ### END YOUR CODE 144 | 145 | return cost, gradIn, gradOut 146 | 147 | def cbow_sol(currentWord, C, contextWords, tokens, inputVectors, outputVectors, 148 | dataset, word2vecCostAndGradient = softmaxCostAndGradient_sol): 149 | """ CBOW model in word2vec """ 150 | # Implement the continuous bag-of-words model in this function. 151 | # Input/Output specifications: same as the skip-gram model 152 | # We will not provide starter code for this function, but feel 153 | # free to reference the code you previously wrote for this 154 | # assignment! 155 | ################################################################# 156 | # IMPLEMENTING CBOW IS EXTRA CREDIT, DERIVATIONS IN THE WRIITEN # 157 | # ASSIGNMENT ARE NOT! # 158 | ################################################################# 159 | cost = 0 160 | gradIn = np.zeros(inputVectors.shape) 161 | gradOut = np.zeros(outputVectors.shape) 162 | ### YOUR CODE HERE 163 | D = inputVectors.shape[1] 164 | predicted = np.zeros((D,)) 165 | 166 | indices = [tokens[cwd] for cwd in contextWords] 167 | for idx in indices: 168 | predicted += inputVectors[idx, :] 169 | 170 | cost, gp, gradOut = word2vecCostAndGradient(predicted, tokens[currentWord], outputVectors, dataset) 171 | gradIn = np.zeros(inputVectors.shape) 172 | for idx in indices: 173 | gradIn[idx, :] += gp 174 | ### END YOUR CODE 175 | 176 | return cost, gradIn, gradOut 177 | -------------------------------------------------------------------------------- /assignment1/q3_word_vectors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment1/q3_word_vectors.png -------------------------------------------------------------------------------- /assignment1/q4_reg_v_acc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment1/q4_reg_v_acc.png -------------------------------------------------------------------------------- /assignment1/q4_sentiment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from cs224d.data_utils import * 5 | 6 | from q3_sgd import load_saved_params, sgd 7 | from q4_softmaxreg import softmaxRegression, getSentenceFeature, accuracy, softmax_wrapper 8 | 9 | import seaborn as sns 10 | sns.set(style='whitegrid', context='talk') 11 | 12 | # Try different regularizations and pick the best! 13 | # NOTE: fill in one more "your code here" below before running! 14 | REGULARIZATION = None # Assign a list of floats in the block below 15 | ### YOUR CODE HERE 16 | REGULARIZATION = np.logspace(-6,0.1,21) 17 | REGULARIZATION = np.hstack([0,REGULARIZATION]) 18 | ### END YOUR CODE 19 | 20 | # Load the dataset 21 | dataset = StanfordSentiment() 22 | tokens = dataset.tokens() 23 | nWords = len(tokens) 24 | 25 | # Load the word vectors we trained earlier 26 | _, wordVectors0, _ = load_saved_params() 27 | N = wordVectors0.shape[0]//2 28 | #assert nWords == N 29 | wordVectors = (wordVectors0[:N,:] + wordVectors0[N:,:]) 30 | dimVectors = wordVectors.shape[1] 31 | 32 | # Load the train set 33 | trainset = dataset.getTrainSentences() 34 | nTrain = len(trainset) 35 | trainFeatures = np.zeros((nTrain, dimVectors)) 36 | trainLabels = np.zeros((nTrain,), dtype=np.int32) 37 | for i in range(nTrain): 38 | words, trainLabels[i] = trainset[i] 39 | trainFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) 40 | 41 | # Prepare dev set features 42 | devset = dataset.getDevSentences() 43 | nDev = len(devset) 44 | devFeatures = np.zeros((nDev, dimVectors)) 45 | devLabels = np.zeros((nDev,), dtype=np.int32) 46 | for i in range(nDev): 47 | words, devLabels[i] = devset[i] 48 | devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) 49 | 50 | # Try our regularization parameters 51 | results = [] 52 | for regularization in REGULARIZATION: 53 | random.seed(3141) 54 | np.random.seed(59265) 55 | weights = np.random.randn(dimVectors, 5) 56 | print("Training for reg=%f" % regularization) 57 | 58 | # We will do batch optimization 59 | weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, 60 | weights, regularization), weights, 3.0, 10000, PRINT_EVERY=100) 61 | 62 | # Test on train set 63 | _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights) 64 | trainAccuracy = accuracy(trainLabels, pred) 65 | print("Train accuracy (%%): %f" % trainAccuracy) 66 | 67 | # Test on dev set 68 | _, _, pred = softmaxRegression(devFeatures, devLabels, weights) 69 | devAccuracy = accuracy(devLabels, pred) 70 | print("Dev accuracy (%%): %f" % devAccuracy) 71 | 72 | # Save the results and weights 73 | results.append({ 74 | "reg" : regularization, 75 | "weights" : weights, 76 | "train" : trainAccuracy, 77 | "dev" : devAccuracy}) 78 | 79 | # Print the accuracies 80 | print("") 81 | print("=== Recap ===") 82 | print("Reg\t\tTrain\t\tDev") 83 | for result in results: 84 | print("%E\t%0.4g\t%0.4g" % ( 85 | result["reg"], 86 | result["train"], 87 | result["dev"])) 88 | for result in results: 89 | print("%0.2e & %0.4g & %0.4g \\\\" % ( 90 | result["reg"], 91 | result["train"], 92 | result["dev"])) 93 | print("") 94 | 95 | # Pick the best regularization parameters 96 | BEST_REGULARIZATION = None 97 | BEST_WEIGHTS = None 98 | 99 | ### YOUR CODE HERE 100 | sorted_results = sorted(results, key=lambda x: x['dev'],reverse=True) 101 | BEST_REGULARIZATION = sorted_results[0]['reg'] 102 | BEST_WEIGHTS = sorted_results[0]['weights'] 103 | ### END YOUR CODE 104 | 105 | # Test your findings on the test set 106 | testset = dataset.getTestSentences() 107 | nTest = len(testset) 108 | testFeatures = np.zeros((nTest, dimVectors)) 109 | testLabels = np.zeros((nTest,), dtype=np.int32) 110 | for i in range(nTest): 111 | words, testLabels[i] = testset[i] 112 | testFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) 113 | 114 | _, _, pred = softmaxRegression(testFeatures, testLabels, BEST_WEIGHTS) 115 | print("Best regularization value: %E" % BEST_REGULARIZATION) 116 | print("Test accuracy (%%): %f" % accuracy(testLabels, pred)) 117 | 118 | # Make a plot of regularization vs accuracy 119 | plt.plot(REGULARIZATION, [x["train"] for x in results]) 120 | plt.plot(REGULARIZATION, [x["dev"] for x in results]) 121 | plt.xscale('log') 122 | plt.xlabel("regularization") 123 | plt.ylabel("accuracy") 124 | plt.legend(['train', 'dev'], loc='upper right') 125 | plt.savefig("q4_reg_v_acc.png") 126 | plt.show() 127 | 128 | -------------------------------------------------------------------------------- /assignment1/q4_softmaxreg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | from cs224d.data_utils import * 5 | 6 | from q1_softmax import softmax 7 | from q2_gradcheck import gradcheck_naive 8 | from q3_sgd import load_saved_params 9 | 10 | def getSentenceFeature(tokens, wordVectors, sentence): 11 | """ Obtain the sentence feature for sentiment analysis by averaging its word vectors """ 12 | # Implement computation for the sentence features given a sentence. 13 | 14 | # Inputs: 15 | # - tokens: a dictionary that maps words to their indices in 16 | # the word vector list 17 | # - wordVectors: word vectors (each row) for all tokens 18 | # - sentence: a list of words in the sentence of interest 19 | 20 | # Output: 21 | # - sentVector: feature vector for the sentence 22 | 23 | sentVector = np.zeros((wordVectors.shape[1],)) 24 | 25 | ### YOUR CODE HERE 26 | array = np.fromiter( (tokens[word] for word in sentence), dtype='int') 27 | sentVector = np.mean(wordVectors[array], axis=0) 28 | ### END YOUR CODE 29 | 30 | return sentVector 31 | 32 | def softmaxRegression(features, labels, weights, regularization = 0.0, nopredictions = False): 33 | """ Softmax Regression """ 34 | # Implement softmax regression with weight regularization. 35 | 36 | # Inputs: 37 | # - features: feature vectors, each row is a feature vector 38 | # - labels: labels corresponding to the feature vectors 39 | # - weights: weights of the regressor 40 | # - regularization: L2 regularization constant 41 | 42 | # Output: 43 | # - cost: cost of the regressor 44 | # - grad: gradient of the regressor cost with respect to its 45 | # weights 46 | # - pred: label predictions of the regressor (you might find 47 | # np.argmax helpful) 48 | 49 | prob = softmax(features.dot(weights)) 50 | if len(features.shape) > 1: 51 | N = features.shape[0] 52 | else: 53 | N = 1 54 | 55 | # A vectorized implementation of 1/N * sum(cross_entropy(x_i, y_i)) + 1/2*|w|^2 56 | cost = np.sum(-np.log(prob[np.arange(N), labels] + 1e-12)) / N 57 | cost += 0.5 * regularization * np.sum(weights ** 2) 58 | 59 | ### YOUR CODE HERE: compute the gradients and predictions 60 | pred = np.argmax(prob, axis=1) 61 | dx = prob 62 | dx[np.arange(N), labels] -= 1 63 | dx /= N 64 | # dx is the gradient associated with the loss (softmax layer only) 65 | grad = np.dot(features.T, dx) 66 | #backprop the weights 67 | grad += regularization * weights 68 | #adding the regularization to the gradient 69 | ### END YOUR CODE 70 | 71 | if nopredictions: 72 | return cost, grad 73 | else: 74 | return cost, grad, pred 75 | 76 | def accuracy(y, yhat): 77 | """ Precision for classifier """ 78 | assert(y.shape == yhat.shape) 79 | return np.sum(y == yhat) * 100.0 / y.size 80 | 81 | def softmax_wrapper(features, labels, weights, regularization = 0.0): 82 | cost, grad, _ = softmaxRegression(features, labels, weights, 83 | regularization) 84 | return cost, grad 85 | 86 | def sanity_check(): 87 | """ 88 | Run python q4_softmaxreg.py. 89 | """ 90 | random.seed(314159) 91 | np.random.seed(265) 92 | 93 | dataset = StanfordSentiment() 94 | tokens = dataset.tokens() 95 | nWords = len(tokens) 96 | 97 | _, wordVectors0, _ = load_saved_params() 98 | N = wordVectors0.shape[0]//2 99 | #assert N == nWords 100 | wordVectors = (wordVectors0[:N,:] + wordVectors0[N:,:]) 101 | dimVectors = wordVectors.shape[1] 102 | 103 | dummy_weights = 0.1 * np.random.randn(dimVectors, 5) 104 | dummy_features = np.zeros((10, dimVectors)) 105 | dummy_labels = np.zeros((10,), dtype=np.int32) 106 | for i in range(10): 107 | words, dummy_labels[i] = dataset.getRandomTrainSentence() 108 | dummy_features[i, :] = getSentenceFeature(tokens, wordVectors, words) 109 | print("==== Gradient check for softmax regression ====") 110 | gradcheck_naive(lambda weights: softmaxRegression(dummy_features, 111 | dummy_labels, weights, 1.0, nopredictions = True), dummy_weights) 112 | 113 | print("\n=== Results ===") 114 | print(softmaxRegression(dummy_features, dummy_labels, dummy_weights, 1.0)) 115 | 116 | dummy_weights = 0.1 * np.random.randn(40, 10) + 1.0 117 | dummy_features = np.random.randn(2000, 40) 118 | dummy_labels = np.argmax(np.random.randn(2000, 10), axis=1) 119 | 120 | print(-np.log(0.1))#expected correct classification (random) = 1 in 10; 121 | #cost then becomes -np.log(0.1) 122 | print(softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0]) 123 | 124 | dummy_weights = 0.1 * np.random.randn(40, 80) + 1.0 125 | dummy_features = np.random.randn(2000, 40) 126 | dummy_labels = np.argmax(np.random.randn(2000, 80), axis=1) 127 | 128 | print(-np.log(1./80))#expected correct classification (random) = 1 in 80; 129 | #cost then becomes -np.log(1./80) 130 | print(softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0]) 131 | 132 | dummy_weights = 0.1 * np.random.randn(40, 1000) + 1.0 133 | dummy_features = np.random.randn(40000, 40) 134 | dummy_labels = np.argmax(np.random.randn(40000, 1000), axis=1) 135 | 136 | print(-np.log(1./1000))#expected correct classification (random) = 1 in 80; 137 | #cost then becomes -np.log(1./80) 138 | print(softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0]) 139 | print(np.exp(-softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0])) 140 | 141 | 142 | if __name__ == "__main__": 143 | sanity_check() 144 | -------------------------------------------------------------------------------- /assignment1/requirements.txt: -------------------------------------------------------------------------------- 1 | Jinja2==2.7.3 2 | MarkupSafe==0.23 3 | backports.ssl-match-hostname==3.4.0.2 4 | certifi==14.05.14 5 | gnureadline==6.3.3 6 | ipython==2.3.1 7 | matplotlib==1.4.2 8 | mock==1.0.1 9 | nose==1.3.4 10 | numpy==1.9.1 11 | pyparsing==2.0.3 12 | python-dateutil==2.4.0 13 | pytz==2014.10 14 | pyzmq==14.4.1 15 | scipy==0.14.1 16 | six==1.9.0 17 | tornado==4.0.2 18 | wsgiref==0.1.2 19 | -------------------------------------------------------------------------------- /assignment1/solutions/.gitignore: -------------------------------------------------------------------------------- 1 | ## Core latex/pdflatex auxiliary files: 2 | *.aux 3 | *.lof 4 | *.log 5 | *.lot 6 | *.fls 7 | *.out 8 | *.toc 9 | *.fmt 10 | *.fot 11 | *.cb 12 | *.cb2 13 | 14 | ## Intermediate documents: 15 | *.dvi 16 | *-converted-to.* 17 | # these rules might exclude image files for figures etc. 18 | # *.ps 19 | # *.eps 20 | # *.pdf 21 | 22 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 23 | *.bbl 24 | *.bcf 25 | *.blg 26 | *-blx.aux 27 | *-blx.bib 28 | *.brf 29 | *.run.xml 30 | 31 | ## Build tool auxiliary files: 32 | *.fdb_latexmk 33 | *.synctex 34 | *.synctex.gz 35 | *.synctex.gz(busy) 36 | *.pdfsync 37 | 38 | ## Auxiliary and intermediate files from other packages: 39 | # algorithms 40 | *.alg 41 | *.loa 42 | 43 | # achemso 44 | acs-*.bib 45 | 46 | # amsthm 47 | *.thm 48 | 49 | # beamer 50 | *.nav 51 | *.snm 52 | *.vrb 53 | 54 | # cprotect 55 | *.cpt 56 | 57 | # fixme 58 | *.lox 59 | 60 | #(r)(e)ledmac/(r)(e)ledpar 61 | *.end 62 | *.?end 63 | *.[1-9] 64 | *.[1-9][0-9] 65 | *.[1-9][0-9][0-9] 66 | *.[1-9]R 67 | *.[1-9][0-9]R 68 | *.[1-9][0-9][0-9]R 69 | *.eledsec[1-9] 70 | *.eledsec[1-9]R 71 | *.eledsec[1-9][0-9] 72 | *.eledsec[1-9][0-9]R 73 | *.eledsec[1-9][0-9][0-9] 74 | *.eledsec[1-9][0-9][0-9]R 75 | 76 | # glossaries 77 | *.acn 78 | *.acr 79 | *.glg 80 | *.glo 81 | *.gls 82 | *.glsdefs 83 | 84 | # gnuplottex 85 | *-gnuplottex-* 86 | 87 | # hyperref 88 | *.brf 89 | 90 | # knitr 91 | *-concordance.tex 92 | # TODO Comment the next line if you want to keep your tikz graphics files 93 | *.tikz 94 | *-tikzDictionary 95 | 96 | # listings 97 | *.lol 98 | 99 | # makeidx 100 | *.idx 101 | *.ilg 102 | *.ind 103 | *.ist 104 | 105 | # minitoc 106 | *.maf 107 | *.mlf 108 | *.mlt 109 | *.mtc 110 | *.mtc[0-9] 111 | *.mtc[1-9][0-9] 112 | 113 | # minted 114 | _minted* 115 | *.pyg 116 | 117 | # morewrites 118 | *.mw 119 | 120 | # mylatexformat 121 | *.fmt 122 | 123 | # nomencl 124 | *.nlo 125 | 126 | # sagetex 127 | *.sagetex.sage 128 | *.sagetex.py 129 | *.sagetex.scmd 130 | 131 | # sympy 132 | *.sout 133 | *.sympy 134 | sympy-plots-for-*.tex/ 135 | 136 | # pdfcomment 137 | *.upa 138 | *.upb 139 | 140 | # pythontex 141 | *.pytxcode 142 | pythontex-files-*/ 143 | 144 | # TikZ & PGF 145 | *.dpth 146 | *.md5 147 | *.auxlock 148 | 149 | # todonotes 150 | *.tdo 151 | 152 | # xindy 153 | *.xdy 154 | 155 | # xypic precompiled matrices 156 | *.xyc 157 | 158 | # endfloat 159 | *.ttt 160 | *.fff 161 | 162 | # Latexian 163 | TSWLatexianTemp* 164 | 165 | ## Editors: 166 | # WinEdt 167 | *.bak 168 | *.sav 169 | 170 | # Texpad 171 | .texpadtmp 172 | 173 | # Kile 174 | *.backup 175 | 176 | # KBibTeX 177 | *~[0-9]* 178 | -------------------------------------------------------------------------------- /assignment1/tensorflow_word2vec.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import random 4 | import collections 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | import cs224d.data_utils as data_utils 10 | from tensorflow.models.embedding import gen_word2vec as word2vec 11 | 12 | class Options(object): 13 | def __init__(self): 14 | #Model Options 15 | self.emb_dim = 20 16 | self.train_data = None 17 | self.num_samples = 20 18 | self.learning_rate = 1.0 19 | 20 | self.epochs_to_train = 5 21 | self.batch_size = 64 22 | self.window_size = 5 23 | self.min_count = 3 24 | 25 | class Word2Vec(object): 26 | """Word2Vec model (skipgram) """ 27 | def __init__(self, options, session): 28 | self._options = options 29 | self._session = session 30 | self._word2id = {} 31 | self._id2word = [] 32 | self.build_graph() 33 | self.build_eval_graph() 34 | self.save_vocab() 35 | self._read_dataset() 36 | 37 | def _read_dataset(self): 38 | # dataset = data_utils.StanfordSentiment() 39 | # #print(dataset.sent_labels()[0:100]) 40 | # #print(dataset.getSplitSentences(0)[0:100]) 41 | # #this is the labels vector :) 42 | 43 | # #sentences = np.from_iter(dataset.sentences(), dtype="int32") 44 | # self._word2id = dataset.tokens() 45 | # print(self._word2id["UNK"]) 46 | # ids = [self._word2id.get(w) for w in self._word2id.keys()] 47 | # print(ids) 48 | pass 49 | def forward(self, examples, labels): 50 | return None,None 51 | 52 | def nce_loss(self, true_logits, sampled_logits): 53 | opts = self._options 54 | true_xent = tf.nn.sigmoid_cross_entropy_with_logits(true_logits, tf.ones_like(true_logits)) 55 | sampled_xent = tf.nn.sigmoid_cross_entropy_with_logits(sampled_logits, tf.zeros_like(sampled_logits)) 56 | nce_loss_tensor = (tf.reduce_sum(true_xent) + 57 | tf.reduce_sum(sampled_xent)) / opts.batch_size 58 | return nce_loss_tensor 59 | 60 | def build_graph(self): 61 | opts = self._options 62 | (words, counts, words_per_epoch, self._epoch, self._words, examples, 63 | labels) = word2vec.skipgram(filename="text8", 64 | batch_size=opt.batch_size, 65 | window_size=opt.window_size, 66 | min_count=opt.min_count, 67 | subsample=0) 68 | (opts.vocab_words, opts.vocab_counts, 69 | opts.words_per_epoch) = self._session.run([words, counts, words_per_epoch]) 70 | opts.vocab_size = len(opts.vocab_words) 71 | print("Data file: ", opts.train_data) 72 | print("Vocab size: ", opts.vocab_size - 1, " + UNK") 73 | print("Words per epoch: ", opts.words_per_epoch) 74 | self._examples = examples 75 | self._labels = labels 76 | self._id2word = opts.vocab_words 77 | for i, w in enumerate(self._id2word): 78 | self._word2id[w] = i 79 | true_logits, sampled_logits = self.forward(examples, labels) 80 | loss = self.nce_loss(true_logits, sampled_logits) 81 | tf.scalar_summary("NCE loss", loss) 82 | self._loss = loss 83 | self.optimize(loss) 84 | 85 | def build_eval_graph(self): 86 | pass 87 | def save_vocab(self): 88 | pass 89 | 90 | if __name__ == "__main__": 91 | opt = Options() 92 | session = tf.Session() 93 | model = Word2Vec(opt, session) 94 | -------------------------------------------------------------------------------- /assignment1/tests/test_gradcheck.py: -------------------------------------------------------------------------------- 1 | ''' 2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root): 3 | python -m py.test tests/test_gradcheck.py -vv -s -q 4 | python -m py.test tests/test_gradcheck.py -vv -s -q --cov 5 | 6 | py.test.exe --cov=cs224d/ tests/test_gradcheck.py --cov-report html 7 | 8 | (if the tests are within the subfolder tests) 9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html 10 | python -m pytest tests -v --cov-report html 11 | 12 | Open index.html contained within htmlcov 13 | ''' 14 | 15 | import pytest 16 | import numpy as np 17 | 18 | import random 19 | 20 | from collections import defaultdict, OrderedDict, Counter 21 | from q2_gradcheck import grad_numerical 22 | 23 | def rel_error(x,y): 24 | """ returns relative error """ 25 | return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) 26 | 27 | quad = lambda x: (x**2, 2*x) 28 | 29 | def test_gradcheck_naive_1(): 30 | """ Original sigmoid test defined in q2_sigmoid.py; """ 31 | x = np.array(123.45) 32 | assert rel_error(quad(x)[1], grad_numerical(quad,x)) 33 | 34 | def test_gradcheck_naive_2(): 35 | """ Original sigmoid test defined in q2_sigmoid.py; """ 36 | x = np.random.normal(loc=10., scale=30., size=20) 37 | assert rel_error(quad(x)[1], grad_numerical(quad,x)) 38 | 39 | def test_gradcheck_naive_3(): 40 | """ Original sigmoid test defined in q2_sigmoid.py; """ 41 | x = np.random.normal(loc=10., scale=30., size=(20,20)) 42 | assert rel_error(quad(x)[1], grad_numerical(quad,x)) 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /assignment1/tests/test_neural.py: -------------------------------------------------------------------------------- 1 | ''' 2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root): 3 | python -m py.test tests/test_neural.py -vv -s -q 4 | python -m py.test tests/test_neural.py -vv -s -q --cov 5 | 6 | py.test.exe --cov=cs224d/ tests/test_neural.py --cov-report html 7 | 8 | (if the tests are within the subfolder tests) 9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html 10 | python -m pytest tests -v --cov-report html 11 | 12 | Open index.html contained within htmlcov 13 | ''' 14 | 15 | import pytest 16 | import numpy as np 17 | 18 | import random 19 | 20 | from collections import defaultdict, OrderedDict, Counter 21 | from q2_gradcheck import grad_numerical, eval_numerical_gradient_array 22 | from q2_neural import forward_backward_prop 23 | from q2_neural import affine_forward, affine_backward, sigmoid_forward, sigmoid_backward 24 | 25 | def rel_error(x,y): 26 | """ returns relative error """ 27 | return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) 28 | 29 | @pytest.fixture(scope='module') 30 | def construct_toy_model(N=100, D1=10, H=20, D2=10): 31 | dim = [D1, H, D2] 32 | data = np.random.randn(N, dim[0]) 33 | labels = np.zeros((N,dim[2])) 34 | for i in range(N): 35 | labels[i, np.random.randint(0, dim[2]-1)] = 0 36 | 37 | params = np.random.randn((dim[0] + 1) * dim[1] + (dim[1] + 1) * dim[2], ) 38 | return data,labels,params,dim 39 | 40 | def test_affine_forward(): 41 | num_inputs = 2 42 | input_shape = (4, 5, 6) 43 | output_dim = 3 44 | 45 | input_size = num_inputs * np.prod(input_shape) 46 | weight_size = output_dim * np.prod(input_shape) 47 | 48 | x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape) 49 | w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim) 50 | b = np.linspace(-0.3, 0.1, num=output_dim).reshape((1,output_dim)) 51 | 52 | out, _ = affine_forward(x, w, b) 53 | correct_out = np.array([[ 1.49834967, 1.70660132, 1.91485297], 54 | [ 3.25553199, 3.5141327, 3.77273342]]) 55 | 56 | # Compare your output with ours. The error should be around 1e-9. 57 | assert out.shape == correct_out.shape 58 | assert rel_error(out, correct_out) < 5e-7 59 | 60 | 61 | def test_affine_backward(): 62 | x = np.random.randn(10, 2, 3) 63 | w = np.random.randn(6, 5) 64 | b = np.random.randn(5).reshape((1,5)) 65 | dout = np.random.randn(10, 5) 66 | 67 | #use eval_numerical_gradient_array for backprop from an output layer: 68 | # input -> layer -> output -> ... -> final_layer_loss 69 | # backprop becomes: 70 | # final_layer_loss -> gradient_of_loss (g.o.l) 71 | # g.o.l -> .. -> g.o.l backproped -> output -> layer -> g.o.l @ input 72 | dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout) 73 | dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout) 74 | db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout) 75 | 76 | _, cache = affine_forward(x, w, b) 77 | dx, dw, db = affine_backward(dout, cache) 78 | 79 | assert dx.shape == dx.shape 80 | assert dw.shape == dw.shape 81 | assert db.shape == db.shape 82 | 83 | assert rel_error(dx_num,dx) < 5e-7 84 | assert rel_error(dw_num,dw) < 5e-7 85 | assert rel_error(db_num,db) < 5e-7 86 | -------------------------------------------------------------------------------- /assignment1/tests/test_neural_to_solutions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root): 3 | python -m py.test tests/test_neural_to_solutions.py -vv -s -q 4 | python -m py.test tests/test_neural_to_solutions.py -vv -s -q --cov 5 | 6 | py.test.exe --cov=cs224d/ tests/test_neural_to_solutions.py --cov-report html 7 | 8 | (if the tests are within the subfolder tests) 9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html 10 | python -m pytest tests -v --cov-report html 11 | 12 | Open index.html contained within htmlcov 13 | ''' 14 | 15 | import pytest 16 | import numpy as np 17 | 18 | import random 19 | 20 | from collections import defaultdict, OrderedDict, Counter 21 | from q2_gradcheck import grad_numerical, eval_numerical_gradient_array 22 | from q2_neural import forward_backward_prop 23 | from q2_neural import affine_forward, affine_backward, sigmoid_forward, sigmoid_backward 24 | 25 | from q2_neural_sol import forward_backward_prop_sol 26 | 27 | def rel_error(x,y): 28 | """ returns relative error """ 29 | return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) 30 | 31 | @pytest.fixture(scope='module') 32 | def construct_toy_model(D1=10, H=20, D2=10, N=100): 33 | dim = [D1, H, D2] 34 | data = np.random.randn(N, dim[0]) 35 | labels = np.zeros((N,dim[2])) 36 | for i in range(N): 37 | labels[i, np.random.randint(0, dim[2]-1)] = 0 38 | 39 | params = np.random.randn((dim[0] + 1) * dim[1] + (dim[1] + 1) * dim[2], ) 40 | return data,labels,params,dim 41 | 42 | def test_affine_forward(): 43 | num_inputs = 2 44 | input_shape = (4, 5, 6) 45 | output_dim = 3 46 | 47 | input_size = num_inputs * np.prod(input_shape) 48 | weight_size = output_dim * np.prod(input_shape) 49 | 50 | x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape) 51 | w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim) 52 | b = np.linspace(-0.3, 0.1, num=output_dim).reshape((1,output_dim)) 53 | 54 | out, _ = affine_forward(x, w, b) 55 | correct_out = np.array([[ 1.49834967, 1.70660132, 1.91485297], 56 | [ 3.25553199, 3.5141327, 3.77273342]]) 57 | 58 | # Compare your output with ours. The error should be around 1e-9. 59 | assert out.shape == correct_out.shape 60 | assert rel_error(out, correct_out) < 5e-7 61 | 62 | def test_affine_backward(): 63 | x = np.random.randn(10, 2, 3) 64 | w = np.random.randn(6, 5) 65 | b = np.random.randn(5).reshape((1,5)) 66 | dout = np.random.randn(10, 5) 67 | 68 | #use eval_numerical_gradient_array for backprop from an output layer: 69 | # input -> layer -> output -> ... -> final_layer_loss 70 | # backprop becomes: 71 | # final_layer_loss -> gradient_of_loss (g.o.l) 72 | # g.o.l -> .. -> g.o.l backproped -> output -> layer -> g.o.l @ input 73 | dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout) 74 | dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout) 75 | db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout) 76 | 77 | _, cache = affine_forward(x, w, b) 78 | dx, dw, db = affine_backward(dout, cache) 79 | 80 | assert dx.shape == dx.shape 81 | assert dw.shape == dw.shape 82 | assert db.shape == db.shape 83 | 84 | assert rel_error(dx_num,dx) < 5e-7 85 | assert rel_error(dw_num,dw) < 5e-7 86 | assert rel_error(db_num,db) < 5e-7 87 | 88 | @pytest.mark.parametrize("dim1", list(range(2,10))) 89 | @pytest.mark.parametrize("dim2", list(range(2,10))) 90 | @pytest.mark.parametrize("dim3", list(range(2,10))) 91 | def test_neural_vs_neural_sol(dim1, dim2, dim3, N=300): 92 | dimensions = [ dim1, dim2, dim3 ] 93 | data = np.random.randn(N, dim1) 94 | labels = np.zeros((N, dim3)) 95 | for i in range(N): 96 | labels[i, random.randint(0,dim3 -1)] = 1. 97 | 98 | params = np.random.randn((dimensions[0] + 1) * dimensions[1] + ( 99 | dimensions[1] + 1) * dimensions[2], ) 100 | 101 | cost, grad = forward_backward_prop(data, labels, params, dimensions) 102 | cost_sol, grad_sol = forward_backward_prop_sol(data, labels, params, dimensions) 103 | assert rel_error(cost, cost_sol) < 1e-7 104 | 105 | @pytest.mark.parametrize("dim1", list(range(2,10))) 106 | @pytest.mark.parametrize("dim2", list(range(2,10))) 107 | @pytest.mark.parametrize("dim3", list(range(2,10))) 108 | def test_neural_vs_neural_sol_gradient(dim1, dim2, dim3, N=300): 109 | dimensions = [ dim1, dim2, dim3 ] 110 | data = np.random.randn(N, dim1) 111 | labels = np.zeros((N, dim3)) 112 | for i in range(N): 113 | labels[i, random.randint(0,dim3 -1)] = 1. 114 | 115 | params = np.random.randn((dimensions[0] + 1) * dimensions[1] + ( 116 | dimensions[1] + 1) * dimensions[2], ) 117 | 118 | cost, grad = forward_backward_prop(data, labels, params, dimensions) 119 | cost_sol, grad_sol = forward_backward_prop_sol(data, labels, params, dimensions) 120 | assert rel_error(grad, grad_sol) < 1e-8 121 | 122 | -------------------------------------------------------------------------------- /assignment1/tests/test_normalize.py: -------------------------------------------------------------------------------- 1 | ''' 2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root): 3 | python -m py.test tests/test_normalize.py -vv -s -q 4 | python -m py.test tests/test_normalize.py -vv -s -q --cov 5 | 6 | py.test.exe --cov=cs231n/ tests/test_normalize.py --cov-report html 7 | 8 | (if the tests are within the subfolder tests) 9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html 10 | python -m pytest tests -v --cov-report html 11 | 12 | Open index.html contained within htmlcov 13 | ''' 14 | 15 | import pytest 16 | import numpy as np 17 | import random 18 | 19 | from collections import defaultdict, OrderedDict, Counter 20 | from q3_word2vec import normalizeRows, l1_normalize_rows, l2_normalize_rows 21 | 22 | def rel_error(x,y): 23 | """ returns relative error """ 24 | return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) 25 | 26 | def test_normalize(): 27 | """ Original normalization test defined in q3_word2vec.py; """ 28 | x = np.array([[3.0,4.0],[1, 2]]) 29 | norm_x = normalizeRows(x) 30 | y = np.array([[0.6, 0.8], [0.4472, 0.8944]]) 31 | assert rel_error(norm_x, y) <= 1e-4 32 | 33 | def test_l2_normalize(): 34 | x = np.array([[3.0,4.0],[1, 2]]) 35 | norm_x = l2_normalize_rows(x) 36 | y = np.array([[0.6, 0.8], [0.4472, 0.8944]]) 37 | assert rel_error(norm_x, y) <= 1e-4 38 | 39 | @pytest.fixture(scope='module') 40 | def test_array(): 41 | def functor(in_dim_1 = 10, in_dim_2 = 10): 42 | assert in_dim_1 > 0 and in_dim_2 > 0 43 | return np.random.uniform(low=0.,high=10.,size=(in_dim_1,in_dim_2)) 44 | return functor 45 | 46 | def test_l2_against_sklearn(test_array): 47 | try: 48 | from sklearn.preprocessing import normalize 49 | in_array = test_array() 50 | assert rel_error(l2_normalize_rows(in_array), normalize(in_array, axis=1, norm='l2')) <= 1e-8 51 | except ImportError: 52 | assert 1 53 | print("ImportError (sklearn) on current node!") 54 | 55 | def test_l1_against_sklearn(test_array): 56 | try: 57 | from sklearn.preprocessing import normalize 58 | in_array = test_array() 59 | assert rel_error(l1_normalize_rows(in_array), normalize(in_array, axis=1, norm='l1')) <= 1e-8 60 | except ImportError: 61 | assert 1 62 | print("ImportError (sklearn) on current node!") 63 | -------------------------------------------------------------------------------- /assignment1/tests/test_sgd.py: -------------------------------------------------------------------------------- 1 | ''' 2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root): 3 | python -m py.test tests/test_sgd.py -vv -s -q 4 | python -m py.test tests/test_sgd.py -vv -s -q --cov 5 | 6 | py.test.exe --cov=cs231n/ tests/test_sgd.py --cov-report html 7 | 8 | (if the tests are within the subfolder tests) 9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html 10 | python -m pytest tests -v --cov-report html 11 | 12 | Open index.html contained within htmlcov 13 | ''' 14 | 15 | import pytest 16 | import numpy as np 17 | import random 18 | 19 | from collections import defaultdict, OrderedDict, Counter 20 | from q3_sgd import sgd 21 | 22 | def rel_error(x,y): 23 | """ returns relative error """ 24 | return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) 25 | 26 | @pytest.fixture(scope='module') 27 | def quad(): 28 | return lambda x: (np.sum(x**2), x * 2) 29 | 30 | def test_sgd_1(quad): 31 | """ Original normalization test defined in q3_word2vec.py; """ 32 | 33 | t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=None) 34 | assert abs(t1) <= 1e-6 35 | 36 | def test_sgd_2(quad): 37 | t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=None) 38 | assert abs(t2) <= 1e-6 39 | 40 | def test_sgd_3(quad): 41 | t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=None) 42 | assert abs(t3) <= 1e-6 43 | -------------------------------------------------------------------------------- /assignment1/tests/test_sigmoid.py: -------------------------------------------------------------------------------- 1 | ''' 2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root): 3 | python -m py.test tests/test_sigmoid.py -vv -s -q 4 | python -m py.test tests/test_sigmoid.py -vv -s -q --cov 5 | 6 | py.test.exe --cov=cs224d/ tests/test_sigmoid.py --cov-report html 7 | 8 | (if the tests are within the subfolder tests) 9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html 10 | python -m pytest tests -v --cov-report html 11 | 12 | Open index.html contained within htmlcov 13 | ''' 14 | 15 | import pytest 16 | import numpy as np 17 | from q2_sigmoid import sigmoid, sigmoid_grad 18 | 19 | import random 20 | 21 | from collections import defaultdict, OrderedDict, Counter 22 | 23 | COUNT=5 24 | 25 | def rel_error(x,y): 26 | """ returns relative error """ 27 | return np.max(np.abs(x - y) / (np.maximum(1e-7, np.abs(x) + np.abs(y)))) 28 | 29 | def test_sigmoid(): 30 | """ Original sigmoid test defined in q2_sigmoid.py; """ 31 | x = np.array([[1, 2], [-1, -2]]) 32 | f = sigmoid(x) 33 | assert rel_error(f, np.array([[0.73105858, 0.88079708], 34 | [0.26894142, 0.11920292]])) <= 1e-7 35 | 36 | def test_sigmoidgrad(): 37 | """ Original sigmoid gradient test defined in q2_sigmoid.py; """ 38 | x = np.array([[1, 2], [-1, -2]]) 39 | f = sigmoid(x) 40 | g = sigmoid_grad(f) 41 | assert rel_error(g, np.array([[0.19661193, 0.10499359], 42 | [0.19661193, 0.10499359]])) <= 1e-7 43 | 44 | @pytest.mark.parametrize("dim", list(range(1,8))) 45 | def test_sigmoid_shape(dim): 46 | testing_shape = [] 47 | for y in range(0,dim): 48 | testing_shape.append(np.random.randint(3,8)) 49 | shape = tuple(testing_shape) 50 | #z = np.random.randn(*testing_shape) 51 | x = np.random.standard_normal(shape) 52 | y = np.copy(x) 53 | assert x.shape == sigmoid(y).shape 54 | assert x.shape == sigmoid_grad(sigmoid(y)).shape 55 | 56 | def test_sigmoid_minus_z(count=100): 57 | z = np.random.normal(loc=0., scale=100., size=count) 58 | y = -z 59 | assert rel_error(1 - sigmoid(y), sigmoid(z)) <= 1e-7 60 | 61 | def test_sigmoid_monotone(count=100): 62 | z = np.random.normal(loc=0., scale=100., size=count) 63 | shift = np.random.uniform(low=0., high=10., size=count) 64 | assert np.all(sigmoid(z + shift) - sigmoid(z)) >= 0 65 | assert np.all(sigmoid(z - shift) - sigmoid(z)) <= 0 66 | 67 | def test_sigmoid_range(count=100): 68 | z = np.random.normal(loc=0., scale=100., size=count) 69 | assert np.max(sigmoid(z)) <= 1. 70 | assert np.max(sigmoid(z)) >= 0. 71 | 72 | @pytest.mark.parametrize('execution_number', list(range(COUNT))) 73 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 74 | def test_sigmoid_permutation_axis0(dim_1, execution_number): 75 | """ sigmoid needs to be applied element-wise;""" 76 | a1 = np.random.normal(size=(dim_1,1)) 77 | s1 = sigmoid(a1) 78 | 79 | permutation = np.random.permutation(dim_1) 80 | inverse_permutation = np.argsort(permutation) 81 | 82 | s1_perm = sigmoid(a1[permutation]) 83 | assert rel_error(s1_perm[inverse_permutation], s1) <= 1e-8 84 | 85 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 86 | def test_sigmoid_permutation_axis1(dim_1): 87 | a1 = np.random.normal(size=(1,dim_1)) 88 | s1 = sigmoid(a1) 89 | 90 | permutation = np.random.permutation(dim_1) 91 | inverse_permutation = np.argsort(permutation) 92 | 93 | s1_perm = sigmoid(a1.ravel()[permutation]) 94 | assert rel_error(s1_perm.ravel()[inverse_permutation], s1) <= 1e-8 95 | #note: permutation(sigmoid(x)) = sigmoid(permutation(x)) 96 | 97 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 98 | @pytest.mark.parametrize("dim_2", list(range(1,20))) 99 | def test_sigmoid_gradient(dim_1, dim_2): 100 | a1 = np.random.normal(loc=0., scale=20., size=(dim_1,dim_2)) 101 | shift = np.random.uniform(low=1e-9, high=1e-5, size=(dim_1,dim_2)) 102 | ap = a1 + shift 103 | am = a1 - shift 104 | 105 | dsigmoid = (sigmoid(ap) - sigmoid(am)) / (2*shift) 106 | assert np.abs(np.max(dsigmoid - sigmoid_grad(sigmoid(a1)))) <= 1e-7 107 | assert np.abs(np.min(dsigmoid - sigmoid_grad(sigmoid(a1)))) <= 1e-7 108 | -------------------------------------------------------------------------------- /assignment1/tests/test_sigmoid_to_solutions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root): 3 | python -m py.test tests/test_sigmoid_to_solutions.py -vv -s -q 4 | python -m py.test tests/test_sigmoid_to_solutions.py -vv -s -q --cov 5 | 6 | py.test.exe --cov=cs224d/ tests/test_sigmoid_to_solutions.py --cov-report html 7 | 8 | (if the tests are within the subfolder tests) 9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html 10 | python -m pytest tests -v --cov-report html 11 | 12 | Open index.html contained within htmlcov 13 | ''' 14 | 15 | import pytest 16 | import numpy as np 17 | from q2_sigmoid import sigmoid, sigmoid_grad 18 | from q2_sigmoid_sol import sigmoid_sol, sigmoid_grad_sol 19 | 20 | import random 21 | 22 | from collections import defaultdict, OrderedDict, Counter 23 | 24 | COUNT=5 25 | 26 | def rel_error(x,y): 27 | """ returns relative error """ 28 | return np.max(np.abs(x - y) / (np.maximum(1e-7, np.abs(x) + np.abs(y)))) 29 | 30 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol]) 31 | def test_sigmoid(sigmoid_f): 32 | """ Original sigmoid test defined in q2_sigmoid.py; """ 33 | x = np.array([[1, 2], [-1, -2]]) 34 | f = sigmoid_f(x) 35 | assert rel_error(f, np.array([[0.73105858, 0.88079708], 36 | [0.26894142, 0.11920292]])) <= 1e-7 37 | 38 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol]) 39 | def test_sigmoidgrad(sigmoid_f): 40 | """ Original sigmoid gradient test defined in q2_sigmoid.py; """ 41 | x = np.array([[1, 2], [-1, -2]]) 42 | f = sigmoid(x) 43 | g = sigmoid_grad(f) 44 | assert rel_error(g, np.array([[0.19661193, 0.10499359], 45 | [0.19661193, 0.10499359]])) <= 1e-7 46 | 47 | @pytest.mark.parametrize("dim", list(range(1,8))) 48 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol]) 49 | def test_sigmoid_shape(dim, sigmoid_f): 50 | testing_shape = [] 51 | for y in range(0,dim): 52 | testing_shape.append(np.random.randint(3,8)) 53 | shape = tuple(testing_shape) 54 | #z = np.random.randn(*testing_shape) 55 | x = np.random.standard_normal(shape) 56 | y = np.copy(x) 57 | assert x.shape == sigmoid(y).shape 58 | assert x.shape == sigmoid_grad(sigmoid(y)).shape 59 | 60 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol]) 61 | def test_sigmoid_minus_z(sigmoid_f, count=100): 62 | z = np.random.normal(loc=0., scale=100., size=count) 63 | y = -z 64 | assert rel_error(1 - sigmoid(y), sigmoid(z)) <= 1e-7 65 | 66 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol]) 67 | def test_sigmoid_monotone(sigmoid_f, count=100): 68 | z = np.random.normal(loc=0., scale=100., size=count) 69 | shift = np.random.uniform(low=0., high=10., size=count) 70 | assert np.all(sigmoid(z + shift) - sigmoid(z)) >= 0 71 | assert np.all(sigmoid(z - shift) - sigmoid(z)) <= 0 72 | 73 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol]) 74 | def test_sigmoid_range(sigmoid_f, count=100): 75 | z = np.random.normal(loc=0., scale=100., size=count) 76 | assert np.max(sigmoid(z)) <= 1. 77 | assert np.max(sigmoid(z)) >= 0. 78 | 79 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 80 | @pytest.mark.parametrize('execution_number', list(range(COUNT))) 81 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol]) 82 | def test_sigmoid_permutation_axis0(dim_1, execution_number, sigmoid_f): 83 | """ sigmoid needs to be applied element-wise;""" 84 | a1 = np.random.normal(size=(dim_1,1)) 85 | s1 = sigmoid(a1) 86 | 87 | permutation = np.random.permutation(dim_1) 88 | inverse_permutation = np.argsort(permutation) 89 | 90 | s1_perm = sigmoid(a1[permutation]) 91 | assert rel_error(s1_perm[inverse_permutation], s1) <= 1e-8 92 | 93 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 94 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol]) 95 | def test_sigmoid_permutation_axis1(dim_1, sigmoid_f): 96 | a1 = np.random.normal(size=(1,dim_1)) 97 | s1 = sigmoid(a1) 98 | 99 | permutation = np.random.permutation(dim_1) 100 | inverse_permutation = np.argsort(permutation) 101 | 102 | s1_perm = sigmoid(a1.ravel()[permutation]) 103 | assert rel_error(s1_perm.ravel()[inverse_permutation], s1) <= 1e-8 104 | #note: permutation(sigmoid(x)) = sigmoid(permutation(x)) 105 | 106 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 107 | @pytest.mark.parametrize("dim_2", list(range(1,20))) 108 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol]) 109 | def test_sigmoid_gradient(dim_1, dim_2, sigmoid_f): 110 | a1 = np.random.normal(loc=0., scale=20., size=(dim_1,dim_2)) 111 | shift = np.random.uniform(low=1e-9, high=1e-5, size=(dim_1,dim_2)) 112 | ap = a1 + shift 113 | am = a1 - shift 114 | 115 | dsigmoid = (sigmoid(ap) - sigmoid(am)) / (2*shift) 116 | assert np.abs(np.max(dsigmoid - sigmoid_grad(sigmoid(a1)))) <= 1e-7 117 | assert np.abs(np.min(dsigmoid - sigmoid_grad(sigmoid(a1)))) <= 1e-7 118 | 119 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 120 | @pytest.mark.parametrize("dim_2", list(range(1,20))) 121 | def test_sigmoid(dim_1, dim_2): 122 | a1 = np.random.normal(loc=0., scale=20., size=(dim_1,dim_2)) 123 | assert rel_error(sigmoid(a1), sigmoid_sol(a1)) <= 1e-10 124 | 125 | 126 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 127 | @pytest.mark.parametrize("dim_2", list(range(1,20))) 128 | def test_sigmoid(dim_1, dim_2): 129 | a1 = np.random.normal(loc=0., scale=20., size=(dim_1,dim_2)) 130 | a1_copy = a1.copy() 131 | 132 | s_a1 = sigmoid(a1) 133 | s_sol_a1 = sigmoid_sol(a1_copy) 134 | 135 | assert rel_error(sigmoid_grad(s_a1), sigmoid_grad_sol(s_sol_a1)) <= 1e-10 136 | 137 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 138 | @pytest.mark.parametrize("dim_2", list(range(1,20))) 139 | def test_sigmoid(dim_1, dim_2): 140 | a1 = np.random.normal(loc=0., scale=20., size=(dim_1,dim_2)) 141 | a1_copy = a1.copy() 142 | 143 | assert rel_error(sigmoid_grad(a1), sigmoid_grad_sol(a1_copy)) <= 1e-10 144 | 145 | -------------------------------------------------------------------------------- /assignment1/tests/test_softmax.py: -------------------------------------------------------------------------------- 1 | ''' 2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root): 3 | python -m py.test tests/test_softmax.py -vv -s -q 4 | python -m py.test tests/test_softmax.py -vv -s -q --cov 5 | 6 | py.test.exe --cov=cs224d/ tests/test_softmax.py --cov-report html 7 | 8 | (if the tests are within the subfolder tests) 9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html 10 | python -m pytest tests -v --cov-report html 11 | 12 | Open index.html contained within htmlcov 13 | ''' 14 | 15 | import pytest 16 | import numpy as np 17 | from q1_softmax import softmax 18 | 19 | import random 20 | 21 | from collections import defaultdict, OrderedDict, Counter 22 | 23 | def rel_error(x,y): 24 | """ returns relative error """ 25 | return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) 26 | 27 | @pytest.fixture(scope='module') 28 | def array_1(): 29 | return np.array([1,2]) 30 | 31 | @pytest.fixture(scope='module') 32 | def array_2(): 33 | return np.array([1001,1002]) 34 | 35 | @pytest.fixture(scope='module') 36 | def array_3(): 37 | return np.array([-1001,-1002]) 38 | 39 | @pytest.fixture(scope='module') 40 | def fake_data_normal(in_dim_1, in_dim_2, mean=0., sigma=1.): 41 | return np.random.normal(loc=mean, scale=sigma, size=(in_dim_1,in_dim_2)) 42 | 43 | @pytest.fixture(scope='module') 44 | def fake_data_uniform(in_dim_1, in_dim_2, low=-1000., high=1000.): 45 | return np.random.uniform(low=low, high=high, size=(in_dim_1, in_dim_2)) 46 | 47 | @pytest.fixture(scope='module') 48 | def linear_shift(low=-100, high=100.): 49 | return np.random.uniform(low,high) 50 | 51 | @pytest.fixture(scope='module') 52 | def vector_shift(in_dim, low=-100., high=100.): 53 | return np.random.uniform(low=low,high=high,size=(in_dim,1)) 54 | 55 | #starting with some simple fixed test 56 | def test_softmax_array_1(array_1): 57 | """ Original softmax test defined in q2_softmax.py; """ 58 | assert rel_error(softmax(array_1), np.array([0.26894142, 0.73105858])) < 1e-8 59 | 60 | def test_softmax_array_2(array_1, array_2): 61 | """ Original softmax test defined in q2_softmax.py; """ 62 | assert rel_error(softmax(array_2), softmax(array_1)) < 1e-8 63 | 64 | def test_softmax_array_3(array_3): 65 | """ Original softmax test defined in q2_softmax.py; """ 66 | assert rel_error(softmax(array_3), np.array( 67 | [0.73105858, 0.26894142])) 68 | 69 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 70 | @pytest.mark.parametrize("dim_2", list(range(1,20))) 71 | def test_softmax_shape(dim_1, dim_2): 72 | a1 = np.random.normal(size=(dim_1,dim_2)) 73 | assert a1.shape == softmax(a1).shape 74 | 75 | @pytest.mark.parametrize("dim_1", list(range(1,20,3))) 76 | @pytest.mark.parametrize("dim_2", list(range(1,20,3))) 77 | def test_softmax_linearity(dim_1, dim_2): 78 | shift = linear_shift(-100,100) 79 | a1 = np.random.normal(size=(dim_1,dim_2)) 80 | a2 = a1 + shift 81 | assert rel_error(np.max(shift), np.min(shift)) <1e-8 82 | assert rel_error(softmax(a1),softmax(a2)) < 1e-8 83 | 84 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 85 | def test_softmax_permutation_axis0(dim_1): 86 | a1 = np.random.normal(size=(dim_1,1)) 87 | s1 = softmax(a1) 88 | 89 | permutation = np.random.permutation(dim_1) 90 | inverse_permutation = np.argsort(permutation) 91 | 92 | s1_perm = softmax(a1[permutation]) 93 | assert rel_error(s1_perm[inverse_permutation], s1) <= 1e-8 94 | 95 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 96 | def test_softmax_permutation_axis1(dim_1): 97 | a1 = np.random.normal(size=(1,dim_1)) 98 | s1 = softmax(a1) 99 | 100 | permutation = np.random.permutation(dim_1) 101 | inverse_permutation = np.argsort(permutation) 102 | 103 | s1_perm = softmax(a1.ravel()[permutation]) 104 | assert rel_error(s1_perm.ravel()[inverse_permutation], s1) <= 1e-8 105 | #note: permutation(softmax(x)) = softmax(permutation(x)) 106 | 107 | #probably can move this to a 'fake' data call 108 | @pytest.mark.parametrize("dim_1", list(range(1,20,3))) 109 | @pytest.mark.parametrize("dim_2", list(range(1,20,3))) 110 | def test_softmax_linearity_rowwise(dim_1, dim_2): 111 | shift = np.random.uniform(low=-100,high=100,size=(dim_1,1)) 112 | #print(shift) 113 | a1 = np.random.normal(size=(dim_1,dim_2)) 114 | a2 = a1 + shift 115 | assert rel_error(np.max(a2 - a1), np.max(shift)) < 1e-8 116 | assert rel_error(softmax(a1),softmax(a2)) < 1e-8 117 | -------------------------------------------------------------------------------- /assignment1/tests/test_softmax_regression.py: -------------------------------------------------------------------------------- 1 | ''' 2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root): 3 | python -m py.test tests/test_softmax_regression.py -vv -s -q 4 | python -m py.test tests/test_softmax_regression.py -vv -s -q --cov 5 | 6 | py.test.exe --cov=cs224d/ tests/test_softmax_regression.py --cov-report html 7 | 8 | (if the tests are within the subfolder tests) 9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html 10 | python -m pytest tests -v --cov-report html 11 | 12 | Open index.html contained within htmlcov 13 | ''' 14 | 15 | import pytest 16 | import numpy as np 17 | from q1_softmax import softmax 18 | from q2_gradcheck import grad_numerical, eval_numerical_gradient_array 19 | from q4_softmaxreg import softmaxRegression, accuracy, softmax_wrapper, getSentenceFeature 20 | 21 | import random 22 | 23 | from collections import defaultdict, OrderedDict, Counter 24 | 25 | def rel_error(x,y): 26 | """ returns relative error """ 27 | return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) 28 | 29 | @pytest.fixture(scope='module') 30 | def array_1(): 31 | return np.array([1,2]) 32 | 33 | @pytest.fixture(scope='module') 34 | def array_2(): 35 | return np.array([1001,1002]) 36 | 37 | @pytest.fixture(scope='module') 38 | def array_3(): 39 | return np.array([-1001,-1002]) 40 | 41 | @pytest.fixture(scope='module') 42 | def fake_data_normal(in_dim_1, in_dim_2, mean=0., sigma=1.): 43 | return np.random.normal(loc=mean, scale=sigma, size=(in_dim_1,in_dim_2)) 44 | 45 | @pytest.fixture(scope='module') 46 | def fake_data_uniform(in_dim_1, in_dim_2, low=-1000., high=1000.): 47 | return np.random.uniform(low=low, high=high, size=(in_dim_1, in_dim_2)) 48 | 49 | @pytest.fixture(scope='module') 50 | def linear_shift(low=-100, high=100.): 51 | return np.random.uniform(low,high) 52 | 53 | @pytest.fixture(scope='module') 54 | def vector_shift(in_dim, low=-100., high=100.): 55 | return np.random.uniform(low=low,high=high,size=(in_dim,1)) 56 | 57 | #starting with some simple fixed test 58 | def test_softmax_array_1(array_1): 59 | """ Original softmax test defined in q2_softmax.py; """ 60 | assert rel_error(softmax(array_1), np.array([0.26894142, 0.73105858])) < 1e-8 61 | 62 | def test_softmax_array_2(array_1, array_2): 63 | """ Original softmax test defined in q2_softmax.py; """ 64 | assert rel_error(softmax(array_2), softmax(array_1)) < 1e-8 65 | 66 | def test_softmax_array_3(array_3): 67 | """ Original softmax test defined in q2_softmax.py; """ 68 | assert rel_error(softmax(array_3), np.array( 69 | [0.73105858, 0.26894142])) 70 | 71 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 72 | @pytest.mark.parametrize("dim_2", list(range(1,20))) 73 | def test_softmax_shape(dim_1, dim_2): 74 | a1 = np.random.normal(size=(dim_1,dim_2)) 75 | assert a1.shape == softmax(a1).shape 76 | 77 | @pytest.mark.parametrize("dim_1", list(range(1,20,3))) 78 | @pytest.mark.parametrize("dim_2", list(range(1,20,3))) 79 | def test_softmax_linearity(dim_1, dim_2): 80 | shift = linear_shift(-100,100) 81 | a1 = np.random.normal(size=(dim_1,dim_2)) 82 | a2 = a1 + shift 83 | assert rel_error(np.max(shift), np.min(shift)) <1e-8 84 | assert rel_error(softmax(a1),softmax(a2)) < 1e-8 85 | 86 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 87 | def test_softmax_permutation_axis0(dim_1): 88 | a1 = np.random.normal(size=(dim_1,1)) 89 | s1 = softmax(a1) 90 | 91 | permutation = np.random.permutation(dim_1) 92 | inverse_permutation = np.argsort(permutation) 93 | 94 | s1_perm = softmax(a1[permutation]) 95 | assert rel_error(s1_perm[inverse_permutation], s1) <= 1e-8 96 | 97 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 98 | def test_softmax_permutation_axis1(dim_1): 99 | a1 = np.random.normal(size=(1,dim_1)) 100 | s1 = softmax(a1) 101 | 102 | permutation = np.random.permutation(dim_1) 103 | inverse_permutation = np.argsort(permutation) 104 | 105 | s1_perm = softmax(a1.ravel()[permutation]) 106 | assert rel_error(s1_perm.ravel()[inverse_permutation], s1) <= 1e-8 107 | #note: permutation(softmax(x)) = softmax(permutation(x)) 108 | 109 | #probably can move this to a 'fake' data call 110 | @pytest.mark.parametrize("dim_1", list(range(1,20,3))) 111 | @pytest.mark.parametrize("dim_2", list(range(1,20,3))) 112 | def test_softmax_linearity_rowwise(dim_1, dim_2): 113 | shift = np.random.uniform(low=-100,high=100,size=(dim_1,1)) 114 | #print(shift) 115 | a1 = np.random.normal(size=(dim_1,dim_2)) 116 | a2 = a1 + shift 117 | assert rel_error(np.max(a2 - a1), np.max(shift)) < 1e-8 118 | assert rel_error(softmax(a1),softmax(a2)) < 1e-8 119 | 120 | @pytest.mark.parametrize("samples", [5000, 10000]) 121 | @pytest.mark.parametrize("features", list(range(20, 41, 5))) 122 | @pytest.mark.parametrize("classes", [100]) 123 | def test_softmax_regression(samples, features, classes): 124 | dummy_weights = 0.1 * np.random.randn(features, classes) + 1.0 125 | dummy_features = np.random.randn(samples, features) 126 | dummy_labels = np.argmax(np.random.randn(samples, classes), axis=1) 127 | #np.sqrt(p * (1 - p) / N ) 128 | #n/N 129 | #rough estimate of how many should be correct (assuming random: -np.log(p), where p is the probability of guessing correctly) 130 | assert np.abs(-np.log(1./classes) - (softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0])) / -np.log(1./classes) <= 0.15 131 | 132 | @pytest.mark.parametrize("samples", [50, 60]) 133 | @pytest.mark.parametrize("features", list(range(20, 41, 5))) 134 | @pytest.mark.parametrize("classes", [10]) 135 | def test_softmax_gradient(samples, features, classes, check_count=20): 136 | dummy_weights = 0.1 * np.random.randn(features, classes) + 1.0 137 | dummy_features = np.random.randn(samples, features) 138 | dummy_labels = np.argmax(np.random.randn(samples, classes), axis=1) 139 | #rough estimate of how many should be correct (assuming random: -np.log(p), where p is the probability of guessing correctly) 140 | #assert np.abs(-np.log(1./classes) - (softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0])) / -np.log(1./classes) <= 0.15 141 | 142 | f = lambda w: softmaxRegression(dummy_features, dummy_labels, w, regularization=0.0, nopredictions=False)[0] 143 | g = lambda w: softmaxRegression(dummy_features, dummy_labels, w, regularization=0.0, nopredictions=False)[1] 144 | 145 | W = dummy_weights 146 | grad_analytic = g(W) 147 | 148 | for i in range(check_count): 149 | ix = tuple([random.randrange(m) for m in W.shape]) 150 | shift = np.zeros(W.shape) 151 | shift[ix] = 1e-7 152 | grad_numerical = (f(W + shift) - f(W - shift)) / (2 * 1e-7) 153 | assert( abs(grad_numerical - grad_analytic[ix]) / (abs(grad_numerical) + abs(grad_analytic[ix])) < 0.0002) 154 | #only evaluating at one point (expect the majority of the deviation at this point) 155 | -------------------------------------------------------------------------------- /assignment1/tests/test_softmax_to_solutions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root): 3 | python -m py.test tests/test_softmax.py -vv -s -q 4 | python -m py.test tests/test_softmax.py -vv -s -q --cov 5 | 6 | py.test.exe --cov=cs224d/ tests/test_softmax.py --cov-report html 7 | 8 | (if the tests are within the subfolder tests) 9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html 10 | python -m pytest tests -v --cov-report html 11 | 12 | Open index.html contained within htmlcov 13 | ''' 14 | 15 | import pytest 16 | import numpy as np 17 | from q1_softmax import softmax 18 | from q1_softmax_sol import softmax_sol 19 | 20 | import random 21 | 22 | from collections import defaultdict, OrderedDict, Counter 23 | 24 | def rel_error(x,y): 25 | """ returns relative error """ 26 | return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) 27 | 28 | @pytest.fixture(scope='module') 29 | def array_1(): 30 | return np.array([1,2]) 31 | 32 | @pytest.fixture(scope='module') 33 | def array_2(): 34 | return np.array([1001,1002]) 35 | 36 | @pytest.fixture(scope='module') 37 | def array_3(): 38 | return np.array([-1001,-1002]) 39 | 40 | @pytest.fixture(scope='module') 41 | def fake_data_normal(in_dim_1, in_dim_2, mean=0., sigma=1.): 42 | return np.random.normal(loc=mean, scale=sigma, size=(in_dim_1,in_dim_2)) 43 | 44 | @pytest.fixture(scope='module') 45 | def fake_data_uniform(in_dim_1, in_dim_2, low=-1000., high=1000.): 46 | return np.random.uniform(low=low, high=high, size=(in_dim_1, in_dim_2)) 47 | 48 | @pytest.fixture(scope='module') 49 | def linear_shift(low=-100, high=100.): 50 | return np.random.uniform(low,high) 51 | 52 | @pytest.fixture(scope='module') 53 | def vector_shift(in_dim, low=-100., high=100.): 54 | return np.random.uniform(low=low,high=high,size=(in_dim,1)) 55 | 56 | #starting with some simple fixed test 57 | @pytest.mark.parametrize("softmax_f", [softmax, softmax_sol]) 58 | def test_softmax_array_1(array_1, softmax_f): 59 | """ Original softmax test defined in q2_softmax.py; """ 60 | assert rel_error(softmax_f(array_1), np.array([0.26894142, 0.73105858])) < 1e-8 61 | 62 | @pytest.mark.parametrize("softmax_f", [softmax, softmax_sol]) 63 | def test_softmax_array_2(array_1, array_2, softmax_f): 64 | """ Original softmax test defined in q2_softmax.py; """ 65 | assert rel_error(softmax_f(array_2), softmax_f(array_1)) < 1e-8 66 | 67 | @pytest.mark.parametrize("softmax_f", [softmax, softmax_sol]) 68 | def test_softmax_array_3(array_3, softmax_f): 69 | """ Original softmax test defined in q2_softmax.py; """ 70 | assert rel_error(softmax_f(array_3), np.array( 71 | [0.73105858, 0.26894142])) 72 | 73 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 74 | @pytest.mark.parametrize("dim_2", list(range(1,20))) 75 | @pytest.mark.parametrize("softmax_f", [softmax, softmax_sol]) 76 | def test_softmax_shape(dim_1, dim_2, softmax_f): 77 | a1 = np.random.normal(size=(dim_1,dim_2)) 78 | assert a1.shape == softmax_f(a1).shape 79 | 80 | @pytest.mark.parametrize("dim_1", list(range(1,20,3))) 81 | @pytest.mark.parametrize("dim_2", list(range(1,20,3))) 82 | @pytest.mark.parametrize("softmax_f", [softmax, softmax_sol]) 83 | def test_softmax_linearity(dim_1, dim_2, softmax_f): 84 | shift = linear_shift(-100,100) 85 | a1 = np.random.normal(size=(dim_1,dim_2)) 86 | a2 = a1 + shift 87 | assert rel_error(np.max(shift), np.min(shift)) <1e-8 88 | assert rel_error(softmax_f(a1), softmax_f(a2)) < 1e-8 89 | 90 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 91 | @pytest.mark.parametrize("softmax_f", [softmax, softmax_sol]) 92 | def test_softmax_permutation_axis0(dim_1, softmax_f): 93 | a1 = np.random.normal(size=(dim_1,1)) 94 | s1 = softmax_f(a1) 95 | 96 | permutation = np.random.permutation(dim_1) 97 | inverse_permutation = np.argsort(permutation) 98 | 99 | s1_perm = softmax_f(a1[permutation]) 100 | assert rel_error(s1_perm[inverse_permutation], s1) <= 1e-8 101 | 102 | @pytest.mark.parametrize("dim_1", list(range(1,20))) 103 | @pytest.mark.parametrize("softmax_f", [softmax, softmax_sol]) 104 | def test_softmax_permutation_axis1(dim_1, softmax_f): 105 | a1 = np.random.normal(size=(1,dim_1)) 106 | s1 = softmax_f(a1) 107 | 108 | permutation = np.random.permutation(dim_1) 109 | inverse_permutation = np.argsort(permutation) 110 | 111 | s1_perm = softmax_f(a1.ravel()[permutation]) 112 | assert rel_error(s1_perm.ravel()[inverse_permutation], s1) <= 1e-8 113 | #note: permutation(softmax(x)) = softmax(permutation(x)) 114 | 115 | #probably can move this to a 'fake' data call 116 | @pytest.mark.parametrize("dim_1", list(range(1,20,3))) 117 | @pytest.mark.parametrize("dim_2", list(range(1,20,3))) 118 | @pytest.mark.parametrize("softmax_f", [softmax, softmax_sol]) 119 | def test_softmax_linearity_rowwise(dim_1, dim_2, softmax_f): 120 | shift = np.random.uniform(low=-100,high=100,size=(dim_1,1)) 121 | #print(shift) 122 | a1 = np.random.normal(size=(dim_1,dim_2)) 123 | a2 = a1 + shift 124 | assert rel_error(np.max(a2 - a1), np.max(shift)) < 1e-8 125 | assert rel_error(softmax_f(a1), softmax_f(a2)) < 1e-8 126 | 127 | #ABOVE tests both implementations; 128 | #Now comparisons 129 | @pytest.mark.parametrize("dim_1", list(range(1,20,3))) 130 | @pytest.mark.parametrize("dim_2", list(range(1,20,3))) 131 | def test_softmax_vs_softmax_sol(dim_1, dim_2): 132 | distribution = np.random.uniform(low=-100,high=100, size=(dim_1, dim_2)) 133 | assert rel_error(softmax_sol(distribution), softmax(distribution)) < 1e-10 134 | -------------------------------------------------------------------------------- /assignment1/tests/test_word2vec_to_solutions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root): 3 | python -m py.test tests/test_word2vec_to_solutions.py -vv -s -q 4 | python -m py.test tests/test_word2vec_to_solutions.py -vv -s -q --cov 5 | 6 | py.test.exe --cov=cs224d/ tests/test_word2vec_to_solutions.py --cov-report html 7 | 8 | (if the tests are within the subfolder tests) 9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html 10 | python -m pytest tests -v --cov-report html 11 | 12 | Open index.html contained within htmlcov 13 | ''' 14 | 15 | import pytest 16 | import numpy as np 17 | 18 | import random 19 | from collections import defaultdict, OrderedDict, Counter 20 | from q2_gradcheck import grad_numerical, eval_numerical_gradient_array 21 | 22 | from q3_word2vec import normalizeRows 23 | from q3_word2vec import softmaxCostAndGradient, negSamplingCostAndGradient 24 | from q3_word2vec import skipgram, cbow 25 | 26 | from q3_word2vec_sol import normalizeRows_sol 27 | from q3_word2vec_sol import softmaxCostAndGradient_sol, negSamplingCostAndGradient_sol 28 | from q3_word2vec_sol import skipgram_sol, cbow_sol 29 | 30 | from q3_word2vec import word2vec_sgd_wrapper 31 | 32 | def rel_error(x,y): 33 | """ returns relative error """ 34 | return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) 35 | 36 | @pytest.fixture(scope='module') 37 | def dataset_default(): 38 | dataset = type('dummy', (), {})() 39 | def dummySampleTokenIdx(): 40 | return random.randint(0, 4) 41 | 42 | def getRandomContext(C): 43 | tokens = ["a", "b", "c", "d", "e"] 44 | return tokens[random.randint(0,4)], [tokens[random.randint(0,4)] \ 45 | for i in range(2*C)] 46 | dataset.sampleTokenIdx = dummySampleTokenIdx 47 | dataset.getRandomContext = getRandomContext 48 | dataset.genTokens = ["a", "b", "c", "d", "e"] 49 | dataset.dummy_tokens = dict((i,j) for j,i in enumerate(dataset.genTokens())) 50 | dataset.dummy_vectors = normalizeRows(np.random(10,3)) 51 | return dataset 52 | 53 | @pytest.fixture(scope='module') 54 | def dataset_large(size = 10): 55 | assert size < 26 56 | dataset = type('dummy', (), {})() 57 | 58 | def dummySampleTokenIdx(): 59 | return random.randint(0, size) 60 | 61 | def gen_tokens(): 62 | tokens = [chr(i + ord('a')) for i in range(0, size)] 63 | return tokens 64 | 65 | def getRandomContext(C = size): 66 | tokens = gen_tokens() 67 | return tokens[random.randint(0,size-1)], [tokens[random.randint(0,size-1)] \ 68 | for i in range(2*C)] 69 | 70 | dataset.size = size 71 | dataset.sampleTokenIdx = dummySampleTokenIdx 72 | dataset.getRandomContext = getRandomContext 73 | dataset.genTokens = gen_tokens() 74 | dataset.dummy_tokens = dict((i,j) for j,i in enumerate(dataset.genTokens)) 75 | dataset.dummy_vectors = normalizeRows(np.random.randn(size * 2, 5)) 76 | return dataset 77 | 78 | 79 | def test_skipgram_to_solutions(dataset_large): 80 | word2vec_sgd_wrapper(skipgram, dataset_large.dummy_tokens, dataset_large.dummy_vectors, dataset_large, 5) 81 | #this might be harder than it looks (since the number of calls to random have to be identical;) 82 | #will probably need to re-work this -------------------------------------------------------------------------------- /assignment2/README.md: -------------------------------------------------------------------------------- 1 | [CS224d: Deep Learning for Natural Language Processing](http://cs224d.stanford.edu/) 2 | ==================================================================================== 3 | 4 | Assignment #2: Deep and Recurrent Neural Networks 5 | ------------------------------------------------- 6 | 7 | **Due Date: 5/5/2016 (Thu) 11:59 PM PST.** 8 | 9 | In this assignment you will learn how to use TensorFlow to solve problems in NLP. In particular, you'll use TensorFlow to implement feed-forward neural networks and recurrent neural networks (RNNs), and apply them to the tasks of Named Entity Recognition (NER) and Language Modeling (LM). 10 | 11 | As with Assignment #1, you're limited to a maximum of three late days on this assigment. Don't forget that the in-class midterm is scheduled for May 10, so we recommend starting this one early! 12 | 13 | Setup 14 | ----- 15 | 16 | **Note:** *Please be sure you have Python 2.7.x installed on your system. The following instructions should work on Mac or Linux. If you have any trouble getting set up, please come to office hours and the TAs will be happy to help.* 17 | 18 | **Get the code (updated!):** [**Download the starter code here**](http://cs224d.stanford.edu/assignment2/assignment2.zip) and [**the assignment handout here**](http://cs224d.stanford.edu/assignment2/assignment2.pdf). 19 | 20 | **Python package requirements:** The core requirements for this assignment are 21 | * tensorflow 22 | * numpy 23 | 24 | If you have a recent linux (**Ubuntu 14.04** and later) install or Mac OS X, the default TensorFlow installation directions will work well for you. If not, we recommend using the installation on the [**corn clusters**](https://web.stanford.edu/group/farmshare/cgi-bin/wiki/index.php/Main_Page). Note that you will need to use the system default python, not a local Anaconda python. 25 | 26 | The corn clusters don't provide GPU support. If you'd like to use GPUs, we recommend using AWS. We've put together a [**brief tutorial**](http://cs224d.stanford.edu/supplementary/aws-tutorial-2.pdf) with directions on how to get started with TensorFlow on AWS. 27 | 28 | Submitting your work 29 | -------------------- 30 | 31 | Once you are done working, run the `collectSubmission.sh` script; this will produce a file called `assignment2.zip`. Rename this file to `.zip`, for instance if your stanford email is `jdoe@stanford.edu`, your file name should be `jdoe.zip`. 32 | 33 | For the written component, please upload a PDF file of your solutions to `Gradescope`. If you are enrolled in the class you should have been signed up automatically. If you added the class late or are not signed up, post privately to Piazza and we will add you to the roster. When asked to map question parts to your PDF, please map the parts accordingly as courtesy to your TAs. This is crucial so that we can provide accurate feedback. If a question has no written component (completely programatic), map it on the same page as the previous section or next section. 34 | 35 | Please upload your programming submission below. 36 | 37 | 38 | Assignment Overview (Tasks) 39 | --------------------------- 40 | 41 | There will be three parts to this assignment. Each part has written and code components. The assignment is designed to be completed in order as later sections will leverage solutions to earlier parts. We recommend reading the assignment carefully and starting early as some parts may take significant time to run. 42 | 43 | Q1: TensorFlow Softmax (20 points) 44 | ---------------------------------- 45 | 46 | Q2: TensorFlow NER Window Model (35 points) 47 | ------------------------------------------- 48 | 49 | Q3: TensorFlow RNN Language Model (45 points) 50 | --------------------------------------------- 51 | -------------------------------------------------------------------------------- /assignment2/assignment2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment2/assignment2.pdf -------------------------------------------------------------------------------- /assignment2/data_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment2/data_utils/__init__.py -------------------------------------------------------------------------------- /assignment2/data_utils/ner.py: -------------------------------------------------------------------------------- 1 | ## 2 | # Utility functions for NER assignment 3 | # Assigment 2, part 1 for CS224D 4 | ## 5 | 6 | from .utils import invert_dict 7 | from numpy import * 8 | 9 | def load_wv(vocabfile, wvfile): 10 | wv = loadtxt(wvfile, dtype=float) 11 | with open(vocabfile) as fd: 12 | words = [line.strip() for line in fd] 13 | num_to_word = dict(enumerate(words)) 14 | word_to_num = invert_dict(num_to_word) 15 | return wv, word_to_num, num_to_word 16 | 17 | 18 | def save_predictions(y, filename): 19 | """Save predictions, one per line.""" 20 | with open(filename, 'w') as fd: 21 | fd.write("\n".join(map(str, y))) 22 | fd.write("\n") -------------------------------------------------------------------------------- /assignment2/data_utils/utils.py: -------------------------------------------------------------------------------- 1 | import sys, os, re, json 2 | import itertools 3 | from collections import Counter 4 | import time 5 | from numpy import * 6 | 7 | import pandas as pd 8 | 9 | 10 | def invert_dict(d): 11 | return {v:k for k,v in d.items()} 12 | 13 | def flatten1(lst): 14 | return list(itertools.chain.from_iterable(lst)) 15 | 16 | def load_wv_pandas(fname): 17 | return pd.read_hdf(fname, 'data') 18 | 19 | def extract_wv(df): 20 | num_to_word = dict(enumerate(df.index)) 21 | word_to_num = invert_dict(num_to_word) 22 | wv = df.as_matrix() 23 | return wv, word_to_num, num_to_word 24 | 25 | def canonicalize_digits(word): 26 | if any([c.isalpha() for c in word]): return word 27 | word = re.sub("\d", "DG", word) 28 | if word.startswith("DG"): 29 | word = word.replace(",", "") # remove thousands separator 30 | return word 31 | 32 | def canonicalize_word(word, wordset=None, digits=True): 33 | word = word.lower() 34 | if digits: 35 | if (wordset != None) and (word in wordset): return word 36 | word = canonicalize_digits(word) # try to canonicalize numbers 37 | if (wordset == None) or (word in wordset): return word 38 | else: return "UUUNKKK" # unknown token 39 | 40 | 41 | ## 42 | # Utility functions used to create dataset 43 | ## 44 | def augment_wv(df, extra=["UUUNKKK"]): 45 | for e in extra: 46 | df.loc[e] = zeros(len(df.columns)) 47 | 48 | def prune_wv(df, vocab, extra=["UUUNKKK"]): 49 | """Prune word vectors to vocabulary.""" 50 | items = set(vocab).union(set(extra)) 51 | return df.filter(items=items, axis='index') 52 | 53 | def load_wv_raw(fname): 54 | return pd.read_table(fname, sep="\s+", 55 | header=None, 56 | index_col=0, 57 | quoting=3) 58 | 59 | def load_dataset(fname): 60 | docs = [] 61 | with open(fname, encoding="utf-8") as fd: 62 | cur = [] 63 | for line in fd: 64 | # new sentence on -DOCSTART- or blank line 65 | if re.match(r"-DOCSTART-.+", line) or (len(line.strip()) == 0): 66 | if len(cur) > 0: 67 | docs.append(cur) 68 | cur = [] 69 | else: # read in tokens 70 | cur.append(line.strip().split("\t",1)) 71 | # flush running buffer 72 | docs.append(cur) 73 | return docs 74 | 75 | def extract_tag_set(docs): 76 | tags = set(flatten1([[t[1].split("|")[0] for t in d] for d in docs])) 77 | return tags 78 | 79 | def extract_word_set(docs): 80 | words = set(flatten1([[t[0] for t in d] for d in docs])) 81 | return words 82 | 83 | def pad_sequence(seq, left=1, right=1): 84 | return left*[("", "")] + seq + right*[("", "")] 85 | 86 | ## 87 | # For window models 88 | def seq_to_windows(words, tags, word_to_num, tag_to_num, left=1, right=1): 89 | ns = len(words) 90 | X = [] 91 | y = [] 92 | for i in range(ns): 93 | if words[i] == "" or words[i] == "": 94 | continue # skip sentence delimiters 95 | tagn = tag_to_num[tags[i]] 96 | idxs = [word_to_num[words[ii]] 97 | for ii in range(i - left, i + right + 1)] 98 | X.append(idxs) 99 | y.append(tagn) 100 | return array(X), array(y) 101 | 102 | def docs_to_windows(docs, word_to_num, tag_to_num, wsize=3): 103 | pad = (wsize - 1)//2 104 | docs = flatten1([pad_sequence(seq, left=pad, right=pad) for seq in docs]) 105 | 106 | words, tags = zip(*docs) 107 | words = [canonicalize_word(w, word_to_num) for w in words] 108 | tags = [t.split("|")[0] for t in tags] 109 | return seq_to_windows(words, tags, word_to_num, tag_to_num, pad, pad) 110 | 111 | def window_to_vec(window, L): 112 | """Concatenate word vectors for a given window.""" 113 | return concatenate([L[i] for i in window]) 114 | 115 | ## 116 | # For fixed-window LM: 117 | # each row of X is a list of word indices 118 | # each entry of y is the word index to predict 119 | def seq_to_lm_windows(words, word_to_num, ngram=2): 120 | ns = len(words) 121 | X = [] 122 | y = [] 123 | for i in range(ns): 124 | if words[i] == "": 125 | continue # skip sentence begin, but do predict end 126 | idxs = [word_to_num[words[ii]] 127 | for ii in range(i - ngram + 1, i + 1)] 128 | X.append(idxs[:-1]) 129 | y.append(idxs[-1]) 130 | return array(X), array(y) 131 | 132 | def docs_to_lm_windows(docs, word_to_num, ngram=2): 133 | docs = flatten1([pad_sequence(seq, left=(ngram-1), right=1) 134 | for seq in docs]) 135 | words = [canonicalize_word(wt[0], word_to_num) for wt in docs] 136 | return seq_to_lm_windows(words, word_to_num, ngram) 137 | 138 | 139 | ## 140 | # For RNN LM 141 | # just convert each sentence to a list of indices 142 | # after padding each with ... tokens 143 | def seq_to_indices(words, word_to_num): 144 | return array([word_to_num[w] for w in words]) 145 | 146 | def docs_to_indices(docs, word_to_num): 147 | docs = [pad_sequence(seq, left=1, right=1) for seq in docs] 148 | ret = [] 149 | for seq in docs: 150 | words = [canonicalize_word(wt[0], word_to_num) for wt in seq] 151 | ret.append(seq_to_indices(words, word_to_num)) 152 | 153 | # return as numpy array for fancier slicing 154 | return array(ret, dtype=object) 155 | 156 | def offset_seq(seq): 157 | return seq[:-1], seq[1:] 158 | 159 | def seqs_to_lmXY(seqs): 160 | X, Y = zip(*[offset_seq(s) for s in seqs]) 161 | return array(X, dtype=object), array(Y, dtype=object) 162 | 163 | ## 164 | # For RNN tagger 165 | # return X, Y as lists 166 | # where X[i] is indices, Y[i] is tags for a sequence 167 | # NOTE: this does not use padding tokens! 168 | # (RNN should natively handle begin/end) 169 | def docs_to_tag_sequence(docs, word_to_num, tag_to_num): 170 | # docs = [pad_sequence(seq, left=1, right=1) for seq in docs] 171 | X = [] 172 | Y = [] 173 | for seq in docs: 174 | if len(seq) < 1: continue 175 | words, tags = zip(*seq) 176 | 177 | words = [canonicalize_word(w, word_to_num) for w in words] 178 | x = seq_to_indices(words, word_to_num) 179 | X.append(x) 180 | 181 | tags = [t.split("|")[0] for t in tags] 182 | y = seq_to_indices(tags, tag_to_num) 183 | Y.append(y) 184 | 185 | # return as numpy array for fancier slicing 186 | return array(X, dtype=object), array(Y, dtype=object) 187 | 188 | def idxs_to_matrix(idxs, L): 189 | """Return a matrix X with each row 190 | as a word vector for the corresponding 191 | index in idxs.""" 192 | return vstack([L[i] for i in idxs]) 193 | -------------------------------------------------------------------------------- /assignment2/model.py: -------------------------------------------------------------------------------- 1 | class Model(object): 2 | """Abstracts a Tensorflow graph for a learning task. 3 | 4 | We use various Model classes as usual abstractions to encapsulate tensorflow 5 | computational graphs. Each algorithm you will construct in this homework will 6 | inherit from a Model object. 7 | """ 8 | def __init__(self): 9 | self.input_data = None 10 | 11 | def load_data(self): 12 | """Loads data from disk and stores it in memory. 13 | 14 | Feel free to add instance variables to Model object that store loaded data. 15 | """ 16 | raise NotImplementedError("Each Model must re-implement this method.") 17 | 18 | def add_placeholders(self): 19 | """Adds placeholder variables to tensorflow computational graph. 20 | 21 | Tensorflow uses placeholder variables to represent locations in a 22 | computational graph where data is inserted. These placeholders are used as 23 | inputs by the rest of the model building code and will be fed data during 24 | training. 25 | 26 | See for more information: 27 | 28 | https://www.tensorflow.org/versions/r0.7/api_docs/python/io_ops.html#placeholders 29 | """ 30 | raise NotImplementedError("Each Model must re-implement this method.") 31 | 32 | def create_feed_dict(self, input_batch, label_batch): 33 | """Creates the feed_dict for training the given step. 34 | 35 | A feed_dict takes the form of: 36 | 37 | feed_dict = { 38 | : , 39 | .... 40 | } 41 | 42 | If label_batch is None, then no labels are added to feed_dict. 43 | 44 | Hint: The keys for the feed_dict should be a subset of the placeholder 45 | tensors created in add_placeholders. 46 | 47 | Args: 48 | input_batch: A batch of input data. 49 | label_batch: A batch of label data. 50 | Returns: 51 | feed_dict: The feed dictionary mapping from placeholders to values. 52 | """ 53 | raise NotImplementedError("Each Model must re-implement this method.") 54 | 55 | def add_model(self, input_data): 56 | """Implements core of model that transforms input_data into predictions. 57 | 58 | The core transformation for this model which transforms a batch of input 59 | data into a batch of predictions. 60 | 61 | Args: 62 | input_data: A tensor of shape (batch_size, n_features). 63 | Returns: 64 | out: A tensor of shape (batch_size, n_classes) 65 | """ 66 | raise NotImplementedError("Each Model must re-implement this method.") 67 | 68 | def add_loss_op(self, pred): 69 | """Adds ops for loss to the computational graph. 70 | 71 | Args: 72 | pred: A tensor of shape (batch_size, n_classes) 73 | Returns: 74 | loss: A 0-d tensor (scalar) output 75 | """ 76 | raise NotImplementedError("Each Model must re-implement this method.") 77 | 78 | def run_epoch(self, sess, input_data, input_labels): 79 | """Runs an epoch of training. 80 | 81 | Trains the model for one-epoch. 82 | 83 | Args: 84 | sess: tf.Session() object 85 | input_data: np.ndarray of shape (n_samples, n_features) 86 | input_labels: np.ndarray of shape (n_samples, n_classes) 87 | Returns: 88 | average_loss: scalar. Average minibatch loss of model on epoch. 89 | """ 90 | raise NotImplementedError("Each Model must re-implement this method.") 91 | 92 | def fit(self, sess, input_data, input_labels): 93 | """Fit model on provided data. 94 | 95 | Args: 96 | sess: tf.Session() 97 | input_data: np.ndarray of shape (n_samples, n_features) 98 | input_labels: np.ndarray of shape (n_samples, n_classes) 99 | Returns: 100 | losses: list of loss per epoch 101 | """ 102 | raise NotImplementedError("Each Model must re-implement this method.") 103 | 104 | def predict(self, sess, input_data, input_labels=None): 105 | """Make predictions from the provided model. 106 | Args: 107 | sess: tf.Session() 108 | input_data: np.ndarray of shape (n_samples, n_features) 109 | input_labels: np.ndarray of shape (n_samples, n_classes) 110 | Returns: 111 | average_loss: Average loss of model. 112 | predictions: Predictions of model on input_data 113 | """ 114 | return None, None 115 | raise NotImplementedError("Each Model must re-implement this method.") 116 | 117 | class LanguageModel(Model): 118 | """Abstracts a Tensorflow graph for learning language models. 119 | 120 | Adds ability to do embedding. 121 | """ 122 | def add_embedding(self): 123 | """Add embedding layer. that maps from vocabulary to vectors. 124 | """ 125 | raise NotImplementedError("Each Model must re-implement this method.") 126 | 127 | def predict(self, sess, input_data, input_labels=None): 128 | return None, None 129 | -------------------------------------------------------------------------------- /assignment2/q1_softmax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | def softmax(x): 5 | """ 6 | Compute the softmax function in tensorflow. 7 | 8 | You might find the tensorflow functions tf.exp, tf.reduce_max, 9 | tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may 10 | not need to use all of these functions). Recall also that many common 11 | tensorflow operations are sugared (e.g. x * y does a tensor multiplication 12 | if x and y are both tensors). Make sure to implement the numerical stability 13 | fixes as in the previous homework! 14 | 15 | Args: 16 | x: tf.Tensor with shape (n_samples, n_features). Note feature vectors are 17 | represented by row-vectors. (For simplicity, no need to handle 1-d 18 | input as in the previous homework) 19 | Returns: 20 | out: tf.Tensor with shape (n_sample, n_features). You need to construct this 21 | tensor in this problem. 22 | """ 23 | 24 | ### YOUR CODE HERE 25 | log_c = tf.reduce_max(x, reduction_indices=[len(x.get_shape()) - 1], keep_dims=True) 26 | y = tf.reduce_sum(tf.exp(x - log_c), reduction_indices=[len(x.get_shape()) - 1], keep_dims=True) 27 | out = tf.exp(x - log_c) / y 28 | ### END YOUR CODE 29 | 30 | return out 31 | 32 | def cross_entropy_loss(y, yhat): 33 | """ 34 | Compute the cross entropy loss in tensorflow. 35 | 36 | y is a one-hot tensor of shape (n_samples, n_classes) and yhat is a tensor 37 | of shape (n_samples, n_classes). y should be of dtype tf.int32, and yhat should 38 | be of dtype tf.float32. 39 | 40 | The functions tf.to_float, tf.reduce_sum, and tf.log might prove useful. (Many 41 | solutions are possible, so you may not need to use all of these functions). 42 | 43 | Note: You are NOT allowed to use the tensorflow built-in cross-entropy 44 | functions. 45 | 46 | Args: 47 | y: tf.Tensor with shape (n_samples, n_classes). One-hot encoded. 48 | yhat: tf.Tensorwith shape (n_sample, n_classes). Each row encodes a 49 | probability distribution and should sum to 1. 50 | Returns: 51 | out: tf.Tensor with shape (1,) (Scalar output). You need to construct this 52 | tensor in the problem. 53 | """ 54 | ### YOUR CODE HERE 55 | out = - tf.reduce_sum(y * tf.log(yhat + 1e-12)) 56 | # out = tf.reduce_sum(- tf.reduce_sum(y * tf.log(yhat + 1e-12), reduction_indices=[len(yhat.get_shape()) - 1])) 57 | 58 | ### END YOUR CODE 59 | return out 60 | 61 | def cross_entropy_mean_loss(y, yhat): 62 | """ 63 | Compute the cross entropy loss in tensorflow. 64 | 65 | y is a one-hot tensor of shape (n_samples, n_classes) and yhat is a tensor 66 | of shape (n_samples, n_classes). y should be of dtype tf.int32, and yhat should 67 | be of dtype tf.float32. 68 | 69 | The functions tf.to_float, tf.reduce_sum, and tf.log might prove useful. (Many 70 | solutions are possible, so you may not need to use all of these functions). 71 | 72 | Note: You are NOT allowed to use the tensorflow built-in cross-entropy 73 | functions. 74 | 75 | Args: 76 | y: tf.Tensor with shape (n_samples, n_classes). One-hot encoded. 77 | yhat: tf.Tensorwith shape (n_sample, n_classes). Each row encodes a 78 | probability distribution and should sum to 1. 79 | Returns: 80 | out: tf.Tensor with shape (1,) (Scalar output). You need to construct this 81 | tensor in the problem. 82 | """ 83 | ### YOUR CODE HERE 84 | # tf.reduce_mean(- tf.reduce_sum(y * tf.log(yhat + 1e-12), reduction_indices=[len(yhat.get_shape()) - 1])) 85 | out = -tf.reduce_sum(y * tf.log(yhat + 1e-12)) 86 | ### END YOUR CODE 87 | return out 88 | 89 | 90 | 91 | def test_softmax_basic(): 92 | """ 93 | Some simple tests to get you started. 94 | Warning: these are not exhaustive. 95 | """ 96 | print("Running basic tests...") 97 | test1 = softmax(tf.convert_to_tensor( 98 | np.array([[1001,1002],[3,4]]), dtype=tf.float32)) 99 | with tf.Session(): 100 | test1 = test1.eval() 101 | assert np.amax(np.fabs(test1 - np.array( 102 | [0.26894142, 0.73105858]))) <= 1e-6 103 | 104 | test2 = softmax(tf.convert_to_tensor( 105 | np.array([[-1001,-1002]]), dtype=tf.float32)) 106 | with tf.Session(): 107 | test2 = test2.eval() 108 | assert np.amax(np.fabs(test2 - np.array( 109 | [0.73105858, 0.26894142]))) <= 1e-6 110 | 111 | print( "Basic (non-exhaustive) softmax tests pass\n") 112 | 113 | def test_cross_entropy_loss_basic(): 114 | """ 115 | Some simple tests to get you started. 116 | Warning: these are not exhaustive. 117 | """ 118 | y = np.array([[0, 1], [1, 0], [1, 0]], dtype=np.int32) 119 | yhat = np.array([[.5, .5], [.5, .5], [.5, .5]]) 120 | 121 | #implicit dtype conversion passes in 0.7.1 not in 0.8.0 122 | test1 = cross_entropy_loss( 123 | tf.convert_to_tensor(y, dtype=tf.float32), 124 | tf.convert_to_tensor(yhat, dtype=tf.float32)) 125 | with tf.Session(): 126 | test1 = test1.eval() 127 | result = -3 * np.log(.5) 128 | assert np.amax(np.fabs(test1 - result)) <= 1e-6 129 | print( "Basic (non-exhaustive) cross-entropy tests pass\n") 130 | 131 | if __name__ == "__main__": 132 | test_softmax_basic() 133 | test_cross_entropy_loss_basic() 134 | -------------------------------------------------------------------------------- /assignment2/q2_initialization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | def xavier_weight_init(): 5 | """ 6 | Returns function that creates random tensor. 7 | 8 | The specified function will take in a shape (tuple or 1-d array) and must 9 | return a random tensor of the specified shape and must be drawn from the 10 | Xavier initialization distribution. 11 | 12 | Hint: You might find tf.random_uniform useful. 13 | """ 14 | def _xavier_initializer(shape, **kwargs): 15 | """Defines an initializer for the Xavier distribution. 16 | 17 | This function will be used as a variable scope initializer. 18 | 19 | https://www.tensorflow.org/versions/r0.7/how_tos/variable_scope/index.html#initializers-in-variable-scope 20 | 21 | Args: 22 | shape: Tuple or 1-d array that species dimensions of requested tensor. 23 | Returns: 24 | out: tf.Tensor of specified shape sampled from Xavier distribution. 25 | """ 26 | ### YOUR CODE HERE 27 | eps = 4 * np.sqrt(6 / np.sum(shape)) 28 | out = tf.random_uniform(shape=shape, minval=-eps, maxval=eps, dtype=tf.float32) 29 | ### END YOUR CODE 30 | return out 31 | # Returns defined initializer function. 32 | return _xavier_initializer 33 | 34 | def test_initialization_basic(): 35 | """ 36 | Some simple tests for the initialization. 37 | """ 38 | print( "Running basic tests...") 39 | xavier_initializer = xavier_weight_init() 40 | shape = (1,) 41 | xavier_mat = xavier_initializer(shape) 42 | assert xavier_mat.get_shape() == shape 43 | 44 | shape = (1, 2, 3) 45 | xavier_mat = xavier_initializer(shape) 46 | assert xavier_mat.get_shape() == shape 47 | print( "Basic (non-exhaustive) Xavier initialization tests pass\n") 48 | 49 | def test_initialization(): 50 | """ 51 | Use this space to test your Xavier initialization code by running: 52 | python q1_initialization.py 53 | This function will not be called by the autograder, nor will 54 | your tests be graded. 55 | """ 56 | print( "Running your tests...") 57 | ### YOUR CODE HERE 58 | xavier_initializer = xavier_weight_init() 59 | sess = tf.Session() 60 | 61 | shape = (100,100) 62 | tf_xavier = xavier_initializer(shape) 63 | sess.run(tf_xavier.initializer) 64 | xavier = sess.run(tf_xavier) 65 | # print(np.mean(xavier)) 66 | # print(np.max(xavier)) 67 | # print(np.min(xavier)) 68 | # print(np.std(xavier)) 69 | # eps = np.sqrt(6/np.sum((100,100))) 70 | # print((2 * eps)/np.sqrt(12)) 71 | # expect min to roughly be -np.sqrt(6)/sqrt(200) 72 | # expect max to roughly be np.sqrt(6)/sqrt(200) 73 | # expect mean to be roughly 0. 74 | # expect variance to be (b - a) ** 2 / 12 75 | 76 | ### END YOUR CODE 77 | 78 | if __name__ == "__main__": 79 | test_initialization_basic() 80 | test_initialization() 81 | -------------------------------------------------------------------------------- /assignment2/solutions/.gitignore: -------------------------------------------------------------------------------- 1 | ## Core latex/pdflatex auxiliary files: 2 | *.aux 3 | *.lof 4 | *.log 5 | *.lot 6 | *.fls 7 | *.out 8 | *.toc 9 | *.fmt 10 | *.fot 11 | *.cb 12 | *.cb2 13 | 14 | ## Intermediate documents: 15 | *.dvi 16 | *-converted-to.* 17 | # these rules might exclude image files for figures etc. 18 | # *.ps 19 | # *.eps 20 | # *.pdf 21 | 22 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 23 | *.bbl 24 | *.bcf 25 | *.blg 26 | *-blx.aux 27 | *-blx.bib 28 | *.brf 29 | *.run.xml 30 | 31 | ## Build tool auxiliary files: 32 | *.fdb_latexmk 33 | *.synctex 34 | *.synctex.gz 35 | *.synctex.gz(busy) 36 | *.pdfsync 37 | 38 | ## Auxiliary and intermediate files from other packages: 39 | # algorithms 40 | *.alg 41 | *.loa 42 | 43 | # achemso 44 | acs-*.bib 45 | 46 | # amsthm 47 | *.thm 48 | 49 | # beamer 50 | *.nav 51 | *.snm 52 | *.vrb 53 | 54 | # cprotect 55 | *.cpt 56 | 57 | # fixme 58 | *.lox 59 | 60 | #(r)(e)ledmac/(r)(e)ledpar 61 | *.end 62 | *.?end 63 | *.[1-9] 64 | *.[1-9][0-9] 65 | *.[1-9][0-9][0-9] 66 | *.[1-9]R 67 | *.[1-9][0-9]R 68 | *.[1-9][0-9][0-9]R 69 | *.eledsec[1-9] 70 | *.eledsec[1-9]R 71 | *.eledsec[1-9][0-9] 72 | *.eledsec[1-9][0-9]R 73 | *.eledsec[1-9][0-9][0-9] 74 | *.eledsec[1-9][0-9][0-9]R 75 | 76 | # glossaries 77 | *.acn 78 | *.acr 79 | *.glg 80 | *.glo 81 | *.gls 82 | *.glsdefs 83 | 84 | # gnuplottex 85 | *-gnuplottex-* 86 | 87 | # hyperref 88 | *.brf 89 | 90 | # knitr 91 | *-concordance.tex 92 | # TODO Comment the next line if you want to keep your tikz graphics files 93 | *.tikz 94 | *-tikzDictionary 95 | 96 | # listings 97 | *.lol 98 | 99 | # makeidx 100 | *.idx 101 | *.ilg 102 | *.ind 103 | *.ist 104 | 105 | # minitoc 106 | *.maf 107 | *.mlf 108 | *.mlt 109 | *.mtc 110 | *.mtc[0-9] 111 | *.mtc[1-9][0-9] 112 | 113 | # minted 114 | _minted* 115 | *.pyg 116 | 117 | # morewrites 118 | *.mw 119 | 120 | # mylatexformat 121 | *.fmt 122 | 123 | # nomencl 124 | *.nlo 125 | 126 | # sagetex 127 | *.sagetex.sage 128 | *.sagetex.py 129 | *.sagetex.scmd 130 | 131 | # sympy 132 | *.sout 133 | *.sympy 134 | sympy-plots-for-*.tex/ 135 | 136 | # pdfcomment 137 | *.upa 138 | *.upb 139 | 140 | # pythontex 141 | *.pytxcode 142 | pythontex-files-*/ 143 | 144 | # TikZ & PGF 145 | *.dpth 146 | *.md5 147 | *.auxlock 148 | 149 | # todonotes 150 | *.tdo 151 | 152 | # xindy 153 | *.xdy 154 | 155 | # xypic precompiled matrices 156 | *.xyc 157 | 158 | # endfloat 159 | *.ttt 160 | *.fff 161 | 162 | # Latexian 163 | TSWLatexianTemp* 164 | 165 | ## Editors: 166 | # WinEdt 167 | *.bak 168 | *.sav 169 | 170 | # Texpad 171 | .texpadtmp 172 | 173 | # Kile 174 | *.backup 175 | 176 | # KBibTeX 177 | *~[0-9]* 178 | -------------------------------------------------------------------------------- /assignment2/test_confusion.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib as mpl 3 | import matplotlib.pyplot as plt 4 | 5 | import seaborn as sns 6 | 7 | import tensorflow as tf 8 | 9 | confusion = np.array([[42452, 27, 45, 175, 60], 10 | [ 255, 1636, 12, 152, 39], 11 | [ 317, 26, 863, 42, 20], 12 | [ 598, 73, 31, 1319, 71], 13 | [ 546, 24, 3, 49, 2527]], dtype=np.int32) 14 | 15 | cm = confusion.copy() 16 | cm = cm.astype('float') / cm.sum(axis=1, keepdims=True) 17 | # cm *= 255 18 | # cm = cm.astype('uint8') 19 | cm = cm[:, :] 20 | print(cm) 21 | 22 | plt.figure() 23 | cmap1 = mpl.colors.ListedColormap(sns.color_palette("coolwarm", 100)) 24 | # print(sns.color_palette(sns.color_palette("coolwarm", 100))) 25 | 26 | #plt.cm.Blues 27 | plt.imshow(cm, interpolation='nearest', cmap=cmap1) 28 | plt.title("Confusion Matrix") 29 | plt.colorbar() 30 | tick_marks = np.arange(len(["a", "b", "c", "d", "e"])) 31 | plt.xticks(tick_marks, ["a", "b", "c", "d", "e"], rotation=45) 32 | plt.yticks(tick_marks, ["a", "b", "c", "d", "e"]) 33 | plt.gca().xaxis.grid(b=False) 34 | plt.gca().yaxis.grid(b=False) 35 | plt.tight_layout() 36 | plt.ylabel('True label') 37 | plt.xlabel('Predicted label') 38 | plt.savefig('cm.png') 39 | plt.show() 40 | 41 | 42 | 43 | 44 | # conf_matrix = tf.image_summary("confusion_matrix" + str(epoch), tf.convert_to_tensor(confusion.astype(np.float32))) 45 | # conf_summary = session.run(conf_matrix) 46 | # model.summary_writer.add_summary(conf_summary, epoch) 47 | -------------------------------------------------------------------------------- /assignment2/tests/test_softmax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | import pytest 5 | 6 | from q1_softmax import softmax, cross_entropy_loss 7 | 8 | def rel_error(x,y): 9 | """ returns relative error """ 10 | return np.max(np.abs(x-y) / np.maximum(1e-8, np.abs(x) + np.abs(y))) 11 | 12 | @pytest.fixture(scope='module') 13 | def array_1(): 14 | return np.array([1,2], dtype=np.float32) 15 | 16 | @pytest.fixture(scope='module') 17 | def array_2(): 18 | return np.array([1001,1002], dtype=np.float32) 19 | 20 | @pytest.fixture(scope='module') 21 | def array_3(array_2 = array_2()): 22 | return np.array([array_2], dtype=np.float32) 23 | 24 | @pytest.fixture(scope='module') 25 | def array_4(array_1 = array_1(), array_2 = array_2()): 26 | return np.array([array_1, array_2], dtype=np.float32) 27 | 28 | @pytest.fixture(scope='module') 29 | def CE_arrays(): 30 | return np.array([[0, 1], [1, 0], [1, 0]]), np.array([[.5, .5], [.5, .5], [.5, .5]]) 31 | 32 | #this should construct a single tf session per function call 33 | @pytest.fixture(scope='function') 34 | def sess(): 35 | return tf.Session() 36 | 37 | def test_softmax_array_1(array_1): 38 | """ Original softmax test defined in q2_softmax.py; """ 39 | with tf.Session(): 40 | input_array = tf.convert_to_tensor(array_1) 41 | assert rel_error(softmax(input_array).eval(), 42 | np.array([0.26894142, 0.73105858])) < 1e-7 43 | 44 | def test_softmax_array_alt(sess, array_1): 45 | input_array = tf.convert_to_tensor(array_1) 46 | output_array = sess.run(softmax(input_array)) 47 | assert rel_error(output_array, 48 | np.array([0.26894142, 0.73105858])) < 1e-7 49 | 50 | @pytest.mark.parametrize("input_array", [array_1(), array_2(), array_3(), array_4()]) 51 | def test_get_session(sess, input_array): 52 | sess.run(softmax(tf.convert_to_tensor(input_array))) 53 | assert 1 54 | print("softmax ran to completion") 55 | 56 | def test_CE_loss(sess, CE_arrays): 57 | y, y_hat = CE_arrays 58 | y = tf.convert_to_tensor(y, dtype=tf.float64) 59 | y_hat = tf.convert_to_tensor(y_hat, dtype=tf.float64) 60 | sess.run(cross_entropy_loss(y,y_hat)) 61 | assert 1 62 | print("CE_loss ran to completion") 63 | 64 | def test_CE_loss_validation(sess, CE_arrays): 65 | y, y_hat = CE_arrays 66 | y = tf.convert_to_tensor(y, dtype=tf.float64) 67 | y_hat = tf.convert_to_tensor(y_hat, dtype=tf.float64) 68 | value = sess.run(cross_entropy_loss(y,y_hat)) 69 | assert rel_error(value, -3 * np.log(0.5)) <= 1e-7 70 | 71 | -------------------------------------------------------------------------------- /assignment2/utils.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | import numpy as np 4 | 5 | class Vocab(object): 6 | def __init__(self): 7 | self.word_to_index = {} 8 | self.index_to_word = {} 9 | self.word_freq = defaultdict(int) 10 | self.total_words = 0 11 | self.unknown = '' 12 | self.add_word(self.unknown, count=0) 13 | 14 | def add_word(self, word, count=1): 15 | if word not in self.word_to_index: 16 | index = len(self.word_to_index) 17 | self.word_to_index[word] = index 18 | self.index_to_word[index] = word 19 | self.word_freq[word] += count 20 | 21 | def construct(self, words): 22 | for word in words: 23 | self.add_word(word) 24 | self.total_words = float(sum(self.word_freq.values())) 25 | print( '{} total words with {} uniques'.format(self.total_words, len(self.word_freq))) 26 | 27 | def encode(self, word): 28 | if word not in self.word_to_index: 29 | word = self.unknown 30 | return self.word_to_index[word] 31 | 32 | def decode(self, index): 33 | return self.index_to_word[index] 34 | 35 | def __len__(self): 36 | return len(self.word_freq) 37 | 38 | def calculate_perplexity(log_probs): 39 | # https://web.stanford.edu/class/cs124/lec/languagemodeling.pdf 40 | perp = 0 41 | for p in log_probs: 42 | perp += -p 43 | return np.exp(perp / len(log_probs)) 44 | 45 | def get_ptb_dataset(dataset='train'): 46 | fn = 'data/ptb/ptb.{}.txt' 47 | for line in open(fn.format(dataset), encoding="utf-8"): 48 | for word in line.split(): 49 | yield word 50 | # Add token to the end of the line 51 | # Equivalent to in: 52 | # https://github.com/wojzaremba/lstm/blob/master/data.lua#L32 53 | # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/reader.py#L31 54 | yield '' 55 | 56 | def ptb_iterator(raw_data, batch_size, num_steps): 57 | # Pulled from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/reader.py#L82 58 | raw_data = np.array(raw_data, dtype=np.int32) 59 | data_len = len(raw_data) 60 | batch_len = data_len // batch_size 61 | data = np.zeros([batch_size, batch_len], dtype=np.int32) 62 | for i in range(batch_size): 63 | data[i] = raw_data[batch_len * i:batch_len * (i + 1)] 64 | epoch_size = (batch_len - 1) // num_steps 65 | if epoch_size == 0: 66 | raise ValueError("epoch_size == 0, decrease batch_size or num_steps") 67 | for i in range(epoch_size): 68 | x = data[:, i * num_steps:(i + 1) * num_steps] 69 | y = data[:, i * num_steps + 1:(i + 1) * num_steps + 1] 70 | yield (x, y) 71 | 72 | def sample(a, temperature=1.0): 73 | # helper function to sample an index from a probability array 74 | # from https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py 75 | a = np.log(a) / temperature 76 | a = np.exp(a) / np.sum(np.exp(a)) 77 | return np.argmax(np.random.multinomial(1, a, 1)) 78 | 79 | def data_iterator(orig_X, orig_y=None, batch_size=32, label_size=2, shuffle=False): 80 | # Optionally shuffle the data before training 81 | if shuffle: 82 | indices = np.random.permutation(len(orig_X)) 83 | data_X = orig_X[indices] 84 | data_y = orig_y[indices] if np.any(orig_y) else None 85 | else: 86 | data_X = orig_X 87 | data_y = orig_y 88 | ### 89 | total_processed_examples = 0 90 | total_steps = int(np.ceil(len(data_X) / float(batch_size))) 91 | for step in range(total_steps): 92 | # Create the batch by selecting up to batch_size elements 93 | batch_start = step * batch_size 94 | x = data_X[batch_start:batch_start + batch_size] 95 | # Convert our target from the class index to a one hot vector 96 | y = None 97 | if np.any(data_y): 98 | y_indices = data_y[batch_start:batch_start + batch_size] 99 | y = np.zeros((len(x), label_size), dtype=np.int32) 100 | y[np.arange(len(y_indices)), y_indices] = 1 101 | ### 102 | yield x, y 103 | total_processed_examples += len(x) 104 | # Sanity check to make sure we iterated over all the dataset as intended 105 | assert total_processed_examples == len(data_X), 'Expected {} and processed {}'.format(len(data_X), total_processed_examples) 106 | -------------------------------------------------------------------------------- /assignment3/assignment3_2016.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment3/assignment3_2016.pdf -------------------------------------------------------------------------------- /assignment3/codebase_release/loss_history.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment3/codebase_release/loss_history.png -------------------------------------------------------------------------------- /assignment3/codebase_release/prepare_submission.sh: -------------------------------------------------------------------------------- 1 | echo "Please enter yout sunetid: " 2 | read sunetid 3 | 4 | zip -r $sunetid.zip rnn.py weights 5 | -------------------------------------------------------------------------------- /assignment3/codebase_release/rnn_pytorch.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import random 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import math 8 | import time 9 | import itertools 10 | import shutil 11 | 12 | import torch 13 | import torch.nn as nn 14 | from torch.autograd import Variable 15 | import torch.nn.functional as F 16 | 17 | from torch.nn.utils import clip_grad_norm 18 | 19 | import tree as tr 20 | from utils import Vocab 21 | 22 | from collections import OrderedDict 23 | 24 | import seaborn as sns 25 | 26 | from random import shuffle 27 | 28 | sns.set_style('whitegrid') 29 | 30 | embed_size = 100 31 | label_size = 2 32 | early_stopping = 2 33 | anneal_threshold = 0.99 34 | anneal_by = 1.5 35 | max_epochs = 30 36 | lr = 0.01 37 | l2 = 0.02 38 | average_over = 700 39 | train_size = 800 40 | 41 | 42 | class RNN_Model(nn.Module): 43 | def __init__(self, vocab, embed_size=100, label_size=2): 44 | super(RNN_Model, self).__init__() 45 | self.embed_size = embed_size 46 | self.label_size = label_size 47 | self.vocab = vocab 48 | self.embedding = nn.Embedding(int(self.vocab.total_words), self.embed_size) 49 | self.fcl = nn.Linear(self.embed_size, self.embed_size, bias=True) 50 | self.fcr = nn.Linear(self.embed_size, self.embed_size, bias=True) 51 | self.projection = nn.Linear(self.embed_size, self.label_size , bias=True) 52 | self.activation = F.relu 53 | self.node_list = [] 54 | 55 | def init_variables(self): 56 | print("total_words = ", self.vocab.total_words) 57 | 58 | def walk_tree(self, in_node): 59 | if in_node.isLeaf: 60 | word_id = torch.LongTensor((self.vocab.encode(in_node.word), )) 61 | current_node = self.embedding(Variable(word_id)) 62 | self.node_list.append(self.projection(current_node).unsqueeze(0)) 63 | else: 64 | left = self.walk_tree(in_node.left) 65 | right = self.walk_tree(in_node.right) 66 | current_node = self.activation(self.fcl(left) + self.fcl(right)) 67 | self.node_list.append(self.projection(current_node).unsqueeze(0)) 68 | return current_node 69 | 70 | def forward(self, x): 71 | """ 72 | Forward function accepts input data and returns a Variable of output data 73 | """ 74 | self.node_list = [] 75 | root_node = self.walk_tree(x.root) 76 | all_nodes = torch.cat(self.node_list) 77 | #now I need to project out 78 | return all_nodes 79 | 80 | def main(): 81 | print("do nothing") 82 | 83 | 84 | if __name__ == '__main__': 85 | train_data, dev_data, test_data = tr.simplified_data(train_size, 100, 200) 86 | vocab = Vocab() 87 | train_sents = [t.get_words() for t in train_data] 88 | vocab.construct(list(itertools.chain.from_iterable(train_sents))) 89 | model = RNN_Model(vocab, embed_size=50) 90 | main() 91 | 92 | lr = 0.01 93 | loss_history = [] 94 | optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, dampening=0.0) 95 | # params (iterable): iterable of parameters to optimize or dicts defining 96 | # parameter groups 97 | # lr (float): learning rate 98 | # momentum (float, optional): momentum factor (default: 0) 99 | # weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 100 | #torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, dampening=0, weight_decay=0) 101 | # print(model.fcl._parameters['weight']) 102 | 103 | for epoch in range(max_epochs): 104 | print("epoch = ", epoch) 105 | shuffle(train_data) 106 | total_root_prediction = 0. 107 | total_summed_accuracy = 0. 108 | if (epoch % 10 == 0) and epoch > 0: 109 | for param_group in optimizer.param_groups: 110 | #update learning rate 111 | print("Droping learning from %f to %f"%(param_group['lr'], 0.5 * param_group['lr'])) 112 | param_group['lr'] = 0.5 * param_group['lr'] 113 | for step, tree in enumerate(train_data): 114 | # if step == 0: 115 | # optimizer.zero_grad() 116 | # objective_loss.backward() 117 | # if step == len(train_data) - 1: 118 | # optimizer.step() 119 | 120 | all_nodes = model(tree) 121 | 122 | labels = [] 123 | indices = [] 124 | for x,y in enumerate(tree.labels): 125 | if y != 2: 126 | labels.append(y) 127 | indices.append(x) 128 | 129 | torch_labels = torch.LongTensor([l for l in labels if l != 2]) 130 | logits = all_nodes.index_select(dim=0, index=Variable(torch.LongTensor(indices))) 131 | logits_squeezed = logits.squeeze() 132 | predictions = logits.max(dim=2)[1].squeeze() 133 | 134 | correct = predictions.data == torch_labels 135 | #so correctly predicted (root); 136 | total_root_prediction += float(correct[-1]) 137 | total_summed_accuracy += float(correct.sum()) / len(labels) 138 | 139 | objective_loss = F.cross_entropy(input=logits_squeezed, target=Variable(torch_labels)) 140 | if objective_loss.data[0] > 5 and epoch > 10: 141 | #interested in phrase that have large loss (i.e. incorrectly classified) 142 | print(' '.join(tree.get_words())) 143 | 144 | loss_history.append(objective_loss.data[0]) 145 | if step % 20 == 0 and step > 0: 146 | print("step %3d, last loss %0.3f, mean loss (%d steps) %0.3f" % (step, objective_loss.data[0], average_over, np.mean(loss_history[-average_over:]))) 147 | optimizer.zero_grad() 148 | 149 | if np.isnan(objective_loss.data[0]): 150 | print("object_loss was not a number") 151 | sys.exit(1) 152 | else: 153 | objective_loss.backward() 154 | clip_grad_norm(model.parameters(), 5, norm_type=2.) 155 | #temp_grad += model.fcl._parameters['weight'].grad.data 156 | # # Update weights using gradient descent; w1.data and w2.data are Tensors, 157 | # # w1.grad and w2.grad are Variables and w1.grad.data and w2.grad.data are 158 | # # Tensors. 159 | # loss.backward() 160 | # w1.data -= learning_rate * w1.grad.data 161 | # w2.data -= learning_rate * w2.grad.data 162 | optimizer.step() 163 | print("total root predicted correctly = ", total_root_prediction/ float(train_size)) 164 | print("total node (including root) predicted correctly = ", total_summed_accuracy / float(train_size)) 165 | 166 | total_dev_loss = 0. 167 | dev_correct_at_root = 0. 168 | dev_correct_all = 0. 169 | for step, dev_example in enumerate(dev_data): 170 | all_nodes = model(dev_example) 171 | 172 | labels = [] 173 | indices = [] 174 | for x,y in enumerate(dev_example.labels): 175 | if y != 2: 176 | labels.append(y) 177 | indices.append(x) 178 | torch_labels = torch.LongTensor([l for l in labels if l != 2]) 179 | logits = all_nodes.index_select(dim=0, index=Variable(torch.LongTensor(indices))) 180 | logits_squeezed = logits.squeeze() 181 | predictions = logits.max(dim=2)[1].squeeze() 182 | 183 | correct = predictions.data == torch_labels 184 | #so correctly predicted (root); 185 | dev_correct_at_root += float(correct[-1]) 186 | dev_correct_all += float(correct.sum()) / len(labels) 187 | objective_loss = F.cross_entropy(input=logits_squeezed, target=Variable(torch_labels)) 188 | total_dev_loss += objective_loss.data[0] 189 | print("total_dev_loss = ", total_dev_loss) 190 | print("correct (root) = ", dev_correct_at_root) 191 | print("correct (all)= ", dev_correct_all) 192 | # logits = logits.index_select(dim=0, index=Variable(torch.LongTensor(indices))) 193 | plt.figure() 194 | plt.plot(loss_history) 195 | plt.show() 196 | print("DONE!") -------------------------------------------------------------------------------- /assignment3/codebase_release/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Get trees 4 | data=trainDevTestTrees_PTB.zip 5 | curl -O http://nlp.stanford.edu/sentiment/$data 6 | unzip $data 7 | rm -f $data 8 | 9 | -------------------------------------------------------------------------------- /assignment3/codebase_release/tree.py: -------------------------------------------------------------------------------- 1 | import random 2 | UNK = 'UNK' 3 | # This file contains the dataset in a useful way. We populate a list of 4 | # Trees to train/test our Neural Nets such that each Tree contains any 5 | # number of Node objects. 6 | 7 | # The best way to get a feel for how these objects are used in the program is to drop pdb.set_trace() in a few places throughout the codebase 8 | # to see how the trees are used.. look where loadtrees() is called etc.. 9 | 10 | 11 | class Node: # a node in the tree 12 | def __init__(self, label, word=None): 13 | self.label = label 14 | self.word = word 15 | self.parent = None # reference to parent 16 | self.left = None # reference to left child 17 | self.right = None # reference to right child 18 | # true if I am a leaf (could have probably derived this from if I have 19 | # a word) 20 | self.isLeaf = False 21 | # true if we have finished performing fowardprop on this node (note, 22 | # there are many ways to implement the recursion.. some might not 23 | # require this flag) 24 | 25 | 26 | class Tree: 27 | 28 | def __init__(self, treeString, openChar='(', closeChar=')'): 29 | tokens = [] 30 | self.open = '(' 31 | self.close = ')' 32 | for toks in treeString.strip().split(): 33 | tokens += list(toks) 34 | self.root = self.parse(tokens) 35 | # get list of labels as obtained through a post-order traversal 36 | self.labels = get_labels(self.root) 37 | self.num_words = len(self.labels) 38 | 39 | def parse(self, tokens, parent=None): 40 | assert tokens[0] == self.open, "Malformed tree" 41 | assert tokens[-1] == self.close, "Malformed tree" 42 | 43 | split = 2 # position after open and label 44 | countOpen = countClose = 0 45 | 46 | if tokens[split] == self.open: 47 | countOpen += 1 48 | split += 1 49 | # Find where left child and right child split 50 | while countOpen != countClose: 51 | if tokens[split] == self.open: 52 | countOpen += 1 53 | if tokens[split] == self.close: 54 | countClose += 1 55 | split += 1 56 | 57 | # New node 58 | node = Node(int(tokens[1])) # zero index labels 59 | 60 | node.parent = parent 61 | 62 | # leaf Node 63 | if countOpen == 0: 64 | node.word = ''.join(tokens[2:-1]).lower() # lower case? 65 | node.isLeaf = True 66 | return node 67 | 68 | node.left = self.parse(tokens[2:split], parent=node) 69 | node.right = self.parse(tokens[split:-1], parent=node) 70 | 71 | return node 72 | 73 | def get_words(self): 74 | leaves = getLeaves(self.root) 75 | words = [node.word for node in leaves] 76 | return words 77 | 78 | 79 | def leftTraverse(node, nodeFn=None, args=None): 80 | """ 81 | Recursive function traverses tree 82 | from left to right. 83 | Calls nodeFn at each node 84 | """ 85 | if node is None: 86 | return 87 | leftTraverse(node.left, nodeFn, args) 88 | leftTraverse(node.right, nodeFn, args) 89 | nodeFn(node, args) 90 | 91 | 92 | def getLeaves(node): 93 | if node is None: 94 | return [] 95 | if node.isLeaf: 96 | return [node] 97 | else: 98 | return getLeaves(node.left) + getLeaves(node.right) 99 | 100 | 101 | def get_labels(node): 102 | if node is None: 103 | return [] 104 | return get_labels(node.left) + get_labels(node.right) + [node.label] 105 | 106 | 107 | def clearFprop(node, words): 108 | node.fprop = False 109 | 110 | 111 | def loadTrees(dataSet='train'): 112 | """ 113 | Loads training trees. Maps leaf node words to word ids. 114 | """ 115 | file = 'trees/%s.txt' % dataSet 116 | print("Loading %s trees.." % dataSet) 117 | with open(file, 'r') as fid: 118 | trees = [Tree(l) for l in fid.readlines()] 119 | 120 | return trees 121 | 122 | def simplified_data(num_train, num_dev, num_test): 123 | rndstate = random.getstate() 124 | random.seed(0) 125 | trees = loadTrees('train') + loadTrees('dev') + loadTrees('test') 126 | 127 | #filter extreme trees 128 | pos_trees = [t for t in trees if t.root.label==4] 129 | neg_trees = [t for t in trees if t.root.label==0] 130 | 131 | #binarize labels 132 | binarize_labels(pos_trees) 133 | binarize_labels(neg_trees) 134 | 135 | #split into train, dev, test 136 | print(len(pos_trees), len(neg_trees)) 137 | pos_trees = sorted(pos_trees, key=lambda t: len(t.get_words())) 138 | neg_trees = sorted(neg_trees, key=lambda t: len(t.get_words())) 139 | num_train = num_train // 2 140 | num_dev = num_dev // 2 141 | num_test = num_test // 2 142 | train = pos_trees[:num_train] + neg_trees[:num_train] 143 | dev = pos_trees[num_train : num_train+num_dev] + neg_trees[num_train : num_train+num_dev] 144 | test = pos_trees[num_train+num_dev : num_train+num_dev+num_test] + neg_trees[num_train+num_dev : num_train+num_dev+num_test] 145 | random.shuffle(train) 146 | random.shuffle(dev) 147 | random.shuffle(test) 148 | random.setstate(rndstate) 149 | 150 | 151 | return train, dev, test 152 | 153 | 154 | def binarize_labels(trees): 155 | def binarize_node(node, _): 156 | if node.label<2: 157 | node.label = 0 158 | elif node.label>2: 159 | node.label = 1 160 | for tree in trees: 161 | leftTraverse(tree.root, binarize_node, None) 162 | tree.labels = get_labels(tree.root) 163 | -------------------------------------------------------------------------------- /assignment3/codebase_release/utils.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | 4 | class Vocab(object): 5 | def __init__(self): 6 | self.word_to_index = {} 7 | self.index_to_word = {} 8 | self.word_freq = defaultdict(int) 9 | self.total_words = 0 10 | self.unknown = '' 11 | self.add_word(self.unknown, count=0) 12 | 13 | def add_word(self, word, count=1): 14 | if word not in self.word_to_index: 15 | index = len(self.word_to_index) 16 | self.word_to_index[word] = index 17 | self.index_to_word[index] = word 18 | self.word_freq[word] += count 19 | 20 | def construct(self, words): 21 | for word in words: 22 | self.add_word(word) 23 | self.total_words = float(sum(self.word_freq.values())) 24 | print('{} total words with {} uniques'.format(self.total_words, len(self.word_freq))) 25 | 26 | def encode(self, word): 27 | if word not in self.word_to_index: 28 | word = self.unknown 29 | return self.word_to_index[word] 30 | 31 | def decode(self, index): 32 | return self.index_to_word[index] 33 | 34 | def __len__(self): 35 | return len(self.word_freq) 36 | -------------------------------------------------------------------------------- /assignment3/recursive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment3/recursive.png -------------------------------------------------------------------------------- /class_notebooks/tensorflow_scan.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# TensorFlow Scan\n", 8 | "`tensorflow.scan` allows for loops to be written inside a computation graph (which using explicit loop structures like `for`) -- backpropagation is handled implicitly by `tensorflow`. Explicitly unrolling the loops requires the creation of new graph nodes for each loop body iteration (although the number of iterations is fixed)." 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Cumulative Sum" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 3, 21 | "metadata": { 22 | "collapsed": false 23 | }, 24 | "outputs": [ 25 | { 26 | "name": "stdout", 27 | "output_type": "stream", 28 | "text": [ 29 | "[ 1. 3. 5. 7.]\n" 30 | ] 31 | } 32 | ], 33 | "source": [ 34 | "import tensorflow as tf\n", 35 | "\n", 36 | "def fn(previous_output, current_input):\n", 37 | " return previous_output + current_input\n", 38 | "\n", 39 | "elems = tf.Variable([1.0, 2.0, 2.0, 2.0])\n", 40 | "elems = tf.identity(elems)\n", 41 | "#required otherwise it will fail;\n", 42 | "\n", 43 | "initializer = tf.constant(0.0)\n", 44 | "out = tf.scan(fn, elems, initializer=initializer)\n", 45 | "\n", 46 | "with tf.Session() as session:\n", 47 | " init_op = tf.initialize_all_variables()\n", 48 | " session.run(init_op)\n", 49 | " value = session.run(out)\n", 50 | " print(value)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": { 56 | "collapsed": true 57 | }, 58 | "source": [ 59 | "# Loop Equivalence" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 18, 65 | "metadata": { 66 | "collapsed": false 67 | }, 68 | "outputs": [ 69 | { 70 | "name": "stdout", 71 | "output_type": "stream", 72 | "text": [ 73 | "[ 7.]\n" 74 | ] 75 | } 76 | ], 77 | "source": [ 78 | "import tensorflow as tf\n", 79 | "\n", 80 | "def fn(previous_output, current_input):\n", 81 | " return previous_output + current_input\n", 82 | "\n", 83 | "elems = tf.Variable([1.0, 2.0, 2.0, 2.0])\n", 84 | "elems = tf.identity(elems)\n", 85 | "#required otherwise it will fail;\n", 86 | "\n", 87 | "initializer = tf.constant(0.0)\n", 88 | "cum_sum = tf.Variable(0.0)\n", 89 | "\n", 90 | "for x in tf.split(0, elems.get_shape()[0], elems):\n", 91 | " cum_sum += x\n", 92 | " \n", 93 | "with tf.Session() as session:\n", 94 | " init_op = tf.initialize_all_variables()\n", 95 | " session.run(init_op)\n", 96 | " value = session.run(cum_sum)\n", 97 | " print(value)\n", 98 | " #like a reduce operation (but it scans over elements)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": { 105 | "collapsed": true 106 | }, 107 | "outputs": [], 108 | "source": [] 109 | } 110 | ], 111 | "metadata": { 112 | "kernelspec": { 113 | "display_name": "Python 3", 114 | "language": "python", 115 | "name": "python3" 116 | }, 117 | "language_info": { 118 | "codemirror_mode": { 119 | "name": "ipython", 120 | "version": 3 121 | }, 122 | "file_extension": ".py", 123 | "mimetype": "text/x-python", 124 | "name": "python", 125 | "nbconvert_exporter": "python", 126 | "pygments_lexer": "ipython3", 127 | "version": "3.4.3+" 128 | } 129 | }, 130 | "nbformat": 4, 131 | "nbformat_minor": 0 132 | } 133 | -------------------------------------------------------------------------------- /old_assignments/assignment1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/old_assignments/assignment1.pdf -------------------------------------------------------------------------------- /old_assignments/assignment1/README.md: -------------------------------------------------------------------------------- 1 | [CS224d: Deep Learning for Natural Language Processing](http://cs224d.stanford.edu/) 2 | ==================================================================================== 3 | 4 | ** Due Date: 4/16/2015 (Thursday) 11:59 PM PST. ** 5 | 6 | In this assignment we will familiarize you with basic concepts of neural networks, word vectors, and their application to sentiment analysis. 7 | 8 | Setup 9 | ----- 10 | 11 | *Note: Please be sure you have Python 2.7.x installed on your system. The following instructions should work on Mac or Linux. If you have any trouble getting set up, please come to office hours and the TAs will be happy to help.* 12 | 13 | **Get the code**: [Download the starter code here](http://cs224d.stanford.edu/assignment1/assignment1.zip) and [the complementary written problems here](http://cs224d.stanford.edu/assignment1/assignment1.pdf). 14 | 15 | **[Optional] virtual environment:** Once you have unzipped the starter code, you might want to create a [virtual environment](http://docs.python-guide.org/en/latest/dev/virtualenvs/) for the project. If you choose not to use a virtual environment, it is up to you to make sure that all dependencies for the code are installed on your machine. To set up a virtual environment, run the following: 16 | 17 | ``` 18 | cd assignment1 19 | sudo pip install virtualenv # This may already be installed 20 | virtualenv .env # Create a virtual environment 21 | source .env/bin/activate # Activate the virtual environment 22 | pip install -r requirements.txt # Install dependencies 23 | # Work on the assignment for a while ... 24 | deactivate # Exit the virtual environment 25 | ``` 26 | 27 | **Install requirements (without a virtual environment):** To install the required packages locally without setting up a virtual environment, run the following: 28 | 29 | ``` 30 | cd assignment1 31 | pip install -r requirements.txt # Install dependencies 32 | ``` 33 | 34 | **Download data:** Once you have the starter code, you will need to download the Stanford Sentiment Treebank dataset. Run the following from the assignment1 directory: 35 | 36 | ``` 37 | cd cs224d/datasets 38 | ./get_datasets.sh 39 | ``` 40 | 41 | **Start IPython:** After you have the Stanford Sentiment data, you should start the IPython notebook server from the `assignment1` directory. If you are unfamiliar with IPython, you should read this [IPython tutorial](http://cs231n.github.io/ipython-tutorial). 42 | 43 | Submitting your work 44 | -------------------- 45 | 46 | Once you are done working, put the written part in the same directory as your IPython notebook file, and run the `collectSubmission.sh` script; this will produce a file called `assignment1.zip`. Rename this file to `.zip`, for instance if your stanford email is `jdoe@stanford.edu`, your file name should be 47 | 48 | ``` 49 | cd cs224d/datasets 50 | jdoe.zip 51 | ``` 52 | 53 | Upload this file to [the Box for this assignment](https://stanford.box.com/signup/collablink/d_3367429916/116c2072133f72). 54 | For the written component, please upload a PDF file of your solutions to [`Scoryst`](https://scoryst.com/course/67/submit/). Please [sign up](https://scoryst.com/enroll/MUPJ5J2xd9/) with your stanford email and SUNet ID (letter ID) if applicable. When asked to map question parts to your PDF, please map the parts accordingly as courtesy to your TAs. The last part of each problem is a placeholder for the programming component, you could just map it to the page of the last part in your written assignment. 55 | 56 | Tasks 57 | ----- 58 | 59 | There will be four parts to this assignment, the first three comprise of a written component and a programming component in the IPython notebook. The fourth part is purely programming-based, and we also give you an opportunity to earn extra credits by doing a programming-based optional part. For all of the tasks, you will be using the IPython notebook `wordvec_sentiment.ipynb`. 60 | 61 | Q1: Softmax (10 points) 62 | ----------------------- 63 | 64 | Q2: Neural Network Basics (30 points) 65 | ------------------------------------- 66 | 67 | Q3: word2vec (40 points) 68 | ------------------------ 69 | 70 | Q4: Sentiment Analysis (20 points) 71 | ---------------------------------- 72 | 73 | For these four parts, please try to finish the written component before writing code. We designed the written component to help you think through the details in your code implementation. For each part, the written component is worth 40% the points of that part, and programming is 60%. 74 | 75 | Extra Credit (optional): Improve Your Sentiment Analysis Model (+10 points) 76 | --------------------------------------------------------------------------- 77 | 78 | For this optional part, please follow the instructions in the IPython notebook to finish your implementation and report results. Extra credit will be awarded based on relative progress. -------------------------------------------------------------------------------- /old_assignments/assignment1/collectSubmission.sh: -------------------------------------------------------------------------------- 1 | rm -f assignment1.zip 2 | zip -r assignment1.zip . -x "*.git*" "*cs224d/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" \ 3 | "saved_params_1000.npy" "saved_params_2000.npy" "saved_params_3000.npy" "saved_params_4000.npy" "saved_params_5000.npy" \ 4 | "saved_params_6000.npy" "saved_params_7000.npy" "saved_params_8000.npy" "saved_params_9000.npy" "saved_params_10000.npy" \ 5 | "saved_params_11000.npy" "saved_params_12000.npy" "saved_params_13000.npy" "saved_params_14000.npy" "saved_params_15000.npy" \ 6 | "saved_params_16000.npy" "saved_params_17000.npy" "saved_params_18000.npy" "saved_params_19000.npy" "saved_params_20000.npy" \ 7 | "saved_params_21000.npy" "saved_params_22000.npy" "saved_params_23000.npy" "saved_params_24000.npy" "saved_params_25000.npy" \ 8 | "saved_params_26000.npy" "saved_params_27000.npy" "saved_params_28000.npy" "saved_params_29000.npy" "saved_params_30000.npy" \ 9 | "saved_params_31000.npy" "saved_params_32000.npy" "saved_params_33000.npy" "saved_params_34000.npy" "saved_params_35000.npy" \ 10 | "saved_params_36000.npy" "saved_params_37000.npy" "saved_params_38000.npy" "saved_params_39000.npy" -------------------------------------------------------------------------------- /old_assignments/assignment1/cs224d/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/old_assignments/assignment1/cs224d/__init__.py -------------------------------------------------------------------------------- /old_assignments/assignment1/cs224d/datasets/get_datasets.sh: -------------------------------------------------------------------------------- 1 | # Get Stanford Sentiment Treebank 2 | wget http://nlp.stanford.edu/~socherr/stanfordSentimentTreebank.zip 3 | unzip stanfordSentimentTreebank.zip 4 | rm stanfordSentimentTreebank.zip 5 | -------------------------------------------------------------------------------- /old_assignments/assignment1/requirements.txt: -------------------------------------------------------------------------------- 1 | Jinja2==2.7.3 2 | MarkupSafe==0.23 3 | backports.ssl-match-hostname==3.4.0.2 4 | certifi==14.05.14 5 | gnureadline==6.3.3 6 | ipython==3.0.0 7 | matplotlib==1.4.3 8 | mock==1.0.1 9 | nose==1.3.4 10 | numpy==1.9.2 11 | pyparsing==2.0.3 12 | python-dateutil==2.4.0 13 | pytz==2014.10 14 | pyzmq==14.4.1 15 | scipy==0.14.1 16 | six==1.9.0 17 | tornado==4.0.2 18 | wsgiref==0.1.2 19 | -------------------------------------------------------------------------------- /old_assignments/assignment1/solutions/.gitignore: -------------------------------------------------------------------------------- 1 | ## Core latex/pdflatex auxiliary files: 2 | *.aux 3 | *.lof 4 | *.log 5 | *.lot 6 | *.fls 7 | *.out 8 | *.toc 9 | *.fmt 10 | *.fot 11 | *.cb 12 | *.cb2 13 | 14 | ## Intermediate documents: 15 | *.dvi 16 | *-converted-to.* 17 | # these rules might exclude image files for figures etc. 18 | # *.ps 19 | # *.eps 20 | # *.pdf 21 | 22 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 23 | *.bbl 24 | *.bcf 25 | *.blg 26 | *-blx.aux 27 | *-blx.bib 28 | *.brf 29 | *.run.xml 30 | 31 | ## Build tool auxiliary files: 32 | *.fdb_latexmk 33 | *.synctex 34 | *.synctex.gz 35 | *.synctex.gz(busy) 36 | *.pdfsync 37 | 38 | ## Auxiliary and intermediate files from other packages: 39 | # algorithms 40 | *.alg 41 | *.loa 42 | 43 | # achemso 44 | acs-*.bib 45 | 46 | # amsthm 47 | *.thm 48 | 49 | # beamer 50 | *.nav 51 | *.snm 52 | *.vrb 53 | 54 | # cprotect 55 | *.cpt 56 | 57 | # fixme 58 | *.lox 59 | 60 | #(r)(e)ledmac/(r)(e)ledpar 61 | *.end 62 | *.?end 63 | *.[1-9] 64 | *.[1-9][0-9] 65 | *.[1-9][0-9][0-9] 66 | *.[1-9]R 67 | *.[1-9][0-9]R 68 | *.[1-9][0-9][0-9]R 69 | *.eledsec[1-9] 70 | *.eledsec[1-9]R 71 | *.eledsec[1-9][0-9] 72 | *.eledsec[1-9][0-9]R 73 | *.eledsec[1-9][0-9][0-9] 74 | *.eledsec[1-9][0-9][0-9]R 75 | 76 | # glossaries 77 | *.acn 78 | *.acr 79 | *.glg 80 | *.glo 81 | *.gls 82 | *.glsdefs 83 | 84 | # gnuplottex 85 | *-gnuplottex-* 86 | 87 | # hyperref 88 | *.brf 89 | 90 | # knitr 91 | *-concordance.tex 92 | # TODO Comment the next line if you want to keep your tikz graphics files 93 | *.tikz 94 | *-tikzDictionary 95 | 96 | # listings 97 | *.lol 98 | 99 | # makeidx 100 | *.idx 101 | *.ilg 102 | *.ind 103 | *.ist 104 | 105 | # minitoc 106 | *.maf 107 | *.mlf 108 | *.mlt 109 | *.mtc 110 | *.mtc[0-9] 111 | *.mtc[1-9][0-9] 112 | 113 | # minted 114 | _minted* 115 | *.pyg 116 | 117 | # morewrites 118 | *.mw 119 | 120 | # mylatexformat 121 | *.fmt 122 | 123 | # nomencl 124 | *.nlo 125 | 126 | # sagetex 127 | *.sagetex.sage 128 | *.sagetex.py 129 | *.sagetex.scmd 130 | 131 | # sympy 132 | *.sout 133 | *.sympy 134 | sympy-plots-for-*.tex/ 135 | 136 | # pdfcomment 137 | *.upa 138 | *.upb 139 | 140 | # pythontex 141 | *.pytxcode 142 | pythontex-files-*/ 143 | 144 | # TikZ & PGF 145 | *.dpth 146 | *.md5 147 | *.auxlock 148 | 149 | # todonotes 150 | *.tdo 151 | 152 | # xindy 153 | *.xdy 154 | 155 | # xypic precompiled matrices 156 | *.xyc 157 | 158 | # endfloat 159 | *.ttt 160 | *.fff 161 | 162 | # Latexian 163 | TSWLatexianTemp* 164 | 165 | ## Editors: 166 | # WinEdt 167 | *.bak 168 | *.sav 169 | 170 | # Texpad 171 | .texpadtmp 172 | 173 | # Kile 174 | *.backup 175 | 176 | # KBibTeX 177 | *~[0-9]* 178 | -------------------------------------------------------------------------------- /old_assignments/assignment1/updateAssignment.sh: -------------------------------------------------------------------------------- 1 | ASSIGNMENT=assignment1 2 | wget http://cs224d.stanford.edu/${ASSIGNMENT}/${ASSIGNMENT}.zip 3 | rm -f ${ASSIGNMENT}.pdf 4 | wget http://cs224d.stanford.edu/${ASSIGNMENT}/${ASSIGNMENT}.pdf 5 | unzip ${ASSIGNMENT}.zip 6 | rm -f ${ASSIGNMENT}.zip 7 | wget https://raw.githubusercontent.com/qipeng/nbutils/master/updateAssignment.py 8 | 9 | echo Update in progress... 10 | python updateAssignment.py . ${ASSIGNMENT} 11 | 12 | rm -rf updateAssignment.py 13 | rm -rf ${ASSIGNMENT} 14 | rm -rf __MACOSX 15 | echo Done! -------------------------------------------------------------------------------- /old_assignments/assignment2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/old_assignments/assignment2.pdf -------------------------------------------------------------------------------- /old_assignments/assignment2/collectSubmission.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys, os, re, json 4 | import glob, shutil 5 | import time 6 | 7 | 8 | ################ 9 | # Sanity check # 10 | ################ 11 | import numpy as np 12 | 13 | fail = 0 14 | counter = 0 15 | testcases = [] 16 | 17 | from functools import wraps 18 | import traceback 19 | 20 | def prompt(msg): 21 | yn = input(msg + " [y/n]: ") 22 | return yn.lower().startswith('y') 23 | 24 | class testcase(object): 25 | def __init__(self, name): 26 | self.name = name 27 | 28 | def __call__(self, func): 29 | global testcases 30 | 31 | @wraps(func) 32 | def wrapper(): 33 | global counter 34 | global fail 35 | counter += 1 36 | print(">> Test %d (%s)" % (counter, self.name)) 37 | try: 38 | func() 39 | print("[ok] Passed test %d (%s)" % (counter, self.name)) 40 | except Exception as e: 41 | fail += 1 42 | print("[!!] Error on test %d (%s):" % (counter, self.name)) 43 | traceback.print_exc() 44 | 45 | testcases.append(wrapper) 46 | return wrapper 47 | 48 | ## 49 | # Part 0 50 | 51 | ## 52 | # Part 1 53 | @testcase("Part1: test random_weight_matrix") 54 | def test_random_weight_matrix(): 55 | from misc import random_weight_matrix 56 | A = random_weight_matrix(100,100) 57 | assert(A.shape == (100,100)) 58 | 59 | @testcase("Part1: initialize window model") 60 | def ner_init(): 61 | from nerwindow import WindowMLP 62 | np.random.seed(10) 63 | wv = np.random.randn(20,10) 64 | clf = WindowMLP(wv, windowsize=3, 65 | dims = [None, 15, 3], rseed=10) 66 | 67 | @testcase("Part1: test predict_proba()") 68 | def ner_predict_proba(): 69 | from nerwindow import WindowMLP 70 | np.random.seed(10) 71 | wv = np.random.randn(20,10) 72 | clf = WindowMLP(wv, windowsize=3, 73 | dims = [None, 15, 3], rseed=10) 74 | p = clf.predict_proba([1,2,3]) 75 | assert(len(p.flatten()) == 3) 76 | p = clf.predict_proba([[1,2,3], [2,3,4]]) 77 | assert(np.ndim(p) == 2) 78 | assert(p.shape == (2,3)) 79 | 80 | @testcase("Part1: test compute_loss()") 81 | def ner_predict_proba(): 82 | from nerwindow import WindowMLP 83 | np.random.seed(10) 84 | wv = np.random.randn(20,10) 85 | clf = WindowMLP(wv, windowsize=3, 86 | dims = [None, 15, 3], rseed=10) 87 | J = clf.compute_loss([1,2,3], 1) 88 | print(" dummy: J = %g" % J) 89 | J = clf.compute_loss([[1,2,3], [2,3,4]], [0,1]) 90 | print(" dummy: J = %g" % J) 91 | 92 | @testcase("Part1: NER prediction - dev set") 93 | def ner_pred_dev(): 94 | devpred = np.loadtxt("dev.predicted", dtype=int) 95 | assert(len(devpred) == 51362) # dev set length 96 | 97 | @testcase("Part1: NER prediction - test set") 98 | def ner_pred_test(): 99 | testpred = np.loadtxt("test.predicted", dtype=int) 100 | assert(len(testpred) == 46435) 101 | 102 | def setup_probing(): 103 | num_to_word = dict(enumerate( 104 | ["hello", "world", "i", "am", "a", "banana", 105 | "there", "is", "no", "spoon"])) 106 | tagnames = ["O", "LOC", "MISC", "ORG", "PER"] 107 | num_to_tag = dict(enumerate(tagnames)) 108 | 109 | from nerwindow import WindowMLP 110 | np.random.seed(10) 111 | wv = np.random.randn(10,50) 112 | clf = WindowMLP(wv, windowsize=3, 113 | dims = [None, 100, 5], rseed=10) 114 | return clf, num_to_word, num_to_tag 115 | 116 | @testcase("Part1.1 (a): verify output format") 117 | def ner_probe_a(): 118 | from part11probing import part_a, part_b, part_c 119 | clf, num_to_word, num_to_tag = setup_probing() 120 | s,w = part_a(clf, num_to_word, verbose=False) 121 | assert(len(s) == len(w)) 122 | if type(s) == dict: # some students may have done this 123 | for k in list(s.keys()): assert(k in w) 124 | for k in list(w.keys()): assert(k in s) 125 | assert(len(s) >= 5) 126 | else: # list 127 | assert(len(s[0]) == len(w[0])) 128 | assert(len(s[0]) == 10) 129 | assert(type(w[0][0]) == str) 130 | 131 | 132 | @testcase("Part1.1 (b): verify output format") 133 | def ner_probe_b(): 134 | from part11probing import part_a, part_b, part_c 135 | clf, num_to_word, num_to_tag = setup_probing() 136 | s,w = part_b(clf, num_to_word, num_to_tag, verbose=False) 137 | assert(len(s) == len(w)) 138 | assert(len(s) == 5) 139 | assert(len(s[0]) == len(w[0])) 140 | assert(len(s[0]) == 10) 141 | assert(type(w[0][0]) == str) 142 | 143 | 144 | @testcase("Part1.1 (c): verify output format") 145 | def ner_probe_b(): 146 | from part11probing import part_a, part_b, part_c 147 | clf, num_to_word, num_to_tag = setup_probing() 148 | s,w = part_c(clf, num_to_word, num_to_tag, verbose=False) 149 | assert(len(s) == len(w)) 150 | assert(len(s) == 5) 151 | assert(len(s[0]) == len(w[0])) 152 | assert(len(s[0]) == 10) 153 | assert(type(w[0][0]) == str) 154 | 155 | 156 | ## 157 | # Part 2 158 | @testcase("Part2: initialize RNNLM") 159 | def rnnlm_init(): 160 | from rnnlm import RNNLM 161 | np.random.seed(10) 162 | L = np.random.randn(50,10) 163 | model = RNNLM(L0 = L) 164 | 165 | @testcase("Part2: load RNNLM params") 166 | def rnnlm_load(): 167 | from rnnlm import RNNLM 168 | L = np.load('rnnlm.L.npy') 169 | print(" loaded L: %s" % str(L.shape)) 170 | H = np.load('rnnlm.H.npy') 171 | print(" loaded H: %s" % str(H.shape)) 172 | U = np.load('rnnlm.U.npy') 173 | print(" loaded U: %s" % str(U.shape)) 174 | assert(L.shape[0] == U.shape[0]) 175 | assert(L.shape[1] == H.shape[1]) 176 | assert(H.shape[0] == U.shape[1]) 177 | model = RNNLM(L0 = L, U0 = U) 178 | model.params.H[:] = H 179 | 180 | @testcase("Part2: test generate_sequence") 181 | def rnnlm_generate_sequence(): 182 | from rnnlm import RNNLM 183 | np.random.seed(10) 184 | L = np.random.randn(20,10) 185 | model = RNNLM(L0 = L) 186 | model.H = np.random.randn(20,20) 187 | s, J = model.generate_sequence(0,1, maxlen=15) 188 | print("dummy J: %g" % J) 189 | print("dummy seq: len(s) = %d" % len(s)) 190 | assert(len(s) <= 15+1) 191 | assert(s[0] == 0) 192 | assert(J > 0) 193 | 194 | ## 195 | # Execute sanity check 196 | print("=== Running sanity check ===") 197 | for f in testcases: 198 | f() 199 | 200 | if fail <= 0: 201 | print("=== Sanity check passed! ===") 202 | else: 203 | print("=== Sanity check failed %d tests :( ===" % fail) 204 | if not prompt("Continue submission anyway?"): 205 | sys.exit(1) 206 | 207 | 208 | ## 209 | # List of files for submission 210 | filelist = [ 211 | 'part0-XOR.ipynb', 212 | 'part1-NER.ipynb', 213 | 'misc.py', 214 | 'nerwindow.py', 215 | 'ner.learningcurve.best.png', 216 | 'ner.learningcurve.comparison.png', 217 | 'dev.predicted', 218 | 'test.predicted', 219 | 'part11probing.py', 220 | 'part2-RNNLM.ipynb', 221 | 'rnnlm.py', 222 | 'rnnlm.H.npy', 223 | 'rnnlm.L.npy', 224 | 'rnnlm.U.npy', 225 | ] 226 | files_ok = [] 227 | files_missing = [] 228 | 229 | # Verify required files present 230 | print("=== Verifying file list ===") 231 | for fname in filelist: 232 | print(("File: %s ? -" % fname), end=' ') 233 | if os.path.isfile(fname): 234 | print("ok"); files_ok.append(fname) 235 | else: 236 | print("NOT FOUND"); files_missing.append(fname) 237 | if len(files_missing) > 0: 238 | print("== Error: missing files ==") 239 | print(" ".join(files_missing)) 240 | if not prompt("Continue submission anyway?"): 241 | sys.exit(1) 242 | 243 | ## 244 | # Prepare submission zip 245 | from zipfile import ZipFile 246 | 247 | # Get SUNet ID 248 | sunetid = "" 249 | fail = -1 250 | while not re.match(r'[\w\d]+', sunetid): 251 | fail += 1 252 | sunetid = input("=== Please enter your SUNet ID ===\nSUNet ID: ").lower() 253 | if fail > 3: print("Error: invalid ID"); sys.exit(1) 254 | 255 | # Pack in files 256 | zipname = "%s.zip" % sunetid 257 | with ZipFile(zipname, 'w') as zf: 258 | print("=== Generating submission file '%s' ===" % zipname) 259 | for fname in files_ok: 260 | print((" %s" % fname), end=' ') 261 | zf.write(fname) 262 | print(("(%.02f kB)" % ((1.0/1024) * zf.getinfo(fname).file_size))) 263 | 264 | # Check size 265 | fsize = os.path.getsize(zipname) 266 | SIZE_LIMIT = 3*(2**30) # 30 MB 267 | print("Submission size: %.02f kB -" % ((1.0/1024) * fsize), end=' ') 268 | if fsize < SIZE_LIMIT: 269 | print("ok!") 270 | else: 271 | print("too large! (limit = %.02f kB" % ((1.0/1024) * SIZE_LIMIT)) 272 | sys.exit(1) 273 | 274 | print("=== Successfully generated submission zipfile! ===") 275 | print("Please upload '%s' to Box, and don't forget to submit your writeup PDF via Scoryst!" % zipname) -------------------------------------------------------------------------------- /old_assignments/assignment2/data_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/old_assignments/assignment2/data_utils/__init__.py -------------------------------------------------------------------------------- /old_assignments/assignment2/data_utils/ner.py: -------------------------------------------------------------------------------- 1 | ## 2 | # Utility functions for NER assignment 3 | # Assigment 2, part 1 for CS224D 4 | ## 5 | 6 | from .utils import invert_dict 7 | from numpy import * 8 | 9 | def load_wv(vocabfile, wvfile): 10 | wv = loadtxt(wvfile, dtype=float) 11 | with open(vocabfile) as fd: 12 | words = [line.strip() for line in fd] 13 | num_to_word = dict(enumerate(words)) 14 | word_to_num = invert_dict(num_to_word) 15 | return wv, word_to_num, num_to_word 16 | 17 | 18 | def save_predictions(y, filename): 19 | """Save predictions, one per line.""" 20 | with open(filename, 'w') as fd: 21 | fd.write("\n".join(map(str, y))) 22 | fd.write("\n") -------------------------------------------------------------------------------- /old_assignments/assignment2/data_utils/utils.py: -------------------------------------------------------------------------------- 1 | import sys, os, re, json 2 | import itertools 3 | from collections import Counter 4 | import time 5 | from numpy import * 6 | 7 | import pandas as pd 8 | 9 | 10 | def invert_dict(d): 11 | return {v:k for k,v in d.items()} 12 | 13 | def flatten1(lst): 14 | return list(itertools.chain.from_iterable(lst)) 15 | 16 | def load_wv_pandas(fname): 17 | return pd.read_hdf(fname, 'data') 18 | 19 | def extract_wv(df): 20 | num_to_word = dict(enumerate(df.index)) 21 | word_to_num = invert_dict(num_to_word) 22 | wv = df.as_matrix() 23 | return wv, word_to_num, num_to_word 24 | 25 | def canonicalize_digits(word): 26 | if any([c.isalpha() for c in word]): return word 27 | word = re.sub("\d", "DG", word) 28 | if word.startswith("DG"): 29 | word = word.replace(",", "") # remove thousands separator 30 | return word 31 | 32 | def canonicalize_word(word, wordset=None, digits=True): 33 | word = word.lower() 34 | if digits: 35 | if (wordset != None) and (word in wordset): return word 36 | word = canonicalize_digits(word) # try to canonicalize numbers 37 | if (wordset == None) or (word in wordset): return word 38 | else: return "UUUNKKK" # unknown token 39 | 40 | 41 | ## 42 | # Utility functions used to create dataset 43 | ## 44 | def augment_wv(df, extra=["UUUNKKK"]): 45 | for e in extra: 46 | df.loc[e] = zeros(len(df.columns)) 47 | 48 | def prune_wv(df, vocab, extra=["UUUNKKK"]): 49 | """Prune word vectors to vocabulary.""" 50 | items = set(vocab).union(set(extra)) 51 | return df.filter(items=items, axis='index') 52 | 53 | def load_wv_raw(fname): 54 | return pd.read_table(fname, sep="\s+", 55 | header=None, 56 | index_col=0, 57 | quoting=3) 58 | 59 | def load_dataset(fname): 60 | docs = [] 61 | with open(fname) as fd: 62 | cur = [] 63 | for line in fd: 64 | # new sentence on -DOCSTART- or blank line 65 | if re.match(r"-DOCSTART-.+", line) or (len(line.strip()) == 0): 66 | if len(cur) > 0: 67 | docs.append(cur) 68 | cur = [] 69 | else: # read in tokens 70 | cur.append(line.strip().split("\t",1)) 71 | # flush running buffer 72 | docs.append(cur) 73 | return docs 74 | 75 | def extract_tag_set(docs): 76 | tags = set(flatten1([[t[1].split("|")[0] for t in d] for d in docs])) 77 | return tags 78 | 79 | def extract_word_set(docs): 80 | words = set(flatten1([[t[0] for t in d] for d in docs])) 81 | return words 82 | 83 | def pad_sequence(seq, left=1, right=1): 84 | return left*[("", "")] + seq + right*[("", "")] 85 | 86 | ## 87 | # For window models 88 | def seq_to_windows(words, tags, word_to_num, tag_to_num, left=1, right=1): 89 | ns = len(words) 90 | X = [] 91 | y = [] 92 | for i in range(ns): 93 | if words[i] == "" or words[i] == "": 94 | continue # skip sentence delimiters 95 | tagn = tag_to_num[tags[i]] 96 | idxs = [word_to_num[words[ii]] 97 | for ii in range(i - left, i + right + 1)] 98 | X.append(idxs) 99 | y.append(tagn) 100 | return array(X), array(y) 101 | 102 | def docs_to_windows(docs, word_to_num, tag_to_num, wsize=3): 103 | pad = (wsize - 1)//2 104 | docs = flatten1([pad_sequence(seq, left=pad, right=pad) for seq in docs]) 105 | 106 | words, tags = zip(*docs) 107 | words = [canonicalize_word(w, word_to_num) for w in words] 108 | tags = [t.split("|")[0] for t in tags] 109 | return seq_to_windows(words, tags, word_to_num, tag_to_num, pad, pad) 110 | 111 | def window_to_vec(window, L): 112 | """Concatenate word vectors for a given window.""" 113 | return concatenate([L[i] for i in window]) 114 | 115 | ## 116 | # For fixed-window LM: 117 | # each row of X is a list of word indices 118 | # each entry of y is the word index to predict 119 | def seq_to_lm_windows(words, word_to_num, ngram=2): 120 | ns = len(words) 121 | X = [] 122 | y = [] 123 | for i in range(ns): 124 | if words[i] == "": 125 | continue # skip sentence begin, but do predict end 126 | idxs = [word_to_num[words[ii]] 127 | for ii in range(i - ngram + 1, i + 1)] 128 | X.append(idxs[:-1]) 129 | y.append(idxs[-1]) 130 | return array(X), array(y) 131 | 132 | def docs_to_lm_windows(docs, word_to_num, ngram=2): 133 | docs = flatten1([pad_sequence(seq, left=(ngram-1), right=1) 134 | for seq in docs]) 135 | words = [canonicalize_word(wt[0], word_to_num) for wt in docs] 136 | return seq_to_lm_windows(words, word_to_num, ngram) 137 | 138 | 139 | ## 140 | # For RNN LM 141 | # just convert each sentence to a list of indices 142 | # after padding each with ... tokens 143 | def seq_to_indices(words, word_to_num): 144 | return array([word_to_num[w] for w in words]) 145 | 146 | def docs_to_indices(docs, word_to_num): 147 | docs = [pad_sequence(seq, left=1, right=1) for seq in docs] 148 | ret = [] 149 | for seq in docs: 150 | words = [canonicalize_word(wt[0], word_to_num) for wt in seq] 151 | ret.append(seq_to_indices(words, word_to_num)) 152 | 153 | # return as numpy array for fancier slicing 154 | return array(ret, dtype=object) 155 | 156 | def offset_seq(seq): 157 | return seq[:-1], seq[1:] 158 | 159 | def seqs_to_lmXY(seqs): 160 | X, Y = zip(*[offset_seq(s) for s in seqs]) 161 | return array(X, dtype=object), array(Y, dtype=object) 162 | 163 | ## 164 | # For RNN tagger 165 | # return X, Y as lists 166 | # where X[i] is indices, Y[i] is tags for a sequence 167 | # NOTE: this does not use padding tokens! 168 | # (RNN should natively handle begin/end) 169 | def docs_to_tag_sequence(docs, word_to_num, tag_to_num): 170 | # docs = [pad_sequence(seq, left=1, right=1) for seq in docs] 171 | X = [] 172 | Y = [] 173 | for seq in docs: 174 | if len(seq) < 1: continue 175 | words, tags = zip(*seq) 176 | 177 | words = [canonicalize_word(w, word_to_num) for w in words] 178 | x = seq_to_indices(words, word_to_num) 179 | X.append(x) 180 | 181 | tags = [t.split("|")[0] for t in tags] 182 | y = seq_to_indices(tags, tag_to_num) 183 | Y.append(y) 184 | 185 | # return as numpy array for fancier slicing 186 | return array(X, dtype=object), array(Y, dtype=object) 187 | 188 | def idxs_to_matrix(idxs, L): 189 | """Return a matrix X with each row 190 | as a word vector for the corresponding 191 | index in idxs.""" 192 | return vstack([L[i] for i in idxs]) -------------------------------------------------------------------------------- /old_assignments/assignment2/misc.py: -------------------------------------------------------------------------------- 1 | ## 2 | # Miscellaneous helper functions 3 | ## 4 | 5 | from numpy import * 6 | 7 | def random_weight_matrix(m, n): 8 | #### YOUR CODE HERE #### 9 | eps = sqrt(6/(m+n)) 10 | A0 = random.uniform(low=-eps, high=eps, size=(m,n)) 11 | #### END YOUR CODE #### 12 | assert A0.shape == (m,n) 13 | return A0 -------------------------------------------------------------------------------- /old_assignments/assignment2/nerwindow.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from nn.base import NNBase 3 | from nn.math import softmax, make_onehot 4 | from misc import random_weight_matrix 5 | 6 | 7 | ## 8 | # Evaluation code; do not change this 9 | ## 10 | from sklearn import metrics 11 | def full_report(y_true, y_pred, tagnames): 12 | cr = metrics.classification_report(y_true, y_pred, 13 | target_names=tagnames) 14 | print(cr) 15 | 16 | def eval_performance(y_true, y_pred, tagnames): 17 | pre, rec, f1, support = metrics.precision_recall_fscore_support(y_true, y_pred) 18 | print("=== Performance (omitting 'O' class) ===") 19 | print("Mean precision: %.02f%%" % (100*sum(pre[1:] * support[1:])/sum(support[1:]))) 20 | print("Mean recall: %.02f%%" % (100*sum(rec[1:] * support[1:])/sum(support[1:]))) 21 | print("Mean F1: %.02f%%" % (100*sum(f1[1:] * support[1:])/sum(support[1:]))) 22 | 23 | 24 | ## 25 | # Implement this! 26 | ## 27 | class WindowMLP(NNBase): 28 | """Single hidden layer, plus representation learning.""" 29 | 30 | def __init__(self, wv, windowsize=3, 31 | dims=[None, 100, 5], 32 | reg=0.001, alpha=0.01, rseed=10): 33 | """ 34 | Initialize classifier model. 35 | 36 | Arguments: 37 | wv : initial word vectors (array |V| x n) 38 | note that this is the transpose of the n x |V| matrix L 39 | described in the handout; you'll want to keep it in 40 | this |V| x n form for efficiency reasons, since numpy 41 | stores matrix rows continguously. 42 | windowsize : int, size of context window 43 | dims : dimensions of [input, hidden, output] 44 | input dimension can be computed from wv.shape 45 | reg : regularization strength (lambda) 46 | alpha : default learning rate 47 | rseed : random initialization seed 48 | """ 49 | 50 | # Set regularization 51 | self.lreg = float(reg) 52 | self.alpha = alpha # default training rate 53 | 54 | dims[0] = windowsize * wv.shape[1] # input dimension 55 | param_dims = dict(W=(dims[1], dims[0]), 56 | b1=(dims[1],), 57 | U=(dims[2], dims[1]), 58 | b2=(dims[2],), 59 | ) 60 | param_dims_sparse = dict(L=wv.shape) 61 | 62 | # initialize parameters: don't change this line 63 | NNBase.__init__(self, param_dims, param_dims_sparse) 64 | 65 | random.seed(rseed) # be sure to seed this for repeatability! 66 | #### YOUR CODE HERE #### 67 | # any other initialization you need 68 | self.params.W = random_weight_matrix(*self.params.W.shape) 69 | self.params.U = random_weight_matrix(*self.params.U.shape) 70 | self.sparams.L = wv.copy() 71 | #### END YOUR CODE #### 72 | 73 | 74 | 75 | def _acc_grads(self, window, label): 76 | """ 77 | Accumulate gradients, given a training point 78 | (window, label) of the format 79 | 80 | window = [x_{i-1} x_{i} x_{i+1}] # three ints 81 | label = {0,1,2,3,4} # single int, gives class 82 | 83 | Your code should update self.grads and self.sgrads, 84 | in order for gradient_check and training to work. 85 | 86 | So, for example: 87 | self.grads.U += (your gradient dJ/dU) 88 | self.sgrads.L[i] = (gradient dJ/dL[i]) # this adds an update for that index 89 | """ 90 | #### YOUR CODE HERE #### 91 | ## 92 | # Forward propagation 93 | words = np.array([self.params.L[x] for x in window]) 94 | x = np.reshape(words, -1) 95 | layer1 = np.tanh(self.params.W.dot(x) + self.params.b1) 96 | probs = softmax(self.params.U.dot(layer1) + self.params.b2) 97 | ## 98 | # Backpropagation 99 | y = make_onehot(label, len(probs)) 100 | dx = probs - y 101 | dU = np.outer(dx, layer1) 102 | delta2 = np.multiply((1 - np.square(dU)), 103 | self.params.U.T.dot(dx)) 104 | dW = np.outer(delta2, x) 105 | db1 = delta2 106 | dL = self.params.W.T.dot(delta2) 107 | dL = np.reshape(dL, (3, self.params.L.shape[1])) 108 | 109 | dW += self.lreg * self.params.W 110 | dU += self.lreg * self.params.U 111 | 112 | self.grads.U += dU 113 | self.grads.W += dW 114 | self.grads.b2 += dx 115 | self.grads.b1 += delta2 116 | 117 | self.sgrads.L[window[0]] = dL[0] 118 | self.sgrads.L[window[1]] = dL[1] 119 | self.sgrads.L[window[2]] = dL[2] 120 | #### END YOUR CODE #### 121 | 122 | 123 | def predict_proba(self, windows): 124 | """ 125 | Predict class probabilities. 126 | 127 | Should return a matrix P of probabilities, 128 | with each row corresponding to a row of X. 129 | 130 | windows = array (n x windowsize), 131 | each row is a window of indices 132 | """ 133 | # handle singleton input by making sure we have 134 | # a list-of-lists 135 | if not hasattr(windows[0], "__iter__"): 136 | windows = [windows] 137 | 138 | #### YOUR CODE HERE #### 139 | idx_array = np.array(windows) 140 | words = np.array(self.sparams.L[idx_array]) 141 | #### END YOUR CODE #### 142 | 143 | return P # rows are output for each input 144 | 145 | 146 | def predict(self, windows): 147 | """ 148 | Predict most likely class. 149 | Returns a list of predicted class indices; 150 | input is same as to predict_proba 151 | """ 152 | 153 | #### YOUR CODE HERE #### 154 | probs = self.predict_proba(windows) 155 | c = np.argmax(probs, axis=1) 156 | #### END YOUR CODE #### 157 | return c # list of predicted classes 158 | 159 | 160 | def compute_loss(self, windows, labels): 161 | """ 162 | Compute the loss for a given dataset. 163 | windows = same as for predict_proba 164 | labels = list of class labels, for each row of windows 165 | """ 166 | 167 | #### YOUR CODE HERE #### 168 | 169 | 170 | #### END YOUR CODE #### 171 | return J -------------------------------------------------------------------------------- /old_assignments/assignment2/nn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/old_assignments/assignment2/nn/__init__.py -------------------------------------------------------------------------------- /old_assignments/assignment2/nn/math.py: -------------------------------------------------------------------------------- 1 | from numpy import * 2 | 3 | def sigmoid(x): 4 | return 1.0/(1.0 + exp(-x)) 5 | 6 | def softmax(x): 7 | xt = exp(x - max(x)) 8 | return xt / sum(xt) 9 | 10 | def make_onehot(i, n): 11 | y = zeros(n) 12 | y[i] = 1 13 | return y 14 | 15 | 16 | class MultinomialSampler(object): 17 | """ 18 | Fast (O(log n)) sampling from a discrete probability 19 | distribution, with O(n) set-up time. 20 | """ 21 | 22 | def __init__(self, p, verbose=False): 23 | n = len(p) 24 | p = p.astype(float) / sum(p) 25 | self._cdf = cumsum(p) 26 | 27 | def sample(self, k=1): 28 | rs = random.random(k) 29 | # binary search to get indices 30 | return searchsorted(self._cdf, rs) 31 | 32 | def __call__(self, **kwargs): 33 | return self.sample(**kwargs) 34 | 35 | def reconstruct_p(self): 36 | """ 37 | Return the original probability vector. 38 | Helpful for debugging. 39 | """ 40 | n = len(self._cdf) 41 | p = zeros(n) 42 | p[0] = self._cdf[0] 43 | p[1:] = (self._cdf[1:] - self._cdf[:-1]) 44 | return p 45 | 46 | 47 | def multinomial_sample(p): 48 | """ 49 | Wrapper to generate a single sample, 50 | using the above class. 51 | """ 52 | return MultinomialSampler(p).sample(1)[0] -------------------------------------------------------------------------------- /old_assignments/assignment2/part11probing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys, os 4 | from numpy import * 5 | 6 | def print_scores(scores, words): 7 | for i in range(len(scores)): 8 | print("[%d]: (%.03f) %s" % (i, scores[i], words[i])) 9 | 10 | 11 | def part_a(clf, num_to_word, verbose=True): 12 | """ 13 | Code for 1.1 part (a): 14 | Hidden Layer, Center Word 15 | 16 | clf: instance of WindowMLP, 17 | trained on data 18 | num_to_word: dict {int:string} 19 | 20 | You need to create: 21 | - topscores : list of lists of 10 scores (float) 22 | - topwords : list of lists of 10 words (string) 23 | You should generate these lists for each neuron 24 | (so for hdim = 100, you'll have lists of 100 lists of 10) 25 | then fill in neurons = [] to print 26 | """ 27 | #### YOUR CODE HERE #### 28 | 29 | 30 | 31 | 32 | 33 | neurons = [1,3,4,6,8] # change this to your chosen neurons 34 | 35 | #### END YOUR CODE #### 36 | # topscores[i]: list of floats 37 | # topwords[i]: list of words 38 | if verbose == True: 39 | for i in neurons: 40 | print("Neuron %d" % i) 41 | print_scores(topscores[i], topwords[i]) 42 | 43 | return topscores, topwords 44 | 45 | 46 | def part_b(clf, num_to_word, num_to_tag, verbose=True): 47 | """ 48 | Code for 1.1 part (b): 49 | Model Output, Center Word 50 | 51 | clf: instance of WindowMLP, 52 | trained on data 53 | num_to_word: dict {int:string} 54 | 55 | You need to create: 56 | - topscores : list of 5 lists of 10 probability scores (float) 57 | - topwords : list of 5 lists of 10 words (string) 58 | where indices 0,1,2,3,4 correspond to num_to_tag, i.e. 59 | tagnames = ["O", "LOC", "MISC", "ORG", "PER"] 60 | """ 61 | #### YOUR CODE HERE #### 62 | 63 | 64 | 65 | 66 | 67 | 68 | #### END YOUR CODE #### 69 | # topscores[i]: list of floats 70 | # topwords[i]: list of words 71 | if verbose == True: 72 | for i in range(1,5): 73 | print("Output neuron %d: %s" % (i, num_to_tag[i])) 74 | print_scores(topscores[i], topwords[i]) 75 | print("") 76 | 77 | return topscores, topwords 78 | 79 | 80 | def part_c(clf, num_to_word, num_to_tag, verbose=True): 81 | """ 82 | Code for 1.1 part (c): 83 | Model Output, Preceding Word 84 | 85 | clf: instance of WindowMLP, 86 | trained on data 87 | num_to_word: dict {int:string} 88 | 89 | You need to create: 90 | - topscores : list of 5 lists of 10 probability scores (float) 91 | - topwords : list of 5 lists of 10 words (string) 92 | where indices 0,1,2,3,4 correspond to num_to_tag, i.e. 93 | tagnames = ["O", "LOC", "MISC", "ORG", "PER"] 94 | """ 95 | #### YOUR CODE HERE #### 96 | 97 | 98 | 99 | 100 | 101 | 102 | #### END YOUR CODE #### 103 | # topscores[i]: list of floats 104 | # topwords[i]: list of words 105 | if verbose == True: 106 | for i in range(1,5): 107 | print("Output neuron %d: %s" % (i, num_to_tag[i])) 108 | print_scores(topscores[i], topwords[i]) 109 | print("") 110 | 111 | return topscores, topwords 112 | 113 | 114 | ## 115 | # Dummy test code 116 | # run this script, and make sure nothing crashes 117 | # (this is the same as sanity check for part 1.1) 118 | if __name__ == '__main__': 119 | num_to_word = dict(enumerate( 120 | ["hello", "world", "i", "am", "a", "banana", 121 | "there", "is", "no", "spoon"])) 122 | tagnames = ["O", "LOC", "MISC", "ORG", "PER"] 123 | num_to_tag = dict(enumerate(tagnames)) 124 | 125 | from nerwindow import WindowMLP 126 | random.seed(10) 127 | wv = random.randn(10,50) 128 | clf = WindowMLP(wv, windowsize=3, 129 | dims = [None, 100, 5], rseed=10) 130 | 131 | print("\n=== Testing Part (a) ===\n") 132 | s,w = part_a(clf, num_to_word, verbose=True) 133 | assert(len(s) == len(w)) 134 | if type(s) == dict: # some students may have done this 135 | for k in list(s.keys()): assert(k in w) 136 | for k in list(w.keys()): assert(k in s) 137 | assert(len(s) >= 5) 138 | else: # list 139 | assert(len(s[0]) == len(w[0])) 140 | assert(len(s[0]) == 10) 141 | assert(type(w[0][0]) == str) 142 | 143 | print("\n=== Testing Part (b) ===\n") 144 | s,w = part_b(clf, num_to_word, num_to_tag, verbose=True) 145 | assert(len(s) == len(w)) 146 | assert(len(s) == 5) 147 | assert(len(s[0]) == len(w[0])) 148 | assert(len(s[0]) == 10) 149 | assert(type(w[0][0]) == str) 150 | 151 | print("\n=== Testing Part (c) ===\n") 152 | s,w = part_c(clf, num_to_word, num_to_tag, verbose=True) 153 | assert(len(s) == len(w)) 154 | assert(len(s) == 5) 155 | assert(len(s[0]) == len(w[0])) 156 | assert(len(s[0]) == 10) 157 | assert(type(w[0][0]) == str) -------------------------------------------------------------------------------- /old_assignments/assignment2/requirements.txt: -------------------------------------------------------------------------------- 1 | Jinja2==2.7.3 2 | MarkupSafe==0.23 3 | backports.ssl-match-hostname==3.4.0.2 4 | certifi==14.05.14 5 | gnureadline==6.3.3 6 | ipython==3.0.0 7 | matplotlib==1.4.3 8 | mock==1.0.1 9 | nose==1.3.4 10 | numpy==1.9.2 11 | scikit-learn==0.16.0 12 | pandas==0.15.2 13 | pyparsing==2.0.3 14 | python-dateutil==2.4.0 15 | pytz==2014.10 16 | pyzmq==14.4.1 17 | scipy==0.14.1 18 | six==1.9.0 19 | tornado==4.0.2 20 | wsgiref==0.1.2 21 | -------------------------------------------------------------------------------- /old_assignments/assignment2/rnnlm.py: -------------------------------------------------------------------------------- 1 | from numpy import * 2 | import itertools 3 | import time 4 | import sys 5 | 6 | # Import NN utils 7 | from nn.base import NNBase 8 | from nn.math import softmax, sigmoid 9 | from nn.math import MultinomialSampler, multinomial_sample 10 | from misc import random_weight_matrix 11 | 12 | 13 | class RNNLM(NNBase): 14 | """ 15 | Implements an RNN language model of the form: 16 | h(t) = sigmoid(H * h(t-1) + L[x(t)]) 17 | y(t) = softmax(U * h(t)) 18 | where y(t) predicts the next word in the sequence 19 | 20 | U = |V| * dim(h) as output vectors 21 | L = |V| * dim(h) as input vectors 22 | 23 | You should initialize each U[i,j] and L[i,j] 24 | as Gaussian noise with mean 0 and variance 0.1 25 | 26 | Arguments: 27 | L0 : initial input word vectors 28 | U0 : initial output word vectors 29 | alpha : default learning rate 30 | bptt : number of backprop timesteps 31 | """ 32 | 33 | def __init__(self, L0, U0=None, 34 | alpha=0.005, rseed=10, bptt=1): 35 | 36 | self.hdim = L0.shape[1] # word vector dimensions 37 | self.vdim = L0.shape[0] # vocab size 38 | param_dims = dict(H = (self.hdim, self.hdim), 39 | U = L0.shape) 40 | # note that only L gets sparse updates 41 | param_dims_sparse = dict(L = L0.shape) 42 | NNBase.__init__(self, param_dims, param_dims_sparse) 43 | 44 | #### YOUR CODE HERE #### 45 | 46 | 47 | # Initialize word vectors 48 | # either copy the passed L0 and U0 (and initialize in your notebook) 49 | # or initialize with gaussian noise here 50 | 51 | # Initialize H matrix, as with W and U in part 1 52 | 53 | #### END YOUR CODE #### 54 | 55 | 56 | def _acc_grads(self, xs, ys): 57 | """ 58 | Accumulate gradients, given a pair of training sequences: 59 | xs = [] # input words 60 | ys = [] # output words (to predict) 61 | 62 | Your code should update self.grads and self.sgrads, 63 | in order for gradient_check and training to work. 64 | 65 | So, for example: 66 | self.grads.H += (your gradient dJ/dH) 67 | self.sgrads.L[i] = (gradient dJ/dL[i]) # update row 68 | 69 | Per the handout, you should: 70 | - make predictions by running forward in time 71 | through the entire input sequence 72 | - for *each* output word in ys, compute the 73 | gradients with respect to the cross-entropy 74 | loss for that output word 75 | - run backpropagation-through-time for self.bptt 76 | timesteps, storing grads in self.grads (for H, U) 77 | and self.sgrads (for L) 78 | 79 | You'll want to store your predictions \hat{y}(t) 80 | and the hidden layer values h(t) as you run forward, 81 | so that you can access them during backpropagation. 82 | 83 | At time 0, you should initialize the hidden layer to 84 | be a vector of zeros. 85 | """ 86 | 87 | # Expect xs as list of indices 88 | ns = len(xs) 89 | 90 | # make matrix here of corresponding h(t) 91 | # hs[-1] = initial hidden state (zeros) 92 | hs = zeros((ns+1, self.hdim)) 93 | # predicted probas 94 | ps = zeros((ns, self.vdim)) 95 | 96 | #### YOUR CODE HERE #### 97 | 98 | ## 99 | # Forward propagation 100 | 101 | 102 | ## 103 | # Backward propagation through time 104 | 105 | 106 | 107 | #### END YOUR CODE #### 108 | 109 | 110 | 111 | def grad_check(self, x, y, outfd=sys.stderr, **kwargs): 112 | """ 113 | Wrapper for gradient check on RNNs; 114 | ensures that backprop-through-time is run to completion, 115 | computing the full gradient for the loss as summed over 116 | the input sequence and predictions. 117 | 118 | Do not modify this function! 119 | """ 120 | bptt_old = self.bptt 121 | self.bptt = len(y) 122 | print("NOTE: temporarily setting self.bptt = len(y) = %d to compute true gradient." % self.bptt, file=outfd) 123 | NNBase.grad_check(self, x, y, outfd=outfd, **kwargs) 124 | self.bptt = bptt_old 125 | print("Reset self.bptt = %d" % self.bptt, file=outfd) 126 | 127 | 128 | def compute_seq_loss(self, xs, ys): 129 | """ 130 | Compute the total cross-entropy loss 131 | for an input sequence xs and output 132 | sequence (labels) ys. 133 | 134 | You should run the RNN forward, 135 | compute cross-entropy loss at each timestep, 136 | and return the sum of the point losses. 137 | """ 138 | 139 | J = 0 140 | #### YOUR CODE HERE #### 141 | 142 | 143 | #### END YOUR CODE #### 144 | return J 145 | 146 | 147 | def compute_loss(self, X, Y): 148 | """ 149 | Compute total loss over a dataset. 150 | (wrapper for compute_seq_loss) 151 | 152 | Do not modify this function! 153 | """ 154 | if not isinstance(X[0], ndarray): # single example 155 | return self.compute_seq_loss(X, Y) 156 | else: # multiple examples 157 | return sum([self.compute_seq_loss(xs,ys) 158 | for xs,ys in zip(X, Y)]) 159 | 160 | def compute_mean_loss(self, X, Y): 161 | """ 162 | Normalize loss by total number of points. 163 | 164 | Do not modify this function! 165 | """ 166 | J = self.compute_loss(X, Y) 167 | ntot = sum(map(len,Y)) 168 | return J / float(ntot) 169 | 170 | 171 | def generate_sequence(self, init, end, maxlen=100): 172 | """ 173 | Generate a sequence from the language model, 174 | by running the RNN forward and selecting, 175 | at each timestep, a random word from the 176 | a word from the emitted probability distribution. 177 | 178 | The MultinomialSampler class (in nn.math) may be helpful 179 | here for sampling a word. Use as: 180 | 181 | y = multinomial_sample(p) 182 | 183 | to sample an index y from the vector of probabilities p. 184 | 185 | 186 | Arguments: 187 | init = index of start word (word_to_num['']) 188 | end = index of end word (word_to_num['']) 189 | maxlen = maximum length to generate 190 | 191 | Returns: 192 | ys = sequence of indices 193 | J = total cross-entropy loss of generated sequence 194 | """ 195 | 196 | J = 0 # total loss 197 | ys = [init] # emitted sequence 198 | 199 | #### YOUR CODE HERE #### 200 | 201 | 202 | #### YOUR CODE HERE #### 203 | return ys, J 204 | 205 | 206 | 207 | class ExtraCreditRNNLM(RNNLM): 208 | """ 209 | Implements an improved RNN language model, 210 | for better speed and/or performance. 211 | 212 | We're not going to place any constraints on you 213 | for this part, but we do recommend that you still 214 | use the starter code (NNBase) framework that 215 | you've been using for the NER and RNNLM models. 216 | """ 217 | 218 | def __init__(self, *args, **kwargs): 219 | #### YOUR CODE HERE #### 220 | raise NotImplementedError("__init__() not yet implemented.") 221 | #### END YOUR CODE #### 222 | 223 | def _acc_grads(self, xs, ys): 224 | #### YOUR CODE HERE #### 225 | raise NotImplementedError("_acc_grads() not yet implemented.") 226 | #### END YOUR CODE #### 227 | 228 | def compute_seq_loss(self, xs, ys): 229 | #### YOUR CODE HERE #### 230 | raise NotImplementedError("compute_seq_loss() not yet implemented.") 231 | #### END YOUR CODE #### 232 | 233 | def generate_sequence(self, init, end, maxlen=100): 234 | #### YOUR CODE HERE #### 235 | raise NotImplementedError("generate_sequence() not yet implemented.") 236 | #### END YOUR CODE #### -------------------------------------------------------------------------------- /old_assignments/assignment2/softmax_example.py: -------------------------------------------------------------------------------- 1 | from nn.base import NNBase 2 | from nn.math import softmax, make_onehot 3 | from misc import random_weight_matrix 4 | from numpy import * 5 | 6 | class SoftmaxRegression(NNBase): 7 | """ 8 | Dummy example, to show how to implement a network. 9 | This implements softmax regression, trained by SGD. 10 | """ 11 | 12 | def __init__(self, wv, dims=[100, 5], 13 | reg=0.1, alpha=0.001, 14 | rseed=10): 15 | """ 16 | Set up classifier: parameters, hyperparameters 17 | """ 18 | ## 19 | # Store hyperparameters 20 | self.lreg = reg # regularization 21 | self.alpha = alpha # default learning rate 22 | self.nclass = dims[1] # number of output classes 23 | 24 | ## 25 | # NNBase stores parameters in a special format 26 | # for efficiency reasons, and to allow the code 27 | # to automatically implement gradient checks 28 | # and training algorithms, independent of the 29 | # specific model architecture 30 | # To initialize, give shapes as if to np.array((m,n)) 31 | param_dims = dict(W = (dims[1], dims[0]), # 5x100 matrix 32 | b = (dims[1])) # column vector 33 | # These parameters have sparse gradients, 34 | # which is *much* more efficient if only a row 35 | # at a time gets updated (e.g. word representations) 36 | param_dims_sparse = dict(L=wv.shape) 37 | NNBase.__init__(self, param_dims, param_dims_sparse) 38 | 39 | ## 40 | # Now we can access the parameters using 41 | # self.params. for normal parameters 42 | # self.sparams. for params with sparse gradients 43 | # and get access to normal NumPy arrays 44 | self.sparams.L = wv.copy() # store own representations 45 | self.params.W = random_weight_matrix(*self.params.W.shape) 46 | # self.params.b1 = zeros((self.nclass,1)) # done automatically! 47 | 48 | def _acc_grads(self, idx, label): 49 | """ 50 | Accumulate gradients from a training example. 51 | """ 52 | ## 53 | # Forward propagation 54 | x = self.sparams.L[idx] # extract representation 55 | p = softmax(self.params.W.dot(x) + self.params.b) 56 | 57 | ## 58 | # Compute gradients w.r.t cross-entropy loss 59 | y = make_onehot(label, len(p)) 60 | delta = p - y 61 | # dJ/dW, dJ/db1 62 | self.grads.W += outer(delta, x) + self.lreg * self.params.W 63 | self.grads.b += delta 64 | # dJ/dL, sparse update: use sgrads 65 | # this stores an update to the row L[idx] 66 | self.sgrads.L[idx] = self.params.W.T.dot(delta) 67 | # note that the syntax is overloaded here; L[idx] = 68 | # works like +=, so if you update the same index 69 | # twice, it'll store *BOTH* updates. For example: 70 | # self.sgrads.L[idx] = ones(50) 71 | # self.sgrads.L[idx] = ones(50) 72 | # will add -2*alpha to that row when gradients are applied! 73 | 74 | ## 75 | # We don't need to do the update ourself, as NNBase 76 | # calls that during training. See NNBase.train_sgd 77 | # in nn/base.py to see how this is done, if interested. 78 | ## 79 | 80 | def compute_loss(self, idx, label): 81 | """ 82 | Compute the cost function for a single example. 83 | """ 84 | ## 85 | # Forward propagation 86 | x = self.sparams.L[idx] 87 | p = softmax(self.params.W.dot(x) + self.params.b) 88 | J = -1*log(p[label]) # cross-entropy loss 89 | Jreg = (self.lreg / 2.0) * sum(self.params.W**2.0) 90 | return J + Jreg 91 | 92 | def predict_proba(self, idx): 93 | """ 94 | Predict class probabilities. 95 | """ 96 | x = self.sparams.L[idx] 97 | p = softmax(self.params.W.dot(x) + self.params.b) 98 | return p 99 | 100 | def predict(self, idx): 101 | """Predict most likely class.""" 102 | P = self.predict_proba(idx) 103 | return argmax(P, axis=1) -------------------------------------------------------------------------------- /old_assignments/assignment3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/old_assignments/assignment3.pdf -------------------------------------------------------------------------------- /old_assignments/assignment3/README.md: -------------------------------------------------------------------------------- 1 | [CS224d: Deep Learning for Natural Language Processing](http://cs224d.stanford.edu/) 2 | ==================================================================================== 3 | 4 | ** Due Date: 5/21/2015 (Thursday) 11:59 PM PST.** 5 | 6 | In this assignment we will treat you as real professional datascientist. This assignment should give you the experience to run, train, optimize, debug, augment your neural nets, then rinse wash and repeat. To get state-of-the-art, you must learn to look at your errors to gain insights, then augment your model and retrain it. 7 | 8 | Setup 9 | ----- 10 | 11 | All of you by now should have a functioning `Python 2.7.x` environment so all setup instructions are in the handout. 12 | 13 | **Get the code:** [Download the starter code here](http://cs224d.stanford.edu/assignment3/assignment3.zip) and the [complementary written problems here](http://cs224d.stanford.edu/assignment3/assignment3.pdf). 14 | 15 | Submitting your work 16 | -------------------- 17 | 18 | Once you are done working, zip your code base up and call it `.zip`, for instance if your stanford email is `jdoe@stanford.edu`, your file name should be 19 | 20 | `jdoe.zip` 21 | 22 | Upload this file to [the Box for this assignment](https://stanford.box.com/signup/collablink/d_3641065535/110477b9efa36c). 23 | For the written component which includes derivations and plots, please upload a PDF file of your solutions to [Scoryst](https://scoryst.com/course/67/submit/). When asked to map question parts to your PDF, please map the parts accordingly as courtesy to your TAs. The last part of each problem is a placeholder for the programming component (b), you could just map it to the page of the last part in your written assignment. 24 | 25 | Tasks 26 | ----- 27 | 28 | There will be two parts to this assignment, the first will be a very easy Recursive Neural Network implementation to train and test multiple times. The next part is an augmented version of the plain RNN and includes one additional layer. Your job is to see how much the model improves. There is a lot of extra credit in this PSet to give you all the opportunity to really show us how capable of datascientists you are! But we also want you focusing on your projects so this PSet is intentionally short. 29 | 30 | ### Q1: Recursive Neural Network (30 points) 31 | 32 | * (a): 5 points 33 | * (b): 5 points 34 | * (c): 15 points 35 | * (d): 5 points 36 | 37 | ### Q2: 2-Layer Deep RNN (70 points and 30 Extra Credit points) 38 | 39 | * (a): 15 points 40 | * (b): 15 points 41 | * (c): 30 points 42 | * (d): 10 points 43 | * (e): 15 extra points 44 | * (f): 15 extra points 45 | 46 | ### Q3: RNTN (20 extra points) 47 | 48 | * (a): 5 Correct Derivations 49 | * (b): 5 Correct Implementation 50 | * (c): 10 Found Optimal Hyperparameters 51 | 52 | **Please try to finish the written component before writing code. We designed the written component to help you think through the details in your code implementation.** 53 | --------------------------------------------------------------------------------