├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── assignment1
    ├── README.md
    ├── assignment1.pdf
    ├── collectSubmission.sh
    ├── cs224d
    │   ├── __init__.py
    │   ├── data_utils.py
    │   └── datasets
    │   │   └── get_datasets.sh
    ├── q1_softmax.py
    ├── q1_softmax_sol.py
    ├── q2_gradcheck.py
    ├── q2_neural.py
    ├── q2_neural_sol.py
    ├── q2_sigmoid.py
    ├── q2_sigmoid_sol.py
    ├── q3_run.py
    ├── q3_sgd.py
    ├── q3_word2vec.py
    ├── q3_word2vec_sol.py
    ├── q3_word_vectors.png
    ├── q4_reg_v_acc.png
    ├── q4_sentiment.py
    ├── q4_softmaxreg.py
    ├── requirements.txt
    ├── solutions
    │   ├── .gitignore
    │   └── assignment1_solutions.tex
    ├── tensorflow_word2vec.py
    └── tests
    │   ├── test_gradcheck.py
    │   ├── test_neural.py
    │   ├── test_neural_to_solutions.py
    │   ├── test_normalize.py
    │   ├── test_sgd.py
    │   ├── test_sigmoid.py
    │   ├── test_sigmoid_to_solutions.py
    │   ├── test_softmax.py
    │   ├── test_softmax_regression.py
    │   ├── test_softmax_to_solutions.py
    │   └── test_word2vec_to_solutions.py
├── assignment2
    ├── README.md
    ├── assignment2.pdf
    ├── data
    │   ├── ner
    │   │   ├── dev
    │   │   ├── test.masked
    │   │   ├── train
    │   │   ├── vocab.txt
    │   │   └── wordVectors.txt
    │   └── ptb
    │   │   ├── ptb.test.txt
    │   │   ├── ptb.train.txt
    │   │   ├── ptb.valid.txt
    │   │   └── vocab.ptb.txt
    ├── data_utils
    │   ├── __init__.py
    │   ├── ner.py
    │   └── utils.py
    ├── model.py
    ├── q1_classifier.py
    ├── q1_softmax.py
    ├── q2_NER.py
    ├── q2_initialization.py
    ├── q3_RNNLM.py
    ├── solutions
    │   ├── .gitignore
    │   └── assignment2_solutions.tex
    ├── test_confusion.py
    ├── tests
    │   └── test_softmax.py
    └── utils.py
├── assignment3
    ├── README.md
    ├── assignment3_2016.pdf
    ├── codebase_release
    │   ├── loss_history.png
    │   ├── prepare_submission.sh
    │   ├── rnn.py
    │   ├── rnn_pytorch.py
    │   ├── rnn_tensorarray.py
    │   ├── rnn_while_loop_storage.py
    │   ├── setup.sh
    │   ├── tree.py
    │   └── utils.py
    └── recursive.png
├── class_notebooks
    ├── tensorflow_scan.ipynb
    └── vanishing_grad_example.ipynb
└── old_assignments
    ├── assignment1.pdf
    ├── assignment1
        ├── README.md
        ├── collectSubmission.sh
        ├── cs224d
        │   ├── __init__.py
        │   ├── data_utils.py
        │   └── datasets
        │   │   └── get_datasets.sh
        ├── requirements.txt
        ├── solutions
        │   ├── .gitignore
        │   └── assignment1_solutions.tex
        ├── updateAssignment.sh
        └── wordvec_sentiment.ipynb
    ├── assignment2.pdf
    ├── assignment2
        ├── README.md
        ├── collectSubmission.py
        ├── data_utils
        │   ├── __init__.py
        │   ├── ner.py
        │   └── utils.py
        ├── misc.py
        ├── nerwindow.py
        ├── nn
        │   ├── __init__.py
        │   ├── base.py
        │   └── math.py
        ├── part0-XOR.ipynb
        ├── part1-NER.ipynb
        ├── part11probing.py
        ├── part2-RNNLM.ipynb
        ├── requirements.txt
        ├── rnnlm.py
        └── softmax_example.py
    ├── assignment3.pdf
    └── assignment3
        └── README.md


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | #numpy weight arrays
 7 | *.npy
 8 | 
 9 | # C extensions
10 | *.so
11 | 
12 | # Distribution / packaging
13 | .Python
14 | env/
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | *.zip
30 | 
31 | # PyInstaller
32 | #  Usually these files are written by a python script from a template
33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 | 
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 | 
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *,cover
50 | .hypothesis/
51 | 
52 | # Translations
53 | *.mo
54 | *.pot
55 | 
56 | # Django stuff:
57 | *.log
58 | 
59 | # Sphinx documentation
60 | docs/_build/
61 | 
62 | # PyBuilder
63 | target/
64 | 
65 | #Ipython Notebook
66 | .ipynb_checkpoints
67 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | sudo: false
 3 | addons:
 4 |   apt:
 5 |     packages:
 6 |       - python3-scipy
 7 |       - python3-numpy
 8 |       - python-sklearn
 9 | # Whitelisting master
10 | branches:
11 |   only:
12 |     - master
13 | git:
14 |   depth: 10
15 | python:
16 |   - "3.5"
17 | before_install:
18 |   - echo "before_install"
19 |   - echo $VIRTUAL_ENV
20 |   - df -h
21 |   - date
22 |   - pwd
23 |   - uname -m
24 |   - python -V
25 |   - which python2
26 |   - which python3
27 |   - git --version
28 |   - git tag
29 |   - pip install pytest-cov pylint unidecode
30 |   - apt-cache show python3-numpy python3-scipy
31 | install:
32 |   - echo "install start"
33 | # - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
34 | # - bash miniconda.sh -b -p ${HOME}/miniconda
35 | # - export PATH="$HOME/miniconda/bin:$PATH"
36 | # - conda config --set always_yes yes --set changeps1 no
37 | # - conda update -q conda
38 | # - conda info -a
39 | # - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy matplotlib seaborn pytest pytest-cov
40 | # - source activate test-environment
41 | before_script:
42 |   - echo "before_script"
43 | script:
44 |   - PYTHONPATH=assignment1 python -m py.test --cov=assignment1 --cov-report term assignment1/tests/ -s --durations=10
45 | ##PYTHONPATH=assignment1 py.test --cov=assignment1 --cov-report term assignment1/tests/ -s --durations=10


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Gregory King
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Stanford CS224D: Deep Learning for Natural Language Processing
 2 | CS224D Assignments;
 3 | [`Assignment's Page`](http://cs224d.stanford.edu/assignments.html)
 4 | 
 5 | Notes
 6 | -----
 7 | * Certain featuers may require a compiler to be installed: Visual Studio C++, GCC, clang
 8 | * Code base is stored in separate assignment directories; might contain ipython notebooks used for running and displaying results;
 9 | * Assignment will contain a small number of python modules (and possibly test scripts)
10 | * Assignment directories might have a dataset directory (with either a scripts to download datasets, or at least details on how to get datasets);
11 | 
12 | 
13 | [`Assignment 1`](https://github.com/kingtaurus/cs224d/blob/master/assignment1/assignment1.pdf)[![Build Status](https://travis-ci.com/kingtaurus/cs224d.svg?token=S5K3fgjLh8cmmfpF6ZLy&branch=master)](https://travis-ci.com/kingtaurus/cs224d)
14 | ------------
15 | See [`Assignment1 README.md`](https://github.com/kingtaurus/cs224d/blob/master/assignment1/README.md)
16 | * **Softmax**
17 | * **Neural Network Basics**
18 | * **`word2vec`**
19 | * **Sentiment Analysis**
20 | 
21 | [`Assignment 2`](https://github.com/kingtaurus/cs224d/blob/master/assignment2/assignment2.pdf)
22 | -------------
23 | See [`Assignment2 README.md`](https://github.com/kingtaurus/cs224d/blob/master/assignment2/README.md)
24 | * **TensorFlow Softmax (coding)**
25 | * **TensorFlow NER Window Model (coding and theory)**
26 | * **TensorFlow RNN Language Model (coding and theory)**
27 | 
28 | [`Assignment 3`](https://github.com/kingtaurus/cs224d/blob/master/assignment3/assignment3_2016.pdf)
29 | -------------
30 | See [`Assignment3 README.md`](https://github.com/kingtaurus/cs224d/blob/master/assignment3/README.md)
31 | * **TensorFlow Recursive Neural Network (RNN) and Sentiment Analysis**
32 | 
33 | Old Assignments
34 | ===============
35 | Assignments from 2015. 
36 | 
37 | [`Assignment 1`](https://github.com/kingtaurus/cs224d/blob/master/old_assignments/assignment1.pdf)
38 | --------------
39 | See [`Assignment1 README.md`](https://github.com/kingtaurus/cs224d/blob/master/old_assignments/assignment1/README.md)
40 | * **Softmax**
41 | * **Neural Network Basics**
42 | * **`word2vec`**
43 | * **Sentiment Analysis**
44 | 
45 | [`Assignment 2`](https://github.com/kingtaurus/cs224d/blob/master/old_assignments/assignment2.pdf)
46 | --------------
47 | See [`Assignment2 README.md`](https://github.com/kingtaurus/cs224d/blob/master/old_assignments/assignment2/README.md)
48 | * **Boolean Logic**
49 | * **Deep Network (for Named Entity Recognition)**
50 | * **Recurrent Neural Networks (Language Modeling)**
51 | 
52 | [`Assignment 3`](https://github.com/kingtaurus/cs224d/blob/master/old_assignments/assignment3.pdf)
53 | --------------
54 | See [`Assignment3 README.md`](https://github.com/kingtaurus/cs224d/blob/master/old_assignments/assignment3/README.md)
55 | * **Recursive Neural Network**
56 | * **2-Layer Deep RNN**
57 | * **Recursive Neural Tensor Networks (Extra Credit)**
58 | 


--------------------------------------------------------------------------------
/assignment1/README.md:
--------------------------------------------------------------------------------
 1 | [`CS224d: Deep Learning for Natural Language Processing`](http://cs224d.stanford.edu/)
 2 | ======================================================================================
 3 | [![Build Status](https://travis-ci.com/kingtaurus/cs224d.svg?token=S5K3fgjLh8cmmfpF6ZLy&branch=master)](https://travis-ci.com/kingtaurus/cs224d)
 4 | 
 5 | **Due Date: 4/19/2016 (Thursday) 11:59 PM PST. Hard deadline: 4/22 (Sun) 11:59 PM PST with 3 late days**
 6 | 
 7 | In this assignment we will familiarize you with basic concepts of neural networks, word vectors, and their application to sentiment analysis.
 8 | 
 9 | Setup
10 | -----
11 | 
12 | **Note:** Please be sure you have Python 2.7.x installed on your system. The following instructions should work on Mac or Linux. If you have any trouble getting set up, please come to office hours and the TAs will be happy to help.
13 | 
14 | Get the code: [Download the starter code here](http://cs224d.stanford.edu/assignment1/assignment1.zip) and the [complementary written problems here](http://cs224d.stanford.edu/assignment1/assignment1.pdf).
15 | 
16 | **[Optional] virtual environment:** Once you have unzipped the starter code, you might want to create a [`virtual environment`](http://docs.python-guide.org/en/latest/dev/virtualenvs/) for the project. If you choose not to use a virtual environment, it is up to you to make sure that all dependencies for the code are installed on your machine. To set up a virtual environment, run the following:
17 | 
18 | ```bash
19 | cd assignment1
20 | sudo pip install virtualenv      # This may already be installed
21 | virtualenv .env                  # Create a virtual environment
22 | source .env/bin/activate         # Activate the virtual environment
23 | pip install -r requirements.txt  # Install dependencies
24 | # Work on the assignment for a while ...
25 | deactivate                       # Exit the virtual environment
26 | ```
27 | 
28 | **Install requirements (without a virtual environment):** To install the required packages locally without setting up a virtual environment, run the following:
29 | 
30 | ```bash
31 | cd assignment1
32 | pip install -r requirements.txt  # Install dependencies
33 | ```
34 | 
35 | **Download data:** Once you have the starter code, you will need to download the Stanford Sentiment Treebank dataset. Run the following from the assignment1 directory:
36 | 
37 | ```bash
38 | cd cs224d/datasets
39 | ./get_datasets.sh
40 | ```
41 | 
42 | Submitting your work
43 | --------------------
44 | 
45 | Once you are done working, put the written part in the same directory as your IPython notebook file, and run the `collectSubmission.sh` script; this will produce a file called `assignment1.zip`. Rename this file to `<your-sunet-id>.zip`, for instance if your stanford email is `jdoe@stanford.edu`, your file name should be
46 | 
47 | ```bash
48 | cd cs224d/datasets
49 | jdoe.zip
50 | ```
51 | 
52 | Stay tuned for a submission link, which will be posted here and on Piazza.
53 | For the written component, please upload a PDF file of your solutions to Gradescope. If you are enrolled in the class you should have been signed up automatically. If you added the class late or are not signed up, post privately to Piazza and we will add you to the roster. When asked to map question parts to your PDF, please map the parts accordingly as courtesy to your TAs. This is crucial so that we can provide accurate feedback. If a question has no written component (completely programatic), map it on the same page as the previous section or next section.
54 | 
55 | Tasks
56 | -----
57 | 
58 | There will be four parts to this assignment. Each part has written and code components. The assignment is designed to be completed in order as later sections will leverage solutions to earlier parts. We recommend reading the assignment carefully and starting early as some parts may take significant time to run.
59 | 
60 | Q1: Softmax (10 points)
61 | -----------------------
62 | 
63 | Q2: Neural Network Basics (30 points)
64 | -------------------------------------
65 | 
66 | Q3: word2vec (40 points + 5 bonus)
67 | ----------------------------------
68 | 
69 | Q4: Sentiment Analysis (20 points)
70 | ----------------------------------
71 | 


--------------------------------------------------------------------------------
/assignment1/assignment1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment1/assignment1.pdf


--------------------------------------------------------------------------------
/assignment1/collectSubmission.sh:
--------------------------------------------------------------------------------
1 | rm -f assignment1.zip
2 | zip -r assignment1.zip *.py *.png saved_params_40000.npy
3 | 


--------------------------------------------------------------------------------
/assignment1/cs224d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment1/cs224d/__init__.py


--------------------------------------------------------------------------------
/assignment1/cs224d/datasets/get_datasets.sh:
--------------------------------------------------------------------------------
1 | # Get Stanford Sentiment Treebank
2 | wget http://nlp.stanford.edu/~socherr/stanfordSentimentTreebank.zip
3 | unzip stanfordSentimentTreebank.zip
4 | rm stanfordSentimentTreebank.zip
5 | 


--------------------------------------------------------------------------------
/assignment1/q1_softmax.py:
--------------------------------------------------------------------------------
 1 | """Solution to the coding part for question (1) of CS224D.
 2 | """
 3 | 
 4 | import numpy as np
 5 | 
 6 | def softmax(x):
 7 |     """
 8 |     Compute the softmax function for each row of the input x.
 9 | 
10 |     It is crucial that this function is optimized for speed because
11 |     it will be used frequently in later code.
12 |     You might find numpy functions np.exp, np.sum, np.reshape,
13 |     np.max, and numpy broadcasting useful for this task. (numpy
14 |     broadcasting documentation:
15 |     http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
16 | 
17 |     You should also make sure that your code works for one
18 |     dimensional inputs (treat the vector as a row), you might find
19 |     it helpful for your later problems.
20 | 
21 |     You must implement the optimization in problem 1(a) of the
22 |     written assignment!
23 |     """
24 |     ### YOUR CODE HERE
25 |     log_c = np.max(x, axis=x.ndim - 1, keepdims=True)
26 |     #for numerical stability
27 |     y = np.sum(np.exp(x - log_c), axis=x.ndim - 1, keepdims=True)
28 |     x = np.exp(x - log_c)/y
29 |     ### END YOUR CODE
30 |     return x
31 | 
32 | def test_softmax_basic():
33 |     """
34 |     Some simple tests to get you started.
35 |     Warning: these are not exhaustive.
36 |     """
37 |     print("Running basic tests...")
38 |     test1 = softmax(np.array([1, 2]))
39 |     print(test1)
40 |     assert np.amax(np.fabs(test1 - np.array(
41 |         [0.26894142, 0.73105858]))) <= 1e-6
42 | 
43 |     test2 = softmax(np.array([[1001, 1002], [3, 4]]))
44 |     print(test2)
45 |     assert np.amax(np.fabs(test2 - np.array(
46 |         [[0.26894142, 0.73105858], [0.26894142, 0.73105858]]))) <= 1e-6
47 | 
48 |     test3 = softmax(np.array([[-1001, -1002]]))
49 |     print(test3)
50 |     assert np.amax(np.fabs(test3 - np.array(
51 |         [0.73105858, 0.26894142]))) <= 1e-6
52 | 
53 |     print("You should verify these results!\n")
54 | 
55 | def test_softmax():
56 |     """
57 |     Use this space to test your softmax implementation by running:
58 |         python q1_softmax.py
59 |     This function will not be called by the autograder, nor will
60 |     your tests be graded.
61 |     """
62 |     print("Running your tests...")
63 |     ### YOUR CODE HERE
64 |     ### END YOUR CODE
65 | 
66 | if __name__ == "__main__":
67 |     test_softmax_basic()
68 |     test_softmax()
69 | 


--------------------------------------------------------------------------------
/assignment1/q1_softmax_sol.py:
--------------------------------------------------------------------------------
 1 | """Solution to the coding part for question (1) of CS224D.
 2 | """
 3 | 
 4 | import numpy as np
 5 | 
 6 | def softmax_sol(x):
 7 |     """
 8 |     Compute the softmax function for each row of the input x.
 9 | 
10 |     It is crucial that this function is optimized for speed because
11 |     it will be used frequently in later code.
12 |     You might find numpy functions np.exp, np.sum, np.reshape,
13 |     np.max, and numpy broadcasting useful for this task. (numpy
14 |     broadcasting documentation:
15 |     http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
16 | 
17 |     You should also make sure that your code works for one
18 |     dimensional inputs (treat the vector as a row), you might find
19 |     it helpful for your later problems.
20 | 
21 |     You must implement the optimization in problem 1(a) of the
22 |     written assignment!
23 |     """
24 | 
25 |     ### YOUR CODE HERE
26 |     if len(x.shape) > 1:
27 |         tmp = np.max(x, axis = 1)
28 |         x -= tmp.reshape((x.shape[0], 1))
29 |         x = np.exp(x)
30 |         tmp = np.sum(x, axis = 1)
31 |         x /= tmp.reshape((x.shape[0], 1))
32 |     else:
33 |         tmp = np.max(x)
34 |         x -= tmp
35 |         x = np.exp(x)
36 |         tmp = np.sum(x)
37 |         x /= tmp
38 |     ### END YOUR CODE
39 |     return x
40 | 


--------------------------------------------------------------------------------
/assignment1/q2_gradcheck.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | 
  4 | # First implement a gradient checker by filling in the following functions
  5 | def gradcheck_naive(f, x):
  6 |     """ 
  7 |     Gradient check for a function f 
  8 |     - f should be a function that takes a single argument and outputs the cost
  9 |       and its gradients
 10 |     - x is the point (numpy array) to check the gradient at
 11 |     """ 
 12 | 
 13 |     rndstate = random.getstate()
 14 |     random.setstate(rndstate)  
 15 |     fx, grad = f(x) # Evaluate function value at original point
 16 |     h = 1e-4
 17 | 
 18 |     # Iterate over all indexes in x
 19 |     it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 20 |     while not it.finished:
 21 |         ix = it.multi_index
 22 | 
 23 |         ### try modifying x[ix] with h defined above to compute numerical gradients
 24 |         ### make sure you call random.setstate(rndstate) before calling f(x) each time, this will make it 
 25 |         ### possible to test cost functions with built in randomness later
 26 |         ### YOUR CODE HERE:
 27 |         old_xix = x[ix]
 28 |         x[ix] = old_xix + h
 29 |         random.setstate(rndstate)
 30 |         fp = f(x)[0]
 31 |         x[ix] = old_xix - h
 32 |         random.setstate(rndstate)
 33 |         fm = f(x)[0]
 34 |         x[ix] = old_xix
 35 | 
 36 |         numgrad = (fp - fm)/(2* h)
 37 |         ### END YOUR CODE
 38 | 
 39 |         # Compare gradients
 40 |         reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
 41 |         if reldiff > 1e-5:
 42 |             print("Gradient check failed.")
 43 |             print("First gradient error found at index %s" % str(ix))
 44 |             print("Your gradient: %f \t Numerical gradient: %f" % (grad[ix], numgrad))
 45 |             return
 46 |     
 47 |         it.iternext() # Step to next dimension
 48 | 
 49 |     print("Gradient check passed!")
 50 | 
 51 | def grad_numerical(f, x, h=1e-4):
 52 |     """ 
 53 |     Gradient check for a function f 
 54 |     - f should be a function that takes a single argument and outputs the cost
 55 |       and its gradients
 56 |     - x is the point (numpy array) to check the gradient at
 57 |     - h is the size of the shift for all dimensions
 58 |     """ 
 59 | 
 60 |     rndstate = random.getstate()
 61 |     random.setstate(rndstate)  
 62 |     fx, grad = f(x) # Evaluate function value at original point
 63 |     num_grad = np.zeros(x.shape)
 64 | 
 65 |     # Iterate over all indexes in x
 66 |     it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 67 |     while not it.finished:
 68 |         ix = it.multi_index
 69 | 
 70 |         ### try modifying x[ix] with h defined above to compute numerical gradients
 71 |         ### make sure you call random.setstate(rndstate) before calling f(x) each time, this will make it 
 72 |         ### possible to test cost functions with built in randomness later
 73 |         ### YOUR CODE HERE:
 74 |         old_xix = x[ix]
 75 |         x[ix] += 0.5 * h
 76 |         random.setstate(rndstate)
 77 |         fp = f(x)[0]
 78 |         x[ix] -= h
 79 |         random.setstate(rndstate)
 80 |         fm = f(x)[0]
 81 |         x[ix] = old_xix
 82 | 
 83 |         num_grad += (fp - fm)/h
 84 |         ### END YOUR CODE
 85 |         it.iternext() # Step to next dimension
 86 |     return num_grad
 87 | 
 88 | def eval_numerical_gradient_array(f, x, df, h=1e-5):
 89 |   """
 90 |   Evaluate a numeric gradient for a function that accepts a numpy
 91 |   array and returns a numpy array.
 92 |   """
 93 |   grad = np.zeros_like(x)
 94 |   it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 95 |   while not it.finished:
 96 |     ix = it.multi_index
 97 |     
 98 |     oldval = x[ix]
 99 |     x[ix] = oldval + h
100 |     pos = f(x).copy()
101 |     x[ix] = oldval - h
102 |     neg = f(x).copy()
103 |     x[ix] = oldval
104 |     
105 |     grad[ix] = np.sum((pos - neg) * df) / (2 * h)
106 |     it.iternext()
107 |   return grad
108 | 
109 | def sanity_check():
110 |     """
111 |     Some basic sanity checks.
112 |     """
113 |     quad = lambda x: (np.sum(x ** 2), x * 2)
114 | 
115 |     print("Running sanity checks...")
116 |     gradcheck_naive(quad, np.array(123.456))      # scalar test
117 |     gradcheck_naive(quad, np.random.randn(3,))    # 1-D test
118 |     gradcheck_naive(quad, np.random.randn(4,5))   # 2-D test
119 |     print("")
120 | 
121 | def your_sanity_checks(): 
122 |     """
123 |     Use this space add any additional sanity checks by running:
124 |         python q2_gradcheck.py 
125 |     This function will not be called by the autograder, nor will
126 |     your additional tests be graded.
127 |     """
128 |     print("Running your sanity checks...")
129 |     ### YOUR CODE HERE
130 |     print("")
131 |     ### END YOUR CODE
132 | 
133 | if __name__ == "__main__":
134 |     sanity_check()
135 |     your_sanity_checks()
136 | 


--------------------------------------------------------------------------------
/assignment1/q2_neural.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | 
  4 | from q1_softmax import softmax
  5 | from q2_sigmoid import sigmoid, sigmoid_grad
  6 | from q2_gradcheck import gradcheck_naive
  7 | 
  8 | def affine_forward(x, w, b):
  9 |     """
 10 |     Computes the forward pass for an affine (fully-connected) layer.
 11 | 
 12 |     The input x has shape (N, d_1, ..., d_k) and contains a minibatch of N
 13 |     examples, where each example x[i] has shape (d_1, ..., d_k). We will
 14 |     reshape each input into a vector of dimension D = d_1 * ... * d_k, and
 15 |     then transform it to an output vector of dimension M.
 16 | 
 17 |     Inputs:
 18 |     - x: A numpy array containing input data, of shape (N, d_1, ..., d_k)
 19 |     - w: A numpy array of weights, of shape (D, M)
 20 |     - b: A numpy array of biases, of shape (M,)
 21 | 
 22 |     Returns a tuple of:
 23 |     - out: output, of shape (N, M)
 24 |     - cache: (x, w, b)
 25 |     """
 26 |     out = None
 27 |     N   = x.shape[0]
 28 |     D   = np.prod(x.shape[1:])
 29 |     M   = b.shape[1]
 30 |     out = np.dot(x.reshape(N, D), w.reshape(D, M)) + b.reshape(1, M)
 31 |     return out, (x,w,b)
 32 | 
 33 | def affine_backward(dout, cache):
 34 |     """
 35 |     Computes the backward pass for an affine layer.
 36 | 
 37 |     Inputs:
 38 |     - dout: Upstream derivative, of shape (N, M)
 39 |     - cache: Tuple of:
 40 |     - x: Input data, of shape (N, d_1, ... d_k)
 41 |     - w: Weights, of shape (D, M)
 42 | 
 43 |     Returns a tuple of:
 44 |     - dx: Gradient with respect to x, of shape (N, d1, ..., d_k)
 45 |     - dw: Gradient with respect to w, of shape (D, M)
 46 |     - db: Gradient with respect to b, of shape (M,)
 47 |     """
 48 |     x, w, b = cache
 49 |     dx, dw, db = None, None, None
 50 |     N   = x.shape[0]
 51 |     D   = np.prod(x.shape[1:])
 52 |     M   = b.shape[1]
 53 | 
 54 |     dx = np.dot(dout, w.reshape(D, M).T).reshape(x.shape)
 55 |     dw = np.dot(x.reshape(N, D).T, dout).reshape(w.shape)
 56 |     db = np.sum(dout, axis=0)
 57 | 
 58 |     return dx, dw, db
 59 | 
 60 | def sigmoid_forward(x):
 61 |     """
 62 |     Computes the forward pass for a sigmoid activation.
 63 | 
 64 |     Inputs:
 65 |     - x: Input data, numpy array of arbitary shape;
 66 | 
 67 |     Returns a tuple (out, cache)
 68 |     - out: output of the same shape as x
 69 |     - cache: identical to out; required for backpropagation
 70 |     """
 71 |     return sigmoid(x), sigmoid(x)
 72 | 
 73 | def sigmoid_backward(dout, cache):
 74 |     """
 75 |     Computes the backward pass for an sigmoid layer.
 76 | 
 77 |     Inputs:
 78 |     - dout: Upstream derivative, same shape as the input
 79 |             to the sigmoid layer (x)
 80 |     - cache: sigmoid(x)
 81 |     Returns a tuple of:
 82 |     - dx: back propagated gradient with respect to x
 83 |     """
 84 |     x = cache
 85 |     return sigmoid_grad(x) * dout
 86 | 
 87 | def forward_backward_prop(data, labels, params, dimensions):
 88 |     """
 89 |     Forward and backward propagation for a two-layer sigmoidal network
 90 |     
 91 |     Compute the forward propagation and for the cross entropy cost,
 92 |     and backward propagation for the gradients for all parameters.
 93 |     """
 94 | 
 95 |     ### Unpack network parameters (do not modify)
 96 |     ofs = 0
 97 |     Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
 98 |     N = data.shape[0]
 99 | 
100 |     W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
101 |     ofs += Dx * H
102 |     b1 = np.reshape(params[ofs:ofs + H], (1, H))
103 |     ofs += H
104 |     W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
105 |     ofs += H * Dy
106 |     b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))
107 | 
108 |     ### YOUR CODE HERE: forward propagation
109 |     hidden   = np.dot(data,W1) + b1
110 |     layer1_a = sigmoid(hidden)
111 |     layer2   = np.dot(layer1_a, W2) + b2
112 |     # need to calculate the softmax loss
113 |     probs = softmax(layer2)
114 |     cost  = - np.sum(np.log(probs[np.arange(N), np.argmax(labels, axis=1)]))
115 |     ### END YOUR CODE
116 |     
117 |     ### YOUR CODE HERE: backward propagation
118 |     #There is no regularization :/
119 |     # dx -> sigmoid -> W2 * layer1_a + b -> sigmoid -> W1 * data + b1 -> ..
120 |     dx     = probs.copy()
121 |     dx    -= labels
122 | 
123 |     dlayer2   = np.zeros_like(dx)
124 |     gradW2    = np.zeros_like(W2)
125 |     gradW1    = np.zeros_like(W1)
126 |     gradb2    = np.zeros_like(b2)
127 |     gradb1    = np.zeros_like(b1)
128 | 
129 |     gradW2    = np.dot(layer1_a.T, dx)
130 |     gradb2    = np.sum(dx, axis=0)
131 |     dlayer2   = np.dot(dx, W2.T)
132 |     dlayer1   = sigmoid_grad(layer1_a) * dlayer2
133 |     gradW1    = np.dot(data.T, dlayer1)
134 |     gradb1    = np.sum(dlayer1, axis=0)
135 | 
136 |     # Decided to implement affine (forward and backward function)
137 |     #                      sigmoid (forward and backward function)
138 |     # These should work properly;
139 |     # scores, cache_1  = affine_forward(data, W1, b1)
140 |     # scores, cache_s1 = sigmoid_forward(scores)
141 |     # scores, cache_2  = affine_forward(scores, W2, b2)
142 | 
143 |     # # need to calculate the softmax loss
144 |     # probs = softmax(scores)
145 |     # cost  = -np.sum(np.log(probs[np.arange(N), np.argmax(labels)] + 1e-12)) / N
146 |     # softmax_dx    = probs.copy()
147 |     # softmax_dx[np.arange(N), np.argmax(labels,axis=1)] -= 1
148 |     # softmax_dx /= N
149 | 
150 |     # grads = {}
151 | 
152 |     # dlayer2, grads['W2'], grads['b2'] = affine_backward(softmax_dx, cache_2)
153 |     # dlayer1s                          = sigmoid_backward(dlayer2, cache_s1)
154 |     # dlayer1, grads['W1'], grads['b1'] = affine_backward(dlayer1s, cache_1)
155 |     #softmax_dx is the gradient of the loss w.r.t. y_{est}
156 |     ### END YOUR CODE
157 |     
158 |     ### Stack gradients (do not modify)
159 |     grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
160 |         gradW2.flatten(), gradb2.flatten()))
161 |     
162 |     return cost, grad
163 | 
164 | def sanity_check():
165 |     """
166 |     Set up fake data and parameters for the neural network, and test using
167 |     gradcheck.
168 |     """
169 |     print("Running sanity check...")
170 | 
171 |     N = 300
172 |     dimensions = [10, 5, 10]
173 |     data = np.random.randn(N, dimensions[0])   # each row will be a datum
174 |     labels = np.zeros((N, dimensions[2]))
175 |     for i in range(N):
176 |         labels[i,random.randint(0,dimensions[2]-1)] = 1
177 |     
178 |     params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
179 |         dimensions[1] + 1) * dimensions[2], )
180 | 
181 |     #cost, _ = forward_backward_prop(data, labels, params, dimensions)
182 |     # # expect to get 1 in 10 correct
183 |     #print(np.exp(-cost))
184 |     # #cost is roughly correct
185 | 
186 |     gradcheck_naive(lambda params: forward_backward_prop(data, labels, params,
187 |         dimensions), params)
188 | 
189 | def your_sanity_checks():
190 |     """
191 |     Use this space add any additional sanity checks by running:
192 |         python q2_neural.py
193 |     This function will not be called by the autograder, nor will
194 |     your additional tests be graded.
195 |     """
196 |     print("Running your sanity checks...")
197 |     ### YOUR CODE HERE
198 |     #raise NotImplementedError
199 |     ### END YOUR CODE
200 | 
201 | if __name__ == "__main__":
202 |     sanity_check()
203 |     your_sanity_checks()
204 | 


--------------------------------------------------------------------------------
/assignment1/q2_neural_sol.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | 
 4 | from q1_softmax_sol import softmax_sol as softmax
 5 | from q2_sigmoid_sol import sigmoid_sol as sigmoid
 6 | from q2_sigmoid_sol import sigmoid_grad_sol as sigmoid_grad
 7 | from q2_gradcheck import gradcheck_naive
 8 | 
 9 | def forward_backward_prop_sol(data, labels, params, dimensions):
10 |     """
11 |     Forward and backward propagation for a two-layer sigmoidal network
12 | 
13 |     Compute the forward propagation and for the cross entropy cost,
14 |     and backward propagation for the gradients for all parameters.
15 |     """
16 | 
17 |     ### Unpack network parameters (do not modify)
18 |     ofs = 0
19 |     Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
20 | 
21 |     W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
22 |     ofs += Dx * H
23 |     b1 = np.reshape(params[ofs:ofs + H], (1, H))
24 |     ofs += H
25 |     W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
26 |     ofs += H * Dy
27 |     b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))
28 | 
29 |     ### YOUR CODE HERE: forward propagation
30 |     hidden = sigmoid(data.dot(W1) + b1)
31 |     prediction = softmax(hidden.dot(W2) + b2)
32 |     cost = -np.sum(np.log(prediction) * labels)
33 |     ### END YOUR CODE
34 | 
35 |     ### YOUR CODE HERE: backward propagation
36 |     delta = prediction - labels
37 |     gradW2 = hidden.T.dot(delta)
38 |     gradb2 = np.sum(delta, axis = 0)
39 |     delta = delta.dot(W2.T) * sigmoid_grad(hidden)
40 |     gradW1 = data.T.dot(delta)
41 |     gradb1 = np.sum(delta, axis = 0)
42 |     ### END YOUR CODE
43 | 
44 |     ### Stack gradients (do not modify)
45 |     grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
46 |         gradW2.flatten(), gradb2.flatten()))
47 | 
48 |     return cost, grad
49 | 


--------------------------------------------------------------------------------
/assignment1/q2_sigmoid.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def sigmoid(x):
 4 |     """
 5 |     Compute the sigmoid function for the input here.
 6 |     """
 7 |     
 8 |     ### YOUR CODE HERE
 9 |     x = 1 / (1 + np.exp(-x))
10 |     ### END YOUR CODE
11 |     
12 |     return x
13 | 
14 | def sigmoid_grad(f):
15 |     """
16 |     Compute the gradient for the sigmoid function here. Note that
17 |     for this implementation, the input f should be the sigmoid
18 |     function value of your original input x. 
19 |     """
20 |     
21 |     ### YOUR CODE HERE
22 |     f *= (1-f)
23 |     ### END YOUR CODE
24 |     
25 |     return f
26 | 
27 | def test_sigmoid_basic():
28 |     """
29 |     Some simple tests to get you started. 
30 |     Warning: these are not exhaustive.
31 |     """
32 |     print("Running basic tests...")
33 |     x = np.array([[1, 2], [-1, -2]])
34 |     f = sigmoid(x)
35 |     g = sigmoid_grad(f)
36 |     print(f)
37 |     assert np.amax(f - np.array([[0.73105858, 0.88079708], 
38 |         [0.26894142, 0.11920292]])) <= 1e-6
39 |     print(g)
40 |     assert np.amax(g - np.array([[0.19661193, 0.10499359],
41 |         [0.19661193, 0.10499359]])) <= 1e-6
42 |     print("You should verify these results!\n")
43 | 
44 | def test_sigmoid(): 
45 |     """
46 |     Use this space to test your sigmoid implementation by running:
47 |         python q2_sigmoid.py 
48 |     This function will not be called by the autograder, nor will
49 |     your tests be graded.
50 |     """
51 |     print("Running your tests...")
52 |     ### YOUR CODE HERE
53 |     #raise NotImplementedError
54 |     ### END YOUR CODE
55 | 
56 | if __name__ == "__main__":
57 |     test_sigmoid_basic();
58 |     test_sigmoid()
59 | 


--------------------------------------------------------------------------------
/assignment1/q2_sigmoid_sol.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def sigmoid_sol(x):
 4 |     """
 5 |     Compute the sigmoid function for the input here.
 6 |     """
 7 |     ### YOUR CODE HERE
 8 |     x = 1. / (1 + np.exp(-x))
 9 |     ### END YOUR CODE
10 |     return x
11 | 
12 | def sigmoid_grad_sol(f):
13 |     """
14 |     Compute the gradient for the sigmoid function here. Note that
15 |     for this implementation, the input f should be the sigmoid
16 |     function value of your original input x.
17 |     """
18 |     ### YOUR CODE HERE
19 |     f = f * (1-f)
20 |     ### END YOUR CODE
21 |     return f
22 | 
23 | 


--------------------------------------------------------------------------------
/assignment1/q3_run.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | from cs224d.data_utils import *
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | from q3_word2vec import *
 7 | from q3_sgd import *
 8 | 
 9 | import seaborn as sns
10 | sns.set(style='whitegrid', context='talk')
11 | 
12 | # Reset the random seed to make sure that everyone gets the same results
13 | random.seed(314)
14 | dataset = StanfordSentiment()
15 | tokens = dataset.tokens()
16 | nWords = len(tokens)
17 | # We are going to train 10-dimensional vectors for this assignment
18 | dimVectors = 10
19 | 
20 | # Context size
21 | C = 5
22 | 
23 | # Reset the random seed to make sure that everyone gets the same results
24 | random.seed(31415)
25 | np.random.seed(9265)
26 | print("creating initial word vectors")
27 | wordVectors = np.concatenate(((np.random.rand(nWords, dimVectors) - .5) / \
28 |     dimVectors, np.zeros((nWords, dimVectors))), axis=0)
29 | wordVectors0 = sgd(
30 |     lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C,
31 |         negSamplingCostAndGradient),
32 |     wordVectors, 0.30, 40000, None, True, PRINT_EVERY=10)
33 | print("sanity check: cost at convergence should be around or below 10")
34 | 
35 | # sum the input and output word vectors
36 | wordVectors = (wordVectors0[:nWords,:] + wordVectors0[nWords:,:])
37 | 
38 | # Visualize the word vectors you trained
39 | _, wordVectors0, _ = load_saved_params()
40 | print(wordVectors0.shape)
41 | wordVectors = (wordVectors0[:nWords,:] + wordVectors0[nWords:,:])
42 | visualizeWords = ["the", "a", "an", ",", ".", "?", "!", "``", "''", "--",
43 |     "good", "great", "cool", "brilliant", "wonderful", "well", "amazing",
44 |     "worth", "sweet", "enjoyable", "boring", "bad", "waste", "dumb",
45 |     "annoying"]
46 | visualizeIdx = [tokens[word] for word in visualizeWords]
47 | visualizeVecs = wordVectors[visualizeIdx, :]
48 | temp = (visualizeVecs - np.mean(visualizeVecs, axis=0))
49 | covariance = 1.0 / len(visualizeIdx) * temp.T.dot(temp)
50 | U,S,V = np.linalg.svd(covariance)
51 | coord = temp.dot(U[:,0:2])
52 | 
53 | plt.figure(figsize=(12,12))
54 | for i in range(len(visualizeWords)):
55 |     plt.scatter(x=coord[i,0], y=coord[i,1])
56 |     plt.text(coord[i,0]+0.01, coord[i,1]+0.01, visualizeWords[i],
57 |         bbox=dict(facecolor='green', alpha=0.1))
58 | plt.xlim((np.min(coord[:,0])-0.1, np.max(coord[:,0])+0.1))
59 | plt.ylim((np.min(coord[:,1])-0.1, np.max(coord[:,1])+0.1))
60 | plt.xlabel("SVD[0]")
61 | plt.ylabel("SVD[1]")
62 | 
63 | plt.savefig('q3_word_vectors.png')
64 | plt.show()
65 | 


--------------------------------------------------------------------------------
/assignment1/q3_sgd.py:
--------------------------------------------------------------------------------
  1 | # Save parameters every a few SGD iterations as fail-safe
  2 | SAVE_PARAMS_EVERY = 1000
  3 | 
  4 | import glob
  5 | import random
  6 | import numpy as np
  7 | import os.path as op
  8 | import pickle as pickle
  9 | 
 10 | def load_saved_params():
 11 |     """ A helper function that loads previously saved parameters and resets iteration start """
 12 |     st = 0
 13 |     for f in glob.glob("saved_params_*.npy"):
 14 |         iter = int(op.splitext(op.basename(f))[0].split("_")[2])
 15 |         if (iter > st):
 16 |             st = iter
 17 |             
 18 |     if st > 0:
 19 |         with open("saved_params_%d.npy" % st, "rb") as f:
 20 |             params = pickle.load(f)
 21 |             state = pickle.load(f)
 22 |         return st, params, state
 23 |     else:
 24 |         return st, None, None
 25 |     
 26 | def save_params(iter, params):
 27 |     with open("saved_params_%d.npy" % iter, "wb") as f:
 28 |         pickle.dump(params, f)
 29 |         pickle.dump(random.getstate(), f)
 30 | 
 31 | def sgd(f, x0, step, iterations, postprocessing = None, useSaved = False, PRINT_EVERY=10):
 32 |     """ Stochastic Gradient Descent """
 33 |     # Implement the stochastic gradient descent method in this        
 34 |     # function.                                                       
 35 |     
 36 |     # Inputs:                                                         
 37 |     # - f: the function to optimize, it should take a single        
 38 |     #     argument and yield two outputs, a cost and the gradient  
 39 |     #     with respect to the arguments                            
 40 |     # - x0: the initial point to start SGD from                     
 41 |     # - step: the step size for SGD                                 
 42 |     # - iterations: total iterations to run SGD for                 
 43 |     # - postprocessing: postprocessing function for the parameters  
 44 |     #     if necessary. In the case of word2vec we will need to    
 45 |     #     normalize the word vectors to have unit length.          
 46 |     # - PRINT_EVERY: specifies every how many iterations to output  
 47 | 
 48 |     # Output:                                                         
 49 |     # - x: the parameter value after SGD finishes  
 50 |     
 51 |     # Anneal learning rate every several iterations
 52 |     ANNEAL_EVERY = 20000
 53 |     
 54 |     if useSaved:
 55 |         start_iter, oldx, state = load_saved_params()
 56 |         if start_iter > 0:
 57 |             x0 = oldx;
 58 |             step *= 0.5 ** (start_iter / ANNEAL_EVERY)
 59 |             
 60 |         if state:
 61 |             random.setstate(state)
 62 |     else:
 63 |         start_iter = 0
 64 |     
 65 |     x = x0
 66 |     
 67 |     if not postprocessing:
 68 |         postprocessing = lambda x: x
 69 |     
 70 |     expcost = None
 71 |     
 72 |     for iter in range(start_iter + 1, iterations + 1):
 73 |         ### Don't forget to apply the postprocessing after every iteration!
 74 |         ### You might want to print the progress every few iterations.
 75 | 
 76 |         cost = None
 77 |         ### YOUR CODE HERE
 78 |         cost, grad = f(x)
 79 |         x -= step * grad
 80 | 
 81 |         x = postprocessing(x)
 82 |         ### END YOUR CODE
 83 |         
 84 |         if PRINT_EVERY is not None and iter % PRINT_EVERY == 0:
 85 |             if not expcost:
 86 |                 expcost = cost
 87 |             else:
 88 |                 expcost = .95 * expcost + .05 * cost
 89 |             print("iter %d: %f" % (iter, expcost))
 90 |         
 91 |         if iter % SAVE_PARAMS_EVERY == 0 and useSaved:
 92 |             save_params(iter, x)
 93 |             
 94 |         if iter % ANNEAL_EVERY == 0:
 95 |             step *= 0.5
 96 |     
 97 |     return x
 98 | 
 99 | def sanity_check():
100 |     quad = lambda x: (np.sum(x ** 2), x * 2)
101 | 
102 |     print("Running sanity checks...")
103 |     t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=None)
104 |     print("test 1 result:", t1)
105 |     assert abs(t1) <= 1e-6
106 | 
107 |     t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=None)
108 |     print("test 2 result:", t2)
109 |     assert abs(t2) <= 1e-6
110 | 
111 |     t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=None)
112 |     print("test 3 result:", t3)
113 |     assert abs(t3) <= 1e-6
114 |     
115 |     print("")
116 | 
117 | def your_sanity_checks(): 
118 |     """
119 |     Use this space add any additional sanity checks by running:
120 |         python q3_sgd.py 
121 |     This function will not be called by the autograder, nor will
122 |     your additional tests be graded.
123 |     """
124 |     print("Running your sanity checks...")
125 |     ### YOUR CODE HERE
126 |     #raise NotImplementedError
127 |     ### END YOUR CODE
128 | 
129 | if __name__ == "__main__":
130 |     sanity_check();
131 |     your_sanity_checks();


--------------------------------------------------------------------------------
/assignment1/q3_word2vec_sol.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | 
  4 | from q1_softmax_sol import softmax_sol as softmax
  5 | from q2_gradcheck   import gradcheck_naive
  6 | from q2_sigmoid_sol import sigmoid_sol as sigmoid
  7 | from q2_sigmoid_sol import sigmoid_grad_sol as sigmoid_grad
  8 | 
  9 | def normalizeRows_sol(x):
 10 |     """ Row normalization function """
 11 |     # Implement a function that normalizes each row of a matrix to have unit length
 12 |     ### YOUR CODE HERE
 13 |     N = x.shape[0]
 14 |     x /= np.sqrt(np.sum(x**2, axis=1)).reshape((N,1)) + 1e-30
 15 |     ### END YOUR CODE
 16 |     return x
 17 | 
 18 | def softmaxCostAndGradient_sol(predicted, target, outputVectors, dataset):
 19 |     """ Softmax cost function for word2vec models """
 20 |     
 21 |     # Implement the cost and gradients for one predicted word vector  
 22 |     # and one target word vector as a building block for word2vec     
 23 |     # models, assuming the softmax prediction function and cross      
 24 |     # entropy loss.                                                   
 25 |     # Inputs:                                                         
 26 |     # - predicted: numpy ndarray, predicted word vector (\hat{v} in 
 27 |     #   the written component or \hat{r} in an earlier version)
 28 |     # - target: integer, the index of the target word               
 29 |     # - outputVectors: "output" vectors (as rows) for all tokens     
 30 |     # - dataset: needed for negative sampling, unused here.         
 31 |     # Outputs:                                                        
 32 |     # - cost: cross entropy cost for the softmax word prediction    
 33 |     # - gradPred: the gradient with respect to the predicted word   
 34 |     #        vector                                                
 35 |     # - grad: the gradient with respect to all the other word        
 36 |     #        vectors                                               
 37 |     # We will not provide starter code for this function, but feel    
 38 |     # free to reference the code you previously wrote for this        
 39 |     # assignment!                                                  
 40 |     ### YOUR CODE HERE
 41 |     probabilities = softmax(predicted.dot(outputVectors.T))
 42 |     cost = -np.log(probabilities[target])
 43 |     delta = probabilities
 44 |     delta[target] -= 1
 45 |     N = delta.shape[0]
 46 |     D = predicted.shape[0]
 47 |     grad = delta.reshape((N,1)) * predicted.reshape((1,D))
 48 |     gradPred = (delta.reshape((1,N)).dot(outputVectors)).flatten()
 49 |     ### END YOUR CODE
 50 | 
 51 |     return cost, gradPred, grad
 52 | 
 53 | def negSamplingCostAndGradient_sol(predicted, target, outputVectors, dataset, 
 54 |     K=10):
 55 |     """ Negative sampling cost function for word2vec models """
 56 |     # Implement the cost and gradients for one predicted word vector  
 57 |     # and one target word vector as a building block for word2vec     
 58 |     # models, using the negative sampling technique. K is the sample  
 59 |     # size. You might want to use dataset.sampleTokenIdx() to sample  
 60 |     # a random word index. 
 61 |     # 
 62 |     # Note: See test_word2vec below for dataset's initialization.
 63 |     #                                       
 64 |     # Input/Output Specifications: same as softmaxCostAndGradient     
 65 |     # We will not provide starter code for this function, but feel    
 66 |     # free to reference the code you previously wrote for this        
 67 |     # assignment!
 68 |     ### YOUR CODE HERE
 69 |     grad = np.zeros(outputVectors.shape)
 70 |     gradPred = np.zeros(predicted.shape)
 71 | 
 72 |     indices = [target]
 73 |     for k in range(K):
 74 |         newidx = dataset.sampleTokenIdx()
 75 |         while newidx == target:
 76 |             newidx = dataset.sampleTokenIdx()
 77 |         indices += [newidx]
 78 | 
 79 |     labels = np.array([1] + [-1 for k in range(K)])
 80 |     vecs = outputVectors[indices,:]
 81 | 
 82 |     t = sigmoid(vecs.dot(predicted) * labels)
 83 |     cost = -np.sum(np.log(t))
 84 | 
 85 |     delta = labels * (t - 1)
 86 |     gradPred = delta.reshape((1,K+1)).dot(vecs).flatten()
 87 |     gradtemp = delta.reshape((K+1,1)).dot(predicted.reshape(
 88 |         (1,predicted.shape[0])))
 89 |     for k in range(K+1):
 90 |         grad[indices[k]] += gradtemp[k,:]
 91 |     #     t = sigmoid(predicted.dot(outputVectors[target,:]))
 92 |     #     cost = -np.log(t)
 93 |     #     delta = t - 1
 94 |     #     gradPred += delta * outputVectors[target, :]
 95 |     #     grad[target, :] += delta * predicted
 96 |     #     for k in range(K):
 97 |     #         idx = dataset.sampleTokenIdx()
 98 |     #         t = sigmoid(-predicted.dot(outputVectors[idx,:]))
 99 |     #         cost += -np.log(t)
100 |     #         delta = 1 - t
101 |     #         gradPred += delta * outputVectors[idx, :]
102 |     #         grad[idx, :] += delta * predicted
103 |     ### END YOUR CODE
104 |     
105 |     return cost, gradPred, grad
106 | 
107 | 
108 | def skipgram_sol(currentWord, C, contextWords, tokens, inputVectors, outputVectors, 
109 |     dataset, word2vecCostAndGradient = softmaxCostAndGradient_sol):
110 |     """ Skip-gram model in word2vec """
111 |     # Implement the skip-gram model in this function.
112 |     # Inputs:                                                         
113 |     # - currrentWord: a string of the current center word           
114 |     # - C: integer, context size                                    
115 |     # - contextWords: list of no more than 2*C strings, the context words                                               
116 |     # - tokens: a dictionary that maps words to their indices in    
117 |     #      the word vector list                                
118 |     # - inputVectors: "input" word vectors (as rows) for all tokens           
119 |     # - outputVectors: "output" word vectors (as rows) for all tokens         
120 |     # - word2vecCostAndGradient: the cost and gradient function for 
121 |     #      a prediction vector given the target word vectors,  
122 |     #      could be one of the two cost functions you          
123 |     #      implemented above
124 |     # Outputs:                                                        
125 |     # - cost: the cost function value for the skip-gram model       
126 |     # - grad: the gradient with respect to the word vectors         
127 |     # We will not provide starter code for this function, but feel    
128 |     # free to reference the code you previously wrote for this        
129 |     # assignment!
130 |     ### YOUR CODE HERE
131 |     currentI = tokens[currentWord]
132 |     predicted = inputVectors[currentI, :]
133 | 
134 |     cost = 0.0
135 |     gradIn = np.zeros(inputVectors.shape)
136 |     gradOut = np.zeros(outputVectors.shape)
137 |     for cwd in contextWords:
138 |         idx = tokens[cwd]
139 |         cc, gp, gg = word2vecCostAndGradient(predicted, idx, outputVectors, dataset)
140 |         cost += cc
141 |         gradOut += gg
142 |         gradIn[currentI, :] += gp
143 |     ### END YOUR CODE
144 |     
145 |     return cost, gradIn, gradOut
146 | 
147 | def cbow_sol(currentWord, C, contextWords, tokens, inputVectors, outputVectors, 
148 |     dataset, word2vecCostAndGradient = softmaxCostAndGradient_sol):
149 |     """ CBOW model in word2vec """
150 |     # Implement the continuous bag-of-words model in this function.            
151 |     # Input/Output specifications: same as the skip-gram model        
152 |     # We will not provide starter code for this function, but feel    
153 |     # free to reference the code you previously wrote for this        
154 |     # assignment!
155 |     #################################################################
156 |     # IMPLEMENTING CBOW IS EXTRA CREDIT, DERIVATIONS IN THE WRIITEN #
157 |     # ASSIGNMENT ARE NOT!                                           #  
158 |     #################################################################
159 |     cost = 0
160 |     gradIn = np.zeros(inputVectors.shape)
161 |     gradOut = np.zeros(outputVectors.shape)
162 |     ### YOUR CODE HERE
163 |     D = inputVectors.shape[1]
164 |     predicted = np.zeros((D,))
165 | 
166 |     indices = [tokens[cwd] for cwd in contextWords]
167 |     for idx in indices:
168 |         predicted += inputVectors[idx, :]
169 | 
170 |     cost, gp, gradOut = word2vecCostAndGradient(predicted, tokens[currentWord], outputVectors, dataset)
171 |     gradIn = np.zeros(inputVectors.shape)
172 |     for idx in indices:
173 |         gradIn[idx, :] += gp
174 |     ### END YOUR CODE
175 |     
176 |     return cost, gradIn, gradOut
177 | 


--------------------------------------------------------------------------------
/assignment1/q3_word_vectors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment1/q3_word_vectors.png


--------------------------------------------------------------------------------
/assignment1/q4_reg_v_acc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment1/q4_reg_v_acc.png


--------------------------------------------------------------------------------
/assignment1/q4_sentiment.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | 
  4 | from cs224d.data_utils import *
  5 | 
  6 | from q3_sgd import load_saved_params, sgd
  7 | from q4_softmaxreg import softmaxRegression, getSentenceFeature, accuracy, softmax_wrapper
  8 | 
  9 | import seaborn as sns
 10 | sns.set(style='whitegrid', context='talk')
 11 | 
 12 | # Try different regularizations and pick the best!
 13 | # NOTE: fill in one more "your code here" below before running!
 14 | REGULARIZATION = None   # Assign a list of floats in the block below
 15 | ### YOUR CODE HERE
 16 | REGULARIZATION = np.logspace(-6,0.1,21)
 17 | REGULARIZATION = np.hstack([0,REGULARIZATION])
 18 | ### END YOUR CODE
 19 | 
 20 | # Load the dataset
 21 | dataset = StanfordSentiment()
 22 | tokens = dataset.tokens()
 23 | nWords = len(tokens)
 24 | 
 25 | # Load the word vectors we trained earlier
 26 | _, wordVectors0, _ = load_saved_params()
 27 | N = wordVectors0.shape[0]//2
 28 | #assert nWords == N
 29 | wordVectors = (wordVectors0[:N,:] + wordVectors0[N:,:])
 30 | dimVectors = wordVectors.shape[1]
 31 | 
 32 | # Load the train set
 33 | trainset = dataset.getTrainSentences()
 34 | nTrain = len(trainset)
 35 | trainFeatures = np.zeros((nTrain, dimVectors))
 36 | trainLabels = np.zeros((nTrain,), dtype=np.int32)
 37 | for i in range(nTrain):
 38 |     words, trainLabels[i] = trainset[i]
 39 |     trainFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words)
 40 | 
 41 | # Prepare dev set features
 42 | devset = dataset.getDevSentences()
 43 | nDev = len(devset)
 44 | devFeatures = np.zeros((nDev, dimVectors))
 45 | devLabels = np.zeros((nDev,), dtype=np.int32)
 46 | for i in range(nDev):
 47 |     words, devLabels[i] = devset[i]
 48 |     devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words)
 49 | 
 50 | # Try our regularization parameters
 51 | results = []
 52 | for regularization in REGULARIZATION:
 53 |     random.seed(3141)
 54 |     np.random.seed(59265)
 55 |     weights = np.random.randn(dimVectors, 5)
 56 |     print("Training for reg=%f" % regularization)
 57 | 
 58 |     # We will do batch optimization
 59 |     weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels,
 60 |         weights, regularization), weights, 3.0, 10000, PRINT_EVERY=100)
 61 | 
 62 |     # Test on train set
 63 |     _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights)
 64 |     trainAccuracy = accuracy(trainLabels, pred)
 65 |     print("Train accuracy (%%): %f" % trainAccuracy)
 66 | 
 67 |     # Test on dev set
 68 |     _, _, pred = softmaxRegression(devFeatures, devLabels, weights)
 69 |     devAccuracy = accuracy(devLabels, pred)
 70 |     print("Dev accuracy (%%): %f" % devAccuracy)
 71 | 
 72 |     # Save the results and weights
 73 |     results.append({
 74 |         "reg" : regularization,
 75 |         "weights" : weights,
 76 |         "train" : trainAccuracy,
 77 |         "dev" : devAccuracy})
 78 | 
 79 | # Print the accuracies
 80 | print("")
 81 | print("=== Recap ===")
 82 | print("Reg\t\tTrain\t\tDev")
 83 | for result in results:
 84 |     print("%E\t%0.4g\t%0.4g" % (
 85 |         result["reg"],
 86 |         result["train"],
 87 |         result["dev"]))
 88 | for result in results:
 89 |     print("%0.2e & %0.4g & %0.4g \\\\" % (
 90 |         result["reg"],
 91 |         result["train"],
 92 |         result["dev"]))
 93 | print("")
 94 | 
 95 | # Pick the best regularization parameters
 96 | BEST_REGULARIZATION = None
 97 | BEST_WEIGHTS = None
 98 | 
 99 | ### YOUR CODE HERE
100 | sorted_results = sorted(results, key=lambda x: x['dev'],reverse=True)
101 | BEST_REGULARIZATION = sorted_results[0]['reg']
102 | BEST_WEIGHTS = sorted_results[0]['weights']
103 | ### END YOUR CODE
104 | 
105 | # Test your findings on the test set
106 | testset = dataset.getTestSentences()
107 | nTest = len(testset)
108 | testFeatures = np.zeros((nTest, dimVectors))
109 | testLabels = np.zeros((nTest,), dtype=np.int32)
110 | for i in range(nTest):
111 |     words, testLabels[i] = testset[i]
112 |     testFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words)
113 | 
114 | _, _, pred = softmaxRegression(testFeatures, testLabels, BEST_WEIGHTS)
115 | print("Best regularization value: %E" % BEST_REGULARIZATION)
116 | print("Test accuracy (%%): %f" % accuracy(testLabels, pred))
117 | 
118 | # Make a plot of regularization vs accuracy
119 | plt.plot(REGULARIZATION, [x["train"] for x in results])
120 | plt.plot(REGULARIZATION, [x["dev"] for x in results])
121 | plt.xscale('log')
122 | plt.xlabel("regularization")
123 | plt.ylabel("accuracy")
124 | plt.legend(['train', 'dev'], loc='upper right')
125 | plt.savefig("q4_reg_v_acc.png")
126 | plt.show()
127 | 
128 | 


--------------------------------------------------------------------------------
/assignment1/q4_softmaxreg.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | 
  4 | from cs224d.data_utils import *
  5 | 
  6 | from q1_softmax import softmax
  7 | from q2_gradcheck import gradcheck_naive
  8 | from q3_sgd import load_saved_params
  9 | 
 10 | def getSentenceFeature(tokens, wordVectors, sentence):
 11 |     """ Obtain the sentence feature for sentiment analysis by averaging its word vectors """
 12 |     # Implement computation for the sentence features given a sentence.                                                       
 13 |     
 14 |     # Inputs:                                                         
 15 |     # - tokens: a dictionary that maps words to their indices in    
 16 |     #          the word vector list                                
 17 |     # - wordVectors: word vectors (each row) for all tokens                
 18 |     # - sentence: a list of words in the sentence of interest 
 19 | 
 20 |     # Output:                                                         
 21 |     # - sentVector: feature vector for the sentence    
 22 |     
 23 |     sentVector = np.zeros((wordVectors.shape[1],))
 24 |     
 25 |     ### YOUR CODE HERE
 26 |     array = np.fromiter( (tokens[word] for word in sentence), dtype='int')
 27 |     sentVector = np.mean(wordVectors[array], axis=0)
 28 |     ### END YOUR CODE
 29 |     
 30 |     return sentVector
 31 | 
 32 | def softmaxRegression(features, labels, weights, regularization = 0.0, nopredictions = False):
 33 |     """ Softmax Regression """
 34 |     # Implement softmax regression with weight regularization.
 35 |     
 36 |     # Inputs:                                                         
 37 |     # - features: feature vectors, each row is a feature vector
 38 |     # - labels: labels corresponding to the feature vectors
 39 |     # - weights: weights of the regressor
 40 |     # - regularization: L2 regularization constant
 41 |     
 42 |     # Output:
 43 |     # - cost: cost of the regressor
 44 |     # - grad: gradient of the regressor cost with respect to its
 45 |     #        weights
 46 |     # - pred: label predictions of the regressor (you might find
 47 |     #        np.argmax helpful)
 48 |     
 49 |     prob = softmax(features.dot(weights))
 50 |     if len(features.shape) > 1:
 51 |         N = features.shape[0]
 52 |     else:
 53 |         N = 1
 54 | 
 55 |     # A vectorized implementation of    1/N * sum(cross_entropy(x_i, y_i)) + 1/2*|w|^2
 56 |     cost = np.sum(-np.log(prob[np.arange(N), labels] + 1e-12)) / N
 57 |     cost += 0.5 * regularization * np.sum(weights ** 2)
 58 |     
 59 |     ### YOUR CODE HERE: compute the gradients and predictions
 60 |     pred = np.argmax(prob, axis=1)
 61 |     dx = prob
 62 |     dx[np.arange(N), labels] -= 1
 63 |     dx /= N
 64 |     # dx is the gradient associated with the loss (softmax layer only)
 65 |     grad = np.dot(features.T, dx)
 66 |     #backprop the weights
 67 |     grad += regularization * weights
 68 |     #adding the regularization to the gradient
 69 |     ### END YOUR CODE
 70 |     
 71 |     if nopredictions:
 72 |         return cost, grad
 73 |     else:
 74 |         return cost, grad, pred
 75 | 
 76 | def accuracy(y, yhat):
 77 |     """ Precision for classifier """
 78 |     assert(y.shape == yhat.shape)
 79 |     return np.sum(y == yhat) * 100.0 / y.size
 80 | 
 81 | def softmax_wrapper(features, labels, weights, regularization = 0.0):
 82 |     cost, grad, _ = softmaxRegression(features, labels, weights, 
 83 |         regularization)
 84 |     return cost, grad
 85 | 
 86 | def sanity_check():
 87 |     """
 88 |     Run python q4_softmaxreg.py.
 89 |     """
 90 |     random.seed(314159)
 91 |     np.random.seed(265)
 92 | 
 93 |     dataset = StanfordSentiment()
 94 |     tokens = dataset.tokens()
 95 |     nWords = len(tokens)
 96 | 
 97 |     _, wordVectors0, _ = load_saved_params()
 98 |     N = wordVectors0.shape[0]//2
 99 |     #assert N == nWords
100 |     wordVectors = (wordVectors0[:N,:] + wordVectors0[N:,:])
101 |     dimVectors = wordVectors.shape[1]
102 | 
103 |     dummy_weights = 0.1 * np.random.randn(dimVectors, 5)
104 |     dummy_features = np.zeros((10, dimVectors))
105 |     dummy_labels = np.zeros((10,), dtype=np.int32)    
106 |     for i in range(10):
107 |         words, dummy_labels[i] = dataset.getRandomTrainSentence()
108 |         dummy_features[i, :] = getSentenceFeature(tokens, wordVectors, words)
109 |     print("==== Gradient check for softmax regression ====")
110 |     gradcheck_naive(lambda weights: softmaxRegression(dummy_features,
111 |         dummy_labels, weights, 1.0, nopredictions = True), dummy_weights)
112 | 
113 |     print("\n=== Results ===")
114 |     print(softmaxRegression(dummy_features, dummy_labels, dummy_weights, 1.0))
115 | 
116 |     dummy_weights  = 0.1 * np.random.randn(40, 10) + 1.0
117 |     dummy_features = np.random.randn(2000, 40)
118 |     dummy_labels   = np.argmax(np.random.randn(2000, 10), axis=1)
119 | 
120 |     print(-np.log(0.1))#expected correct classification (random) = 1 in 10;
121 |     #cost then becomes -np.log(0.1)
122 |     print(softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0])
123 | 
124 |     dummy_weights  = 0.1 * np.random.randn(40, 80) + 1.0
125 |     dummy_features = np.random.randn(2000, 40)
126 |     dummy_labels   = np.argmax(np.random.randn(2000, 80), axis=1)
127 | 
128 |     print(-np.log(1./80))#expected correct classification (random) = 1 in 80;
129 |     #cost then becomes -np.log(1./80)
130 |     print(softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0])
131 | 
132 |     dummy_weights  = 0.1 * np.random.randn(40, 1000) + 1.0
133 |     dummy_features = np.random.randn(40000, 40)
134 |     dummy_labels   = np.argmax(np.random.randn(40000, 1000), axis=1)
135 | 
136 |     print(-np.log(1./1000))#expected correct classification (random) = 1 in 80;
137 |     #cost then becomes -np.log(1./80)
138 |     print(softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0])
139 |     print(np.exp(-softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0]))
140 | 
141 | 
142 | if __name__ == "__main__":
143 |     sanity_check()
144 | 


--------------------------------------------------------------------------------
/assignment1/requirements.txt:
--------------------------------------------------------------------------------
 1 | Jinja2==2.7.3
 2 | MarkupSafe==0.23
 3 | backports.ssl-match-hostname==3.4.0.2
 4 | certifi==14.05.14
 5 | gnureadline==6.3.3
 6 | ipython==2.3.1
 7 | matplotlib==1.4.2
 8 | mock==1.0.1
 9 | nose==1.3.4
10 | numpy==1.9.1
11 | pyparsing==2.0.3
12 | python-dateutil==2.4.0
13 | pytz==2014.10
14 | pyzmq==14.4.1
15 | scipy==0.14.1
16 | six==1.9.0
17 | tornado==4.0.2
18 | wsgiref==0.1.2
19 | 


--------------------------------------------------------------------------------
/assignment1/solutions/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Core latex/pdflatex auxiliary files:
  2 | *.aux
  3 | *.lof
  4 | *.log
  5 | *.lot
  6 | *.fls
  7 | *.out
  8 | *.toc
  9 | *.fmt
 10 | *.fot
 11 | *.cb
 12 | *.cb2
 13 | 
 14 | ## Intermediate documents:
 15 | *.dvi
 16 | *-converted-to.*
 17 | # these rules might exclude image files for figures etc.
 18 | # *.ps
 19 | # *.eps
 20 | # *.pdf
 21 | 
 22 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
 23 | *.bbl
 24 | *.bcf
 25 | *.blg
 26 | *-blx.aux
 27 | *-blx.bib
 28 | *.brf
 29 | *.run.xml
 30 | 
 31 | ## Build tool auxiliary files:
 32 | *.fdb_latexmk
 33 | *.synctex
 34 | *.synctex.gz
 35 | *.synctex.gz(busy)
 36 | *.pdfsync
 37 | 
 38 | ## Auxiliary and intermediate files from other packages:
 39 | # algorithms
 40 | *.alg
 41 | *.loa
 42 | 
 43 | # achemso
 44 | acs-*.bib
 45 | 
 46 | # amsthm
 47 | *.thm
 48 | 
 49 | # beamer
 50 | *.nav
 51 | *.snm
 52 | *.vrb
 53 | 
 54 | # cprotect
 55 | *.cpt
 56 | 
 57 | # fixme
 58 | *.lox
 59 | 
 60 | #(r)(e)ledmac/(r)(e)ledpar
 61 | *.end
 62 | *.?end
 63 | *.[1-9]
 64 | *.[1-9][0-9]
 65 | *.[1-9][0-9][0-9]
 66 | *.[1-9]R
 67 | *.[1-9][0-9]R
 68 | *.[1-9][0-9][0-9]R
 69 | *.eledsec[1-9]
 70 | *.eledsec[1-9]R
 71 | *.eledsec[1-9][0-9]
 72 | *.eledsec[1-9][0-9]R
 73 | *.eledsec[1-9][0-9][0-9]
 74 | *.eledsec[1-9][0-9][0-9]R
 75 | 
 76 | # glossaries
 77 | *.acn
 78 | *.acr
 79 | *.glg
 80 | *.glo
 81 | *.gls
 82 | *.glsdefs
 83 | 
 84 | # gnuplottex
 85 | *-gnuplottex-*
 86 | 
 87 | # hyperref
 88 | *.brf
 89 | 
 90 | # knitr
 91 | *-concordance.tex
 92 | # TODO Comment the next line if you want to keep your tikz graphics files
 93 | *.tikz
 94 | *-tikzDictionary
 95 | 
 96 | # listings
 97 | *.lol
 98 | 
 99 | # makeidx
100 | *.idx
101 | *.ilg
102 | *.ind
103 | *.ist
104 | 
105 | # minitoc
106 | *.maf
107 | *.mlf
108 | *.mlt
109 | *.mtc
110 | *.mtc[0-9]
111 | *.mtc[1-9][0-9]
112 | 
113 | # minted
114 | _minted*
115 | *.pyg
116 | 
117 | # morewrites
118 | *.mw
119 | 
120 | # mylatexformat
121 | *.fmt
122 | 
123 | # nomencl
124 | *.nlo
125 | 
126 | # sagetex
127 | *.sagetex.sage
128 | *.sagetex.py
129 | *.sagetex.scmd
130 | 
131 | # sympy
132 | *.sout
133 | *.sympy
134 | sympy-plots-for-*.tex/
135 | 
136 | # pdfcomment
137 | *.upa
138 | *.upb
139 | 
140 | # pythontex
141 | *.pytxcode
142 | pythontex-files-*/
143 | 
144 | # TikZ & PGF
145 | *.dpth
146 | *.md5
147 | *.auxlock
148 | 
149 | # todonotes
150 | *.tdo
151 | 
152 | # xindy
153 | *.xdy
154 | 
155 | # xypic precompiled matrices
156 | *.xyc
157 | 
158 | # endfloat
159 | *.ttt
160 | *.fff
161 | 
162 | # Latexian
163 | TSWLatexianTemp*
164 | 
165 | ## Editors:
166 | # WinEdt
167 | *.bak
168 | *.sav
169 | 
170 | # Texpad
171 | .texpadtmp
172 | 
173 | # Kile
174 | *.backup
175 | 
176 | # KBibTeX
177 | *~[0-9]*
178 | 


--------------------------------------------------------------------------------
/assignment1/tensorflow_word2vec.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import math
 3 | import random
 4 | import collections
 5 | 
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | 
 9 | import cs224d.data_utils as data_utils
10 | from tensorflow.models.embedding import gen_word2vec as word2vec
11 | 
12 | class Options(object):
13 |     def __init__(self):
14 |         #Model Options
15 |         self.emb_dim = 20
16 |         self.train_data  = None
17 |         self.num_samples = 20
18 |         self.learning_rate = 1.0
19 | 
20 |         self.epochs_to_train = 5
21 |         self.batch_size  = 64
22 |         self.window_size = 5
23 |         self.min_count   = 3
24 | 
25 | class Word2Vec(object):
26 |     """Word2Vec model (skipgram) """
27 |     def __init__(self, options, session):
28 |         self._options = options
29 |         self._session = session
30 |         self._word2id = {}
31 |         self._id2word = []
32 |         self.build_graph()
33 |         self.build_eval_graph()
34 |         self.save_vocab()
35 |         self._read_dataset()
36 | 
37 |     def _read_dataset(self):
38 |         # dataset = data_utils.StanfordSentiment()
39 |         # #print(dataset.sent_labels()[0:100])
40 |         # #print(dataset.getSplitSentences(0)[0:100])
41 |         # #this is the labels vector :)
42 | 
43 |         # #sentences = np.from_iter(dataset.sentences(), dtype="int32")
44 |         # self._word2id = dataset.tokens()
45 |         # print(self._word2id["UNK"])
46 |         # ids = [self._word2id.get(w) for w in self._word2id.keys()]
47 |         # print(ids)
48 |         pass
49 |     def forward(self, examples, labels):
50 |         return None,None
51 | 
52 |     def nce_loss(self, true_logits, sampled_logits):
53 |         opts = self._options
54 |         true_xent = tf.nn.sigmoid_cross_entropy_with_logits(true_logits, tf.ones_like(true_logits))
55 |         sampled_xent = tf.nn.sigmoid_cross_entropy_with_logits(sampled_logits, tf.zeros_like(sampled_logits))
56 |         nce_loss_tensor = (tf.reduce_sum(true_xent) +
57 |                            tf.reduce_sum(sampled_xent)) / opts.batch_size
58 |         return nce_loss_tensor
59 | 
60 |     def build_graph(self):
61 |         opts = self._options
62 |         (words, counts, words_per_epoch, self._epoch, self._words, examples,
63 |         labels) = word2vec.skipgram(filename="text8",
64 |                           batch_size=opt.batch_size,
65 |                           window_size=opt.window_size,
66 |                           min_count=opt.min_count,
67 |                           subsample=0)
68 |         (opts.vocab_words, opts.vocab_counts,
69 |          opts.words_per_epoch) = self._session.run([words, counts, words_per_epoch])
70 |         opts.vocab_size = len(opts.vocab_words)
71 |         print("Data file: ", opts.train_data)
72 |         print("Vocab size: ", opts.vocab_size - 1, " + UNK")
73 |         print("Words per epoch: ", opts.words_per_epoch)
74 |         self._examples = examples
75 |         self._labels = labels
76 |         self._id2word = opts.vocab_words
77 |         for i, w in enumerate(self._id2word):
78 |             self._word2id[w] = i
79 |         true_logits, sampled_logits = self.forward(examples, labels)
80 |         loss = self.nce_loss(true_logits, sampled_logits)
81 |         tf.scalar_summary("NCE loss", loss)
82 |         self._loss = loss
83 |         self.optimize(loss)
84 | 
85 |     def build_eval_graph(self):
86 |         pass
87 |     def save_vocab(self):
88 |         pass
89 | 
90 | if __name__ == "__main__":
91 |     opt     = Options()
92 |     session = tf.Session()
93 |     model = Word2Vec(opt, session)
94 | 


--------------------------------------------------------------------------------
/assignment1/tests/test_gradcheck.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root):
 3 | python -m py.test tests/test_gradcheck.py -vv -s -q
 4 | python -m py.test tests/test_gradcheck.py -vv -s -q --cov
 5 | 
 6 | py.test.exe --cov=cs224d/ tests/test_gradcheck.py --cov-report html
 7 | 
 8 | (if the tests are within the subfolder tests)
 9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html
10 | python -m pytest tests -v --cov-report html
11 | 
12 | Open index.html contained within htmlcov
13 | '''
14 | 
15 | import pytest
16 | import numpy as np
17 | 
18 | import random
19 | 
20 | from collections import defaultdict, OrderedDict, Counter
21 | from q2_gradcheck import grad_numerical
22 | 
23 | def rel_error(x,y):
24 |     """ returns relative error """
25 |     return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
26 | 
27 | quad = lambda x: (x**2, 2*x)
28 | 
29 | def test_gradcheck_naive_1():
30 |     """ Original sigmoid test defined in q2_sigmoid.py; """
31 |     x = np.array(123.45)
32 |     assert rel_error(quad(x)[1], grad_numerical(quad,x))
33 |     
34 | def test_gradcheck_naive_2():
35 |     """ Original sigmoid test defined in q2_sigmoid.py; """
36 |     x = np.random.normal(loc=10., scale=30., size=20)
37 |     assert rel_error(quad(x)[1], grad_numerical(quad,x))
38 | 
39 | def test_gradcheck_naive_3():
40 |     """ Original sigmoid test defined in q2_sigmoid.py; """
41 |     x = np.random.normal(loc=10., scale=30., size=(20,20))
42 |     assert rel_error(quad(x)[1], grad_numerical(quad,x))
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/assignment1/tests/test_neural.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root):
 3 | python -m py.test tests/test_neural.py -vv -s -q
 4 | python -m py.test tests/test_neural.py -vv -s -q --cov
 5 | 
 6 | py.test.exe --cov=cs224d/ tests/test_neural.py --cov-report html
 7 | 
 8 | (if the tests are within the subfolder tests)
 9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html
10 | python -m pytest tests -v --cov-report html
11 | 
12 | Open index.html contained within htmlcov
13 | '''
14 | 
15 | import pytest
16 | import numpy as np
17 | 
18 | import random
19 | 
20 | from collections import defaultdict, OrderedDict, Counter
21 | from q2_gradcheck import grad_numerical, eval_numerical_gradient_array
22 | from q2_neural import forward_backward_prop
23 | from q2_neural import affine_forward, affine_backward, sigmoid_forward, sigmoid_backward
24 | 
25 | def rel_error(x,y):
26 |     """ returns relative error """
27 |     return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
28 | 
29 | @pytest.fixture(scope='module')
30 | def construct_toy_model(N=100, D1=10, H=20, D2=10):
31 |     dim     = [D1, H, D2]
32 |     data    = np.random.randn(N, dim[0])
33 |     labels  = np.zeros((N,dim[2]))
34 |     for i in range(N):
35 |         labels[i, np.random.randint(0, dim[2]-1)] = 0
36 | 
37 |     params = np.random.randn((dim[0] + 1) * dim[1] + (dim[1] + 1) * dim[2], )
38 |     return data,labels,params,dim
39 | 
40 | def test_affine_forward():
41 |     num_inputs = 2
42 |     input_shape = (4, 5, 6)
43 |     output_dim = 3
44 | 
45 |     input_size = num_inputs * np.prod(input_shape)
46 |     weight_size = output_dim * np.prod(input_shape)
47 | 
48 |     x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
49 |     w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim)
50 |     b = np.linspace(-0.3, 0.1, num=output_dim).reshape((1,output_dim))
51 | 
52 |     out, _ = affine_forward(x, w, b)
53 |     correct_out = np.array([[ 1.49834967,  1.70660132,  1.91485297],
54 |                             [ 3.25553199,  3.5141327,   3.77273342]])
55 | 
56 |     # Compare your output with ours. The error should be around 1e-9.
57 |     assert out.shape == correct_out.shape
58 |     assert rel_error(out, correct_out) < 5e-7
59 | 
60 | 
61 | def test_affine_backward():
62 |     x = np.random.randn(10, 2, 3)
63 |     w = np.random.randn(6, 5)
64 |     b = np.random.randn(5).reshape((1,5))
65 |     dout = np.random.randn(10, 5)
66 | 
67 |     #use eval_numerical_gradient_array for backprop from an output layer:
68 |     # input -> layer -> output -> ... -> final_layer_loss
69 |     # backprop becomes:
70 |     # final_layer_loss -> gradient_of_loss (g.o.l)
71 |     # g.o.l -> .. -> g.o.l backproped -> output -> layer -> g.o.l @ input
72 |     dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout)
73 |     dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout)
74 |     db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout)
75 | 
76 |     _, cache = affine_forward(x, w, b)
77 |     dx, dw, db = affine_backward(dout, cache)
78 | 
79 |     assert dx.shape == dx.shape
80 |     assert dw.shape == dw.shape
81 |     assert db.shape == db.shape
82 | 
83 |     assert rel_error(dx_num,dx) < 5e-7
84 |     assert rel_error(dw_num,dw) < 5e-7
85 |     assert rel_error(db_num,db) < 5e-7
86 | 


--------------------------------------------------------------------------------
/assignment1/tests/test_neural_to_solutions.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root):
  3 | python -m py.test tests/test_neural_to_solutions.py -vv -s -q
  4 | python -m py.test tests/test_neural_to_solutions.py -vv -s -q --cov
  5 | 
  6 | py.test.exe --cov=cs224d/ tests/test_neural_to_solutions.py --cov-report html
  7 | 
  8 | (if the tests are within the subfolder tests)
  9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html
 10 | python -m pytest tests -v --cov-report html
 11 | 
 12 | Open index.html contained within htmlcov
 13 | '''
 14 | 
 15 | import pytest
 16 | import numpy as np
 17 | 
 18 | import random
 19 | 
 20 | from collections import defaultdict, OrderedDict, Counter
 21 | from q2_gradcheck import grad_numerical, eval_numerical_gradient_array
 22 | from q2_neural import forward_backward_prop
 23 | from q2_neural import affine_forward, affine_backward, sigmoid_forward, sigmoid_backward
 24 | 
 25 | from q2_neural_sol import forward_backward_prop_sol
 26 | 
 27 | def rel_error(x,y):
 28 |     """ returns relative error """
 29 |     return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
 30 | 
 31 | @pytest.fixture(scope='module')
 32 | def construct_toy_model(D1=10, H=20, D2=10, N=100):
 33 |     dim     = [D1, H, D2]
 34 |     data    = np.random.randn(N, dim[0])
 35 |     labels  = np.zeros((N,dim[2]))
 36 |     for i in range(N):
 37 |         labels[i, np.random.randint(0, dim[2]-1)] = 0
 38 | 
 39 |     params = np.random.randn((dim[0] + 1) * dim[1] + (dim[1] + 1) * dim[2], )
 40 |     return data,labels,params,dim
 41 | 
 42 | def test_affine_forward():
 43 |     num_inputs = 2
 44 |     input_shape = (4, 5, 6)
 45 |     output_dim = 3
 46 | 
 47 |     input_size = num_inputs * np.prod(input_shape)
 48 |     weight_size = output_dim * np.prod(input_shape)
 49 | 
 50 |     x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
 51 |     w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim)
 52 |     b = np.linspace(-0.3, 0.1, num=output_dim).reshape((1,output_dim))
 53 | 
 54 |     out, _ = affine_forward(x, w, b)
 55 |     correct_out = np.array([[ 1.49834967,  1.70660132,  1.91485297],
 56 |                             [ 3.25553199,  3.5141327,   3.77273342]])
 57 | 
 58 |     # Compare your output with ours. The error should be around 1e-9.
 59 |     assert out.shape == correct_out.shape
 60 |     assert rel_error(out, correct_out) < 5e-7
 61 | 
 62 | def test_affine_backward():
 63 |     x = np.random.randn(10, 2, 3)
 64 |     w = np.random.randn(6, 5)
 65 |     b = np.random.randn(5).reshape((1,5))
 66 |     dout = np.random.randn(10, 5)
 67 | 
 68 |     #use eval_numerical_gradient_array for backprop from an output layer:
 69 |     # input -> layer -> output -> ... -> final_layer_loss
 70 |     # backprop becomes:
 71 |     # final_layer_loss -> gradient_of_loss (g.o.l)
 72 |     # g.o.l -> .. -> g.o.l backproped -> output -> layer -> g.o.l @ input
 73 |     dx_num = eval_numerical_gradient_array(lambda x: affine_forward(x, w, b)[0], x, dout)
 74 |     dw_num = eval_numerical_gradient_array(lambda w: affine_forward(x, w, b)[0], w, dout)
 75 |     db_num = eval_numerical_gradient_array(lambda b: affine_forward(x, w, b)[0], b, dout)
 76 | 
 77 |     _, cache = affine_forward(x, w, b)
 78 |     dx, dw, db = affine_backward(dout, cache)
 79 | 
 80 |     assert dx.shape == dx.shape
 81 |     assert dw.shape == dw.shape
 82 |     assert db.shape == db.shape
 83 | 
 84 |     assert rel_error(dx_num,dx) < 5e-7
 85 |     assert rel_error(dw_num,dw) < 5e-7
 86 |     assert rel_error(db_num,db) < 5e-7
 87 | 
 88 | @pytest.mark.parametrize("dim1", list(range(2,10)))
 89 | @pytest.mark.parametrize("dim2", list(range(2,10)))
 90 | @pytest.mark.parametrize("dim3", list(range(2,10)))
 91 | def test_neural_vs_neural_sol(dim1, dim2, dim3, N=300):
 92 |     dimensions = [ dim1, dim2, dim3 ]
 93 |     data = np.random.randn(N, dim1)
 94 |     labels = np.zeros((N, dim3))
 95 |     for i in range(N):
 96 |         labels[i, random.randint(0,dim3 -1)] = 1.
 97 | 
 98 |     params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
 99 |         dimensions[1] + 1) * dimensions[2], )
100 | 
101 |     cost, grad = forward_backward_prop(data, labels, params, dimensions)
102 |     cost_sol, grad_sol = forward_backward_prop_sol(data, labels, params, dimensions)
103 |     assert rel_error(cost, cost_sol) < 1e-7
104 | 
105 | @pytest.mark.parametrize("dim1", list(range(2,10)))
106 | @pytest.mark.parametrize("dim2", list(range(2,10)))
107 | @pytest.mark.parametrize("dim3", list(range(2,10)))
108 | def test_neural_vs_neural_sol_gradient(dim1, dim2, dim3, N=300):
109 |     dimensions = [ dim1, dim2, dim3 ]
110 |     data = np.random.randn(N, dim1)
111 |     labels = np.zeros((N, dim3))
112 |     for i in range(N):
113 |         labels[i, random.randint(0,dim3 -1)] = 1.
114 | 
115 |     params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
116 |         dimensions[1] + 1) * dimensions[2], )
117 | 
118 |     cost, grad = forward_backward_prop(data, labels, params, dimensions)
119 |     cost_sol, grad_sol = forward_backward_prop_sol(data, labels, params, dimensions)
120 |     assert rel_error(grad, grad_sol) < 1e-8
121 | 
122 | 


--------------------------------------------------------------------------------
/assignment1/tests/test_normalize.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root):
 3 | python -m py.test tests/test_normalize.py -vv -s -q
 4 | python -m py.test tests/test_normalize.py -vv -s -q --cov
 5 | 
 6 | py.test.exe --cov=cs231n/ tests/test_normalize.py --cov-report html
 7 | 
 8 | (if the tests are within the subfolder tests)
 9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html
10 | python -m pytest tests -v --cov-report html
11 | 
12 | Open index.html contained within htmlcov
13 | '''
14 | 
15 | import pytest
16 | import numpy as np
17 | import random
18 | 
19 | from collections import defaultdict, OrderedDict, Counter
20 | from q3_word2vec import normalizeRows, l1_normalize_rows, l2_normalize_rows
21 | 
22 | def rel_error(x,y):
23 |     """ returns relative error """
24 |     return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
25 | 
26 | def test_normalize():
27 |     """ Original normalization test defined in q3_word2vec.py; """
28 |     x      = np.array([[3.0,4.0],[1, 2]])
29 |     norm_x = normalizeRows(x)
30 |     y      = np.array([[0.6, 0.8], [0.4472, 0.8944]])
31 |     assert rel_error(norm_x, y) <= 1e-4
32 | 
33 | def test_l2_normalize():
34 |     x      = np.array([[3.0,4.0],[1, 2]])
35 |     norm_x = l2_normalize_rows(x)
36 |     y      = np.array([[0.6, 0.8], [0.4472, 0.8944]])
37 |     assert rel_error(norm_x, y) <= 1e-4
38 | 
39 | @pytest.fixture(scope='module')
40 | def test_array():
41 |     def functor(in_dim_1 = 10, in_dim_2 = 10):
42 |         assert in_dim_1 > 0 and in_dim_2 > 0
43 |         return np.random.uniform(low=0.,high=10.,size=(in_dim_1,in_dim_2))
44 |     return functor
45 | 
46 | def test_l2_against_sklearn(test_array):
47 |     try:
48 |         from sklearn.preprocessing import normalize
49 |         in_array = test_array()
50 |         assert rel_error(l2_normalize_rows(in_array), normalize(in_array, axis=1, norm='l2')) <= 1e-8
51 |     except ImportError:
52 |         assert 1
53 |         print("ImportError (sklearn) on current node!")
54 | 
55 | def test_l1_against_sklearn(test_array):
56 |     try:
57 |         from sklearn.preprocessing import normalize
58 |         in_array = test_array()
59 |         assert rel_error(l1_normalize_rows(in_array), normalize(in_array, axis=1, norm='l1')) <= 1e-8
60 |     except ImportError:
61 |         assert 1
62 |         print("ImportError (sklearn) on current node!")
63 | 


--------------------------------------------------------------------------------
/assignment1/tests/test_sgd.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root):
 3 | python -m py.test tests/test_sgd.py -vv -s -q
 4 | python -m py.test tests/test_sgd.py -vv -s -q --cov
 5 | 
 6 | py.test.exe --cov=cs231n/ tests/test_sgd.py --cov-report html
 7 | 
 8 | (if the tests are within the subfolder tests)
 9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html
10 | python -m pytest tests -v --cov-report html
11 | 
12 | Open index.html contained within htmlcov
13 | '''
14 | 
15 | import pytest
16 | import numpy as np
17 | import random
18 | 
19 | from collections import defaultdict, OrderedDict, Counter
20 | from q3_sgd      import sgd
21 | 
22 | def rel_error(x,y):
23 |     """ returns relative error """
24 |     return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
25 | 
26 | @pytest.fixture(scope='module')
27 | def quad():
28 | 	return lambda x: (np.sum(x**2), x * 2)
29 | 
30 | def test_sgd_1(quad):
31 |     """ Original normalization test defined in q3_word2vec.py; """
32 | 
33 |     t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=None)
34 |     assert abs(t1) <= 1e-6
35 | 
36 | def test_sgd_2(quad):
37 |     t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=None)
38 |     assert abs(t2) <= 1e-6
39 | 
40 | def test_sgd_3(quad):
41 |     t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=None)
42 |     assert abs(t3) <= 1e-6
43 | 


--------------------------------------------------------------------------------
/assignment1/tests/test_sigmoid.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root):
  3 | python -m py.test tests/test_sigmoid.py -vv -s -q
  4 | python -m py.test tests/test_sigmoid.py -vv -s -q --cov
  5 | 
  6 | py.test.exe --cov=cs224d/ tests/test_sigmoid.py --cov-report html
  7 | 
  8 | (if the tests are within the subfolder tests)
  9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html
 10 | python -m pytest tests -v --cov-report html
 11 | 
 12 | Open index.html contained within htmlcov
 13 | '''
 14 | 
 15 | import pytest
 16 | import numpy as np
 17 | from q2_sigmoid import sigmoid, sigmoid_grad
 18 | 
 19 | import random
 20 | 
 21 | from collections import defaultdict, OrderedDict, Counter
 22 | 
 23 | COUNT=5
 24 | 
 25 | def rel_error(x,y):
 26 |     """ returns relative error """
 27 |     return np.max(np.abs(x - y) / (np.maximum(1e-7, np.abs(x) + np.abs(y))))
 28 | 
 29 | def test_sigmoid():
 30 |     """ Original sigmoid test defined in q2_sigmoid.py; """
 31 |     x = np.array([[1, 2], [-1, -2]])
 32 |     f = sigmoid(x)
 33 |     assert rel_error(f, np.array([[0.73105858, 0.88079708], 
 34 |         [0.26894142, 0.11920292]])) <= 1e-7
 35 | 
 36 | def test_sigmoidgrad():
 37 |     """ Original sigmoid gradient test defined in q2_sigmoid.py; """
 38 |     x = np.array([[1, 2], [-1, -2]])
 39 |     f = sigmoid(x)
 40 |     g = sigmoid_grad(f)
 41 |     assert rel_error(g, np.array([[0.19661193, 0.10499359],
 42 |         [0.19661193, 0.10499359]])) <= 1e-7
 43 | 
 44 | @pytest.mark.parametrize("dim", list(range(1,8)))
 45 | def test_sigmoid_shape(dim):
 46 |     testing_shape = []
 47 |     for y in range(0,dim):
 48 |         testing_shape.append(np.random.randint(3,8))
 49 |     shape = tuple(testing_shape)
 50 |     #z = np.random.randn(*testing_shape)
 51 |     x = np.random.standard_normal(shape)
 52 |     y = np.copy(x)
 53 |     assert x.shape == sigmoid(y).shape
 54 |     assert x.shape == sigmoid_grad(sigmoid(y)).shape
 55 | 
 56 | def test_sigmoid_minus_z(count=100):
 57 |     z = np.random.normal(loc=0., scale=100., size=count)
 58 |     y = -z
 59 |     assert rel_error(1 - sigmoid(y), sigmoid(z)) <= 1e-7
 60 | 
 61 | def test_sigmoid_monotone(count=100):
 62 |     z     = np.random.normal(loc=0., scale=100., size=count)
 63 |     shift = np.random.uniform(low=0., high=10., size=count)
 64 |     assert np.all(sigmoid(z + shift) - sigmoid(z)) >= 0
 65 |     assert np.all(sigmoid(z - shift) - sigmoid(z)) <= 0
 66 | 
 67 | def test_sigmoid_range(count=100):
 68 |     z = np.random.normal(loc=0., scale=100., size=count)
 69 |     assert np.max(sigmoid(z)) <= 1.
 70 |     assert np.max(sigmoid(z)) >= 0.
 71 | 
 72 | @pytest.mark.parametrize('execution_number', list(range(COUNT)))
 73 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
 74 | def test_sigmoid_permutation_axis0(dim_1, execution_number):
 75 |     """ sigmoid needs to be applied element-wise;"""
 76 |     a1          = np.random.normal(size=(dim_1,1))
 77 |     s1          = sigmoid(a1)
 78 | 
 79 |     permutation = np.random.permutation(dim_1)
 80 |     inverse_permutation = np.argsort(permutation)
 81 | 
 82 |     s1_perm     = sigmoid(a1[permutation])
 83 |     assert rel_error(s1_perm[inverse_permutation], s1) <= 1e-8
 84 | 
 85 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
 86 | def test_sigmoid_permutation_axis1(dim_1):
 87 |     a1          = np.random.normal(size=(1,dim_1))
 88 |     s1          = sigmoid(a1)
 89 | 
 90 |     permutation = np.random.permutation(dim_1)
 91 |     inverse_permutation = np.argsort(permutation)
 92 | 
 93 |     s1_perm     = sigmoid(a1.ravel()[permutation])
 94 |     assert rel_error(s1_perm.ravel()[inverse_permutation], s1) <= 1e-8
 95 | #note: permutation(sigmoid(x)) = sigmoid(permutation(x))
 96 | 
 97 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
 98 | @pytest.mark.parametrize("dim_2", list(range(1,20)))
 99 | def test_sigmoid_gradient(dim_1, dim_2):
100 |     a1    = np.random.normal(loc=0., scale=20., size=(dim_1,dim_2))
101 |     shift = np.random.uniform(low=1e-9, high=1e-5, size=(dim_1,dim_2))
102 |     ap = a1 + shift
103 |     am = a1 - shift
104 | 
105 |     dsigmoid = (sigmoid(ap) - sigmoid(am)) / (2*shift)
106 |     assert np.abs(np.max(dsigmoid - sigmoid_grad(sigmoid(a1)))) <= 1e-7
107 |     assert np.abs(np.min(dsigmoid - sigmoid_grad(sigmoid(a1)))) <= 1e-7
108 | 


--------------------------------------------------------------------------------
/assignment1/tests/test_sigmoid_to_solutions.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root):
  3 | python -m py.test tests/test_sigmoid_to_solutions.py -vv -s -q
  4 | python -m py.test tests/test_sigmoid_to_solutions.py -vv -s -q --cov
  5 | 
  6 | py.test.exe --cov=cs224d/ tests/test_sigmoid_to_solutions.py --cov-report html
  7 | 
  8 | (if the tests are within the subfolder tests)
  9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html
 10 | python -m pytest tests -v --cov-report html
 11 | 
 12 | Open index.html contained within htmlcov
 13 | '''
 14 | 
 15 | import pytest
 16 | import numpy as np
 17 | from q2_sigmoid import sigmoid, sigmoid_grad
 18 | from q2_sigmoid_sol import sigmoid_sol, sigmoid_grad_sol
 19 | 
 20 | import random
 21 | 
 22 | from collections import defaultdict, OrderedDict, Counter
 23 | 
 24 | COUNT=5
 25 | 
 26 | def rel_error(x,y):
 27 |     """ returns relative error """
 28 |     return np.max(np.abs(x - y) / (np.maximum(1e-7, np.abs(x) + np.abs(y))))
 29 | 
 30 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol])
 31 | def test_sigmoid(sigmoid_f):
 32 |     """ Original sigmoid test defined in q2_sigmoid.py; """
 33 |     x = np.array([[1, 2], [-1, -2]])
 34 |     f = sigmoid_f(x)
 35 |     assert rel_error(f, np.array([[0.73105858, 0.88079708],
 36 |         [0.26894142, 0.11920292]])) <= 1e-7
 37 | 
 38 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol])
 39 | def test_sigmoidgrad(sigmoid_f):
 40 |     """ Original sigmoid gradient test defined in q2_sigmoid.py; """
 41 |     x = np.array([[1, 2], [-1, -2]])
 42 |     f = sigmoid(x)
 43 |     g = sigmoid_grad(f)
 44 |     assert rel_error(g, np.array([[0.19661193, 0.10499359],
 45 |         [0.19661193, 0.10499359]])) <= 1e-7
 46 | 
 47 | @pytest.mark.parametrize("dim", list(range(1,8)))
 48 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol])
 49 | def test_sigmoid_shape(dim, sigmoid_f):
 50 |     testing_shape = []
 51 |     for y in range(0,dim):
 52 |         testing_shape.append(np.random.randint(3,8))
 53 |     shape = tuple(testing_shape)
 54 |     #z = np.random.randn(*testing_shape)
 55 |     x = np.random.standard_normal(shape)
 56 |     y = np.copy(x)
 57 |     assert x.shape == sigmoid(y).shape
 58 |     assert x.shape == sigmoid_grad(sigmoid(y)).shape
 59 | 
 60 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol])
 61 | def test_sigmoid_minus_z(sigmoid_f, count=100):
 62 |     z = np.random.normal(loc=0., scale=100., size=count)
 63 |     y = -z
 64 |     assert rel_error(1 - sigmoid(y), sigmoid(z)) <= 1e-7
 65 | 
 66 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol])
 67 | def test_sigmoid_monotone(sigmoid_f, count=100):
 68 |     z     = np.random.normal(loc=0., scale=100., size=count)
 69 |     shift = np.random.uniform(low=0., high=10., size=count)
 70 |     assert np.all(sigmoid(z + shift) - sigmoid(z)) >= 0
 71 |     assert np.all(sigmoid(z - shift) - sigmoid(z)) <= 0
 72 | 
 73 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol])
 74 | def test_sigmoid_range(sigmoid_f, count=100):
 75 |     z = np.random.normal(loc=0., scale=100., size=count)
 76 |     assert np.max(sigmoid(z)) <= 1.
 77 |     assert np.max(sigmoid(z)) >= 0.
 78 | 
 79 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
 80 | @pytest.mark.parametrize('execution_number', list(range(COUNT)))
 81 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol])
 82 | def test_sigmoid_permutation_axis0(dim_1, execution_number, sigmoid_f):
 83 |     """ sigmoid needs to be applied element-wise;"""
 84 |     a1          = np.random.normal(size=(dim_1,1))
 85 |     s1          = sigmoid(a1)
 86 | 
 87 |     permutation = np.random.permutation(dim_1)
 88 |     inverse_permutation = np.argsort(permutation)
 89 | 
 90 |     s1_perm     = sigmoid(a1[permutation])
 91 |     assert rel_error(s1_perm[inverse_permutation], s1) <= 1e-8
 92 | 
 93 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
 94 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol])
 95 | def test_sigmoid_permutation_axis1(dim_1, sigmoid_f):
 96 |     a1          = np.random.normal(size=(1,dim_1))
 97 |     s1          = sigmoid(a1)
 98 | 
 99 |     permutation = np.random.permutation(dim_1)
100 |     inverse_permutation = np.argsort(permutation)
101 | 
102 |     s1_perm     = sigmoid(a1.ravel()[permutation])
103 |     assert rel_error(s1_perm.ravel()[inverse_permutation], s1) <= 1e-8
104 | #note: permutation(sigmoid(x)) = sigmoid(permutation(x))
105 | 
106 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
107 | @pytest.mark.parametrize("dim_2", list(range(1,20)))
108 | @pytest.mark.parametrize("sigmoid_f", [sigmoid, sigmoid_sol])
109 | def test_sigmoid_gradient(dim_1, dim_2, sigmoid_f):
110 |     a1    = np.random.normal(loc=0., scale=20., size=(dim_1,dim_2))
111 |     shift = np.random.uniform(low=1e-9, high=1e-5, size=(dim_1,dim_2))
112 |     ap = a1 + shift
113 |     am = a1 - shift
114 | 
115 |     dsigmoid = (sigmoid(ap) - sigmoid(am)) / (2*shift)
116 |     assert np.abs(np.max(dsigmoid - sigmoid_grad(sigmoid(a1)))) <= 1e-7
117 |     assert np.abs(np.min(dsigmoid - sigmoid_grad(sigmoid(a1)))) <= 1e-7
118 | 
119 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
120 | @pytest.mark.parametrize("dim_2", list(range(1,20)))
121 | def test_sigmoid(dim_1, dim_2):
122 |     a1    = np.random.normal(loc=0., scale=20., size=(dim_1,dim_2))
123 |     assert rel_error(sigmoid(a1), sigmoid_sol(a1)) <= 1e-10
124 | 
125 | 
126 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
127 | @pytest.mark.parametrize("dim_2", list(range(1,20)))
128 | def test_sigmoid(dim_1, dim_2):
129 |     a1       = np.random.normal(loc=0., scale=20., size=(dim_1,dim_2))
130 |     a1_copy  = a1.copy()
131 | 
132 |     s_a1     = sigmoid(a1)
133 |     s_sol_a1 = sigmoid_sol(a1_copy)
134 | 
135 |     assert rel_error(sigmoid_grad(s_a1), sigmoid_grad_sol(s_sol_a1)) <= 1e-10
136 | 
137 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
138 | @pytest.mark.parametrize("dim_2", list(range(1,20)))
139 | def test_sigmoid(dim_1, dim_2):
140 |     a1    = np.random.normal(loc=0., scale=20., size=(dim_1,dim_2))
141 |     a1_copy  = a1.copy()
142 | 
143 |     assert rel_error(sigmoid_grad(a1), sigmoid_grad_sol(a1_copy)) <= 1e-10
144 | 
145 | 


--------------------------------------------------------------------------------
/assignment1/tests/test_softmax.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root):
  3 | python -m py.test tests/test_softmax.py -vv -s -q
  4 | python -m py.test tests/test_softmax.py -vv -s -q --cov
  5 | 
  6 | py.test.exe --cov=cs224d/ tests/test_softmax.py --cov-report html
  7 | 
  8 | (if the tests are within the subfolder tests)
  9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html
 10 | python -m pytest tests -v --cov-report html
 11 | 
 12 | Open index.html contained within htmlcov
 13 | '''
 14 | 
 15 | import pytest
 16 | import numpy as np
 17 | from q1_softmax import softmax
 18 | 
 19 | import random
 20 | 
 21 | from collections import defaultdict, OrderedDict, Counter
 22 | 
 23 | def rel_error(x,y):
 24 |     """ returns relative error """
 25 |     return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
 26 | 
 27 | @pytest.fixture(scope='module')
 28 | def array_1():
 29 | 	return np.array([1,2])
 30 | 
 31 | @pytest.fixture(scope='module')
 32 | def array_2():
 33 | 	return np.array([1001,1002])
 34 | 
 35 | @pytest.fixture(scope='module')
 36 | def array_3():
 37 | 	return np.array([-1001,-1002])
 38 | 
 39 | @pytest.fixture(scope='module')
 40 | def fake_data_normal(in_dim_1, in_dim_2, mean=0., sigma=1.):
 41 | 	return np.random.normal(loc=mean, scale=sigma, size=(in_dim_1,in_dim_2))
 42 | 
 43 | @pytest.fixture(scope='module')
 44 | def fake_data_uniform(in_dim_1, in_dim_2, low=-1000., high=1000.):
 45 | 	return np.random.uniform(low=low, high=high, size=(in_dim_1, in_dim_2))
 46 | 
 47 | @pytest.fixture(scope='module')
 48 | def linear_shift(low=-100, high=100.):
 49 | 	return np.random.uniform(low,high)
 50 | 
 51 | @pytest.fixture(scope='module')
 52 | def vector_shift(in_dim, low=-100., high=100.):
 53 | 	return np.random.uniform(low=low,high=high,size=(in_dim,1))
 54 | 
 55 | #starting with some simple fixed test
 56 | def test_softmax_array_1(array_1):
 57 | 	""" Original softmax test defined in q2_softmax.py; """
 58 | 	assert rel_error(softmax(array_1), np.array([0.26894142,  0.73105858])) < 1e-8
 59 | 
 60 | def test_softmax_array_2(array_1, array_2):
 61 | 	""" Original softmax test defined in q2_softmax.py; """
 62 | 	assert rel_error(softmax(array_2), softmax(array_1)) < 1e-8
 63 | 
 64 | def test_softmax_array_3(array_3):
 65 | 	""" Original softmax test defined in q2_softmax.py; """
 66 | 	assert rel_error(softmax(array_3), np.array(
 67 |         [0.73105858, 0.26894142]))
 68 | 
 69 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
 70 | @pytest.mark.parametrize("dim_2", list(range(1,20)))
 71 | def test_softmax_shape(dim_1, dim_2):
 72 | 	a1 = np.random.normal(size=(dim_1,dim_2))
 73 | 	assert a1.shape == softmax(a1).shape
 74 | 
 75 | @pytest.mark.parametrize("dim_1", list(range(1,20,3)))
 76 | @pytest.mark.parametrize("dim_2", list(range(1,20,3)))
 77 | def test_softmax_linearity(dim_1, dim_2):
 78 | 	shift = linear_shift(-100,100)
 79 | 	a1    = np.random.normal(size=(dim_1,dim_2))
 80 | 	a2    = a1 + shift
 81 | 	assert rel_error(np.max(shift), np.min(shift)) <1e-8
 82 | 	assert rel_error(softmax(a1),softmax(a2)) < 1e-8
 83 | 
 84 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
 85 | def test_softmax_permutation_axis0(dim_1):
 86 | 	a1          = np.random.normal(size=(dim_1,1))
 87 | 	s1          = softmax(a1)
 88 | 
 89 | 	permutation = np.random.permutation(dim_1)
 90 | 	inverse_permutation = np.argsort(permutation)
 91 | 
 92 | 	s1_perm     = softmax(a1[permutation])
 93 | 	assert rel_error(s1_perm[inverse_permutation], s1) <= 1e-8
 94 | 
 95 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
 96 | def test_softmax_permutation_axis1(dim_1):
 97 | 	a1          = np.random.normal(size=(1,dim_1))
 98 | 	s1          = softmax(a1)
 99 | 
100 | 	permutation = np.random.permutation(dim_1)
101 | 	inverse_permutation = np.argsort(permutation)
102 | 
103 | 	s1_perm     = softmax(a1.ravel()[permutation])
104 | 	assert rel_error(s1_perm.ravel()[inverse_permutation], s1) <= 1e-8
105 | #note: permutation(softmax(x)) = softmax(permutation(x))
106 | 
107 | #probably can move this to a 'fake' data call
108 | @pytest.mark.parametrize("dim_1", list(range(1,20,3)))
109 | @pytest.mark.parametrize("dim_2", list(range(1,20,3)))
110 | def test_softmax_linearity_rowwise(dim_1, dim_2):
111 | 	shift = np.random.uniform(low=-100,high=100,size=(dim_1,1))
112 | 	#print(shift)
113 | 	a1    = np.random.normal(size=(dim_1,dim_2))
114 | 	a2    = a1 + shift
115 | 	assert rel_error(np.max(a2 - a1), np.max(shift)) < 1e-8
116 | 	assert rel_error(softmax(a1),softmax(a2)) < 1e-8
117 | 


--------------------------------------------------------------------------------
/assignment1/tests/test_softmax_regression.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root):
  3 | python -m py.test tests/test_softmax_regression.py -vv -s -q
  4 | python -m py.test tests/test_softmax_regression.py -vv -s -q --cov
  5 | 
  6 | py.test.exe --cov=cs224d/ tests/test_softmax_regression.py --cov-report html
  7 | 
  8 | (if the tests are within the subfolder tests)
  9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html
 10 | python -m pytest tests -v --cov-report html
 11 | 
 12 | Open index.html contained within htmlcov
 13 | '''
 14 | 
 15 | import pytest
 16 | import numpy as np
 17 | from q1_softmax import softmax
 18 | from q2_gradcheck import grad_numerical, eval_numerical_gradient_array
 19 | from q4_softmaxreg import softmaxRegression, accuracy, softmax_wrapper, getSentenceFeature
 20 | 
 21 | import random
 22 | 
 23 | from collections import defaultdict, OrderedDict, Counter
 24 | 
 25 | def rel_error(x,y):
 26 |     """ returns relative error """
 27 |     return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
 28 | 
 29 | @pytest.fixture(scope='module')
 30 | def array_1():
 31 | 	return np.array([1,2])
 32 | 
 33 | @pytest.fixture(scope='module')
 34 | def array_2():
 35 | 	return np.array([1001,1002])
 36 | 
 37 | @pytest.fixture(scope='module')
 38 | def array_3():
 39 | 	return np.array([-1001,-1002])
 40 | 
 41 | @pytest.fixture(scope='module')
 42 | def fake_data_normal(in_dim_1, in_dim_2, mean=0., sigma=1.):
 43 | 	return np.random.normal(loc=mean, scale=sigma, size=(in_dim_1,in_dim_2))
 44 | 
 45 | @pytest.fixture(scope='module')
 46 | def fake_data_uniform(in_dim_1, in_dim_2, low=-1000., high=1000.):
 47 | 	return np.random.uniform(low=low, high=high, size=(in_dim_1, in_dim_2))
 48 | 
 49 | @pytest.fixture(scope='module')
 50 | def linear_shift(low=-100, high=100.):
 51 | 	return np.random.uniform(low,high)
 52 | 
 53 | @pytest.fixture(scope='module')
 54 | def vector_shift(in_dim, low=-100., high=100.):
 55 | 	return np.random.uniform(low=low,high=high,size=(in_dim,1))
 56 | 
 57 | #starting with some simple fixed test
 58 | def test_softmax_array_1(array_1):
 59 | 	""" Original softmax test defined in q2_softmax.py; """
 60 | 	assert rel_error(softmax(array_1), np.array([0.26894142,  0.73105858])) < 1e-8
 61 | 
 62 | def test_softmax_array_2(array_1, array_2):
 63 | 	""" Original softmax test defined in q2_softmax.py; """
 64 | 	assert rel_error(softmax(array_2), softmax(array_1)) < 1e-8
 65 | 
 66 | def test_softmax_array_3(array_3):
 67 | 	""" Original softmax test defined in q2_softmax.py; """
 68 | 	assert rel_error(softmax(array_3), np.array(
 69 |         [0.73105858, 0.26894142]))
 70 | 
 71 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
 72 | @pytest.mark.parametrize("dim_2", list(range(1,20)))
 73 | def test_softmax_shape(dim_1, dim_2):
 74 | 	a1 = np.random.normal(size=(dim_1,dim_2))
 75 | 	assert a1.shape == softmax(a1).shape
 76 | 
 77 | @pytest.mark.parametrize("dim_1", list(range(1,20,3)))
 78 | @pytest.mark.parametrize("dim_2", list(range(1,20,3)))
 79 | def test_softmax_linearity(dim_1, dim_2):
 80 | 	shift = linear_shift(-100,100)
 81 | 	a1    = np.random.normal(size=(dim_1,dim_2))
 82 | 	a2    = a1 + shift
 83 | 	assert rel_error(np.max(shift), np.min(shift)) <1e-8
 84 | 	assert rel_error(softmax(a1),softmax(a2)) < 1e-8
 85 | 
 86 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
 87 | def test_softmax_permutation_axis0(dim_1):
 88 | 	a1          = np.random.normal(size=(dim_1,1))
 89 | 	s1          = softmax(a1)
 90 | 
 91 | 	permutation = np.random.permutation(dim_1)
 92 | 	inverse_permutation = np.argsort(permutation)
 93 | 
 94 | 	s1_perm     = softmax(a1[permutation])
 95 | 	assert rel_error(s1_perm[inverse_permutation], s1) <= 1e-8
 96 | 
 97 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
 98 | def test_softmax_permutation_axis1(dim_1):
 99 | 	a1          = np.random.normal(size=(1,dim_1))
100 | 	s1          = softmax(a1)
101 | 
102 | 	permutation = np.random.permutation(dim_1)
103 | 	inverse_permutation = np.argsort(permutation)
104 | 
105 | 	s1_perm     = softmax(a1.ravel()[permutation])
106 | 	assert rel_error(s1_perm.ravel()[inverse_permutation], s1) <= 1e-8
107 | #note: permutation(softmax(x)) = softmax(permutation(x))
108 | 
109 | #probably can move this to a 'fake' data call
110 | @pytest.mark.parametrize("dim_1", list(range(1,20,3)))
111 | @pytest.mark.parametrize("dim_2", list(range(1,20,3)))
112 | def test_softmax_linearity_rowwise(dim_1, dim_2):
113 | 	shift = np.random.uniform(low=-100,high=100,size=(dim_1,1))
114 | 	#print(shift)
115 | 	a1    = np.random.normal(size=(dim_1,dim_2))
116 | 	a2    = a1 + shift
117 | 	assert rel_error(np.max(a2 - a1), np.max(shift)) < 1e-8
118 | 	assert rel_error(softmax(a1),softmax(a2)) < 1e-8
119 | 
120 | @pytest.mark.parametrize("samples", [5000, 10000])
121 | @pytest.mark.parametrize("features", list(range(20, 41, 5)))
122 | @pytest.mark.parametrize("classes", [100])
123 | def test_softmax_regression(samples, features, classes):
124 |     dummy_weights  = 0.1 * np.random.randn(features, classes) + 1.0
125 |     dummy_features = np.random.randn(samples, features)
126 |     dummy_labels   = np.argmax(np.random.randn(samples, classes), axis=1)
127 |     #np.sqrt(p * (1 - p) / N )
128 |     #n/N
129 |     #rough estimate of how many should be correct (assuming random: -np.log(p), where p is the probability of guessing correctly)
130 |     assert np.abs(-np.log(1./classes) - (softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0])) / -np.log(1./classes) <= 0.15 
131 | 
132 | @pytest.mark.parametrize("samples", [50, 60])
133 | @pytest.mark.parametrize("features", list(range(20, 41, 5)))
134 | @pytest.mark.parametrize("classes", [10])
135 | def test_softmax_gradient(samples, features, classes, check_count=20):
136 |     dummy_weights  = 0.1 * np.random.randn(features, classes) + 1.0
137 |     dummy_features = np.random.randn(samples, features)
138 |     dummy_labels   = np.argmax(np.random.randn(samples, classes), axis=1)
139 |     #rough estimate of how many should be correct (assuming random: -np.log(p), where p is the probability of guessing correctly)
140 |     #assert np.abs(-np.log(1./classes) - (softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0])) / -np.log(1./classes) <= 0.15 
141 | 
142 |     f = lambda w: softmaxRegression(dummy_features, dummy_labels, w, regularization=0.0, nopredictions=False)[0]
143 |     g = lambda w: softmaxRegression(dummy_features, dummy_labels, w, regularization=0.0, nopredictions=False)[1]
144 | 
145 |     W = dummy_weights
146 |     grad_analytic = g(W)
147 | 
148 |     for i in range(check_count):
149 |         ix = tuple([random.randrange(m) for m in W.shape])
150 |         shift = np.zeros(W.shape)
151 |         shift[ix] = 1e-7
152 |         grad_numerical = (f(W + shift) - f(W - shift)) / (2 * 1e-7)
153 |         assert( abs(grad_numerical - grad_analytic[ix]) / (abs(grad_numerical) + abs(grad_analytic[ix])) < 0.0002)
154 |         #only evaluating at one point (expect the majority of the deviation at this point)
155 | 


--------------------------------------------------------------------------------
/assignment1/tests/test_softmax_to_solutions.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root):
  3 | python -m py.test tests/test_softmax.py -vv -s -q
  4 | python -m py.test tests/test_softmax.py -vv -s -q --cov
  5 | 
  6 | py.test.exe --cov=cs224d/ tests/test_softmax.py --cov-report html
  7 | 
  8 | (if the tests are within the subfolder tests)
  9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html
 10 | python -m pytest tests -v --cov-report html
 11 | 
 12 | Open index.html contained within htmlcov
 13 | '''
 14 | 
 15 | import pytest
 16 | import numpy as np
 17 | from q1_softmax import softmax
 18 | from q1_softmax_sol import softmax_sol
 19 | 
 20 | import random
 21 | 
 22 | from collections import defaultdict, OrderedDict, Counter
 23 | 
 24 | def rel_error(x,y):
 25 |     """ returns relative error """
 26 |     return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
 27 | 
 28 | @pytest.fixture(scope='module')
 29 | def array_1():
 30 |     return np.array([1,2])
 31 | 
 32 | @pytest.fixture(scope='module')
 33 | def array_2():
 34 |     return np.array([1001,1002])
 35 | 
 36 | @pytest.fixture(scope='module')
 37 | def array_3():
 38 |     return np.array([-1001,-1002])
 39 | 
 40 | @pytest.fixture(scope='module')
 41 | def fake_data_normal(in_dim_1, in_dim_2, mean=0., sigma=1.):
 42 |     return np.random.normal(loc=mean, scale=sigma, size=(in_dim_1,in_dim_2))
 43 | 
 44 | @pytest.fixture(scope='module')
 45 | def fake_data_uniform(in_dim_1, in_dim_2, low=-1000., high=1000.):
 46 |     return np.random.uniform(low=low, high=high, size=(in_dim_1, in_dim_2))
 47 | 
 48 | @pytest.fixture(scope='module')
 49 | def linear_shift(low=-100, high=100.):
 50 |     return np.random.uniform(low,high)
 51 | 
 52 | @pytest.fixture(scope='module')
 53 | def vector_shift(in_dim, low=-100., high=100.):
 54 |     return np.random.uniform(low=low,high=high,size=(in_dim,1))
 55 | 
 56 | #starting with some simple fixed test
 57 | @pytest.mark.parametrize("softmax_f", [softmax, softmax_sol])
 58 | def test_softmax_array_1(array_1, softmax_f):
 59 |     """ Original softmax test defined in q2_softmax.py; """
 60 |     assert rel_error(softmax_f(array_1), np.array([0.26894142,  0.73105858])) < 1e-8
 61 | 
 62 | @pytest.mark.parametrize("softmax_f", [softmax, softmax_sol])
 63 | def test_softmax_array_2(array_1, array_2, softmax_f):
 64 |     """ Original softmax test defined in q2_softmax.py; """
 65 |     assert rel_error(softmax_f(array_2), softmax_f(array_1)) < 1e-8
 66 | 
 67 | @pytest.mark.parametrize("softmax_f", [softmax, softmax_sol])
 68 | def test_softmax_array_3(array_3, softmax_f):
 69 |     """ Original softmax test defined in q2_softmax.py; """
 70 |     assert rel_error(softmax_f(array_3), np.array(
 71 |         [0.73105858, 0.26894142]))
 72 | 
 73 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
 74 | @pytest.mark.parametrize("dim_2", list(range(1,20)))
 75 | @pytest.mark.parametrize("softmax_f", [softmax, softmax_sol])
 76 | def test_softmax_shape(dim_1, dim_2, softmax_f):
 77 |     a1 = np.random.normal(size=(dim_1,dim_2))
 78 |     assert a1.shape == softmax_f(a1).shape
 79 | 
 80 | @pytest.mark.parametrize("dim_1", list(range(1,20,3)))
 81 | @pytest.mark.parametrize("dim_2", list(range(1,20,3)))
 82 | @pytest.mark.parametrize("softmax_f", [softmax, softmax_sol])
 83 | def test_softmax_linearity(dim_1, dim_2, softmax_f):
 84 |     shift = linear_shift(-100,100)
 85 |     a1    = np.random.normal(size=(dim_1,dim_2))
 86 |     a2    = a1 + shift
 87 |     assert rel_error(np.max(shift), np.min(shift)) <1e-8
 88 |     assert rel_error(softmax_f(a1), softmax_f(a2)) < 1e-8
 89 | 
 90 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
 91 | @pytest.mark.parametrize("softmax_f", [softmax, softmax_sol])
 92 | def test_softmax_permutation_axis0(dim_1, softmax_f):
 93 |     a1          = np.random.normal(size=(dim_1,1))
 94 |     s1          = softmax_f(a1)
 95 | 
 96 |     permutation = np.random.permutation(dim_1)
 97 |     inverse_permutation = np.argsort(permutation)
 98 | 
 99 |     s1_perm     = softmax_f(a1[permutation])
100 |     assert rel_error(s1_perm[inverse_permutation], s1) <= 1e-8
101 | 
102 | @pytest.mark.parametrize("dim_1", list(range(1,20)))
103 | @pytest.mark.parametrize("softmax_f", [softmax, softmax_sol])
104 | def test_softmax_permutation_axis1(dim_1, softmax_f):
105 |     a1          = np.random.normal(size=(1,dim_1))
106 |     s1          = softmax_f(a1)
107 | 
108 |     permutation = np.random.permutation(dim_1)
109 |     inverse_permutation = np.argsort(permutation)
110 | 
111 |     s1_perm     = softmax_f(a1.ravel()[permutation])
112 |     assert rel_error(s1_perm.ravel()[inverse_permutation], s1) <= 1e-8
113 | #note: permutation(softmax(x)) = softmax(permutation(x))
114 | 
115 | #probably can move this to a 'fake' data call
116 | @pytest.mark.parametrize("dim_1", list(range(1,20,3)))
117 | @pytest.mark.parametrize("dim_2", list(range(1,20,3)))
118 | @pytest.mark.parametrize("softmax_f", [softmax, softmax_sol])
119 | def test_softmax_linearity_rowwise(dim_1, dim_2, softmax_f):
120 |     shift = np.random.uniform(low=-100,high=100,size=(dim_1,1))
121 |     #print(shift)
122 |     a1    = np.random.normal(size=(dim_1,dim_2))
123 |     a2    = a1 + shift
124 |     assert rel_error(np.max(a2 - a1), np.max(shift)) < 1e-8
125 |     assert rel_error(softmax_f(a1), softmax_f(a2)) < 1e-8
126 | 
127 | #ABOVE tests both implementations;
128 | #Now comparisons
129 | @pytest.mark.parametrize("dim_1", list(range(1,20,3)))
130 | @pytest.mark.parametrize("dim_2", list(range(1,20,3)))
131 | def test_softmax_vs_softmax_sol(dim_1, dim_2):
132 |     distribution = np.random.uniform(low=-100,high=100, size=(dim_1, dim_2))
133 |     assert rel_error(softmax_sol(distribution), softmax(distribution)) < 1e-10
134 | 


--------------------------------------------------------------------------------
/assignment1/tests/test_word2vec_to_solutions.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | HOW TO RUN THIS CODE (if tests are within the assignment 1 root):
 3 | python -m py.test tests/test_word2vec_to_solutions.py -vv -s -q
 4 | python -m py.test tests/test_word2vec_to_solutions.py -vv -s -q --cov
 5 | 
 6 | py.test.exe --cov=cs224d/ tests/test_word2vec_to_solutions.py --cov-report html
 7 | 
 8 | (if the tests are within the subfolder tests)
 9 | PYTHONPATH=${PWD} py.test.exe tests/ -v --cov-report html
10 | python -m pytest tests -v --cov-report html
11 | 
12 | Open index.html contained within htmlcov
13 | '''
14 | 
15 | import pytest
16 | import numpy as np
17 | 
18 | import random
19 | from collections import defaultdict, OrderedDict, Counter
20 | from q2_gradcheck import grad_numerical, eval_numerical_gradient_array
21 | 
22 | from q3_word2vec import normalizeRows
23 | from q3_word2vec import softmaxCostAndGradient, negSamplingCostAndGradient
24 | from q3_word2vec import skipgram, cbow
25 | 
26 | from q3_word2vec_sol import normalizeRows_sol
27 | from q3_word2vec_sol import softmaxCostAndGradient_sol, negSamplingCostAndGradient_sol
28 | from q3_word2vec_sol import skipgram_sol, cbow_sol
29 | 
30 | from q3_word2vec import word2vec_sgd_wrapper
31 | 
32 | def rel_error(x,y):
33 |     """ returns relative error """
34 |     return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
35 | 
36 | @pytest.fixture(scope='module')
37 | def dataset_default():
38 |     dataset = type('dummy', (), {})()
39 |     def dummySampleTokenIdx():
40 |         return random.randint(0, 4)
41 | 
42 |     def getRandomContext(C):
43 |         tokens = ["a", "b", "c", "d", "e"]
44 |         return tokens[random.randint(0,4)], [tokens[random.randint(0,4)] \
45 |            for i in range(2*C)]
46 |     dataset.sampleTokenIdx   = dummySampleTokenIdx
47 |     dataset.getRandomContext = getRandomContext
48 |     dataset.genTokens        = ["a", "b", "c", "d", "e"]
49 |     dataset.dummy_tokens     = dict((i,j) for j,i in enumerate(dataset.genTokens()))
50 |     dataset.dummy_vectors    = normalizeRows(np.random(10,3))
51 |     return dataset
52 | 
53 | @pytest.fixture(scope='module')
54 | def dataset_large(size = 10):
55 |     assert size < 26
56 |     dataset = type('dummy', (), {})()
57 | 
58 |     def dummySampleTokenIdx():
59 |         return random.randint(0, size)
60 | 
61 |     def gen_tokens():
62 |         tokens = [chr(i + ord('a')) for i in range(0, size)]
63 |         return tokens
64 | 
65 |     def getRandomContext(C = size):
66 |         tokens = gen_tokens()
67 |         return tokens[random.randint(0,size-1)], [tokens[random.randint(0,size-1)] \
68 |            for i in range(2*C)]
69 | 
70 |     dataset.size             = size
71 |     dataset.sampleTokenIdx   = dummySampleTokenIdx
72 |     dataset.getRandomContext = getRandomContext
73 |     dataset.genTokens        = gen_tokens()
74 |     dataset.dummy_tokens     = dict((i,j) for j,i in enumerate(dataset.genTokens))
75 |     dataset.dummy_vectors    = normalizeRows(np.random.randn(size * 2, 5))
76 |     return dataset
77 | 
78 | 
79 | def test_skipgram_to_solutions(dataset_large):
80 |     word2vec_sgd_wrapper(skipgram, dataset_large.dummy_tokens, dataset_large.dummy_vectors, dataset_large, 5)
81 |     #this might be harder than it looks (since the number of calls to random have to be identical;)
82 |     #will probably need to re-work this


--------------------------------------------------------------------------------
/assignment2/README.md:
--------------------------------------------------------------------------------
 1 | [CS224d: Deep Learning for Natural Language Processing](http://cs224d.stanford.edu/)
 2 | ====================================================================================
 3 | 
 4 | Assignment #2: Deep and Recurrent Neural Networks
 5 | -------------------------------------------------
 6 | 
 7 | **Due Date: 5/5/2016 (Thu) 11:59 PM PST.**
 8 | 
 9 | In this assignment you will learn how to use TensorFlow to solve problems in NLP. In particular, you'll use TensorFlow to implement feed-forward neural networks and recurrent neural networks (RNNs), and apply them to the tasks of Named Entity Recognition (NER) and Language Modeling (LM).
10 | 
11 | As with Assignment #1, you're limited to a maximum of three late days on this assigment. Don't forget that the in-class midterm is scheduled for May 10, so we recommend starting this one early!
12 | 
13 | Setup
14 | -----
15 | 
16 | **Note:** *Please be sure you have Python 2.7.x installed on your system. The following instructions should work on Mac or Linux. If you have any trouble getting set up, please come to office hours and the TAs will be happy to help.*
17 | 
18 | **Get the code (updated!):** [**Download the starter code here**](http://cs224d.stanford.edu/assignment2/assignment2.zip) and [**the assignment handout here**](http://cs224d.stanford.edu/assignment2/assignment2.pdf).
19 | 
20 | **Python package requirements:** The core requirements for this assignment are
21 | * tensorflow
22 | * numpy
23 | 
24 | If you have a recent linux (**Ubuntu 14.04** and later) install or Mac OS X, the default TensorFlow installation directions will work well for you. If not, we recommend using the installation on the [**corn clusters**](https://web.stanford.edu/group/farmshare/cgi-bin/wiki/index.php/Main_Page). Note that you will need to use the system default python, not a local Anaconda python.
25 | 
26 | The corn clusters don't provide GPU support. If you'd like to use GPUs, we recommend using AWS. We've put together a [**brief tutorial**](http://cs224d.stanford.edu/supplementary/aws-tutorial-2.pdf) with directions on how to get started with TensorFlow on AWS.
27 | 
28 | Submitting your work
29 | --------------------
30 | 
31 | Once you are done working, run the `collectSubmission.sh` script; this will produce a file called `assignment2.zip`. Rename this file to `<your-sunet-id>.zip`, for instance if your stanford email is `jdoe@stanford.edu`, your file name should be `jdoe.zip`.
32 | 
33 | For the written component, please upload a PDF file of your solutions to `Gradescope`. If you are enrolled in the class you should have been signed up automatically. If you added the class late or are not signed up, post privately to Piazza and we will add you to the roster. When asked to map question parts to your PDF, please map the parts accordingly as courtesy to your TAs. This is crucial so that we can provide accurate feedback. If a question has no written component (completely programatic), map it on the same page as the previous section or next section.
34 | 
35 | Please upload your programming submission below.
36 | 
37 | 
38 | Assignment Overview (Tasks)
39 | ---------------------------
40 | 
41 | There will be three parts to this assignment. Each part has written and code components. The assignment is designed to be completed in order as later sections will leverage solutions to earlier parts. We recommend reading the assignment carefully and starting early as some parts may take significant time to run.
42 | 
43 | Q1: TensorFlow Softmax (20 points)
44 | ----------------------------------
45 | 
46 | Q2: TensorFlow NER Window Model (35 points)
47 | -------------------------------------------
48 | 
49 | Q3: TensorFlow RNN Language Model (45 points)
50 | ---------------------------------------------
51 | 


--------------------------------------------------------------------------------
/assignment2/assignment2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment2/assignment2.pdf


--------------------------------------------------------------------------------
/assignment2/data_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment2/data_utils/__init__.py


--------------------------------------------------------------------------------
/assignment2/data_utils/ner.py:
--------------------------------------------------------------------------------
 1 | ##
 2 | # Utility functions for NER assignment
 3 | # Assigment 2, part 1 for CS224D
 4 | ##
 5 | 
 6 | from .utils import invert_dict
 7 | from numpy import *
 8 | 
 9 | def load_wv(vocabfile, wvfile):
10 |     wv = loadtxt(wvfile, dtype=float)
11 |     with open(vocabfile) as fd:
12 |         words = [line.strip() for line in fd]
13 |     num_to_word = dict(enumerate(words))
14 |     word_to_num = invert_dict(num_to_word)
15 |     return wv, word_to_num, num_to_word
16 | 
17 | 
18 | def save_predictions(y, filename):
19 |     """Save predictions, one per line."""
20 |     with open(filename, 'w') as fd:
21 |         fd.write("\n".join(map(str, y)))
22 |         fd.write("\n")


--------------------------------------------------------------------------------
/assignment2/data_utils/utils.py:
--------------------------------------------------------------------------------
  1 | import sys, os, re, json
  2 | import itertools
  3 | from collections import Counter
  4 | import time
  5 | from numpy import *
  6 | 
  7 | import pandas as pd
  8 | 
  9 | 
 10 | def invert_dict(d):
 11 |     return {v:k for k,v in d.items()}
 12 | 
 13 | def flatten1(lst):
 14 |     return list(itertools.chain.from_iterable(lst))
 15 | 
 16 | def load_wv_pandas(fname):
 17 |     return pd.read_hdf(fname, 'data')
 18 | 
 19 | def extract_wv(df):
 20 |     num_to_word = dict(enumerate(df.index))
 21 |     word_to_num = invert_dict(num_to_word)
 22 |     wv = df.as_matrix()
 23 |     return wv, word_to_num, num_to_word
 24 | 
 25 | def canonicalize_digits(word):
 26 |     if any([c.isalpha() for c in word]): return word
 27 |     word = re.sub("\d", "DG", word)
 28 |     if word.startswith("DG"):
 29 |         word = word.replace(",", "") # remove thousands separator
 30 |     return word
 31 | 
 32 | def canonicalize_word(word, wordset=None, digits=True):
 33 |     word = word.lower()
 34 |     if digits:
 35 |         if (wordset != None) and (word in wordset): return word
 36 |         word = canonicalize_digits(word) # try to canonicalize numbers
 37 |     if (wordset == None) or (word in wordset): return word
 38 |     else: return "UUUNKKK" # unknown token
 39 | 
 40 | 
 41 | ##
 42 | # Utility functions used to create dataset
 43 | ##
 44 | def augment_wv(df, extra=["UUUNKKK"]):
 45 |     for e in extra:
 46 |         df.loc[e] = zeros(len(df.columns))
 47 | 
 48 | def prune_wv(df, vocab, extra=["UUUNKKK"]):
 49 |     """Prune word vectors to vocabulary."""
 50 |     items = set(vocab).union(set(extra))
 51 |     return df.filter(items=items, axis='index')
 52 | 
 53 | def load_wv_raw(fname):
 54 |     return pd.read_table(fname, sep="\s+",
 55 |                          header=None,
 56 |                          index_col=0,
 57 |                          quoting=3)
 58 | 
 59 | def load_dataset(fname):
 60 |     docs = []
 61 |     with open(fname, encoding="utf-8") as fd:
 62 |         cur = []
 63 |         for line in fd:
 64 |             # new sentence on -DOCSTART- or blank line
 65 |             if re.match(r"-DOCSTART-.+", line) or (len(line.strip()) == 0):
 66 |                 if len(cur) > 0:
 67 |                     docs.append(cur)
 68 |                 cur = []
 69 |             else: # read in tokens
 70 |                 cur.append(line.strip().split("\t",1))
 71 |         # flush running buffer
 72 |         docs.append(cur)
 73 |     return docs
 74 | 
 75 | def extract_tag_set(docs):
 76 |     tags = set(flatten1([[t[1].split("|")[0] for t in d] for d in docs]))
 77 |     return tags
 78 | 
 79 | def extract_word_set(docs):
 80 |     words = set(flatten1([[t[0] for t in d] for d in docs]))
 81 |     return words
 82 | 
 83 | def pad_sequence(seq, left=1, right=1):
 84 |     return left*[("<s>", "")] + seq + right*[("</s>", "")]
 85 | 
 86 | ##
 87 | # For window models
 88 | def seq_to_windows(words, tags, word_to_num, tag_to_num, left=1, right=1):
 89 |     ns = len(words)
 90 |     X = []
 91 |     y = []
 92 |     for i in range(ns):
 93 |         if words[i] == "<s>" or words[i] == "</s>":
 94 |             continue # skip sentence delimiters
 95 |         tagn = tag_to_num[tags[i]]
 96 |         idxs = [word_to_num[words[ii]]
 97 |                 for ii in range(i - left, i + right + 1)]
 98 |         X.append(idxs)
 99 |         y.append(tagn)
100 |     return array(X), array(y)
101 | 
102 | def docs_to_windows(docs, word_to_num, tag_to_num, wsize=3):
103 |     pad = (wsize - 1)//2
104 |     docs = flatten1([pad_sequence(seq, left=pad, right=pad) for seq in docs])
105 | 
106 |     words, tags = zip(*docs)
107 |     words = [canonicalize_word(w, word_to_num) for w in words]
108 |     tags = [t.split("|")[0] for t in tags]
109 |     return seq_to_windows(words, tags, word_to_num, tag_to_num, pad, pad)
110 | 
111 | def window_to_vec(window, L):
112 |     """Concatenate word vectors for a given window."""
113 |     return concatenate([L[i] for i in window])
114 | 
115 | ##
116 | # For fixed-window LM:
117 | # each row of X is a list of word indices
118 | # each entry of y is the word index to predict
119 | def seq_to_lm_windows(words, word_to_num, ngram=2):
120 |     ns = len(words)
121 |     X = []
122 |     y = []
123 |     for i in range(ns):
124 |         if words[i] == "<s>":
125 |             continue # skip sentence begin, but do predict end
126 |         idxs = [word_to_num[words[ii]]
127 |                 for ii in range(i - ngram + 1, i + 1)]
128 |         X.append(idxs[:-1])
129 |         y.append(idxs[-1])
130 |     return array(X), array(y)
131 | 
132 | def docs_to_lm_windows(docs, word_to_num, ngram=2):
133 |     docs = flatten1([pad_sequence(seq, left=(ngram-1), right=1)
134 |                      for seq in docs])
135 |     words = [canonicalize_word(wt[0], word_to_num) for wt in docs]
136 |     return seq_to_lm_windows(words, word_to_num, ngram)
137 | 
138 | 
139 | ##
140 | # For RNN LM
141 | # just convert each sentence to a list of indices
142 | # after padding each with <s> ... </s> tokens
143 | def seq_to_indices(words, word_to_num):
144 |     return array([word_to_num[w] for w in words])
145 | 
146 | def docs_to_indices(docs, word_to_num):
147 |     docs = [pad_sequence(seq, left=1, right=1) for seq in docs]
148 |     ret = []
149 |     for seq in docs:
150 |         words = [canonicalize_word(wt[0], word_to_num) for wt in seq]
151 |         ret.append(seq_to_indices(words, word_to_num))
152 | 
153 |     # return as numpy array for fancier slicing
154 |     return array(ret, dtype=object)
155 | 
156 | def offset_seq(seq):
157 |     return seq[:-1], seq[1:]
158 | 
159 | def seqs_to_lmXY(seqs):
160 |     X, Y = zip(*[offset_seq(s) for s in seqs])
161 |     return array(X, dtype=object), array(Y, dtype=object)
162 | 
163 | ##
164 | # For RNN tagger
165 | # return X, Y as lists
166 | # where X[i] is indices, Y[i] is tags for a sequence
167 | # NOTE: this does not use padding tokens!
168 | #    (RNN should natively handle begin/end)
169 | def docs_to_tag_sequence(docs, word_to_num, tag_to_num):
170 |     # docs = [pad_sequence(seq, left=1, right=1) for seq in docs]
171 |     X = []
172 |     Y = []
173 |     for seq in docs:
174 |         if len(seq) < 1: continue
175 |         words, tags = zip(*seq)
176 | 
177 |         words = [canonicalize_word(w, word_to_num) for w in words]
178 |         x = seq_to_indices(words, word_to_num)
179 |         X.append(x)
180 | 
181 |         tags = [t.split("|")[0] for t in tags]
182 |         y = seq_to_indices(tags, tag_to_num)
183 |         Y.append(y)
184 | 
185 |     # return as numpy array for fancier slicing
186 |     return array(X, dtype=object), array(Y, dtype=object)
187 | 
188 | def idxs_to_matrix(idxs, L):
189 |     """Return a matrix X with each row
190 |     as a word vector for the corresponding
191 |     index in idxs."""
192 |     return vstack([L[i] for i in idxs])
193 | 


--------------------------------------------------------------------------------
/assignment2/model.py:
--------------------------------------------------------------------------------
  1 | class Model(object):
  2 |   """Abstracts a Tensorflow graph for a learning task.
  3 | 
  4 |   We use various Model classes as usual abstractions to encapsulate tensorflow
  5 |   computational graphs. Each algorithm you will construct in this homework will
  6 |   inherit from a Model object.
  7 |   """
  8 |   def __init__(self):
  9 |     self.input_data = None
 10 | 
 11 |   def load_data(self):
 12 |     """Loads data from disk and stores it in memory.
 13 | 
 14 |     Feel free to add instance variables to Model object that store loaded data.    
 15 |     """
 16 |     raise NotImplementedError("Each Model must re-implement this method.")
 17 | 
 18 |   def add_placeholders(self):
 19 |     """Adds placeholder variables to tensorflow computational graph.
 20 | 
 21 |     Tensorflow uses placeholder variables to represent locations in a
 22 |     computational graph where data is inserted.  These placeholders are used as
 23 |     inputs by the rest of the model building code and will be fed data during
 24 |     training.
 25 | 
 26 |     See for more information:
 27 | 
 28 |     https://www.tensorflow.org/versions/r0.7/api_docs/python/io_ops.html#placeholders
 29 |     """
 30 |     raise NotImplementedError("Each Model must re-implement this method.")
 31 | 
 32 |   def create_feed_dict(self, input_batch, label_batch):
 33 |     """Creates the feed_dict for training the given step.
 34 | 
 35 |     A feed_dict takes the form of:
 36 | 
 37 |     feed_dict = {
 38 |         <placeholder>: <tensor of values to be passed for placeholder>,
 39 |         ....
 40 |     }
 41 |   
 42 |     If label_batch is None, then no labels are added to feed_dict.
 43 | 
 44 |     Hint: The keys for the feed_dict should be a subset of the placeholder
 45 |           tensors created in add_placeholders.
 46 |     
 47 |     Args:
 48 |       input_batch: A batch of input data.
 49 |       label_batch: A batch of label data.
 50 |     Returns:
 51 |       feed_dict: The feed dictionary mapping from placeholders to values.
 52 |     """
 53 |     raise NotImplementedError("Each Model must re-implement this method.")
 54 | 
 55 |   def add_model(self, input_data):
 56 |     """Implements core of model that transforms input_data into predictions.
 57 | 
 58 |     The core transformation for this model which transforms a batch of input
 59 |     data into a batch of predictions.
 60 | 
 61 |     Args:
 62 |       input_data: A tensor of shape (batch_size, n_features).
 63 |     Returns:
 64 |       out: A tensor of shape (batch_size, n_classes)
 65 |     """
 66 |     raise NotImplementedError("Each Model must re-implement this method.")
 67 | 
 68 |   def add_loss_op(self, pred):
 69 |     """Adds ops for loss to the computational graph.
 70 | 
 71 |     Args:
 72 |       pred: A tensor of shape (batch_size, n_classes)
 73 |     Returns:
 74 |       loss: A 0-d tensor (scalar) output
 75 |     """
 76 |     raise NotImplementedError("Each Model must re-implement this method.")
 77 | 
 78 |   def run_epoch(self, sess, input_data, input_labels):
 79 |     """Runs an epoch of training.
 80 | 
 81 |     Trains the model for one-epoch.
 82 |   
 83 |     Args:
 84 |       sess: tf.Session() object
 85 |       input_data: np.ndarray of shape (n_samples, n_features)
 86 |       input_labels: np.ndarray of shape (n_samples, n_classes)
 87 |     Returns:
 88 |       average_loss: scalar. Average minibatch loss of model on epoch.
 89 |     """
 90 |     raise NotImplementedError("Each Model must re-implement this method.")
 91 | 
 92 |   def fit(self, sess, input_data, input_labels):
 93 |     """Fit model on provided data.
 94 | 
 95 |     Args:
 96 |       sess: tf.Session()
 97 |       input_data: np.ndarray of shape (n_samples, n_features)
 98 |       input_labels: np.ndarray of shape (n_samples, n_classes)
 99 |     Returns:
100 |       losses: list of loss per epoch
101 |     """
102 |     raise NotImplementedError("Each Model must re-implement this method.")
103 | 
104 |   def predict(self, sess, input_data, input_labels=None):
105 |     """Make predictions from the provided model.
106 |     Args:
107 |       sess: tf.Session()
108 |       input_data: np.ndarray of shape (n_samples, n_features)
109 |       input_labels: np.ndarray of shape (n_samples, n_classes)
110 |     Returns:
111 |       average_loss: Average loss of model.
112 |       predictions: Predictions of model on input_data
113 |     """
114 |     return None, None
115 |     raise NotImplementedError("Each Model must re-implement this method.")
116 | 
117 | class LanguageModel(Model):
118 |   """Abstracts a Tensorflow graph for learning language models.
119 | 
120 |   Adds ability to do embedding.
121 |   """
122 |   def add_embedding(self):
123 |     """Add embedding layer. that maps from vocabulary to vectors.
124 |     """
125 |     raise NotImplementedError("Each Model must re-implement this method.")
126 | 
127 |   def predict(self, sess, input_data, input_labels=None):
128 |     return None, None
129 | 


--------------------------------------------------------------------------------
/assignment2/q1_softmax.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | def softmax(x):
  5 |   """
  6 |   Compute the softmax function in tensorflow.
  7 | 
  8 |   You might find the tensorflow functions tf.exp, tf.reduce_max,
  9 |   tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may
 10 |   not need to use all of these functions). Recall also that many common
 11 |   tensorflow operations are sugared (e.g. x * y does a tensor multiplication
 12 |   if x and y are both tensors). Make sure to implement the numerical stability
 13 |   fixes as in the previous homework!
 14 | 
 15 |   Args:
 16 |     x:   tf.Tensor with shape (n_samples, n_features). Note feature vectors are
 17 |          represented by row-vectors. (For simplicity, no need to handle 1-d
 18 |          input as in the previous homework)
 19 |   Returns:
 20 |     out: tf.Tensor with shape (n_sample, n_features). You need to construct this
 21 |          tensor in this problem.
 22 |   """
 23 | 
 24 |   ### YOUR CODE HERE
 25 |   log_c = tf.reduce_max(x, reduction_indices=[len(x.get_shape()) - 1], keep_dims=True)
 26 |   y     = tf.reduce_sum(tf.exp(x - log_c), reduction_indices=[len(x.get_shape()) - 1], keep_dims=True)
 27 |   out   = tf.exp(x - log_c) / y
 28 |   ### END YOUR CODE
 29 |   
 30 |   return out 
 31 | 
 32 | def cross_entropy_loss(y, yhat):
 33 |   """
 34 |   Compute the cross entropy loss in tensorflow.
 35 | 
 36 |   y is a one-hot tensor of shape (n_samples, n_classes) and yhat is a tensor
 37 |   of shape (n_samples, n_classes). y should be of dtype tf.int32, and yhat should
 38 |   be of dtype tf.float32.
 39 | 
 40 |   The functions tf.to_float, tf.reduce_sum, and tf.log might prove useful. (Many
 41 |   solutions are possible, so you may not need to use all of these functions).
 42 | 
 43 |   Note: You are NOT allowed to use the tensorflow built-in cross-entropy
 44 |         functions.
 45 | 
 46 |   Args:
 47 |     y:    tf.Tensor with shape (n_samples, n_classes). One-hot encoded.
 48 |     yhat: tf.Tensorwith shape (n_sample, n_classes). Each row encodes a
 49 |           probability distribution and should sum to 1.
 50 |   Returns:
 51 |     out:  tf.Tensor with shape (1,) (Scalar output). You need to construct this
 52 |           tensor in the problem.
 53 |   """
 54 |   ### YOUR CODE HERE
 55 |   out = - tf.reduce_sum(y * tf.log(yhat + 1e-12))
 56 |   # out = tf.reduce_sum(- tf.reduce_sum(y * tf.log(yhat + 1e-12), reduction_indices=[len(yhat.get_shape()) - 1]))
 57 | 
 58 |   ### END YOUR CODE
 59 |   return out
 60 | 
 61 | def cross_entropy_mean_loss(y, yhat):
 62 |   """
 63 |   Compute the cross entropy loss in tensorflow.
 64 | 
 65 |   y is a one-hot tensor of shape (n_samples, n_classes) and yhat is a tensor
 66 |   of shape (n_samples, n_classes). y should be of dtype tf.int32, and yhat should
 67 |   be of dtype tf.float32.
 68 | 
 69 |   The functions tf.to_float, tf.reduce_sum, and tf.log might prove useful. (Many
 70 |   solutions are possible, so you may not need to use all of these functions).
 71 | 
 72 |   Note: You are NOT allowed to use the tensorflow built-in cross-entropy
 73 |         functions.
 74 | 
 75 |   Args:
 76 |     y:    tf.Tensor with shape (n_samples, n_classes). One-hot encoded.
 77 |     yhat: tf.Tensorwith shape (n_sample, n_classes). Each row encodes a
 78 |           probability distribution and should sum to 1.
 79 |   Returns:
 80 |     out:  tf.Tensor with shape (1,) (Scalar output). You need to construct this
 81 |           tensor in the problem.
 82 |   """
 83 |   ### YOUR CODE HERE
 84 |   # tf.reduce_mean(- tf.reduce_sum(y * tf.log(yhat + 1e-12), reduction_indices=[len(yhat.get_shape()) - 1]))
 85 |   out =  -tf.reduce_sum(y * tf.log(yhat + 1e-12))
 86 |   ### END YOUR CODE
 87 |   return out
 88 | 
 89 | 
 90 | 
 91 | def test_softmax_basic():
 92 |   """
 93 |   Some simple tests to get you started. 
 94 |   Warning: these are not exhaustive.
 95 |   """
 96 |   print("Running basic tests...")
 97 |   test1 = softmax(tf.convert_to_tensor(
 98 |       np.array([[1001,1002],[3,4]]), dtype=tf.float32))
 99 |   with tf.Session():
100 |       test1 = test1.eval()
101 |   assert np.amax(np.fabs(test1 - np.array(
102 |       [0.26894142,  0.73105858]))) <= 1e-6
103 | 
104 |   test2 = softmax(tf.convert_to_tensor(
105 |       np.array([[-1001,-1002]]), dtype=tf.float32))
106 |   with tf.Session():
107 |       test2 = test2.eval()
108 |   assert np.amax(np.fabs(test2 - np.array(
109 |       [0.73105858, 0.26894142]))) <= 1e-6
110 | 
111 |   print( "Basic (non-exhaustive) softmax tests pass\n")
112 | 
113 | def test_cross_entropy_loss_basic():
114 |   """
115 |   Some simple tests to get you started.
116 |   Warning: these are not exhaustive.
117 |   """
118 |   y = np.array([[0, 1], [1, 0], [1, 0]], dtype=np.int32)
119 |   yhat = np.array([[.5, .5], [.5, .5], [.5, .5]])
120 | 
121 |   #implicit dtype conversion passes in 0.7.1 not in 0.8.0
122 |   test1 = cross_entropy_loss(
123 |       tf.convert_to_tensor(y, dtype=tf.float32),
124 |       tf.convert_to_tensor(yhat, dtype=tf.float32))
125 |   with tf.Session():
126 |     test1 = test1.eval()
127 |   result = -3 * np.log(.5)
128 |   assert np.amax(np.fabs(test1 - result)) <= 1e-6
129 |   print( "Basic (non-exhaustive) cross-entropy tests pass\n")
130 | 
131 | if __name__ == "__main__":
132 |   test_softmax_basic()
133 |   test_cross_entropy_loss_basic()
134 | 


--------------------------------------------------------------------------------
/assignment2/q2_initialization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | def xavier_weight_init():
 5 |   """
 6 |   Returns function that creates random tensor. 
 7 | 
 8 |   The specified function will take in a shape (tuple or 1-d array) and must
 9 |   return a random tensor of the specified shape and must be drawn from the
10 |   Xavier initialization distribution.
11 | 
12 |   Hint: You might find tf.random_uniform useful.
13 |   """
14 |   def _xavier_initializer(shape, **kwargs):
15 |     """Defines an initializer for the Xavier distribution.
16 | 
17 |     This function will be used as a variable scope initializer.
18 | 
19 |     https://www.tensorflow.org/versions/r0.7/how_tos/variable_scope/index.html#initializers-in-variable-scope
20 | 
21 |     Args:
22 |       shape: Tuple or 1-d array that species dimensions of requested tensor.
23 |     Returns:
24 |       out: tf.Tensor of specified shape sampled from Xavier distribution.
25 |     """
26 |     ### YOUR CODE HERE
27 |     eps = 4 * np.sqrt(6 / np.sum(shape))
28 |     out = tf.random_uniform(shape=shape, minval=-eps, maxval=eps, dtype=tf.float32)
29 |     ### END YOUR CODE
30 |     return out
31 |   # Returns defined initializer function.
32 |   return _xavier_initializer
33 | 
34 | def test_initialization_basic():
35 |   """
36 |   Some simple tests for the initialization.
37 |   """
38 |   print( "Running basic tests...")
39 |   xavier_initializer = xavier_weight_init()
40 |   shape = (1,)
41 |   xavier_mat = xavier_initializer(shape)
42 |   assert xavier_mat.get_shape() == shape
43 | 
44 |   shape = (1, 2, 3)
45 |   xavier_mat = xavier_initializer(shape)
46 |   assert xavier_mat.get_shape() == shape
47 |   print( "Basic (non-exhaustive) Xavier initialization tests pass\n")
48 | 
49 | def test_initialization():
50 |   """ 
51 |   Use this space to test your Xavier initialization code by running:
52 |       python q1_initialization.py 
53 |   This function will not be called by the autograder, nor will
54 |   your tests be graded.
55 |   """
56 |   print( "Running your tests...")
57 |   ### YOUR CODE HERE
58 |   xavier_initializer = xavier_weight_init()
59 |   sess = tf.Session()
60 | 
61 |   shape = (100,100)
62 |   tf_xavier = xavier_initializer(shape)
63 |   sess.run(tf_xavier.initializer)
64 |   xavier = sess.run(tf_xavier)
65 |   # print(np.mean(xavier))
66 |   # print(np.max(xavier))
67 |   # print(np.min(xavier))
68 |   # print(np.std(xavier))
69 |   # eps = np.sqrt(6/np.sum((100,100)))
70 |   # print((2 * eps)/np.sqrt(12))
71 |   # expect min to roughly be -np.sqrt(6)/sqrt(200)
72 |   # expect max to roughly be  np.sqrt(6)/sqrt(200)
73 |   # expect mean to be roughly 0.
74 |   # expect variance to be (b - a) ** 2 / 12
75 | 
76 |   ### END YOUR CODE  
77 | 
78 | if __name__ == "__main__":
79 |     test_initialization_basic()
80 |     test_initialization()
81 | 


--------------------------------------------------------------------------------
/assignment2/solutions/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Core latex/pdflatex auxiliary files:
  2 | *.aux
  3 | *.lof
  4 | *.log
  5 | *.lot
  6 | *.fls
  7 | *.out
  8 | *.toc
  9 | *.fmt
 10 | *.fot
 11 | *.cb
 12 | *.cb2
 13 | 
 14 | ## Intermediate documents:
 15 | *.dvi
 16 | *-converted-to.*
 17 | # these rules might exclude image files for figures etc.
 18 | # *.ps
 19 | # *.eps
 20 | # *.pdf
 21 | 
 22 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
 23 | *.bbl
 24 | *.bcf
 25 | *.blg
 26 | *-blx.aux
 27 | *-blx.bib
 28 | *.brf
 29 | *.run.xml
 30 | 
 31 | ## Build tool auxiliary files:
 32 | *.fdb_latexmk
 33 | *.synctex
 34 | *.synctex.gz
 35 | *.synctex.gz(busy)
 36 | *.pdfsync
 37 | 
 38 | ## Auxiliary and intermediate files from other packages:
 39 | # algorithms
 40 | *.alg
 41 | *.loa
 42 | 
 43 | # achemso
 44 | acs-*.bib
 45 | 
 46 | # amsthm
 47 | *.thm
 48 | 
 49 | # beamer
 50 | *.nav
 51 | *.snm
 52 | *.vrb
 53 | 
 54 | # cprotect
 55 | *.cpt
 56 | 
 57 | # fixme
 58 | *.lox
 59 | 
 60 | #(r)(e)ledmac/(r)(e)ledpar
 61 | *.end
 62 | *.?end
 63 | *.[1-9]
 64 | *.[1-9][0-9]
 65 | *.[1-9][0-9][0-9]
 66 | *.[1-9]R
 67 | *.[1-9][0-9]R
 68 | *.[1-9][0-9][0-9]R
 69 | *.eledsec[1-9]
 70 | *.eledsec[1-9]R
 71 | *.eledsec[1-9][0-9]
 72 | *.eledsec[1-9][0-9]R
 73 | *.eledsec[1-9][0-9][0-9]
 74 | *.eledsec[1-9][0-9][0-9]R
 75 | 
 76 | # glossaries
 77 | *.acn
 78 | *.acr
 79 | *.glg
 80 | *.glo
 81 | *.gls
 82 | *.glsdefs
 83 | 
 84 | # gnuplottex
 85 | *-gnuplottex-*
 86 | 
 87 | # hyperref
 88 | *.brf
 89 | 
 90 | # knitr
 91 | *-concordance.tex
 92 | # TODO Comment the next line if you want to keep your tikz graphics files
 93 | *.tikz
 94 | *-tikzDictionary
 95 | 
 96 | # listings
 97 | *.lol
 98 | 
 99 | # makeidx
100 | *.idx
101 | *.ilg
102 | *.ind
103 | *.ist
104 | 
105 | # minitoc
106 | *.maf
107 | *.mlf
108 | *.mlt
109 | *.mtc
110 | *.mtc[0-9]
111 | *.mtc[1-9][0-9]
112 | 
113 | # minted
114 | _minted*
115 | *.pyg
116 | 
117 | # morewrites
118 | *.mw
119 | 
120 | # mylatexformat
121 | *.fmt
122 | 
123 | # nomencl
124 | *.nlo
125 | 
126 | # sagetex
127 | *.sagetex.sage
128 | *.sagetex.py
129 | *.sagetex.scmd
130 | 
131 | # sympy
132 | *.sout
133 | *.sympy
134 | sympy-plots-for-*.tex/
135 | 
136 | # pdfcomment
137 | *.upa
138 | *.upb
139 | 
140 | # pythontex
141 | *.pytxcode
142 | pythontex-files-*/
143 | 
144 | # TikZ & PGF
145 | *.dpth
146 | *.md5
147 | *.auxlock
148 | 
149 | # todonotes
150 | *.tdo
151 | 
152 | # xindy
153 | *.xdy
154 | 
155 | # xypic precompiled matrices
156 | *.xyc
157 | 
158 | # endfloat
159 | *.ttt
160 | *.fff
161 | 
162 | # Latexian
163 | TSWLatexianTemp*
164 | 
165 | ## Editors:
166 | # WinEdt
167 | *.bak
168 | *.sav
169 | 
170 | # Texpad
171 | .texpadtmp
172 | 
173 | # Kile
174 | *.backup
175 | 
176 | # KBibTeX
177 | *~[0-9]*
178 | 


--------------------------------------------------------------------------------
/assignment2/test_confusion.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib as mpl
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | import seaborn as sns
 6 | 
 7 | import tensorflow as tf
 8 | 
 9 | confusion = np.array([[42452,    27,    45,   175,    60],
10 | 	           [  255,  1636,    12,   152,    39],
11 | 	           [  317,    26,   863,    42,    20],
12 | 	           [  598,    73,    31,  1319,    71],
13 | 	           [  546,    24,     3,    49,  2527]], dtype=np.int32)
14 | 
15 | cm = confusion.copy()
16 | cm = cm.astype('float') / cm.sum(axis=1, keepdims=True)
17 | # cm *= 255
18 | # cm = cm.astype('uint8')
19 | cm = cm[:, :]
20 | print(cm)
21 | 
22 | plt.figure()
23 | cmap1 = mpl.colors.ListedColormap(sns.color_palette("coolwarm", 100))
24 | # print(sns.color_palette(sns.color_palette("coolwarm", 100)))
25 | 
26 | #plt.cm.Blues
27 | plt.imshow(cm, interpolation='nearest', cmap=cmap1)
28 | plt.title("Confusion Matrix")
29 | plt.colorbar()
30 | tick_marks = np.arange(len(["a", "b", "c", "d", "e"]))
31 | plt.xticks(tick_marks, ["a", "b", "c", "d", "e"], rotation=45)
32 | plt.yticks(tick_marks, ["a", "b", "c", "d", "e"])
33 | plt.gca().xaxis.grid(b=False)
34 | plt.gca().yaxis.grid(b=False)
35 | plt.tight_layout()
36 | plt.ylabel('True label')
37 | plt.xlabel('Predicted label')
38 | plt.savefig('cm.png')
39 | plt.show()
40 | 
41 | 
42 | 
43 | 
44 | # conf_matrix = tf.image_summary("confusion_matrix" + str(epoch), tf.convert_to_tensor(confusion.astype(np.float32)))
45 | # conf_summary = session.run(conf_matrix)
46 | # model.summary_writer.add_summary(conf_summary, epoch)
47 | 


--------------------------------------------------------------------------------
/assignment2/tests/test_softmax.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | import pytest
 5 | 
 6 | from q1_softmax import softmax, cross_entropy_loss
 7 | 
 8 | def rel_error(x,y):
 9 |     """ returns relative error """
10 |     return np.max(np.abs(x-y) / np.maximum(1e-8, np.abs(x) + np.abs(y)))
11 | 
12 | @pytest.fixture(scope='module')
13 | def array_1():
14 |     return np.array([1,2], dtype=np.float32)
15 | 
16 | @pytest.fixture(scope='module')
17 | def array_2():
18 |     return np.array([1001,1002], dtype=np.float32)
19 | 
20 | @pytest.fixture(scope='module')
21 | def array_3(array_2 = array_2()):
22 |     return np.array([array_2], dtype=np.float32)
23 | 
24 | @pytest.fixture(scope='module')
25 | def array_4(array_1 = array_1(), array_2 = array_2()):
26 |     return np.array([array_1, array_2], dtype=np.float32)
27 | 
28 | @pytest.fixture(scope='module')
29 | def CE_arrays():
30 |     return np.array([[0, 1], [1, 0], [1, 0]]), np.array([[.5, .5], [.5, .5], [.5, .5]])
31 | 
32 | #this should construct a single tf session per function call
33 | @pytest.fixture(scope='function')
34 | def sess():
35 |     return tf.Session()
36 | 
37 | def test_softmax_array_1(array_1):
38 |     """ Original softmax test defined in q2_softmax.py; """
39 |     with tf.Session():
40 |         input_array = tf.convert_to_tensor(array_1)
41 |         assert rel_error(softmax(input_array).eval(),
42 |                          np.array([0.26894142,  0.73105858])) < 1e-7
43 | 
44 | def test_softmax_array_alt(sess, array_1):
45 |     input_array  = tf.convert_to_tensor(array_1)
46 |     output_array = sess.run(softmax(input_array)) 
47 |     assert rel_error(output_array,
48 |                      np.array([0.26894142,  0.73105858])) < 1e-7
49 | 
50 | @pytest.mark.parametrize("input_array", [array_1(), array_2(), array_3(), array_4()])
51 | def test_get_session(sess, input_array):
52 |     sess.run(softmax(tf.convert_to_tensor(input_array)))
53 |     assert 1
54 |     print("softmax ran to completion")
55 | 
56 | def test_CE_loss(sess, CE_arrays):
57 |     y, y_hat = CE_arrays
58 |     y = tf.convert_to_tensor(y, dtype=tf.float64)
59 |     y_hat = tf.convert_to_tensor(y_hat, dtype=tf.float64)
60 |     sess.run(cross_entropy_loss(y,y_hat))
61 |     assert 1
62 |     print("CE_loss ran to completion")
63 | 
64 | def test_CE_loss_validation(sess, CE_arrays):
65 |     y, y_hat = CE_arrays
66 |     y = tf.convert_to_tensor(y, dtype=tf.float64)
67 |     y_hat = tf.convert_to_tensor(y_hat, dtype=tf.float64)
68 |     value = sess.run(cross_entropy_loss(y,y_hat))
69 |     assert rel_error(value, -3 * np.log(0.5)) <= 1e-7
70 | 
71 | 


--------------------------------------------------------------------------------
/assignment2/utils.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | 
  3 | import numpy as np
  4 | 
  5 | class Vocab(object):
  6 |   def __init__(self):
  7 |     self.word_to_index = {}
  8 |     self.index_to_word = {}
  9 |     self.word_freq = defaultdict(int)
 10 |     self.total_words = 0
 11 |     self.unknown = '<unk>'
 12 |     self.add_word(self.unknown, count=0)
 13 | 
 14 |   def add_word(self, word, count=1):
 15 |     if word not in self.word_to_index:
 16 |       index = len(self.word_to_index)
 17 |       self.word_to_index[word] = index
 18 |       self.index_to_word[index] = word
 19 |     self.word_freq[word] += count
 20 | 
 21 |   def construct(self, words):
 22 |     for word in words:
 23 |       self.add_word(word)
 24 |     self.total_words = float(sum(self.word_freq.values()))
 25 |     print( '{} total words with {} uniques'.format(self.total_words, len(self.word_freq)))
 26 | 
 27 |   def encode(self, word):
 28 |     if word not in self.word_to_index:
 29 |       word = self.unknown
 30 |     return self.word_to_index[word]
 31 | 
 32 |   def decode(self, index):
 33 |     return self.index_to_word[index]
 34 | 
 35 |   def __len__(self):
 36 |     return len(self.word_freq)
 37 | 
 38 | def calculate_perplexity(log_probs):
 39 |   # https://web.stanford.edu/class/cs124/lec/languagemodeling.pdf
 40 |   perp = 0
 41 |   for p in log_probs:
 42 |     perp += -p
 43 |   return np.exp(perp / len(log_probs))
 44 | 
 45 | def get_ptb_dataset(dataset='train'):
 46 |   fn = 'data/ptb/ptb.{}.txt'
 47 |   for line in open(fn.format(dataset), encoding="utf-8"):
 48 |     for word in line.split():
 49 |       yield word
 50 |     # Add token to the end of the line
 51 |     # Equivalent to <eos> in:
 52 |     # https://github.com/wojzaremba/lstm/blob/master/data.lua#L32
 53 |     # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/reader.py#L31
 54 |     yield '<eos>'
 55 | 
 56 | def ptb_iterator(raw_data, batch_size, num_steps):
 57 |   # Pulled from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/reader.py#L82
 58 |   raw_data = np.array(raw_data, dtype=np.int32)
 59 |   data_len = len(raw_data)
 60 |   batch_len = data_len // batch_size
 61 |   data = np.zeros([batch_size, batch_len], dtype=np.int32)
 62 |   for i in range(batch_size):
 63 |     data[i] = raw_data[batch_len * i:batch_len * (i + 1)]
 64 |   epoch_size = (batch_len - 1) // num_steps
 65 |   if epoch_size == 0:
 66 |     raise ValueError("epoch_size == 0, decrease batch_size or num_steps")
 67 |   for i in range(epoch_size):
 68 |     x = data[:, i * num_steps:(i + 1) * num_steps]
 69 |     y = data[:, i * num_steps + 1:(i + 1) * num_steps + 1]
 70 |     yield (x, y)
 71 | 
 72 | def sample(a, temperature=1.0):
 73 |     # helper function to sample an index from a probability array
 74 |     # from https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py
 75 |     a = np.log(a) / temperature
 76 |     a = np.exp(a) / np.sum(np.exp(a))
 77 |     return np.argmax(np.random.multinomial(1, a, 1))
 78 | 
 79 | def data_iterator(orig_X, orig_y=None, batch_size=32, label_size=2, shuffle=False):
 80 |   # Optionally shuffle the data before training
 81 |   if shuffle:
 82 |     indices = np.random.permutation(len(orig_X))
 83 |     data_X = orig_X[indices]
 84 |     data_y = orig_y[indices] if np.any(orig_y) else None
 85 |   else:
 86 |     data_X = orig_X
 87 |     data_y = orig_y
 88 |   ###
 89 |   total_processed_examples = 0
 90 |   total_steps = int(np.ceil(len(data_X) / float(batch_size)))
 91 |   for step in range(total_steps):
 92 |     # Create the batch by selecting up to batch_size elements
 93 |     batch_start = step * batch_size
 94 |     x = data_X[batch_start:batch_start + batch_size]
 95 |     # Convert our target from the class index to a one hot vector
 96 |     y = None
 97 |     if np.any(data_y):
 98 |       y_indices = data_y[batch_start:batch_start + batch_size]
 99 |       y = np.zeros((len(x), label_size), dtype=np.int32)
100 |       y[np.arange(len(y_indices)), y_indices] = 1
101 |     ###
102 |     yield x, y
103 |     total_processed_examples += len(x)
104 |   # Sanity check to make sure we iterated over all the dataset as intended
105 |   assert total_processed_examples == len(data_X), 'Expected {} and processed {}'.format(len(data_X), total_processed_examples)
106 | 


--------------------------------------------------------------------------------
/assignment3/assignment3_2016.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment3/assignment3_2016.pdf


--------------------------------------------------------------------------------
/assignment3/codebase_release/loss_history.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment3/codebase_release/loss_history.png


--------------------------------------------------------------------------------
/assignment3/codebase_release/prepare_submission.sh:
--------------------------------------------------------------------------------
1 | echo "Please enter yout sunetid: "
2 | read sunetid
3 | 
4 | zip -r $sunetid.zip rnn.py weights
5 | 


--------------------------------------------------------------------------------
/assignment3/codebase_release/rnn_pytorch.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import random
  4 | 
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | import math
  8 | import time
  9 | import itertools
 10 | import shutil
 11 | 
 12 | import torch
 13 | import torch.nn as nn
 14 | from torch.autograd import Variable
 15 | import torch.nn.functional as F
 16 | 
 17 | from torch.nn.utils import clip_grad_norm
 18 | 
 19 | import tree as tr
 20 | from utils import Vocab
 21 | 
 22 | from collections import OrderedDict
 23 | 
 24 | import seaborn as sns
 25 | 
 26 | from random import shuffle
 27 | 
 28 | sns.set_style('whitegrid')
 29 | 
 30 | embed_size = 100
 31 | label_size = 2
 32 | early_stopping = 2
 33 | anneal_threshold = 0.99
 34 | anneal_by = 1.5
 35 | max_epochs = 30
 36 | lr = 0.01
 37 | l2 = 0.02
 38 | average_over = 700
 39 | train_size = 800
 40 | 
 41 | 
 42 | class RNN_Model(nn.Module):
 43 |   def __init__(self, vocab, embed_size=100, label_size=2):
 44 |     super(RNN_Model, self).__init__()
 45 |     self.embed_size = embed_size
 46 |     self.label_size = label_size
 47 |     self.vocab = vocab
 48 |     self.embedding = nn.Embedding(int(self.vocab.total_words), self.embed_size)
 49 |     self.fcl = nn.Linear(self.embed_size, self.embed_size, bias=True)
 50 |     self.fcr = nn.Linear(self.embed_size, self.embed_size, bias=True)
 51 |     self.projection = nn.Linear(self.embed_size, self.label_size , bias=True)
 52 |     self.activation = F.relu
 53 |     self.node_list = []
 54 | 
 55 |   def init_variables(self):
 56 |     print("total_words = ", self.vocab.total_words)
 57 | 
 58 |   def walk_tree(self, in_node):
 59 |     if in_node.isLeaf:
 60 |       word_id = torch.LongTensor((self.vocab.encode(in_node.word), ))
 61 |       current_node = self.embedding(Variable(word_id))
 62 |       self.node_list.append(self.projection(current_node).unsqueeze(0))
 63 |     else:
 64 |       left  = self.walk_tree(in_node.left)
 65 |       right = self.walk_tree(in_node.right)
 66 |       current_node = self.activation(self.fcl(left) + self.fcl(right))
 67 |       self.node_list.append(self.projection(current_node).unsqueeze(0))
 68 |     return current_node
 69 | 
 70 |   def forward(self, x):
 71 |     """
 72 |     Forward function accepts input data and returns a Variable of output data
 73 |     """
 74 |     self.node_list = []
 75 |     root_node = self.walk_tree(x.root)
 76 |     all_nodes = torch.cat(self.node_list)
 77 |     #now I need to project out
 78 |     return all_nodes
 79 | 
 80 | def main():
 81 |   print("do nothing")
 82 | 
 83 | 
 84 | if __name__ == '__main__':
 85 |   train_data, dev_data, test_data = tr.simplified_data(train_size, 100, 200)
 86 |   vocab = Vocab()
 87 |   train_sents = [t.get_words() for t in train_data]
 88 |   vocab.construct(list(itertools.chain.from_iterable(train_sents)))
 89 |   model   = RNN_Model(vocab, embed_size=50)
 90 |   main()
 91 | 
 92 |   lr = 0.01
 93 |   loss_history = []
 94 |   optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, dampening=0.0)
 95 |   # params (iterable): iterable of parameters to optimize or dicts defining
 96 |   #     parameter groups
 97 |   # lr (float): learning rate
 98 |   # momentum (float, optional): momentum factor (default: 0)
 99 |   # weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
100 |   #torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, dampening=0, weight_decay=0)
101 |   # print(model.fcl._parameters['weight'])
102 | 
103 |   for epoch in range(max_epochs):
104 |     print("epoch = ", epoch)
105 |     shuffle(train_data)
106 |     total_root_prediction = 0.
107 |     total_summed_accuracy = 0.
108 |     if (epoch % 10 == 0) and epoch > 0:
109 |         for param_group in optimizer.param_groups:
110 |           #update learning rate
111 |           print("Droping learning from %f to %f"%(param_group['lr'], 0.5 * param_group['lr']))
112 |           param_group['lr'] = 0.5 * param_group['lr']
113 |     for step, tree in enumerate(train_data):
114 |         # if step == 0:
115 |         #   optimizer.zero_grad()
116 |         # objective_loss.backward()
117 |         # if step == len(train_data) - 1:
118 |         #   optimizer.step()
119 | 
120 |       all_nodes = model(tree)
121 | 
122 |       labels  = []
123 |       indices = []
124 |       for x,y in enumerate(tree.labels):
125 |         if y != 2:
126 |           labels.append(y)
127 |           indices.append(x)
128 | 
129 |       torch_labels = torch.LongTensor([l for l in labels if l != 2])
130 |       logits = all_nodes.index_select(dim=0, index=Variable(torch.LongTensor(indices)))
131 |       logits_squeezed = logits.squeeze()
132 |       predictions = logits.max(dim=2)[1].squeeze()
133 | 
134 |       correct = predictions.data == torch_labels
135 |       #so correctly predicted (root);
136 |       total_root_prediction += float(correct[-1])
137 |       total_summed_accuracy += float(correct.sum()) / len(labels)
138 | 
139 |       objective_loss = F.cross_entropy(input=logits_squeezed, target=Variable(torch_labels))
140 |       if objective_loss.data[0] > 5 and epoch > 10:
141 |         #interested in phrase that have large loss (i.e. incorrectly classified)
142 |         print(' '.join(tree.get_words()))
143 | 
144 |       loss_history.append(objective_loss.data[0])
145 |       if step % 20 == 0 and step > 0:
146 |         print("step %3d, last loss %0.3f, mean loss (%d steps) %0.3f" % (step, objective_loss.data[0], average_over, np.mean(loss_history[-average_over:])))
147 |       optimizer.zero_grad()
148 | 
149 |       if np.isnan(objective_loss.data[0]):
150 |         print("object_loss was not a number")
151 |         sys.exit(1)
152 |       else:
153 |         objective_loss.backward()
154 |         clip_grad_norm(model.parameters(), 5, norm_type=2.)
155 |         #temp_grad += model.fcl._parameters['weight'].grad.data
156 |         # # Update weights using gradient descent; w1.data and w2.data are Tensors,
157 |         # # w1.grad and w2.grad are Variables and w1.grad.data and w2.grad.data are
158 |         # # Tensors.
159 |         # loss.backward()
160 |         # w1.data -= learning_rate * w1.grad.data
161 |         # w2.data -= learning_rate * w2.grad.data
162 |         optimizer.step()
163 |     print("total root predicted correctly = ", total_root_prediction/ float(train_size))
164 |     print("total node (including root) predicted correctly = ", total_summed_accuracy / float(train_size))
165 | 
166 |     total_dev_loss = 0.
167 |     dev_correct_at_root = 0.
168 |     dev_correct_all = 0.
169 |     for step, dev_example in enumerate(dev_data):
170 |       all_nodes = model(dev_example)
171 | 
172 |       labels  = []
173 |       indices = []
174 |       for x,y in enumerate(dev_example.labels):
175 |         if y != 2:
176 |           labels.append(y)
177 |           indices.append(x)
178 |       torch_labels = torch.LongTensor([l for l in labels if l != 2])
179 |       logits = all_nodes.index_select(dim=0, index=Variable(torch.LongTensor(indices)))
180 |       logits_squeezed = logits.squeeze()
181 |       predictions = logits.max(dim=2)[1].squeeze()
182 | 
183 |       correct = predictions.data == torch_labels
184 |       #so correctly predicted (root);
185 |       dev_correct_at_root += float(correct[-1])
186 |       dev_correct_all += float(correct.sum()) / len(labels)
187 |       objective_loss = F.cross_entropy(input=logits_squeezed, target=Variable(torch_labels))
188 |       total_dev_loss += objective_loss.data[0]
189 |     print("total_dev_loss = ", total_dev_loss)
190 |     print("correct (root) = ", dev_correct_at_root)
191 |     print("correct (all)= ", dev_correct_all)
192 |   # logits = logits.index_select(dim=0, index=Variable(torch.LongTensor(indices)))
193 |   plt.figure()
194 |   plt.plot(loss_history)
195 |   plt.show()
196 |   print("DONE!")


--------------------------------------------------------------------------------
/assignment3/codebase_release/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Get trees
4 | data=trainDevTestTrees_PTB.zip
5 | curl -O http://nlp.stanford.edu/sentiment/$data
6 | unzip $data 
7 | rm -f $data
8 | 
9 | 


--------------------------------------------------------------------------------
/assignment3/codebase_release/tree.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | UNK = 'UNK'
  3 | # This file contains the dataset in a useful way. We populate a list of
  4 | # Trees to train/test our Neural Nets such that each Tree contains any
  5 | # number of Node objects.
  6 | 
  7 | # The best way to get a feel for how these objects are used in the program is to drop pdb.set_trace() in a few places throughout the codebase
  8 | # to see how the trees are used.. look where loadtrees() is called etc..
  9 | 
 10 | 
 11 | class Node:  # a node in the tree
 12 |     def __init__(self, label, word=None):
 13 |         self.label = label
 14 |         self.word = word
 15 |         self.parent = None  # reference to parent
 16 |         self.left = None  # reference to left child
 17 |         self.right = None  # reference to right child
 18 |         # true if I am a leaf (could have probably derived this from if I have
 19 |         # a word)
 20 |         self.isLeaf = False
 21 |         # true if we have finished performing fowardprop on this node (note,
 22 |         # there are many ways to implement the recursion.. some might not
 23 |         # require this flag)
 24 | 
 25 | 
 26 | class Tree:
 27 | 
 28 |     def __init__(self, treeString, openChar='(', closeChar=')'):
 29 |         tokens = []
 30 |         self.open = '('
 31 |         self.close = ')'
 32 |         for toks in treeString.strip().split():
 33 |             tokens += list(toks)
 34 |         self.root = self.parse(tokens)
 35 |         # get list of labels as obtained through a post-order traversal
 36 |         self.labels = get_labels(self.root)
 37 |         self.num_words = len(self.labels)
 38 | 
 39 |     def parse(self, tokens, parent=None):
 40 |         assert tokens[0] == self.open, "Malformed tree"
 41 |         assert tokens[-1] == self.close, "Malformed tree"
 42 | 
 43 |         split = 2  # position after open and label
 44 |         countOpen = countClose = 0
 45 | 
 46 |         if tokens[split] == self.open:
 47 |             countOpen += 1
 48 |             split += 1
 49 |         # Find where left child and right child split
 50 |         while countOpen != countClose:
 51 |             if tokens[split] == self.open:
 52 |                 countOpen += 1
 53 |             if tokens[split] == self.close:
 54 |                 countClose += 1
 55 |             split += 1
 56 | 
 57 |         # New node
 58 |         node = Node(int(tokens[1]))  # zero index labels
 59 | 
 60 |         node.parent = parent
 61 | 
 62 |         # leaf Node
 63 |         if countOpen == 0:
 64 |             node.word = ''.join(tokens[2:-1]).lower()  # lower case?
 65 |             node.isLeaf = True
 66 |             return node
 67 | 
 68 |         node.left = self.parse(tokens[2:split], parent=node)
 69 |         node.right = self.parse(tokens[split:-1], parent=node)
 70 | 
 71 |         return node
 72 | 
 73 |     def get_words(self):
 74 |         leaves = getLeaves(self.root)
 75 |         words = [node.word for node in leaves]
 76 |         return words
 77 | 
 78 | 
 79 | def leftTraverse(node, nodeFn=None, args=None):
 80 |     """
 81 |     Recursive function traverses tree
 82 |     from left to right. 
 83 |     Calls nodeFn at each node
 84 |     """
 85 |     if node is None:
 86 |         return
 87 |     leftTraverse(node.left, nodeFn, args)
 88 |     leftTraverse(node.right, nodeFn, args)
 89 |     nodeFn(node, args)
 90 | 
 91 | 
 92 | def getLeaves(node):
 93 |     if node is None:
 94 |         return []
 95 |     if node.isLeaf:
 96 |         return [node]
 97 |     else:
 98 |         return getLeaves(node.left) + getLeaves(node.right)
 99 | 
100 | 
101 | def get_labels(node):
102 |     if node is None:
103 |         return []
104 |     return get_labels(node.left) + get_labels(node.right) + [node.label]
105 | 
106 | 
107 | def clearFprop(node, words):
108 |     node.fprop = False
109 | 
110 | 
111 | def loadTrees(dataSet='train'):
112 |     """
113 |     Loads training trees. Maps leaf node words to word ids.
114 |     """
115 |     file = 'trees/%s.txt' % dataSet
116 |     print("Loading %s trees.." % dataSet)
117 |     with open(file, 'r') as fid:
118 |         trees = [Tree(l) for l in fid.readlines()]
119 | 
120 |     return trees
121 | 
122 | def simplified_data(num_train, num_dev, num_test):
123 |     rndstate = random.getstate()
124 |     random.seed(0)
125 |     trees = loadTrees('train') + loadTrees('dev') + loadTrees('test')
126 |     
127 |     #filter extreme trees
128 |     pos_trees = [t for t in trees if t.root.label==4]
129 |     neg_trees = [t for t in trees if t.root.label==0]
130 | 
131 |     #binarize labels
132 |     binarize_labels(pos_trees)
133 |     binarize_labels(neg_trees)
134 |     
135 |     #split into train, dev, test
136 |     print(len(pos_trees), len(neg_trees))
137 |     pos_trees = sorted(pos_trees, key=lambda t: len(t.get_words()))
138 |     neg_trees = sorted(neg_trees, key=lambda t: len(t.get_words()))
139 |     num_train = num_train // 2
140 |     num_dev = num_dev // 2
141 |     num_test = num_test // 2
142 |     train = pos_trees[:num_train] + neg_trees[:num_train]
143 |     dev = pos_trees[num_train : num_train+num_dev] + neg_trees[num_train : num_train+num_dev]
144 |     test = pos_trees[num_train+num_dev : num_train+num_dev+num_test] + neg_trees[num_train+num_dev : num_train+num_dev+num_test]
145 |     random.shuffle(train)
146 |     random.shuffle(dev)
147 |     random.shuffle(test)
148 |     random.setstate(rndstate)
149 | 
150 | 
151 |     return train, dev, test
152 | 
153 | 
154 | def binarize_labels(trees):
155 |     def binarize_node(node, _):
156 |         if node.label<2:
157 |             node.label = 0
158 |         elif node.label>2:
159 |             node.label = 1
160 |     for tree in trees:
161 |         leftTraverse(tree.root, binarize_node, None)
162 |         tree.labels = get_labels(tree.root)
163 | 


--------------------------------------------------------------------------------
/assignment3/codebase_release/utils.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | 
 3 | 
 4 | class Vocab(object):
 5 |     def __init__(self):
 6 |         self.word_to_index = {}
 7 |         self.index_to_word = {}
 8 |         self.word_freq = defaultdict(int)
 9 |         self.total_words = 0
10 |         self.unknown = '<unk>'
11 |         self.add_word(self.unknown, count=0)
12 | 
13 |     def add_word(self, word, count=1):
14 |         if word not in self.word_to_index:
15 |             index = len(self.word_to_index)
16 |             self.word_to_index[word] = index
17 |             self.index_to_word[index] = word
18 |         self.word_freq[word] += count
19 | 
20 |     def construct(self, words):
21 |         for word in words:
22 |             self.add_word(word)
23 |         self.total_words = float(sum(self.word_freq.values()))
24 |         print('{} total words with {} uniques'.format(self.total_words, len(self.word_freq)))
25 | 
26 |     def encode(self, word):
27 |         if word not in self.word_to_index:
28 |             word = self.unknown
29 |         return self.word_to_index[word]
30 | 
31 |     def decode(self, index):
32 |         return self.index_to_word[index]
33 | 
34 |     def __len__(self):
35 |         return len(self.word_freq)
36 | 


--------------------------------------------------------------------------------
/assignment3/recursive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/assignment3/recursive.png


--------------------------------------------------------------------------------
/class_notebooks/tensorflow_scan.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# TensorFlow Scan\n",
  8 |     "`tensorflow.scan` allows for loops to be written inside a computation graph (which using explicit loop structures like `for`) -- backpropagation is handled implicitly by `tensorflow`. Explicitly unrolling the loops requires the creation of new graph nodes for each loop body iteration (although the number of iterations is fixed)."
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "metadata": {},
 14 |    "source": [
 15 |     "# Cumulative Sum"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 3,
 21 |    "metadata": {
 22 |     "collapsed": false
 23 |    },
 24 |    "outputs": [
 25 |     {
 26 |      "name": "stdout",
 27 |      "output_type": "stream",
 28 |      "text": [
 29 |       "[ 1.  3.  5.  7.]\n"
 30 |      ]
 31 |     }
 32 |    ],
 33 |    "source": [
 34 |     "import tensorflow as tf\n",
 35 |     "\n",
 36 |     "def fn(previous_output, current_input):\n",
 37 |     "    return previous_output + current_input\n",
 38 |     "\n",
 39 |     "elems = tf.Variable([1.0, 2.0, 2.0, 2.0])\n",
 40 |     "elems = tf.identity(elems)\n",
 41 |     "#required otherwise it will fail;\n",
 42 |     "\n",
 43 |     "initializer = tf.constant(0.0)\n",
 44 |     "out = tf.scan(fn, elems, initializer=initializer)\n",
 45 |     "\n",
 46 |     "with tf.Session() as session:\n",
 47 |     "    init_op = tf.initialize_all_variables()\n",
 48 |     "    session.run(init_op)\n",
 49 |     "    value = session.run(out)\n",
 50 |     "    print(value)"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {
 56 |     "collapsed": true
 57 |    },
 58 |    "source": [
 59 |     "# Loop Equivalence"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 18,
 65 |    "metadata": {
 66 |     "collapsed": false
 67 |    },
 68 |    "outputs": [
 69 |     {
 70 |      "name": "stdout",
 71 |      "output_type": "stream",
 72 |      "text": [
 73 |       "[ 7.]\n"
 74 |      ]
 75 |     }
 76 |    ],
 77 |    "source": [
 78 |     "import tensorflow as tf\n",
 79 |     "\n",
 80 |     "def fn(previous_output, current_input):\n",
 81 |     "    return previous_output + current_input\n",
 82 |     "\n",
 83 |     "elems = tf.Variable([1.0, 2.0, 2.0, 2.0])\n",
 84 |     "elems = tf.identity(elems)\n",
 85 |     "#required otherwise it will fail;\n",
 86 |     "\n",
 87 |     "initializer = tf.constant(0.0)\n",
 88 |     "cum_sum = tf.Variable(0.0)\n",
 89 |     "\n",
 90 |     "for x in tf.split(0, elems.get_shape()[0], elems):\n",
 91 |     "    cum_sum += x\n",
 92 |     "    \n",
 93 |     "with tf.Session() as session:\n",
 94 |     "    init_op = tf.initialize_all_variables()\n",
 95 |     "    session.run(init_op)\n",
 96 |     "    value = session.run(cum_sum)\n",
 97 |     "    print(value)\n",
 98 |     "    #like a reduce operation (but it scans over elements)"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {
105 |     "collapsed": true
106 |    },
107 |    "outputs": [],
108 |    "source": []
109 |   }
110 |  ],
111 |  "metadata": {
112 |   "kernelspec": {
113 |    "display_name": "Python 3",
114 |    "language": "python",
115 |    "name": "python3"
116 |   },
117 |   "language_info": {
118 |    "codemirror_mode": {
119 |     "name": "ipython",
120 |     "version": 3
121 |    },
122 |    "file_extension": ".py",
123 |    "mimetype": "text/x-python",
124 |    "name": "python",
125 |    "nbconvert_exporter": "python",
126 |    "pygments_lexer": "ipython3",
127 |    "version": "3.4.3+"
128 |   }
129 |  },
130 |  "nbformat": 4,
131 |  "nbformat_minor": 0
132 | }
133 | 


--------------------------------------------------------------------------------
/old_assignments/assignment1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/old_assignments/assignment1.pdf


--------------------------------------------------------------------------------
/old_assignments/assignment1/README.md:
--------------------------------------------------------------------------------
 1 | [CS224d: Deep Learning for Natural Language Processing](http://cs224d.stanford.edu/)
 2 | ====================================================================================
 3 | 
 4 | ** Due Date: 4/16/2015 (Thursday) 11:59 PM PST. **
 5 | 
 6 | In this assignment we will familiarize you with basic concepts of neural networks, word vectors, and their application to sentiment analysis.
 7 | 
 8 | Setup
 9 | -----
10 | 
11 | *Note: Please be sure you have Python 2.7.x installed on your system. The following instructions should work on Mac or Linux. If you have any trouble getting set up, please come to office hours and the TAs will be happy to help.*
12 | 
13 | **Get the code**: [Download the starter code here](http://cs224d.stanford.edu/assignment1/assignment1.zip) and [the complementary written problems here](http://cs224d.stanford.edu/assignment1/assignment1.pdf).
14 | 
15 | **[Optional] virtual environment:** Once you have unzipped the starter code, you might want to create a [virtual environment](http://docs.python-guide.org/en/latest/dev/virtualenvs/) for the project. If you choose not to use a virtual environment, it is up to you to make sure that all dependencies for the code are installed on your machine. To set up a virtual environment, run the following:
16 | 
17 | ```
18 | cd assignment1
19 | sudo pip install virtualenv      # This may already be installed
20 | virtualenv .env                  # Create a virtual environment
21 | source .env/bin/activate         # Activate the virtual environment
22 | pip install -r requirements.txt  # Install dependencies
23 | # Work on the assignment for a while ...
24 | deactivate                       # Exit the virtual environment
25 | ```
26 | 
27 | **Install requirements (without a virtual environment):** To install the required packages locally without setting up a virtual environment, run the following:
28 | 
29 | ```
30 | cd assignment1
31 | pip install -r requirements.txt  # Install dependencies
32 | ```
33 | 
34 | **Download data:** Once you have the starter code, you will need to download the Stanford Sentiment Treebank dataset. Run the following from the assignment1 directory:
35 | 
36 | ```
37 | cd cs224d/datasets
38 | ./get_datasets.sh
39 | ```
40 | 
41 | **Start IPython:** After you have the Stanford Sentiment data, you should start the IPython notebook server from the `assignment1` directory. If you are unfamiliar with IPython, you should read this [IPython tutorial](http://cs231n.github.io/ipython-tutorial).
42 | 
43 | Submitting your work
44 | --------------------
45 | 
46 | Once you are done working, put the written part in the same directory as your IPython notebook file, and run the `collectSubmission.sh` script; this will produce a file called `assignment1.zip`. Rename this file to `<your-sunet-id>.zip`, for instance if your stanford email is `jdoe@stanford.edu`, your file name should be
47 | 
48 | ```
49 | cd cs224d/datasets
50 | jdoe.zip
51 | ```
52 | 
53 | Upload this file to [the Box for this assignment](https://stanford.box.com/signup/collablink/d_3367429916/116c2072133f72).
54 | For the written component, please upload a PDF file of your solutions to [`Scoryst`](https://scoryst.com/course/67/submit/). Please [sign up](https://scoryst.com/enroll/MUPJ5J2xd9/) with your stanford email and SUNet ID (letter ID) if applicable. When asked to map question parts to your PDF, please map the parts accordingly as courtesy to your TAs. The last part of each problem is a placeholder for the programming component, you could just map it to the page of the last part in your written assignment.
55 | 
56 | Tasks
57 | -----
58 | 
59 | There will be four parts to this assignment, the first three comprise of a written component and a programming component in the IPython notebook. The fourth part is purely programming-based, and we also give you an opportunity to earn extra credits by doing a programming-based optional part. For all of the tasks, you will be using the IPython notebook `wordvec_sentiment.ipynb`.
60 | 
61 | Q1: Softmax (10 points)
62 | -----------------------
63 | 
64 | Q2: Neural Network Basics (30 points)
65 | -------------------------------------
66 | 
67 | Q3: word2vec (40 points)
68 | ------------------------
69 | 
70 | Q4: Sentiment Analysis (20 points)
71 | ----------------------------------
72 | 
73 | For these four parts, please try to finish the written component before writing code. We designed the written component to help you think through the details in your code implementation. For each part, the written component is worth 40% the points of that part, and programming is 60%.
74 | 
75 | Extra Credit (optional): Improve Your Sentiment Analysis Model (+10 points)
76 | ---------------------------------------------------------------------------
77 | 
78 | For this optional part, please follow the instructions in the IPython notebook to finish your implementation and report results. Extra credit will be awarded based on relative progress.


--------------------------------------------------------------------------------
/old_assignments/assignment1/collectSubmission.sh:
--------------------------------------------------------------------------------
 1 | rm -f assignment1.zip 
 2 | zip -r assignment1.zip . -x "*.git*" "*cs224d/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" \
 3 | 	"saved_params_1000.npy" "saved_params_2000.npy" "saved_params_3000.npy" "saved_params_4000.npy" "saved_params_5000.npy" \
 4 | 	"saved_params_6000.npy" "saved_params_7000.npy" "saved_params_8000.npy" "saved_params_9000.npy" "saved_params_10000.npy" \
 5 | 	"saved_params_11000.npy" "saved_params_12000.npy" "saved_params_13000.npy" "saved_params_14000.npy" "saved_params_15000.npy" \
 6 | 	"saved_params_16000.npy" "saved_params_17000.npy" "saved_params_18000.npy" "saved_params_19000.npy" "saved_params_20000.npy" \
 7 | 	"saved_params_21000.npy" "saved_params_22000.npy" "saved_params_23000.npy" "saved_params_24000.npy" "saved_params_25000.npy" \
 8 | 	"saved_params_26000.npy" "saved_params_27000.npy" "saved_params_28000.npy" "saved_params_29000.npy" "saved_params_30000.npy" \
 9 | 	"saved_params_31000.npy" "saved_params_32000.npy" "saved_params_33000.npy" "saved_params_34000.npy" "saved_params_35000.npy" \
10 | 	"saved_params_36000.npy" "saved_params_37000.npy" "saved_params_38000.npy" "saved_params_39000.npy"


--------------------------------------------------------------------------------
/old_assignments/assignment1/cs224d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/old_assignments/assignment1/cs224d/__init__.py


--------------------------------------------------------------------------------
/old_assignments/assignment1/cs224d/datasets/get_datasets.sh:
--------------------------------------------------------------------------------
1 | # Get Stanford Sentiment Treebank
2 | wget http://nlp.stanford.edu/~socherr/stanfordSentimentTreebank.zip
3 | unzip stanfordSentimentTreebank.zip
4 | rm stanfordSentimentTreebank.zip
5 | 


--------------------------------------------------------------------------------
/old_assignments/assignment1/requirements.txt:
--------------------------------------------------------------------------------
 1 | Jinja2==2.7.3
 2 | MarkupSafe==0.23
 3 | backports.ssl-match-hostname==3.4.0.2
 4 | certifi==14.05.14
 5 | gnureadline==6.3.3
 6 | ipython==3.0.0
 7 | matplotlib==1.4.3
 8 | mock==1.0.1
 9 | nose==1.3.4
10 | numpy==1.9.2
11 | pyparsing==2.0.3
12 | python-dateutil==2.4.0
13 | pytz==2014.10
14 | pyzmq==14.4.1
15 | scipy==0.14.1
16 | six==1.9.0
17 | tornado==4.0.2
18 | wsgiref==0.1.2
19 | 


--------------------------------------------------------------------------------
/old_assignments/assignment1/solutions/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Core latex/pdflatex auxiliary files:
  2 | *.aux
  3 | *.lof
  4 | *.log
  5 | *.lot
  6 | *.fls
  7 | *.out
  8 | *.toc
  9 | *.fmt
 10 | *.fot
 11 | *.cb
 12 | *.cb2
 13 | 
 14 | ## Intermediate documents:
 15 | *.dvi
 16 | *-converted-to.*
 17 | # these rules might exclude image files for figures etc.
 18 | # *.ps
 19 | # *.eps
 20 | # *.pdf
 21 | 
 22 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
 23 | *.bbl
 24 | *.bcf
 25 | *.blg
 26 | *-blx.aux
 27 | *-blx.bib
 28 | *.brf
 29 | *.run.xml
 30 | 
 31 | ## Build tool auxiliary files:
 32 | *.fdb_latexmk
 33 | *.synctex
 34 | *.synctex.gz
 35 | *.synctex.gz(busy)
 36 | *.pdfsync
 37 | 
 38 | ## Auxiliary and intermediate files from other packages:
 39 | # algorithms
 40 | *.alg
 41 | *.loa
 42 | 
 43 | # achemso
 44 | acs-*.bib
 45 | 
 46 | # amsthm
 47 | *.thm
 48 | 
 49 | # beamer
 50 | *.nav
 51 | *.snm
 52 | *.vrb
 53 | 
 54 | # cprotect
 55 | *.cpt
 56 | 
 57 | # fixme
 58 | *.lox
 59 | 
 60 | #(r)(e)ledmac/(r)(e)ledpar
 61 | *.end
 62 | *.?end
 63 | *.[1-9]
 64 | *.[1-9][0-9]
 65 | *.[1-9][0-9][0-9]
 66 | *.[1-9]R
 67 | *.[1-9][0-9]R
 68 | *.[1-9][0-9][0-9]R
 69 | *.eledsec[1-9]
 70 | *.eledsec[1-9]R
 71 | *.eledsec[1-9][0-9]
 72 | *.eledsec[1-9][0-9]R
 73 | *.eledsec[1-9][0-9][0-9]
 74 | *.eledsec[1-9][0-9][0-9]R
 75 | 
 76 | # glossaries
 77 | *.acn
 78 | *.acr
 79 | *.glg
 80 | *.glo
 81 | *.gls
 82 | *.glsdefs
 83 | 
 84 | # gnuplottex
 85 | *-gnuplottex-*
 86 | 
 87 | # hyperref
 88 | *.brf
 89 | 
 90 | # knitr
 91 | *-concordance.tex
 92 | # TODO Comment the next line if you want to keep your tikz graphics files
 93 | *.tikz
 94 | *-tikzDictionary
 95 | 
 96 | # listings
 97 | *.lol
 98 | 
 99 | # makeidx
100 | *.idx
101 | *.ilg
102 | *.ind
103 | *.ist
104 | 
105 | # minitoc
106 | *.maf
107 | *.mlf
108 | *.mlt
109 | *.mtc
110 | *.mtc[0-9]
111 | *.mtc[1-9][0-9]
112 | 
113 | # minted
114 | _minted*
115 | *.pyg
116 | 
117 | # morewrites
118 | *.mw
119 | 
120 | # mylatexformat
121 | *.fmt
122 | 
123 | # nomencl
124 | *.nlo
125 | 
126 | # sagetex
127 | *.sagetex.sage
128 | *.sagetex.py
129 | *.sagetex.scmd
130 | 
131 | # sympy
132 | *.sout
133 | *.sympy
134 | sympy-plots-for-*.tex/
135 | 
136 | # pdfcomment
137 | *.upa
138 | *.upb
139 | 
140 | # pythontex
141 | *.pytxcode
142 | pythontex-files-*/
143 | 
144 | # TikZ & PGF
145 | *.dpth
146 | *.md5
147 | *.auxlock
148 | 
149 | # todonotes
150 | *.tdo
151 | 
152 | # xindy
153 | *.xdy
154 | 
155 | # xypic precompiled matrices
156 | *.xyc
157 | 
158 | # endfloat
159 | *.ttt
160 | *.fff
161 | 
162 | # Latexian
163 | TSWLatexianTemp*
164 | 
165 | ## Editors:
166 | # WinEdt
167 | *.bak
168 | *.sav
169 | 
170 | # Texpad
171 | .texpadtmp
172 | 
173 | # Kile
174 | *.backup
175 | 
176 | # KBibTeX
177 | *~[0-9]*
178 | 


--------------------------------------------------------------------------------
/old_assignments/assignment1/updateAssignment.sh:
--------------------------------------------------------------------------------
 1 | ASSIGNMENT=assignment1
 2 | wget http://cs224d.stanford.edu/${ASSIGNMENT}/${ASSIGNMENT}.zip
 3 | rm -f ${ASSIGNMENT}.pdf
 4 | wget http://cs224d.stanford.edu/${ASSIGNMENT}/${ASSIGNMENT}.pdf
 5 | unzip ${ASSIGNMENT}.zip
 6 | rm -f ${ASSIGNMENT}.zip
 7 | wget https://raw.githubusercontent.com/qipeng/nbutils/master/updateAssignment.py
 8 | 
 9 | echo Update in progress...
10 | python updateAssignment.py . ${ASSIGNMENT}
11 | 
12 | rm -rf updateAssignment.py
13 | rm -rf ${ASSIGNMENT}
14 | rm -rf __MACOSX
15 | echo Done!


--------------------------------------------------------------------------------
/old_assignments/assignment2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/old_assignments/assignment2.pdf


--------------------------------------------------------------------------------
/old_assignments/assignment2/collectSubmission.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import sys, os, re, json
  4 | import glob, shutil
  5 | import time
  6 | 
  7 | 
  8 | ################
  9 | # Sanity check #
 10 | ################
 11 | import numpy as np
 12 | 
 13 | fail = 0
 14 | counter = 0
 15 | testcases = []
 16 | 
 17 | from functools import wraps
 18 | import traceback
 19 | 
 20 | def prompt(msg):
 21 |     yn = input(msg + " [y/n]: ")
 22 |     return yn.lower().startswith('y')
 23 | 
 24 | class testcase(object):
 25 |     def __init__(self, name):
 26 |         self.name = name
 27 | 
 28 |     def __call__(self, func):
 29 |         global testcases
 30 | 
 31 |         @wraps(func)
 32 |         def wrapper():
 33 |             global counter
 34 |             global fail
 35 |             counter += 1
 36 |             print(">> Test %d (%s)" % (counter, self.name))
 37 |             try:
 38 |                 func()
 39 |                 print("[ok] Passed test %d (%s)" % (counter, self.name))
 40 |             except Exception as e:
 41 |                 fail += 1
 42 |                 print("[!!] Error on test %d (%s):" % (counter, self.name))
 43 |                 traceback.print_exc()
 44 | 
 45 |         testcases.append(wrapper)
 46 |         return wrapper
 47 | 
 48 | ##
 49 | # Part 0
 50 | 
 51 | ##
 52 | # Part 1
 53 | @testcase("Part1: test random_weight_matrix")
 54 | def test_random_weight_matrix():
 55 |     from misc import random_weight_matrix
 56 |     A = random_weight_matrix(100,100)
 57 |     assert(A.shape == (100,100))
 58 | 
 59 | @testcase("Part1: initialize window model")
 60 | def ner_init():
 61 |     from nerwindow import WindowMLP
 62 |     np.random.seed(10)
 63 |     wv = np.random.randn(20,10)
 64 |     clf = WindowMLP(wv, windowsize=3,
 65 |                     dims = [None, 15, 3], rseed=10)
 66 | 
 67 | @testcase("Part1: test predict_proba()")
 68 | def ner_predict_proba():
 69 |     from nerwindow import WindowMLP
 70 |     np.random.seed(10)
 71 |     wv = np.random.randn(20,10)
 72 |     clf = WindowMLP(wv, windowsize=3,
 73 |                     dims = [None, 15, 3], rseed=10)
 74 |     p = clf.predict_proba([1,2,3])
 75 |     assert(len(p.flatten()) == 3)
 76 |     p = clf.predict_proba([[1,2,3], [2,3,4]])
 77 |     assert(np.ndim(p) == 2)
 78 |     assert(p.shape == (2,3))
 79 | 
 80 | @testcase("Part1: test compute_loss()")
 81 | def ner_predict_proba():
 82 |     from nerwindow import WindowMLP
 83 |     np.random.seed(10)
 84 |     wv = np.random.randn(20,10)
 85 |     clf = WindowMLP(wv, windowsize=3,
 86 |                     dims = [None, 15, 3], rseed=10)
 87 |     J = clf.compute_loss([1,2,3], 1)
 88 |     print("  dummy: J = %g" % J)
 89 |     J = clf.compute_loss([[1,2,3], [2,3,4]], [0,1])
 90 |     print("  dummy: J = %g" % J)
 91 | 
 92 | @testcase("Part1: NER prediction - dev set")
 93 | def ner_pred_dev():
 94 |     devpred = np.loadtxt("dev.predicted", dtype=int)
 95 |     assert(len(devpred) == 51362) # dev set length
 96 | 
 97 | @testcase("Part1: NER prediction - test set")
 98 | def ner_pred_test():
 99 |     testpred = np.loadtxt("test.predicted", dtype=int)
100 |     assert(len(testpred) == 46435)
101 | 
102 | def setup_probing():
103 |     num_to_word = dict(enumerate(
104 |                        ["hello", "world", "i", "am", "a", "banana",
105 |                         "there", "is", "no", "spoon"]))
106 |     tagnames = ["O", "LOC", "MISC", "ORG", "PER"]
107 |     num_to_tag = dict(enumerate(tagnames))
108 | 
109 |     from nerwindow import WindowMLP
110 |     np.random.seed(10)
111 |     wv = np.random.randn(10,50)
112 |     clf = WindowMLP(wv, windowsize=3,
113 |                     dims = [None, 100, 5], rseed=10)
114 |     return clf, num_to_word, num_to_tag
115 | 
116 | @testcase("Part1.1 (a): verify output format")
117 | def ner_probe_a():
118 |     from part11probing import part_a, part_b, part_c
119 |     clf, num_to_word, num_to_tag = setup_probing()
120 |     s,w = part_a(clf, num_to_word, verbose=False)
121 |     assert(len(s) == len(w))
122 |     if type(s) == dict: # some students may have done this
123 |         for k in list(s.keys()): assert(k in w)
124 |         for k in list(w.keys()): assert(k in s)
125 |         assert(len(s) >= 5)
126 |     else: # list
127 |         assert(len(s[0]) == len(w[0]))
128 |         assert(len(s[0]) == 10)
129 |         assert(type(w[0][0]) == str)
130 | 
131 | 
132 | @testcase("Part1.1 (b): verify output format")
133 | def ner_probe_b():
134 |     from part11probing import part_a, part_b, part_c
135 |     clf, num_to_word, num_to_tag = setup_probing()
136 |     s,w = part_b(clf, num_to_word, num_to_tag, verbose=False)
137 |     assert(len(s) == len(w))
138 |     assert(len(s) == 5)
139 |     assert(len(s[0]) == len(w[0]))
140 |     assert(len(s[0]) == 10)
141 |     assert(type(w[0][0]) == str)
142 | 
143 | 
144 | @testcase("Part1.1 (c): verify output format")
145 | def ner_probe_b():
146 |     from part11probing import part_a, part_b, part_c
147 |     clf, num_to_word, num_to_tag = setup_probing()
148 |     s,w = part_c(clf, num_to_word, num_to_tag, verbose=False)
149 |     assert(len(s) == len(w))
150 |     assert(len(s) == 5)
151 |     assert(len(s[0]) == len(w[0]))
152 |     assert(len(s[0]) == 10)
153 |     assert(type(w[0][0]) == str)
154 | 
155 | 
156 | ##
157 | # Part 2
158 | @testcase("Part2: initialize RNNLM")
159 | def rnnlm_init():
160 |     from rnnlm import RNNLM
161 |     np.random.seed(10)
162 |     L = np.random.randn(50,10)
163 |     model = RNNLM(L0 = L)
164 | 
165 | @testcase("Part2: load RNNLM params")
166 | def rnnlm_load():
167 |     from rnnlm import RNNLM
168 |     L = np.load('rnnlm.L.npy')
169 |     print("  loaded L: %s" % str(L.shape))
170 |     H = np.load('rnnlm.H.npy')
171 |     print("  loaded H: %s" % str(H.shape))
172 |     U = np.load('rnnlm.U.npy')
173 |     print("  loaded U: %s" % str(U.shape))
174 |     assert(L.shape[0] == U.shape[0])
175 |     assert(L.shape[1] == H.shape[1])
176 |     assert(H.shape[0] == U.shape[1])
177 |     model = RNNLM(L0 = L, U0 = U)
178 |     model.params.H[:] = H
179 | 
180 | @testcase("Part2: test generate_sequence")
181 | def rnnlm_generate_sequence():
182 |     from rnnlm import RNNLM
183 |     np.random.seed(10)
184 |     L = np.random.randn(20,10)
185 |     model = RNNLM(L0 = L)
186 |     model.H = np.random.randn(20,20)
187 |     s, J = model.generate_sequence(0,1, maxlen=15)
188 |     print("dummy J: %g" % J)
189 |     print("dummy seq: len(s) = %d" % len(s))
190 |     assert(len(s) <= 15+1)
191 |     assert(s[0] == 0)
192 |     assert(J > 0)
193 | 
194 | ##
195 | # Execute sanity check
196 | print("=== Running sanity check ===")
197 | for f in testcases:
198 |     f()
199 | 
200 | if fail <= 0:
201 |     print("=== Sanity check passed! ===")
202 | else:
203 |     print("=== Sanity check failed %d tests :( ===" % fail)
204 |     if not prompt("Continue submission anyway?"):
205 |         sys.exit(1)
206 | 
207 | 
208 | ##
209 | # List of files for submission
210 | filelist = [
211 |     'part0-XOR.ipynb',
212 |     'part1-NER.ipynb',
213 |     'misc.py',
214 |     'nerwindow.py',
215 |     'ner.learningcurve.best.png',
216 |     'ner.learningcurve.comparison.png',
217 |     'dev.predicted',
218 |     'test.predicted',
219 |     'part11probing.py',
220 |     'part2-RNNLM.ipynb',
221 |     'rnnlm.py',
222 |     'rnnlm.H.npy',
223 |     'rnnlm.L.npy',
224 |     'rnnlm.U.npy',
225 | ]
226 | files_ok = []
227 | files_missing = []
228 | 
229 | # Verify required files present
230 | print("=== Verifying file list ===")
231 | for fname in filelist:
232 |     print(("File: %s ? -" % fname), end=' ')
233 |     if os.path.isfile(fname):
234 |         print("ok"); files_ok.append(fname)
235 |     else:
236 |         print("NOT FOUND"); files_missing.append(fname)
237 | if len(files_missing) > 0:
238 |     print("== Error: missing files ==")
239 |     print(" ".join(files_missing))
240 |     if not prompt("Continue submission anyway?"):
241 |         sys.exit(1)
242 | 
243 | ##
244 | # Prepare submission zip
245 | from zipfile import ZipFile
246 | 
247 | # Get SUNet ID
248 | sunetid = ""
249 | fail = -1
250 | while not re.match(r'[\w\d]+', sunetid):
251 |     fail += 1
252 |     sunetid = input("=== Please enter your SUNet ID ===\nSUNet ID: ").lower()
253 |     if fail > 3: print("Error: invalid ID"); sys.exit(1)
254 | 
255 | # Pack in files
256 | zipname = "%s.zip" % sunetid
257 | with ZipFile(zipname, 'w') as zf:
258 |     print("=== Generating submission file '%s' ===" % zipname)
259 |     for fname in files_ok:
260 |         print(("  %s" % fname), end=' ')
261 |         zf.write(fname)
262 |         print(("(%.02f kB)" % ((1.0/1024) * zf.getinfo(fname).file_size)))
263 | 
264 | # Check size
265 | fsize = os.path.getsize(zipname)
266 | SIZE_LIMIT = 3*(2**30) # 30 MB
267 | print("Submission size: %.02f kB -" % ((1.0/1024) * fsize), end=' ')
268 | if fsize < SIZE_LIMIT:
269 |     print("ok!")
270 | else:
271 |     print("too large! (limit = %.02f kB" % ((1.0/1024) * SIZE_LIMIT))
272 |     sys.exit(1)
273 | 
274 | print("=== Successfully generated submission zipfile! ===")
275 | print("Please upload '%s' to Box, and don't forget to submit your writeup PDF via Scoryst!" % zipname)


--------------------------------------------------------------------------------
/old_assignments/assignment2/data_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/old_assignments/assignment2/data_utils/__init__.py


--------------------------------------------------------------------------------
/old_assignments/assignment2/data_utils/ner.py:
--------------------------------------------------------------------------------
 1 | ##
 2 | # Utility functions for NER assignment
 3 | # Assigment 2, part 1 for CS224D
 4 | ##
 5 | 
 6 | from .utils import invert_dict
 7 | from numpy import *
 8 | 
 9 | def load_wv(vocabfile, wvfile):
10 |     wv = loadtxt(wvfile, dtype=float)
11 |     with open(vocabfile) as fd:
12 |         words = [line.strip() for line in fd]
13 |     num_to_word = dict(enumerate(words))
14 |     word_to_num = invert_dict(num_to_word)
15 |     return wv, word_to_num, num_to_word
16 | 
17 | 
18 | def save_predictions(y, filename):
19 |     """Save predictions, one per line."""
20 |     with open(filename, 'w') as fd:
21 |         fd.write("\n".join(map(str, y)))
22 |         fd.write("\n")


--------------------------------------------------------------------------------
/old_assignments/assignment2/data_utils/utils.py:
--------------------------------------------------------------------------------
  1 | import sys, os, re, json
  2 | import itertools
  3 | from collections import Counter
  4 | import time
  5 | from numpy import *
  6 | 
  7 | import pandas as pd
  8 | 
  9 | 
 10 | def invert_dict(d):
 11 |     return {v:k for k,v in d.items()}
 12 | 
 13 | def flatten1(lst):
 14 |     return list(itertools.chain.from_iterable(lst))
 15 | 
 16 | def load_wv_pandas(fname):
 17 |     return pd.read_hdf(fname, 'data')
 18 | 
 19 | def extract_wv(df):
 20 |     num_to_word = dict(enumerate(df.index))
 21 |     word_to_num = invert_dict(num_to_word)
 22 |     wv = df.as_matrix()
 23 |     return wv, word_to_num, num_to_word
 24 | 
 25 | def canonicalize_digits(word):
 26 |     if any([c.isalpha() for c in word]): return word
 27 |     word = re.sub("\d", "DG", word)
 28 |     if word.startswith("DG"):
 29 |         word = word.replace(",", "") # remove thousands separator
 30 |     return word
 31 | 
 32 | def canonicalize_word(word, wordset=None, digits=True):
 33 |     word = word.lower()
 34 |     if digits:
 35 |         if (wordset != None) and (word in wordset): return word
 36 |         word = canonicalize_digits(word) # try to canonicalize numbers
 37 |     if (wordset == None) or (word in wordset): return word
 38 |     else: return "UUUNKKK" # unknown token
 39 | 
 40 | 
 41 | ##
 42 | # Utility functions used to create dataset
 43 | ##
 44 | def augment_wv(df, extra=["UUUNKKK"]):
 45 |     for e in extra:
 46 |         df.loc[e] = zeros(len(df.columns))
 47 | 
 48 | def prune_wv(df, vocab, extra=["UUUNKKK"]):
 49 |     """Prune word vectors to vocabulary."""
 50 |     items = set(vocab).union(set(extra))
 51 |     return df.filter(items=items, axis='index')
 52 | 
 53 | def load_wv_raw(fname):
 54 |     return pd.read_table(fname, sep="\s+",
 55 |                          header=None,
 56 |                          index_col=0,
 57 |                          quoting=3)
 58 | 
 59 | def load_dataset(fname):
 60 |     docs = []
 61 |     with open(fname) as fd:
 62 |         cur = []
 63 |         for line in fd:
 64 |             # new sentence on -DOCSTART- or blank line
 65 |             if re.match(r"-DOCSTART-.+", line) or (len(line.strip()) == 0):
 66 |                 if len(cur) > 0:
 67 |                     docs.append(cur)
 68 |                 cur = []
 69 |             else: # read in tokens
 70 |                 cur.append(line.strip().split("\t",1))
 71 |         # flush running buffer
 72 |         docs.append(cur)
 73 |     return docs
 74 | 
 75 | def extract_tag_set(docs):
 76 |     tags = set(flatten1([[t[1].split("|")[0] for t in d] for d in docs]))
 77 |     return tags
 78 | 
 79 | def extract_word_set(docs):
 80 |     words = set(flatten1([[t[0] for t in d] for d in docs]))
 81 |     return words
 82 | 
 83 | def pad_sequence(seq, left=1, right=1):
 84 |     return left*[("<s>", "")] + seq + right*[("</s>", "")]
 85 | 
 86 | ##
 87 | # For window models
 88 | def seq_to_windows(words, tags, word_to_num, tag_to_num, left=1, right=1):
 89 |     ns = len(words)
 90 |     X = []
 91 |     y = []
 92 |     for i in range(ns):
 93 |         if words[i] == "<s>" or words[i] == "</s>":
 94 |             continue # skip sentence delimiters
 95 |         tagn = tag_to_num[tags[i]]
 96 |         idxs = [word_to_num[words[ii]]
 97 |                 for ii in range(i - left, i + right + 1)]
 98 |         X.append(idxs)
 99 |         y.append(tagn)
100 |     return array(X), array(y)
101 | 
102 | def docs_to_windows(docs, word_to_num, tag_to_num, wsize=3):
103 |     pad = (wsize - 1)//2
104 |     docs = flatten1([pad_sequence(seq, left=pad, right=pad) for seq in docs])
105 | 
106 |     words, tags = zip(*docs)
107 |     words = [canonicalize_word(w, word_to_num) for w in words]
108 |     tags = [t.split("|")[0] for t in tags]
109 |     return seq_to_windows(words, tags, word_to_num, tag_to_num, pad, pad)
110 | 
111 | def window_to_vec(window, L):
112 |     """Concatenate word vectors for a given window."""
113 |     return concatenate([L[i] for i in window])
114 | 
115 | ##
116 | # For fixed-window LM:
117 | # each row of X is a list of word indices
118 | # each entry of y is the word index to predict
119 | def seq_to_lm_windows(words, word_to_num, ngram=2):
120 |     ns = len(words)
121 |     X = []
122 |     y = []
123 |     for i in range(ns):
124 |         if words[i] == "<s>":
125 |             continue # skip sentence begin, but do predict end
126 |         idxs = [word_to_num[words[ii]]
127 |                 for ii in range(i - ngram + 1, i + 1)]
128 |         X.append(idxs[:-1])
129 |         y.append(idxs[-1])
130 |     return array(X), array(y)
131 | 
132 | def docs_to_lm_windows(docs, word_to_num, ngram=2):
133 |     docs = flatten1([pad_sequence(seq, left=(ngram-1), right=1)
134 |                      for seq in docs])
135 |     words = [canonicalize_word(wt[0], word_to_num) for wt in docs]
136 |     return seq_to_lm_windows(words, word_to_num, ngram)
137 | 
138 | 
139 | ##
140 | # For RNN LM
141 | # just convert each sentence to a list of indices
142 | # after padding each with <s> ... </s> tokens
143 | def seq_to_indices(words, word_to_num):
144 |     return array([word_to_num[w] for w in words])
145 | 
146 | def docs_to_indices(docs, word_to_num):
147 |     docs = [pad_sequence(seq, left=1, right=1) for seq in docs]
148 |     ret = []
149 |     for seq in docs:
150 |         words = [canonicalize_word(wt[0], word_to_num) for wt in seq]
151 |         ret.append(seq_to_indices(words, word_to_num))
152 | 
153 |     # return as numpy array for fancier slicing
154 |     return array(ret, dtype=object)
155 | 
156 | def offset_seq(seq):
157 |     return seq[:-1], seq[1:]
158 | 
159 | def seqs_to_lmXY(seqs):
160 |     X, Y = zip(*[offset_seq(s) for s in seqs])
161 |     return array(X, dtype=object), array(Y, dtype=object)
162 | 
163 | ##
164 | # For RNN tagger
165 | # return X, Y as lists
166 | # where X[i] is indices, Y[i] is tags for a sequence
167 | # NOTE: this does not use padding tokens!
168 | #    (RNN should natively handle begin/end)
169 | def docs_to_tag_sequence(docs, word_to_num, tag_to_num):
170 |     # docs = [pad_sequence(seq, left=1, right=1) for seq in docs]
171 |     X = []
172 |     Y = []
173 |     for seq in docs:
174 |         if len(seq) < 1: continue
175 |         words, tags = zip(*seq)
176 | 
177 |         words = [canonicalize_word(w, word_to_num) for w in words]
178 |         x = seq_to_indices(words, word_to_num)
179 |         X.append(x)
180 | 
181 |         tags = [t.split("|")[0] for t in tags]
182 |         y = seq_to_indices(tags, tag_to_num)
183 |         Y.append(y)
184 | 
185 |     # return as numpy array for fancier slicing
186 |     return array(X, dtype=object), array(Y, dtype=object)
187 | 
188 | def idxs_to_matrix(idxs, L):
189 |     """Return a matrix X with each row
190 |     as a word vector for the corresponding
191 |     index in idxs."""
192 |     return vstack([L[i] for i in idxs])


--------------------------------------------------------------------------------
/old_assignments/assignment2/misc.py:
--------------------------------------------------------------------------------
 1 | ##
 2 | # Miscellaneous helper functions
 3 | ##
 4 | 
 5 | from numpy import *
 6 | 
 7 | def random_weight_matrix(m, n):
 8 |     #### YOUR CODE HERE ####
 9 |     eps = sqrt(6/(m+n))
10 |     A0 = random.uniform(low=-eps, high=eps, size=(m,n))
11 |     #### END YOUR CODE ####
12 |     assert A0.shape == (m,n)
13 |     return A0


--------------------------------------------------------------------------------
/old_assignments/assignment2/nerwindow.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from nn.base import NNBase
  3 | from nn.math import softmax, make_onehot
  4 | from misc import random_weight_matrix
  5 | 
  6 | 
  7 | ##
  8 | # Evaluation code; do not change this
  9 | ##
 10 | from sklearn import metrics
 11 | def full_report(y_true, y_pred, tagnames):
 12 |     cr = metrics.classification_report(y_true, y_pred,
 13 |                                        target_names=tagnames)
 14 |     print(cr)
 15 | 
 16 | def eval_performance(y_true, y_pred, tagnames):
 17 |     pre, rec, f1, support = metrics.precision_recall_fscore_support(y_true, y_pred)
 18 |     print("=== Performance (omitting 'O' class) ===")
 19 |     print("Mean precision:  %.02f%%" % (100*sum(pre[1:] * support[1:])/sum(support[1:])))
 20 |     print("Mean recall:     %.02f%%" % (100*sum(rec[1:] * support[1:])/sum(support[1:])))
 21 |     print("Mean F1:         %.02f%%" % (100*sum(f1[1:] * support[1:])/sum(support[1:])))
 22 | 
 23 | 
 24 | ##
 25 | # Implement this!
 26 | ##
 27 | class WindowMLP(NNBase):
 28 |     """Single hidden layer, plus representation learning."""
 29 | 
 30 |     def __init__(self, wv, windowsize=3,
 31 |                  dims=[None, 100, 5],
 32 |                  reg=0.001, alpha=0.01, rseed=10):
 33 |         """
 34 |         Initialize classifier model.
 35 | 
 36 |         Arguments:
 37 |         wv : initial word vectors (array |V| x n)
 38 |             note that this is the transpose of the n x |V| matrix L
 39 |             described in the handout; you'll want to keep it in
 40 |             this |V| x n form for efficiency reasons, since numpy
 41 |             stores matrix rows continguously.
 42 |         windowsize : int, size of context window
 43 |         dims : dimensions of [input, hidden, output]
 44 |             input dimension can be computed from wv.shape
 45 |         reg : regularization strength (lambda)
 46 |         alpha : default learning rate
 47 |         rseed : random initialization seed
 48 |         """
 49 | 
 50 |         # Set regularization
 51 |         self.lreg = float(reg)
 52 |         self.alpha = alpha # default training rate
 53 | 
 54 |         dims[0] = windowsize * wv.shape[1] # input dimension
 55 |         param_dims = dict(W=(dims[1], dims[0]),
 56 |                           b1=(dims[1],),
 57 |                           U=(dims[2], dims[1]),
 58 |                           b2=(dims[2],),
 59 |                           )
 60 |         param_dims_sparse = dict(L=wv.shape)
 61 | 
 62 |         # initialize parameters: don't change this line
 63 |         NNBase.__init__(self, param_dims, param_dims_sparse)
 64 | 
 65 |         random.seed(rseed) # be sure to seed this for repeatability!
 66 |         #### YOUR CODE HERE ####
 67 |         # any other initialization you need
 68 |         self.params.W = random_weight_matrix(*self.params.W.shape)
 69 |         self.params.U = random_weight_matrix(*self.params.U.shape)
 70 |         self.sparams.L = wv.copy()
 71 |         #### END YOUR CODE ####
 72 | 
 73 | 
 74 | 
 75 |     def _acc_grads(self, window, label):
 76 |         """
 77 |         Accumulate gradients, given a training point
 78 |         (window, label) of the format
 79 | 
 80 |         window = [x_{i-1} x_{i} x_{i+1}] # three ints
 81 |         label = {0,1,2,3,4} # single int, gives class
 82 | 
 83 |         Your code should update self.grads and self.sgrads,
 84 |         in order for gradient_check and training to work.
 85 | 
 86 |         So, for example:
 87 |         self.grads.U += (your gradient dJ/dU)
 88 |         self.sgrads.L[i] = (gradient dJ/dL[i]) # this adds an update for that index
 89 |         """
 90 |         #### YOUR CODE HERE ####
 91 |         ##
 92 |         # Forward propagation
 93 |         words = np.array([self.params.L[x] for x in window])
 94 |         x = np.reshape(words, -1)
 95 |         layer1 = np.tanh(self.params.W.dot(x) + self.params.b1)
 96 |         probs  = softmax(self.params.U.dot(layer1) + self.params.b2)
 97 |         ##
 98 |         # Backpropagation
 99 |         y = make_onehot(label, len(probs))
100 |         dx = probs - y
101 |         dU = np.outer(dx, layer1)
102 |         delta2 = np.multiply((1 - np.square(dU)),
103 |                              self.params.U.T.dot(dx))
104 |         dW  = np.outer(delta2, x)
105 |         db1 = delta2
106 |         dL  = self.params.W.T.dot(delta2)
107 |         dL  = np.reshape(dL, (3, self.params.L.shape[1]))
108 | 
109 |         dW += self.lreg * self.params.W
110 |         dU += self.lreg * self.params.U
111 | 
112 |         self.grads.U += dU
113 |         self.grads.W += dW
114 |         self.grads.b2 += dx
115 |         self.grads.b1 += delta2
116 | 
117 |         self.sgrads.L[window[0]] = dL[0]
118 |         self.sgrads.L[window[1]] = dL[1]
119 |         self.sgrads.L[window[2]] = dL[2]
120 |         #### END YOUR CODE ####
121 | 
122 | 
123 |     def predict_proba(self, windows):
124 |         """
125 |         Predict class probabilities.
126 | 
127 |         Should return a matrix P of probabilities,
128 |         with each row corresponding to a row of X.
129 | 
130 |         windows = array (n x windowsize),
131 |             each row is a window of indices
132 |         """
133 |         # handle singleton input by making sure we have
134 |         # a list-of-lists
135 |         if not hasattr(windows[0], "__iter__"):
136 |             windows = [windows]
137 | 
138 |         #### YOUR CODE HERE ####
139 |         idx_array = np.array(windows)
140 |         words = np.array(self.sparams.L[idx_array])
141 |         #### END YOUR CODE ####
142 | 
143 |         return P # rows are output for each input
144 | 
145 | 
146 |     def predict(self, windows):
147 |         """
148 |         Predict most likely class.
149 |         Returns a list of predicted class indices;
150 |         input is same as to predict_proba
151 |         """
152 | 
153 |         #### YOUR CODE HERE ####
154 |         probs = self.predict_proba(windows)
155 |         c = np.argmax(probs, axis=1)
156 |         #### END YOUR CODE ####
157 |         return c # list of predicted classes
158 | 
159 | 
160 |     def compute_loss(self, windows, labels):
161 |         """
162 |         Compute the loss for a given dataset.
163 |         windows = same as for predict_proba
164 |         labels = list of class labels, for each row of windows
165 |         """
166 | 
167 |         #### YOUR CODE HERE ####
168 | 
169 | 
170 |         #### END YOUR CODE ####
171 |         return J


--------------------------------------------------------------------------------
/old_assignments/assignment2/nn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/old_assignments/assignment2/nn/__init__.py


--------------------------------------------------------------------------------
/old_assignments/assignment2/nn/math.py:
--------------------------------------------------------------------------------
 1 | from numpy import *
 2 | 
 3 | def sigmoid(x):
 4 |     return 1.0/(1.0 + exp(-x))
 5 | 
 6 | def softmax(x):
 7 |     xt = exp(x - max(x))
 8 |     return xt / sum(xt)
 9 | 
10 | def make_onehot(i, n):
11 |     y = zeros(n)
12 |     y[i] = 1
13 |     return y
14 | 
15 | 
16 | class MultinomialSampler(object):
17 |     """
18 |     Fast (O(log n)) sampling from a discrete probability
19 |     distribution, with O(n) set-up time.
20 |     """
21 | 
22 |     def __init__(self, p, verbose=False):
23 |         n = len(p)
24 |         p = p.astype(float) / sum(p)
25 |         self._cdf = cumsum(p)
26 | 
27 |     def sample(self, k=1):
28 |         rs = random.random(k)
29 |         # binary search to get indices
30 |         return searchsorted(self._cdf, rs)
31 | 
32 |     def __call__(self, **kwargs):
33 |         return self.sample(**kwargs)
34 | 
35 |     def reconstruct_p(self):
36 |         """
37 |         Return the original probability vector.
38 |         Helpful for debugging.
39 |         """
40 |         n = len(self._cdf)
41 |         p = zeros(n)
42 |         p[0] = self._cdf[0]
43 |         p[1:] = (self._cdf[1:] - self._cdf[:-1])
44 |         return p
45 | 
46 | 
47 | def multinomial_sample(p):
48 |     """
49 |     Wrapper to generate a single sample,
50 |     using the above class.
51 |     """
52 |     return MultinomialSampler(p).sample(1)[0]


--------------------------------------------------------------------------------
/old_assignments/assignment2/part11probing.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import sys, os
  4 | from numpy import *
  5 | 
  6 | def print_scores(scores, words):
  7 |     for i in range(len(scores)):
  8 |         print("[%d]: (%.03f) %s" % (i, scores[i], words[i]))
  9 | 
 10 | 
 11 | def part_a(clf, num_to_word, verbose=True):
 12 |     """
 13 |     Code for 1.1 part (a):
 14 |     Hidden Layer, Center Word
 15 | 
 16 |     clf: instance of WindowMLP,
 17 |             trained on data
 18 |     num_to_word: dict {int:string}
 19 | 
 20 |     You need to create:
 21 |     - topscores : list of lists of 10 scores (float)
 22 |     - topwords  : list of lists of 10 words (string)
 23 |     You should generate these lists for each neuron
 24 |     (so for hdim = 100, you'll have lists of 100 lists of 10)
 25 |     then fill in neurons = [<your chosen neurons>] to print
 26 |     """
 27 |     #### YOUR CODE HERE ####
 28 | 
 29 | 
 30 | 
 31 | 
 32 | 
 33 |     neurons = [1,3,4,6,8] # change this to your chosen neurons
 34 | 
 35 |     #### END YOUR CODE ####
 36 |     # topscores[i]: list of floats
 37 |     # topwords[i]: list of words
 38 |     if verbose == True:
 39 |         for i in neurons:
 40 |             print("Neuron %d" % i)
 41 |             print_scores(topscores[i], topwords[i])
 42 | 
 43 |     return topscores, topwords
 44 | 
 45 | 
 46 | def part_b(clf, num_to_word, num_to_tag, verbose=True):
 47 |     """
 48 |     Code for 1.1 part (b):
 49 |     Model Output, Center Word
 50 | 
 51 |     clf: instance of WindowMLP,
 52 |             trained on data
 53 |     num_to_word: dict {int:string}
 54 | 
 55 |     You need to create:
 56 |     - topscores : list of 5 lists of 10 probability scores (float)
 57 |     - topwords  : list of 5 lists of 10 words (string)
 58 |     where indices 0,1,2,3,4 correspond to num_to_tag, i.e.
 59 |     tagnames = ["O", "LOC", "MISC", "ORG", "PER"]
 60 |     """
 61 |     #### YOUR CODE HERE ####
 62 | 
 63 | 
 64 | 
 65 | 
 66 | 
 67 | 
 68 |     #### END YOUR CODE ####
 69 |     # topscores[i]: list of floats
 70 |     # topwords[i]: list of words
 71 |     if verbose == True:
 72 |         for i in range(1,5):
 73 |             print("Output neuron %d: %s" % (i, num_to_tag[i]))
 74 |             print_scores(topscores[i], topwords[i])
 75 |             print("")
 76 | 
 77 |     return topscores, topwords
 78 | 
 79 | 
 80 | def part_c(clf, num_to_word, num_to_tag, verbose=True):
 81 |     """
 82 |     Code for 1.1 part (c):
 83 |     Model Output, Preceding Word
 84 | 
 85 |     clf: instance of WindowMLP,
 86 |             trained on data
 87 |     num_to_word: dict {int:string}
 88 | 
 89 |     You need to create:
 90 |     - topscores : list of 5 lists of 10 probability scores (float)
 91 |     - topwords  : list of 5 lists of 10 words (string)
 92 |     where indices 0,1,2,3,4 correspond to num_to_tag, i.e.
 93 |     tagnames = ["O", "LOC", "MISC", "ORG", "PER"]
 94 |     """
 95 |     #### YOUR CODE HERE ####
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 |     #### END YOUR CODE ####
103 |     # topscores[i]: list of floats
104 |     # topwords[i]: list of words
105 |     if verbose == True:
106 |         for i in range(1,5):
107 |             print("Output neuron %d: %s" % (i, num_to_tag[i]))
108 |             print_scores(topscores[i], topwords[i])
109 |             print("")
110 | 
111 |     return topscores, topwords
112 | 
113 | 
114 | ##
115 | # Dummy test code
116 | # run this script, and make sure nothing crashes
117 | # (this is the same as sanity check for part 1.1)
118 | if __name__ == '__main__':
119 |     num_to_word = dict(enumerate(
120 |                        ["hello", "world", "i", "am", "a", "banana",
121 |                         "there", "is", "no", "spoon"]))
122 |     tagnames = ["O", "LOC", "MISC", "ORG", "PER"]
123 |     num_to_tag = dict(enumerate(tagnames))
124 | 
125 |     from nerwindow import WindowMLP
126 |     random.seed(10)
127 |     wv = random.randn(10,50)
128 |     clf = WindowMLP(wv, windowsize=3,
129 |                     dims = [None, 100, 5], rseed=10)
130 | 
131 |     print("\n=== Testing Part (a) ===\n")
132 |     s,w = part_a(clf, num_to_word, verbose=True)
133 |     assert(len(s) == len(w))
134 |     if type(s) == dict: # some students may have done this
135 |         for k in list(s.keys()): assert(k in w)
136 |         for k in list(w.keys()): assert(k in s)
137 |         assert(len(s) >= 5)
138 |     else: # list
139 |         assert(len(s[0]) == len(w[0]))
140 |         assert(len(s[0]) == 10)
141 |         assert(type(w[0][0]) == str)
142 | 
143 |     print("\n=== Testing Part (b) ===\n")
144 |     s,w = part_b(clf, num_to_word, num_to_tag, verbose=True)
145 |     assert(len(s) == len(w))
146 |     assert(len(s) == 5)
147 |     assert(len(s[0]) == len(w[0]))
148 |     assert(len(s[0]) == 10)
149 |     assert(type(w[0][0]) == str)
150 | 
151 |     print("\n=== Testing Part (c) ===\n")
152 |     s,w = part_c(clf, num_to_word, num_to_tag, verbose=True)
153 |     assert(len(s) == len(w))
154 |     assert(len(s) == 5)
155 |     assert(len(s[0]) == len(w[0]))
156 |     assert(len(s[0]) == 10)
157 |     assert(type(w[0][0]) == str)


--------------------------------------------------------------------------------
/old_assignments/assignment2/requirements.txt:
--------------------------------------------------------------------------------
 1 | Jinja2==2.7.3
 2 | MarkupSafe==0.23
 3 | backports.ssl-match-hostname==3.4.0.2
 4 | certifi==14.05.14
 5 | gnureadline==6.3.3
 6 | ipython==3.0.0
 7 | matplotlib==1.4.3
 8 | mock==1.0.1
 9 | nose==1.3.4
10 | numpy==1.9.2
11 | scikit-learn==0.16.0
12 | pandas==0.15.2
13 | pyparsing==2.0.3
14 | python-dateutil==2.4.0
15 | pytz==2014.10
16 | pyzmq==14.4.1
17 | scipy==0.14.1
18 | six==1.9.0
19 | tornado==4.0.2
20 | wsgiref==0.1.2
21 | 


--------------------------------------------------------------------------------
/old_assignments/assignment2/rnnlm.py:
--------------------------------------------------------------------------------
  1 | from numpy import *
  2 | import itertools
  3 | import time
  4 | import sys
  5 | 
  6 | # Import NN utils
  7 | from nn.base import NNBase
  8 | from nn.math import softmax, sigmoid
  9 | from nn.math import MultinomialSampler, multinomial_sample
 10 | from misc import random_weight_matrix
 11 | 
 12 | 
 13 | class RNNLM(NNBase):
 14 |     """
 15 |     Implements an RNN language model of the form:
 16 |     h(t) = sigmoid(H * h(t-1) + L[x(t)])
 17 |     y(t) = softmax(U * h(t))
 18 |     where y(t) predicts the next word in the sequence
 19 | 
 20 |     U = |V| * dim(h) as output vectors
 21 |     L = |V| * dim(h) as input vectors
 22 | 
 23 |     You should initialize each U[i,j] and L[i,j]
 24 |     as Gaussian noise with mean 0 and variance 0.1
 25 | 
 26 |     Arguments:
 27 |         L0 : initial input word vectors
 28 |         U0 : initial output word vectors
 29 |         alpha : default learning rate
 30 |         bptt : number of backprop timesteps
 31 |     """
 32 | 
 33 |     def __init__(self, L0, U0=None,
 34 |                  alpha=0.005, rseed=10, bptt=1):
 35 | 
 36 |         self.hdim = L0.shape[1] # word vector dimensions
 37 |         self.vdim = L0.shape[0] # vocab size
 38 |         param_dims = dict(H = (self.hdim, self.hdim),
 39 |                           U = L0.shape)
 40 |         # note that only L gets sparse updates
 41 |         param_dims_sparse = dict(L = L0.shape)
 42 |         NNBase.__init__(self, param_dims, param_dims_sparse)
 43 | 
 44 |         #### YOUR CODE HERE ####
 45 | 
 46 | 
 47 |         # Initialize word vectors
 48 |         # either copy the passed L0 and U0 (and initialize in your notebook)
 49 |         # or initialize with gaussian noise here
 50 | 
 51 |         # Initialize H matrix, as with W and U in part 1
 52 | 
 53 |         #### END YOUR CODE ####
 54 | 
 55 | 
 56 |     def _acc_grads(self, xs, ys):
 57 |         """
 58 |         Accumulate gradients, given a pair of training sequences:
 59 |         xs = [<indices>] # input words
 60 |         ys = [<indices>] # output words (to predict)
 61 | 
 62 |         Your code should update self.grads and self.sgrads,
 63 |         in order for gradient_check and training to work.
 64 | 
 65 |         So, for example:
 66 |         self.grads.H += (your gradient dJ/dH)
 67 |         self.sgrads.L[i] = (gradient dJ/dL[i]) # update row
 68 | 
 69 |         Per the handout, you should:
 70 |             - make predictions by running forward in time
 71 |                 through the entire input sequence
 72 |             - for *each* output word in ys, compute the
 73 |                 gradients with respect to the cross-entropy
 74 |                 loss for that output word
 75 |             - run backpropagation-through-time for self.bptt
 76 |                 timesteps, storing grads in self.grads (for H, U)
 77 |                 and self.sgrads (for L)
 78 | 
 79 |         You'll want to store your predictions \hat{y}(t)
 80 |         and the hidden layer values h(t) as you run forward,
 81 |         so that you can access them during backpropagation.
 82 | 
 83 |         At time 0, you should initialize the hidden layer to
 84 |         be a vector of zeros.
 85 |         """
 86 | 
 87 |         # Expect xs as list of indices
 88 |         ns = len(xs)
 89 | 
 90 |         # make matrix here of corresponding h(t)
 91 |         # hs[-1] = initial hidden state (zeros)
 92 |         hs = zeros((ns+1, self.hdim))
 93 |         # predicted probas
 94 |         ps = zeros((ns, self.vdim))
 95 | 
 96 |         #### YOUR CODE HERE ####
 97 | 
 98 |         ##
 99 |         # Forward propagation
100 | 
101 | 
102 |         ##
103 |         # Backward propagation through time
104 | 
105 | 
106 | 
107 |         #### END YOUR CODE ####
108 | 
109 | 
110 | 
111 |     def grad_check(self, x, y, outfd=sys.stderr, **kwargs):
112 |         """
113 |         Wrapper for gradient check on RNNs;
114 |         ensures that backprop-through-time is run to completion,
115 |         computing the full gradient for the loss as summed over
116 |         the input sequence and predictions.
117 | 
118 |         Do not modify this function!
119 |         """
120 |         bptt_old = self.bptt
121 |         self.bptt = len(y)
122 |         print("NOTE: temporarily setting self.bptt = len(y) = %d to compute true gradient." % self.bptt, file=outfd)
123 |         NNBase.grad_check(self, x, y, outfd=outfd, **kwargs)
124 |         self.bptt = bptt_old
125 |         print("Reset self.bptt = %d" % self.bptt, file=outfd)
126 | 
127 | 
128 |     def compute_seq_loss(self, xs, ys):
129 |         """
130 |         Compute the total cross-entropy loss
131 |         for an input sequence xs and output
132 |         sequence (labels) ys.
133 | 
134 |         You should run the RNN forward,
135 |         compute cross-entropy loss at each timestep,
136 |         and return the sum of the point losses.
137 |         """
138 | 
139 |         J = 0
140 |         #### YOUR CODE HERE ####
141 | 
142 | 
143 |         #### END YOUR CODE ####
144 |         return J
145 | 
146 | 
147 |     def compute_loss(self, X, Y):
148 |         """
149 |         Compute total loss over a dataset.
150 |         (wrapper for compute_seq_loss)
151 | 
152 |         Do not modify this function!
153 |         """
154 |         if not isinstance(X[0], ndarray): # single example
155 |             return self.compute_seq_loss(X, Y)
156 |         else: # multiple examples
157 |             return sum([self.compute_seq_loss(xs,ys)
158 |                        for xs,ys in zip(X, Y)])
159 | 
160 |     def compute_mean_loss(self, X, Y):
161 |         """
162 |         Normalize loss by total number of points.
163 | 
164 |         Do not modify this function!
165 |         """
166 |         J = self.compute_loss(X, Y)
167 |         ntot = sum(map(len,Y))
168 |         return J / float(ntot)
169 | 
170 | 
171 |     def generate_sequence(self, init, end, maxlen=100):
172 |         """
173 |         Generate a sequence from the language model,
174 |         by running the RNN forward and selecting,
175 |         at each timestep, a random word from the
176 |         a word from the emitted probability distribution.
177 | 
178 |         The MultinomialSampler class (in nn.math) may be helpful
179 |         here for sampling a word. Use as:
180 | 
181 |             y = multinomial_sample(p)
182 | 
183 |         to sample an index y from the vector of probabilities p.
184 | 
185 | 
186 |         Arguments:
187 |             init = index of start word (word_to_num['<s>'])
188 |             end = index of end word (word_to_num['</s>'])
189 |             maxlen = maximum length to generate
190 | 
191 |         Returns:
192 |             ys = sequence of indices
193 |             J = total cross-entropy loss of generated sequence
194 |         """
195 | 
196 |         J = 0 # total loss
197 |         ys = [init] # emitted sequence
198 | 
199 |         #### YOUR CODE HERE ####
200 | 
201 | 
202 |         #### YOUR CODE HERE ####
203 |         return ys, J
204 | 
205 | 
206 | 
207 | class ExtraCreditRNNLM(RNNLM):
208 |     """
209 |     Implements an improved RNN language model,
210 |     for better speed and/or performance.
211 | 
212 |     We're not going to place any constraints on you
213 |     for this part, but we do recommend that you still
214 |     use the starter code (NNBase) framework that
215 |     you've been using for the NER and RNNLM models.
216 |     """
217 | 
218 |     def __init__(self, *args, **kwargs):
219 |         #### YOUR CODE HERE ####
220 |         raise NotImplementedError("__init__() not yet implemented.")
221 |         #### END YOUR CODE ####
222 | 
223 |     def _acc_grads(self, xs, ys):
224 |         #### YOUR CODE HERE ####
225 |         raise NotImplementedError("_acc_grads() not yet implemented.")
226 |         #### END YOUR CODE ####
227 | 
228 |     def compute_seq_loss(self, xs, ys):
229 |         #### YOUR CODE HERE ####
230 |         raise NotImplementedError("compute_seq_loss() not yet implemented.")
231 |         #### END YOUR CODE ####
232 | 
233 |     def generate_sequence(self, init, end, maxlen=100):
234 |         #### YOUR CODE HERE ####
235 |         raise NotImplementedError("generate_sequence() not yet implemented.")
236 |         #### END YOUR CODE ####


--------------------------------------------------------------------------------
/old_assignments/assignment2/softmax_example.py:
--------------------------------------------------------------------------------
  1 | from nn.base import NNBase
  2 | from nn.math import softmax, make_onehot
  3 | from misc import random_weight_matrix
  4 | from numpy import *
  5 | 
  6 | class SoftmaxRegression(NNBase):
  7 |     """
  8 |     Dummy example, to show how to implement a network.
  9 |     This implements softmax regression, trained by SGD.
 10 |     """
 11 | 
 12 |     def __init__(self, wv, dims=[100, 5],
 13 |                  reg=0.1, alpha=0.001,
 14 |                  rseed=10):
 15 |         """
 16 |         Set up classifier: parameters, hyperparameters
 17 |         """
 18 |         ##
 19 |         # Store hyperparameters
 20 |         self.lreg = reg # regularization
 21 |         self.alpha = alpha # default learning rate
 22 |         self.nclass = dims[1] # number of output classes
 23 | 
 24 |         ##
 25 |         # NNBase stores parameters in a special format
 26 |         # for efficiency reasons, and to allow the code
 27 |         # to automatically implement gradient checks
 28 |         # and training algorithms, independent of the
 29 |         # specific model architecture
 30 |         # To initialize, give shapes as if to np.array((m,n))
 31 |         param_dims = dict(W = (dims[1], dims[0]), # 5x100 matrix
 32 |                           b = (dims[1])) # column vector
 33 |         # These parameters have sparse gradients,
 34 |         # which is *much* more efficient if only a row
 35 |         # at a time gets updated (e.g. word representations)
 36 |         param_dims_sparse = dict(L=wv.shape)
 37 |         NNBase.__init__(self, param_dims, param_dims_sparse)
 38 | 
 39 |         ##
 40 |         # Now we can access the parameters using
 41 |         # self.params.<name> for normal parameters
 42 |         # self.sparams.<name> for params with sparse gradients
 43 |         # and get access to normal NumPy arrays
 44 |         self.sparams.L = wv.copy() # store own representations
 45 |         self.params.W = random_weight_matrix(*self.params.W.shape)
 46 |         # self.params.b1 = zeros((self.nclass,1)) # done automatically!
 47 | 
 48 |     def _acc_grads(self, idx, label):
 49 |         """
 50 |         Accumulate gradients from a training example.
 51 |         """
 52 |         ##
 53 |         # Forward propagation
 54 |         x = self.sparams.L[idx] # extract representation
 55 |         p = softmax(self.params.W.dot(x) + self.params.b)
 56 | 
 57 |         ##
 58 |         # Compute gradients w.r.t cross-entropy loss
 59 |         y = make_onehot(label, len(p))
 60 |         delta = p - y
 61 |         # dJ/dW, dJ/db1
 62 |         self.grads.W += outer(delta, x) + self.lreg * self.params.W
 63 |         self.grads.b += delta
 64 |         # dJ/dL, sparse update: use sgrads
 65 |         # this stores an update to the row L[idx]
 66 |         self.sgrads.L[idx] = self.params.W.T.dot(delta)
 67 |         # note that the syntax is overloaded here; L[idx] =
 68 |         # works like +=, so if you update the same index
 69 |         # twice, it'll store *BOTH* updates. For example:
 70 |         # self.sgrads.L[idx] = ones(50)
 71 |         # self.sgrads.L[idx] = ones(50)
 72 |         # will add -2*alpha to that row when gradients are applied!
 73 | 
 74 |         ##
 75 |         # We don't need to do the update ourself, as NNBase
 76 |         # calls that during training. See NNBase.train_sgd
 77 |         # in nn/base.py to see how this is done, if interested.
 78 |         ##
 79 | 
 80 |     def compute_loss(self, idx, label):
 81 |         """
 82 |         Compute the cost function for a single example.
 83 |         """
 84 |         ##
 85 |         # Forward propagation
 86 |         x = self.sparams.L[idx]
 87 |         p = softmax(self.params.W.dot(x) + self.params.b)
 88 |         J = -1*log(p[label]) # cross-entropy loss
 89 |         Jreg = (self.lreg / 2.0) * sum(self.params.W**2.0)
 90 |         return J + Jreg
 91 | 
 92 |     def predict_proba(self, idx):
 93 |         """
 94 |         Predict class probabilities.
 95 |         """
 96 |         x = self.sparams.L[idx]
 97 |         p = softmax(self.params.W.dot(x) + self.params.b)
 98 |         return p
 99 | 
100 |     def predict(self, idx):
101 |         """Predict most likely class."""
102 |         P = self.predict_proba(idx)
103 |         return argmax(P, axis=1)


--------------------------------------------------------------------------------
/old_assignments/assignment3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kingtaurus/cs224d/10ad33f6bafeeaacae456fc48ef530edbfe5444a/old_assignments/assignment3.pdf


--------------------------------------------------------------------------------
/old_assignments/assignment3/README.md:
--------------------------------------------------------------------------------
 1 | [CS224d: Deep Learning for Natural Language Processing](http://cs224d.stanford.edu/)
 2 | ====================================================================================
 3 | 
 4 | ** Due Date: 5/21/2015 (Thursday) 11:59 PM PST.**
 5 | 
 6 | In this assignment we will treat you as real professional datascientist. This assignment should give you the experience to run, train, optimize, debug, augment your neural nets, then rinse wash and repeat. To get state-of-the-art, you must learn to look at your errors to gain insights, then augment your model and retrain it.
 7 | 
 8 | Setup
 9 | -----
10 | 
11 | All of you by now should have a functioning `Python 2.7.x` environment so all setup instructions are in the handout.
12 | 
13 | **Get the code:** [Download the starter code here](http://cs224d.stanford.edu/assignment3/assignment3.zip) and the [complementary written problems here](http://cs224d.stanford.edu/assignment3/assignment3.pdf).
14 | 
15 | Submitting your work
16 | --------------------
17 | 
18 | Once you are done working, zip your code base up and call it `<your-sunet-id>.zip`, for instance if your stanford email is `jdoe@stanford.edu`, your file name should be
19 | 
20 | `jdoe.zip`
21 | 
22 | Upload this file to [the Box for this assignment](https://stanford.box.com/signup/collablink/d_3641065535/110477b9efa36c).
23 | For the written component which includes derivations and plots, please upload a PDF file of your solutions to [Scoryst](https://scoryst.com/course/67/submit/). When asked to map question parts to your PDF, please map the parts accordingly as courtesy to your TAs. The last part of each problem is a placeholder for the programming component (b), you could just map it to the page of the last part in your written assignment.
24 | 
25 | Tasks
26 | -----
27 | 
28 | There will be two parts to this assignment, the first will be a very easy Recursive Neural Network implementation to train and test multiple times. The next part is an augmented version of the plain RNN and includes one additional layer. Your job is to see how much the model improves. There is a lot of extra credit in this PSet to give you all the opportunity to really show us how capable of datascientists you are! But we also want you focusing on your projects so this PSet is intentionally short.
29 | 
30 | ### Q1: Recursive Neural Network (30 points)
31 | 
32 | * (a): 5 points
33 | * (b): 5 points
34 | * (c): 15 points
35 | * (d): 5 points
36 | 
37 | ### Q2: 2-Layer Deep RNN (70 points and 30 Extra Credit points)
38 | 
39 | * (a): 15 points
40 | * (b): 15 points
41 | * (c): 30 points
42 | * (d): 10 points
43 | * (e): 15 extra points
44 | * (f): 15 extra points
45 | 
46 | ### Q3: RNTN (20 extra points)
47 | 
48 | * (a): 5 Correct Derivations
49 | * (b): 5 Correct Implementation
50 | * (c): 10 Found Optimal Hyperparameters
51 | 
52 | **Please try to finish the written component before writing code. We designed the written component to help you think through the details in your code implementation.**
53 | 


--------------------------------------------------------------------------------