├── Assignment1
    ├── FuZhichao
    │   ├── Assignment1
    │   │   ├── q1_softmax.py
    │   │   ├── q2_gradcheck.py
    │   │   ├── q2_neural.py
    │   │   ├── q2_sigmoid.py
    │   │   ├── q3_run.py
    │   │   ├── q3_sgd.py
    │   │   ├── q3_word2vec.py
    │   │   └── q4_sentiment.py
    │   └── assignment1-solution.pdf
    ├── README.md
    ├── TaoJi
    │   ├── assignment1
    │   │   ├── q1_softmax.py
    │   │   ├── q2_gradcheck.py
    │   │   ├── q2_neural.py
    │   │   ├── q2_sigmoid.py
    │   │   ├── q3_run.py
    │   │   ├── q3_sgd.py
    │   │   ├── q3_word2vec.py
    │   │   ├── q3_word_vectors.png
    │   │   ├── q4_dev_conf.png
    │   │   ├── q4_dev_pred.txt
    │   │   ├── q4_reg_v_acc.png
    │   │   └── q4_sentiment.py
    │   ├── solution.md
    │   └── solution.pdf
    ├── WeiYang
    │   ├── assignment1
    │   │   ├── q1_softmax.py
    │   │   ├── q2_gradcheck.py
    │   │   ├── q2_neural.py
    │   │   ├── q2_sigmoid.py
    │   │   ├── q3_run.py
    │   │   ├── q3_sgd.py
    │   │   ├── q3_word2vec.py
    │   │   └── q4_sentiment.py
    │   └── solution.pdf
    └── ZiyinHuang
    │   ├── Assignment1
    │       ├── Makefile
    │       ├── collect_submission.sh
    │       ├── get_datasets.sh
    │       ├── q1_softmax.py
    │       ├── q2_gradcheck.py
    │       ├── q2_neural.py
    │       ├── q2_sigmoid.py
    │       ├── q3_run.py
    │       ├── q3_sgd.py
    │       ├── q3_word2vec.py
    │       ├── q3_word_vectors.png
    │       ├── q4_dev_conf.png
    │       ├── q4_reg_v_acc.png
    │       └── q4_sentiment.py
    │   ├── assignment1_writen.pdf
    │   └── readme
├── Assignment2
    ├── README.md
    ├── TaoJi
    │   ├── assignment2
    │   │   ├── model.py
    │   │   ├── q1_classifier.py
    │   │   ├── q1_softmax.py
    │   │   ├── q2_initialization.py
    │   │   ├── q2_parser_model.py
    │   │   ├── q2_parser_transitions.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── __init__.pyc
    │   │   │   ├── general_utils.py
    │   │   │   ├── general_utils.pyc
    │   │   │   ├── parser_utils.py
    │   │   │   └── parser_utils.pyc
    │   ├── solution.md
    │   └── solution.pdf
    ├── WeiYang
    │   ├── assignment2
    │   │   ├── .idea
    │   │   │   ├── .name
    │   │   │   ├── assignment2.iml
    │   │   │   ├── misc.xml
    │   │   │   ├── modules.xml
    │   │   │   ├── vcs.xml
    │   │   │   └── workspace.xml
    │   │   ├── model.py
    │   │   ├── q1_classifier.py
    │   │   ├── q1_softmax.py
    │   │   ├── q2_initialization.py
    │   │   ├── q2_parser_model.py
    │   │   ├── q2_parser_transitions.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── __init__.pyc
    │   │   │   ├── general_utils.py
    │   │   │   ├── general_utils.pyc
    │   │   │   ├── parser_utils.py
    │   │   │   └── parser_utils.pyc
    │   └── solution.pdf
    └── ZhichaoFu
    │   ├── a.txt
    │   └── assignment2
    │       ├── model.py
    │       ├── q1_classifier.py
    │       ├── q1_softmax.py
    │       ├── q2_initialization.py
    │       ├── q2_parser_model.py
    │       ├── q2_parser_transitions.py
    │       └── utils
    │           ├── __init__.py
    │           ├── __init__.pyc
    │           ├── general_utils.py
    │           ├── general_utils.pyc
    │           ├── parser_utils.py
    │           └── parser_utils.pyc
├── Lecture11
    ├── 1611.04558.pdf
    ├── Lecture11.pdf
    ├── Lecture11_highlight.pdf
    └── README.md
├── Lecture12
    ├── 1611.05358.pdf
    ├── Lecture12.pdf
    ├── Lecture12_highlight.pdf
    └── README.md
├── Lecture13
    ├── Lecture13.pdf
    └── README.md
├── Lecture14
    ├── 1508.06615.pdf
    ├── D14-1181.pdf
    ├── P14-1062.pdf
    ├── README.md
    ├── cs224n-2017-lecture13-CNNs.pdf
    └── cs224n-2017-lecture13-highlight.pdf
├── Lecture2
    ├── 2016 Arora.pdf
    ├── CBOW.png
    ├── HS.png
    ├── Lecture2.pdf
    ├── Lecture2_highlight.pdf
    ├── Lecture2_supplement.ipynb
    ├── README.md
    ├── arXiv 2013 Mikolov-1.pdf
    ├── arXiv 2013 Mikolov.pdf
    ├── arXiv 2014 Goldberg.pdf
    └── word2vec.md
├── Lecture3
    ├── 2014 Pennington.pdf
    ├── Lecture3.pdf
    ├── Lecture3_highlight.pdf
    └── README.md
├── Lecture4
    ├── Lecture4.pdf
    └── README.md
├── Lecture5
    ├── A Primer on Neural Network Models.pdf
    ├── README.md
    └── lecture5.pdf
├── Lecture6
    ├── Lecture6.pdf
    ├── Lecture6_highlight.pdf
    ├── README.md
    └── improving-distributional-similarity-tacl-2015.pdf
├── Lecture8
    ├── Lecture8.pdf
    ├── Lecture8_highlight.pdf
    ├── README.md
    └── acl15.pdf
├── Lecture9
    ├── 1602.02410.pdf
    ├── 1703.02573.pdf
    ├── Lecture9.pdf
    ├── Lecture9_highlight.pdf
    ├── README.md
    └── char.pdf
└── README.md


/Assignment1/FuZhichao/Assignment1/q1_softmax.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def softmax(x):
 5 |     """Compute the softmax function for each row of the input x.
 6 | 
 7 |     It is crucial that this function is optimized for speed because
 8 |     it will be used frequently in later code. You might find numpy
 9 |     functions np.exp, np.sum, np.reshape, np.max, and numpy
10 |     broadcasting useful for this task.
11 | 
12 |     Numpy broadcasting documentation:
13 |     http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html
14 | 
15 |     You should also make sure that your code works for a single
16 |     N-dimensional vector (treat the vector as a single row) and
17 |     for M x N matrices. This may be useful for testing later. Also,
18 |     make sure that the dimensions of the output match the input.
19 | 
20 |     You must implement the optimization in problem 1(a) of the
21 |     written assignment!
22 | 
23 |     Arguments:
24 |     x -- A N dimensional vector or M x N dimensional numpy matrix.
25 | 
26 |     Return:
27 |     x -- You are allowed to modify x in-place
28 |     """
29 |     orig_shape = x.shape
30 | 
31 |     if len(x.shape) > 1:
32 |         # Matrix
33 |         x -= np.max(x, axis=1, keepdims=True)
34 |         x = np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)
35 |         ### YOUR CODE HERE
36 |         #raise NotImplementedError
37 |         ### END YOUR CODE
38 |     else:
39 |         # Vector
40 |         x -= np.max(x)
41 |         x = np.exp(x) / np.sum(np.exp(x))
42 |         ### YOUR CODE HERE
43 |         #raise NotImplementedError
44 |         ### END YOUR CODE
45 | 
46 |     assert x.shape == orig_shape
47 |     return x
48 | 
49 | 
50 | def test_softmax_basic():
51 |     """
52 |     Some simple tests to get you started.
53 |     Warning: these are not exhaustive.
54 |     """
55 |     print "Running basic tests..."
56 |     test1 = softmax(np.array([1,2]))
57 |     print test1
58 |     ans1 = np.array([0.26894142,  0.73105858])
59 |     assert np.allclose(test1, ans1, rtol=1e-05, atol=1e-06)
60 | 
61 |     test2 = softmax(np.array([[1001,1002],[3,4]]))
62 |     print test2
63 |     ans2 = np.array([
64 |         [0.26894142, 0.73105858],
65 |         [0.26894142, 0.73105858]])
66 |     assert np.allclose(test2, ans2, rtol=1e-05, atol=1e-06)
67 | 
68 |     test3 = softmax(np.array([[-1001,-1002]]))
69 |     print test3
70 |     ans3 = np.array([0.73105858, 0.26894142])
71 |     assert np.allclose(test3, ans3, rtol=1e-05, atol=1e-06)
72 | 
73 |     print "You should be able to verify these results by hand!\n"
74 | 
75 | 
76 | def test_softmax():
77 |     """
78 |     Use this space to test your softmax implementation by running:
79 |         python q1_softmax.py
80 |     This function will not be called by the autograder, nor will
81 |     your tests be graded.
82 |     """
83 |     print "Running your tests..."
84 |     ### YOUR CODE HERE
85 |     #raise NotImplementedError
86 |     ### END YOUR CODE
87 | 
88 | 
89 | if __name__ == "__main__":
90 |     test_softmax_basic()
91 |     test_softmax()
92 | 


--------------------------------------------------------------------------------
/Assignment1/FuZhichao/Assignment1/q2_gradcheck.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy as np
 4 | import random
 5 | 
 6 | 
 7 | # First implement a gradient checker by filling in the following functions
 8 | def gradcheck_naive(f, x):
 9 |     """ Gradient check for a function f.
10 | 
11 |     Arguments:
12 |     f -- a function that takes a single argument and outputs the
13 |          cost and its gradients
14 |     x -- the point (numpy array) to check the gradient at
15 |     """
16 | 
17 |     rndstate = random.getstate()
18 |     random.setstate(rndstate)
19 |     fx, grad = f(x) # Evaluate function value at original point
20 |     h = 1e-4        # Do not change this!
21 | 
22 |     # Iterate over all indexes in x
23 |     it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
24 |     while not it.finished:
25 |         ix = it.multi_index
26 | 
27 |         # Try modifying x[ix] with h defined above to compute
28 |         # numerical gradients. Make sure you call random.setstate(rndstate)
29 |         # before calling f(x) each time. This will make it possible
30 |         # to test cost functions with built in randomness later.
31 | 
32 |         ### YOUR CODE HERE:
33 |         x[ix] += h
34 |         random.setstate(rndstate)
35 |         new_f1 = f(x)[0]
36 |         x[ix] -= 2*h
37 |         random.setstate(rndstate)
38 |         new_f2 = f(x)[0]
39 |         x[ix] += h
40 |         numgrad = (new_f1 - new_f2) / (2 * h)
41 |         #raise NotImplementedError
42 |         ### END YOUR CODE
43 | 
44 |         # Compare gradients
45 |         reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
46 |         if reldiff > 1e-5:
47 |             print "Gradient check failed."
48 |             print "First gradient error found at index %s" % str(ix)
49 |             print "Your gradient: %f \t Numerical gradient: %f" % (
50 |                 grad[ix], numgrad)
51 |             return
52 | 
53 |         it.iternext() # Step to next dimension
54 | 
55 |     print "Gradient check passed!"
56 | 
57 | 
58 | def sanity_check():
59 |     """
60 |     Some basic sanity checks.
61 |     """
62 |     quad = lambda x: (np.sum(x ** 2), x * 2)
63 | 
64 |     print "Running sanity checks..."
65 |     gradcheck_naive(quad, np.array(123.456))      # scalar test
66 |     gradcheck_naive(quad, np.random.randn(3,))    # 1-D test
67 |     gradcheck_naive(quad, np.random.randn(4,5))   # 2-D test
68 |     print ""
69 | 
70 | 
71 | def your_sanity_checks():
72 |     """
73 |     Use this space add any additional sanity checks by running:
74 |         python q2_gradcheck.py
75 |     This function will not be called by the autograder, nor will
76 |     your additional tests be graded.
77 |     """
78 |     print "Running your sanity checks..."
79 |     ### YOUR CODE HERE
80 |     #raise NotImplementedError
81 |     ### END YOUR CODE
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     sanity_check()
86 |     your_sanity_checks()
87 | 


--------------------------------------------------------------------------------
/Assignment1/FuZhichao/Assignment1/q2_neural.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import numpy as np
  4 | import random
  5 | 
  6 | from q1_softmax import softmax
  7 | from q2_sigmoid import sigmoid, sigmoid_grad
  8 | from q2_gradcheck import gradcheck_naive
  9 | 
 10 | 
 11 | def forward_backward_prop(data, labels, params, dimensions):
 12 |     """
 13 |     Forward and backward propagation for a two-layer sigmoidal network
 14 | 
 15 |     Compute the forward propagation and for the cross entropy cost,
 16 |     and backward propagation for the gradients for all parameters.
 17 | 
 18 |     Arguments:
 19 |     data -- M x Dx matrix, where each row is a training example.
 20 |     labels -- M x Dy matrix, where each row is a one-hot vector.
 21 |     params -- Model parameters, these are unpacked for you.
 22 |     dimensions -- A tuple of input dimension, number of hidden units
 23 |                   and output dimension
 24 |     """
 25 | 
 26 |     ### Unpack network parameters (do not modify)
 27 |     if len(data.shape) >= 2:
 28 |         (N, _) = data.shape
 29 | 
 30 |     ofs = 0
 31 |     Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
 32 | 
 33 |     W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
 34 |     ofs += Dx * H
 35 |     b1 = np.reshape(params[ofs:ofs + H], (1, H))
 36 |     ofs += H
 37 |     W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
 38 |     ofs += H * Dy
 39 |     b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))
 40 | 
 41 |     ### YOUR CODE HERE: forward propagation
 42 |     a1 = sigmoid(data.dot(W1) + b1)
 43 |     a2 = softmax(a1.dot(W2) + b2)
 44 | 
 45 |     cost = -np.sum(np.log(a2[labels == 1])) / N
 46 |     #raise NotImplementedError
 47 |     ### END YOUR CODE
 48 | 
 49 |     ### YOUR CODE HERE: backward propagation
 50 |     grad_a2 = ( a2 - labels ) 
 51 | 
 52 |     gradW2 = np.dot( a1.T, grad_a2 ) * (1.0/N)
 53 |     gradb2 = np.sum( grad_a2, axis=0, keepdims=True ) * (1.0/N)
 54 | 
 55 |     grad_a1 = np.dot( grad_a2, W2.T ) * sigmoid_grad(a1)
 56 | 
 57 |     gradW1 = np.dot( data.T, grad_a1 ) * (1.0/N)
 58 |     gradb1 = np.sum( grad_a1, axis=0, keepdims=True ) * (1.0/N)
 59 | 
 60 |     #raise NotImplementedError
 61 |     ### END YOUR CODE
 62 | 
 63 |     ### Stack gradients (do not modify)
 64 |     grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
 65 |         gradW2.flatten(), gradb2.flatten()))
 66 | 
 67 |     return cost, grad
 68 | 
 69 | 
 70 | def sanity_check():
 71 |     """
 72 |     Set up fake data and parameters for the neural network, and test using
 73 |     gradcheck.
 74 |     """
 75 |     print "Running sanity check..."
 76 | 
 77 |     N = 20
 78 |     dimensions = [10, 5, 10]
 79 |     data = np.random.randn(N, dimensions[0])   # each row will be a datum
 80 |     labels = np.zeros((N, dimensions[2]))
 81 |     for i in xrange(N):
 82 |         labels[i, random.randint(0,dimensions[2]-1)] = 1
 83 | 
 84 |     params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
 85 |         dimensions[1] + 1) * dimensions[2], )
 86 | 
 87 |     gradcheck_naive(lambda params:
 88 |         forward_backward_prop(data, labels, params, dimensions), params)
 89 | 
 90 | 
 91 | def your_sanity_checks():
 92 |     """
 93 |     Use this space add any additional sanity checks by running:
 94 |         python q2_neural.py
 95 |     This function will not be called by the autograder, nor will
 96 |     your additional tests be graded.
 97 |     """
 98 |     print "Running your sanity checks..."
 99 |     ### YOUR CODE HERE
100 |     #raise NotImplementedError
101 |     ### END YOUR CODE
102 | 
103 | 
104 | if __name__ == "__main__":
105 |     sanity_check()
106 |     your_sanity_checks()
107 | 


--------------------------------------------------------------------------------
/Assignment1/FuZhichao/Assignment1/q2_sigmoid.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def sigmoid(x):
 7 |     """
 8 |     Compute the sigmoid function for the input here.
 9 | 
10 |     Arguments:
11 |     x -- A scalar or numpy array.
12 | 
13 |     Return:
14 |     s -- sigmoid(x)
15 |     """
16 |     
17 |     ### YOUR CODE HERE
18 |     s = 1.0 / (1 + np.exp(-x))
19 |     #raise NotImplementedError
20 |     ### END YOUR CODE
21 | 
22 |     return s
23 | 
24 | 
25 | def sigmoid_grad(s):
26 |     """
27 |     Compute the gradient for the sigmoid function here. Note that
28 |     for this implementation, the input s should be the sigmoid
29 |     function value of your original input x.
30 | 
31 |     Arguments:
32 |     s -- A scalar or numpy array.
33 | 
34 |     Return:
35 |     ds -- Your computed gradient.
36 |     """
37 |     ds = s * (1 - s)
38 |     ### YOUR CODE HERE
39 |     #raise NotImplementedError
40 |     ### END YOUR CODE
41 | 
42 |     return ds
43 | 
44 | 
45 | def test_sigmoid_basic():
46 |     """
47 |     Some simple tests to get you started.
48 |     Warning: these are not exhaustive.
49 |     """
50 |     print "Running basic tests..."
51 |     x = np.array([[1, 2], [-1, -2]])
52 |     f = sigmoid(x)
53 |     g = sigmoid_grad(f)
54 |     print f
55 |     f_ans = np.array([
56 |         [0.73105858, 0.88079708],
57 |         [0.26894142, 0.11920292]])
58 |     assert np.allclose(f, f_ans, rtol=1e-05, atol=1e-06)
59 |     print g
60 |     g_ans = np.array([
61 |         [0.19661193, 0.10499359],
62 |         [0.19661193, 0.10499359]])
63 |     assert np.allclose(g, g_ans, rtol=1e-05, atol=1e-06)
64 |     print "You should verify these results by hand!\n"
65 | 
66 | 
67 | def test_sigmoid():
68 |     """
69 |     Use this space to test your sigmoid implementation by running:
70 |         python q2_sigmoid.py
71 |     This function will not be called by the autograder, nor will
72 |     your tests be graded.
73 |     """
74 |     print "Running your tests..."
75 |     ### YOUR CODE HERE
76 |     #raise NotImplementedError
77 |     ### END YOUR CODE
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     test_sigmoid_basic();
82 |     test_sigmoid()
83 | 


--------------------------------------------------------------------------------
/Assignment1/FuZhichao/Assignment1/q3_run.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import random
 4 | import numpy as np
 5 | from utils.treebank import StanfordSentiment
 6 | import matplotlib
 7 | matplotlib.use('agg')
 8 | import matplotlib.pyplot as plt
 9 | import time
10 | 
11 | from q3_word2vec import *
12 | from q3_sgd import *
13 | 
14 | # Reset the random seed to make sure that everyone gets the same results
15 | random.seed(314)
16 | dataset = StanfordSentiment()
17 | tokens = dataset.tokens()
18 | nWords = len(tokens)
19 | 
20 | # We are going to train 10-dimensional vectors for this assignment
21 | dimVectors = 10
22 | 
23 | # Context size
24 | C = 5
25 | 
26 | # Reset the random seed to make sure that everyone gets the same results
27 | random.seed(31415)
28 | np.random.seed(9265)
29 | 
30 | startTime=time.time()
31 | wordVectors = np.concatenate(
32 |     ((np.random.rand(nWords, dimVectors) - 0.5) /
33 |        dimVectors, np.zeros((nWords, dimVectors))),
34 |     axis=0)
35 | wordVectors = sgd(
36 |     lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C,
37 |         negSamplingCostAndGradient),
38 |     wordVectors, 0.3, 40000, None, True, PRINT_EVERY=10)
39 | # Note that normalization is not called here. This is not a bug,
40 | # normalizing during training loses the notion of length.
41 | 
42 | print "sanity check: cost at convergence should be around or below 10"
43 | print "training took %d seconds" % (time.time() - startTime)
44 | 
45 | # concatenate the input and output word vectors
46 | wordVectors = np.concatenate(
47 |     (wordVectors[:nWords,:], wordVectors[nWords:,:]),
48 |     axis=0)
49 | # wordVectors = wordVectors[:nWords,:] + wordVectors[nWords:,:]
50 | 
51 | visualizeWords = [
52 |     "the", "a", "an", ",", ".", "?", "!", "``", "''", "--",
53 |     "good", "great", "cool", "brilliant", "wonderful", "well", "amazing",
54 |     "worth", "sweet", "enjoyable", "boring", "bad", "waste", "dumb",
55 |     "annoying"]
56 | 
57 | visualizeIdx = [tokens[word] for word in visualizeWords]
58 | visualizeVecs = wordVectors[visualizeIdx, :]
59 | temp = (visualizeVecs - np.mean(visualizeVecs, axis=0))
60 | covariance = 1.0 / len(visualizeIdx) * temp.T.dot(temp)
61 | U,S,V = np.linalg.svd(covariance)
62 | coord = temp.dot(U[:,0:2])
63 | 
64 | for i in xrange(len(visualizeWords)):
65 |     plt.text(coord[i,0], coord[i,1], visualizeWords[i],
66 |         bbox=dict(facecolor='green', alpha=0.1))
67 | 
68 | plt.xlim((np.min(coord[:,0]), np.max(coord[:,0])))
69 | plt.ylim((np.min(coord[:,1]), np.max(coord[:,1])))
70 | 
71 | plt.savefig('q3_word_vectors.png')
72 | 


--------------------------------------------------------------------------------
/Assignment1/FuZhichao/Assignment1/q3_sgd.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Save parameters every a few SGD iterations as fail-safe
  4 | SAVE_PARAMS_EVERY = 5000
  5 | 
  6 | import glob
  7 | import random
  8 | import numpy as np
  9 | import os.path as op
 10 | import cPickle as pickle
 11 | 
 12 | 
 13 | def load_saved_params():
 14 |     """
 15 |     A helper function that loads previously saved parameters and resets
 16 |     iteration start.
 17 |     """
 18 |     st = 0
 19 |     for f in glob.glob("saved_params_*.npy"):
 20 |         iter = int(op.splitext(op.basename(f))[0].split("_")[2])
 21 |         if (iter > st):
 22 |             st = iter
 23 | 
 24 |     if st > 0:
 25 |         with open("saved_params_%d.npy" % st, "r") as f:
 26 |             params = pickle.load(f)
 27 |             state = pickle.load(f)
 28 |         return st, params, state
 29 |     else:
 30 |         return st, None, None
 31 | 
 32 | 
 33 | def save_params(iter, params):
 34 |     with open("saved_params_%d.npy" % iter, "w") as f:
 35 |         pickle.dump(params, f)
 36 |         pickle.dump(random.getstate(), f)
 37 | 
 38 | 
 39 | def sgd(f, x0, step, iterations, postprocessing=None, useSaved=False,
 40 |         PRINT_EVERY=10):
 41 |     """ Stochastic Gradient Descent
 42 |     Implement the stochastic gradient descent method in this function.
 43 |     Arguments:
 44 |     f -- the function to optimize, it should take a single
 45 |          argument and yield two outputs, a cost and the gradient
 46 |          with respect to the arguments
 47 |     x0 -- the initial point to start SGD from
 48 |     step -- the step size for SGD
 49 |     iterations -- total iterations to run SGD for
 50 |     postprocessing -- postprocessing function for the parameters
 51 |                       if necessary. In the case of word2vec we will need to
 52 |                       normalize the word vectors to have unit length.
 53 |     PRINT_EVERY -- specifies how many iterations to output loss
 54 |     Return:
 55 |     x -- the parameter value after SGD finishes
 56 |     """
 57 |     # Anneal learning rate every several iterations
 58 |     ANNEAL_EVERY = 20000
 59 |     if useSaved:
 60 |         start_iter, oldx, state = load_saved_params()
 61 |         if start_iter > 0:
 62 |             x0 = oldx
 63 |             step *= 0.5 ** (start_iter / ANNEAL_EVERY)
 64 |         if state:
 65 |             random.setstate(state)
 66 |     else:
 67 |         start_iter = 0
 68 |     x = x0
 69 |     if not postprocessing:
 70 |         postprocessing = lambda x: x
 71 |     expcost = None
 72 |     for iter in xrange(start_iter + 1, iterations + 1):
 73 |         # Don't forget to apply the postprocessing after every iteration!
 74 |         # You might want to print the progress every few iterations.
 75 |         cost = None
 76 |         ### YOUR CODE HERE
 77 |         cost, grad = f(x)
 78 |         x -= step * grad
 79 |         postprocessing(x)
 80 |         ### END YOUR CODE
 81 |         if iter % PRINT_EVERY == 0:
 82 |             if not expcost:
 83 |                 expcost = cost
 84 |             else:
 85 |                 expcost = .95 * expcost + .05 * cost
 86 |             print "iter %d: %f" % (iter, expcost)
 87 |         if iter % SAVE_PARAMS_EVERY == 0 and useSaved:
 88 |             save_params(iter, x)
 89 |         if iter % ANNEAL_EVERY == 0:
 90 |             step *= 0.5
 91 |     return x
 92 | 
 93 | 
 94 | def sanity_check():
 95 |     quad = lambda x: (np.sum(x ** 2), x * 2)
 96 | 
 97 |     print "Running sanity checks..."
 98 |     t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=100)
 99 |     print "test 1 result:", t1
100 |     assert abs(t1) <= 1e-6
101 | 
102 |     t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100)
103 |     print "test 2 result:", t2
104 |     assert abs(t2) <= 1e-6
105 | 
106 |     t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100)
107 |     print "test 3 result:", t3
108 |     assert abs(t3) <= 1e-6
109 | 
110 |     print ""
111 | 
112 | 
113 | def your_sanity_checks():
114 |     """
115 |     Use this space add any additional sanity checks by running:
116 |         python q3_sgd.py
117 |     This function will not be called by the autograder, nor will
118 |     your additional tests be graded.
119 |     """
120 |     print "Running your sanity checks..."
121 |     ### YOUR CODE HERE
122 |     #raise NotImplementedError
123 |     ### END YOUR CODE
124 | 
125 | 
126 | if __name__ == "__main__":
127 |     sanity_check()
128 |     your_sanity_checks()
129 | 


--------------------------------------------------------------------------------
/Assignment1/FuZhichao/assignment1-solution.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/FuZhichao/assignment1-solution.pdf


--------------------------------------------------------------------------------
/Assignment1/README.md:
--------------------------------------------------------------------------------
 1 | # Assignment 1 
 2 | 
 3 | Materials: [Assignment 1](http://web.stanford.edu/class/cs224n/assignment1/index.html)
 4 | 
 5 | [stanford SentimentTreebank](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Assignment1/stanfordSentimentTreebank.zip)
 6 | 
 7 | [Glove wordvec](http://nlp.stanford.edu/data/glove.6B.zip)
 8 | 
 9 | 
10 | 
11 | ```
12 | ./Assignment1
13 | 	/TaoJi
14 | 		solution.md		--解题报告 代码题实现+非代码题
15 | 		/assignment1
16 | 			...code...	--项目代码 (去除data)
17 | 	/ZiyinHuang
18 | 		...
19 | 	/YupeiDu
20 | 		...
21 | 	/MingZhong
22 | 		...
23 | 	...
24 | ```
25 | 
26 | 


--------------------------------------------------------------------------------
/Assignment1/TaoJi/assignment1/q1_softmax.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def softmax(x):
  5 |     """Compute the softmax function for each row of the input x.
  6 | 
  7 |     It is crucial that this function is optimized for speed because
  8 |     it will be used frequently in later code. You might find numpy
  9 |     functions np.exp, np.sum, np.reshape, np.max, and numpy
 10 |     broadcasting useful for this task.
 11 | 
 12 |     Numpy broadcasting documentation:
 13 |     http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html
 14 | 
 15 |     You should also make sure that your code works for a single
 16 |     N-dimensional vector (treat the vector as a single row) and
 17 |     for M x N matrices. This may be useful for testing later. Also,
 18 |     make sure that the dimensions of the output match the input.
 19 | 
 20 |     You must implement the optimization in problem 1(a) of the
 21 |     written assignment!
 22 | 
 23 |     Arguments:
 24 |     x -- A N dimensional vector or M x N dimensional numpy matrix.
 25 | 
 26 |     Return:
 27 |     x -- You are allowed to modify x in-place
 28 |     """
 29 |     orig_shape = x.shape
 30 | 
 31 |     if len(x.shape) > 1:
 32 |         # Matrix
 33 |         ### YOUR CODE HERE
 34 |         c = np.max(x, axis=1).reshape(x.shape[0], 1)
 35 |         x = np.exp(x - c)
 36 |         norm = np.sum(x, axis=1).reshape(x.shape[0], 1)
 37 |         x = x / norm
 38 |         ### END YOUR CODE
 39 |     else:
 40 |         # Vector
 41 |         ### YOUR CODE HERE
 42 |         c = np.max(x)
 43 |         x = np.exp(x - c)
 44 |         x = x / x.sum()
 45 |         ### END YOUR CODE
 46 | 
 47 |     assert x.shape == orig_shape
 48 |     return x
 49 | 
 50 | 
 51 | def test_softmax_basic():
 52 |     """
 53 |     Some simple tests to get you started.
 54 |     Warning: these are not exhaustive.
 55 |     """
 56 |     print "Running basic tests..."
 57 |     test1 = softmax(np.array([1,2]))
 58 |     
 59 |     ans1 = np.array([0.26894142,  0.73105858])
 60 |     print test1
 61 |     assert np.allclose(test1, ans1, rtol=1e-05, atol=1e-06)
 62 | 
 63 |     test2 = softmax(np.array([[1001,1002],[3,4]]))
 64 |     print test2
 65 |     ans2 = np.array([
 66 |         [0.26894142, 0.73105858],
 67 |         [0.26894142, 0.73105858]])
 68 |     assert np.allclose(test2, ans2, rtol=1e-05, atol=1e-06)
 69 | 
 70 |     test3 = softmax(np.array([[-1001,-1002]]))
 71 |     print test3
 72 |     ans3 = np.array([0.73105858, 0.26894142])
 73 |     assert np.allclose(test3, ans3, rtol=1e-05, atol=1e-06)
 74 | 
 75 |     print "You should be able to verify these results by hand!\n"
 76 | 
 77 | 
 78 | def test_softmax():
 79 |     """
 80 |     Use this space to test your softmax implementation by running:
 81 |         python q1_softmax.py
 82 |     This function will not be called by the autograder, nor will
 83 |     your tests be graded.
 84 |     """
 85 |     print "Running your tests..."
 86 |     ### YOUR CODE HERE
 87 |     mytest1 = softmax(np.array([
 88 |         [1, 2, 3, 6],  
 89 |         [2, 4, 5, 6],  
 90 |         [1, 2, 3, 6]]))
 91 |     print mytest1
 92 |     myans1 = np.array([
 93 |         [0.00626879, 0.01704033, 0.04632042, 0.93037047],
 94 |         [0.01203764, 0.08894682, 0.24178252, 0.65723302],
 95 |         [0.00626879, 0.01704033, 0.04632042, 0.93037047]])
 96 |     assert np.allclose(mytest1, myans1, rtol=1e-05, atol=1e-06)
 97 |     ### END YOUR CODE
 98 | 
 99 | 
100 | if __name__ == "__main__":
101 |     test_softmax_basic()
102 |     test_softmax()
103 | 


--------------------------------------------------------------------------------
/Assignment1/TaoJi/assignment1/q2_gradcheck.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy as np
 4 | import random
 5 | 
 6 | 
 7 | # First implement a gradient checker by filling in the following functions
 8 | def gradcheck_naive(f, x):
 9 |     """ Gradient check for a function f.
10 | 
11 |     Arguments:
12 |     f -- a function that takes a single argument and outputs the
13 |          cost and its gradients
14 |     x -- the point (numpy array) to check the gradient at
15 |     """
16 |     rndstate = random.getstate()
17 |     random.setstate(rndstate)
18 |     fx, grad = f(x) # Evaluate function value at original point
19 |     h = 1e-4        # Do not change this!
20 | 
21 |     # Iterate over all indexes in x
22 |     it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
23 |     while not it.finished:
24 |         ix = it.multi_index
25 | 
26 |         # Try modifying x[ix] with h defined above to compute
27 |         # numerical gradients. Make sure you call random.setstate(rndstate)
28 |         # before calling f(x) each time. This will make it possible
29 |         # to test cost functions with built in randomness later.
30 | 
31 |         ### YOUR CODE HERE:
32 |         
33 |         x[ix] += h
34 |         random.setstate(rndstate)
35 |         fx1, _ = f(x)
36 |         x[ix] -= 2 * h
37 |         random.setstate(rndstate)
38 |         fx2, _ = f(x)
39 |         numgrad = (fx1-fx2) / (2.0*h)
40 |         x[ix] += h
41 |         
42 |         ### END YOUR CODE
43 | 
44 |         # Compare gradients
45 |         reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
46 |         if reldiff > 1e-5:
47 |             print "Gradient check failed."
48 |             print "First gradient error found at index %s" % str(ix)
49 |             print "Your gradient: %f \t Numerical gradient: %f" % (
50 |                 grad[ix], numgrad)
51 |             return
52 | 
53 |         it.iternext() # Step to next dimension
54 | 
55 |     print "Gradient check passed!"
56 | 
57 | 
58 | def sanity_check():
59 |     """
60 |     Some basic sanity checks.
61 |     """
62 |     quad = lambda x: (np.sum(x ** 2), x * 2)
63 | 
64 |     print "Running sanity checks..."
65 |     gradcheck_naive(quad, np.array(123.456))      # scalar test
66 |     gradcheck_naive(quad, np.random.randn(3,))    # 1-D test
67 |     gradcheck_naive(quad, np.random.randn(4,5))   # 2-D test
68 |     print ""
69 | 
70 | 
71 | def your_sanity_checks():
72 |     """
73 |     Use this space add any additional sanity checks by running:
74 |         python q2_gradcheck.py
75 |     This function will not be called by the autograder, nor will
76 |     your additional tests be graded.
77 |     """
78 |     print "Running your sanity checks..."
79 |     ### YOUR CODE HERE
80 |     quad = lambda x: (np.sum(x ** 3), 3 * x ** 2)
81 |     gradcheck_naive(quad, np.random.randn(3,4,5))   # 3-D test
82 |     ### END YOUR CODE
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     sanity_check()
87 |     your_sanity_checks()
88 | 


--------------------------------------------------------------------------------
/Assignment1/TaoJi/assignment1/q2_neural.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import numpy as np
  4 | import random
  5 | 
  6 | from q1_softmax import softmax
  7 | from q2_sigmoid import sigmoid, sigmoid_grad
  8 | from q2_gradcheck import gradcheck_naive
  9 | 
 10 | 
 11 | def forward_backward_prop(data, labels, params, dimensions):
 12 |     """
 13 |     Forward and backward propagation for a two-layer sigmoidal network
 14 | 
 15 |     Compute the forward propagation and for the cross entropy cost,
 16 |     and backward propagation for the gradients for all parameters.
 17 | 
 18 |     Arguments:
 19 |     data -- M x Dx matrix, where each row is a training example.
 20 |     labels -- M x Dy matrix, where each row is a one-hot vector.
 21 |     params -- Model parameters, these are unpacked for you.
 22 |     dimensions -- A tuple of input dimension, number of hidden units
 23 |                   and output dimension
 24 |     """
 25 | 
 26 |     ### Unpack network parameters (do not modify)
 27 |     ofs = 0
 28 |     Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
 29 | 
 30 |     W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
 31 |     ofs += Dx * H
 32 |     b1 = np.reshape(params[ofs:ofs + H], (1, H))
 33 |     ofs += H
 34 |     W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
 35 |     ofs += H * Dy
 36 |     b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))
 37 | 
 38 |     ### YOUR CODE HERE: forward propagation
 39 |     
 40 |     M = data.shape[0]
 41 |     # (M, H)
 42 |     a = np.dot(data, W1) + b1 
 43 |     hiddens = sigmoid(a)   
 44 |     # (M, Dy)
 45 |     z = np.dot(hiddens, W2) + b2  
 46 |     outputs = softmax(z)
 47 |    
 48 |     ### END YOUR CODE
 49 |     
 50 |     cost = -1 * labels * np.log(outputs)
 51 |     cost = cost.sum() / M
 52 | 
 53 |     ### YOUR CODE HERE: backward propagation
 54 |    
 55 |     # (M, Dy)
 56 |     gradZs = outputs - labels   
 57 |     # (M, H, Dx)
 58 |     gradW2 = np.array([np.dot(hiddens[i].reshape(1, H).T, gradZs[i].reshape(1, Dy)) for i in xrange(M)])
 59 |     # (H, Dx)
 60 |     gradW2 = gradW2.sum(axis=0) * (1.0/M)
 61 |     # (1, Dx)
 62 |     gradb2 = (gradZs.sum(axis=0) * (1.0/M)).reshape(1, Dy)
 63 |     # (M, H)
 64 |     gradAs = np.array([np.dot(gradZs[i].reshape(1, Dy), W2.T)*sigmoid_grad(hiddens[i]) for i in xrange(M)])
 65 |     # (M, Dx, H)
 66 |     gradW1 = np.array([np.dot(data[i].reshape(1, Dx).T, gradAs[i].reshape(1, H)) for i in xrange(M)])
 67 |     # (Dx, H)
 68 |     gradW1 = gradW1.sum(axis=0) * (1.0/M)
 69 |     # (1, H)
 70 |     gradb1 = gradAs.sum(axis=0) * (1.0/M)
 71 | 
 72 |     ### END YOUR CODE
 73 | 
 74 |     ### Stack gradients (do not modify)
 75 |     grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
 76 |         gradW2.flatten(), gradb2.flatten()))
 77 |     #print grad
 78 |     return cost, grad
 79 | 
 80 | 
 81 | def sanity_check():
 82 |     """
 83 |     Set up fake data and parameters for the neural network, and test using
 84 |     gradcheck.
 85 |     """
 86 |     print "Running sanity check..."
 87 | 
 88 |     N = 20
 89 |     dimensions = [10, 5, 10]
 90 |     data = np.random.randn(N, dimensions[0])   # each row will be a datum
 91 |     labels = np.zeros((N, dimensions[2]))
 92 |     for i in xrange(N):
 93 |         labels[i, random.randint(0,dimensions[2]-1)] = 1
 94 | 
 95 |     params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
 96 |         dimensions[1] + 1) * dimensions[2], )
 97 | 
 98 |     gradcheck_naive(lambda params:
 99 |         forward_backward_prop(data, labels, params, dimensions), params)
100 | 
101 | 
102 | def your_sanity_checks():
103 |     """
104 |     Use this space add any additional sanity checks by running:
105 |         python q2_neural.py
106 |     This function will not be called by the autograder, nor will
107 |     your additional tests be graded.
108 |     """
109 |     print "Running your sanity checks..."
110 |     ### YOUR CODE HERE
111 |     raise NotImplementedError
112 |     ### END YOUR CODE
113 | 
114 | 
115 | if __name__ == "__main__":
116 |     sanity_check()
117 |     your_sanity_checks()
118 | 


--------------------------------------------------------------------------------
/Assignment1/TaoJi/assignment1/q2_sigmoid.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def sigmoid(x):
 7 |     """
 8 |     Compute the sigmoid function for the input here.
 9 | 
10 |     Arguments:
11 |     x -- A scalar or numpy array.
12 | 
13 |     Return:
14 |     s -- sigmoid(x)
15 |     """
16 | 
17 |     ### YOUR CODE HERE
18 |     s = 1. / (1. + np.exp(-x))
19 |     ### END YOUR CODE
20 | 
21 |     return s
22 | 
23 | 
24 | def sigmoid_grad(s):
25 |     """
26 |     Compute the gradient for the sigmoid function here. Note that
27 |     for this implementation, the input s should be the sigmoid
28 |     function value of your original input x.
29 | 
30 |     Arguments:
31 |     s -- A scalar or numpy array.
32 | 
33 |     Return:
34 |     ds -- Your computed gradient.
35 |     """
36 | 
37 |     ### YOUR CODE HERE    
38 |     ds = s * (1. - s)
39 |     ### END YOUR CODE
40 | 
41 |     return ds
42 | 
43 | 
44 | def test_sigmoid_basic():
45 |     """
46 |     Some simple tests to get you started.
47 |     Warning: these are not exhaustive.
48 |     """
49 |     print "Running basic tests..."
50 |     x = np.array([[1, 2], [-1, -2]])
51 |     f = sigmoid(x)
52 |     g = sigmoid_grad(f)
53 |     print f
54 |     f_ans = np.array([
55 |         [0.73105858, 0.88079708],
56 |         [0.26894142, 0.11920292]])
57 |     assert np.allclose(f, f_ans, rtol=1e-05, atol=1e-06)
58 |     print g
59 |     g_ans = np.array([
60 |         [0.19661193, 0.10499359],
61 |         [0.19661193, 0.10499359]])
62 |     assert np.allclose(g, g_ans, rtol=1e-05, atol=1e-06)
63 |     print "You should verify these results by hand!\n"
64 | 
65 | 
66 | def test_sigmoid():
67 |     """
68 |     Use this space to test your sigmoid implementation by running:
69 |         python q2_sigmoid.py
70 |     This function will not be called by the autograder, nor will
71 |     your tests be graded.
72 |     """
73 |     print "Running your tests..."
74 |     ### YOUR CODE HERE
75 |     x = 0.45
76 |     f = sigmoid(x)
77 |     g = sigmoid_grad(f)
78 |     print f
79 |     f_ans = 0.61063923
80 |     assert np.allclose(f, f_ans, rtol=1e-05, atol=1e-06)
81 |     print g
82 |     g_ans = 0.23775896
83 |     assert np.allclose(g, g_ans, rtol=1e-05, atol=1e-06)
84 |     ### END YOUR CODE
85 | 
86 | 
87 | if __name__ == "__main__":
88 |     test_sigmoid_basic();
89 |     test_sigmoid()
90 | 


--------------------------------------------------------------------------------
/Assignment1/TaoJi/assignment1/q3_run.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import random
 4 | import numpy as np
 5 | from utils.treebank import StanfordSentiment
 6 | import matplotlib
 7 | matplotlib.use('agg')
 8 | import matplotlib.pyplot as plt
 9 | import time
10 | 
11 | from q3_word2vec import *
12 | from q3_sgd import *
13 | 
14 | # Reset the random seed to make sure that everyone gets the same results
15 | random.seed(314)
16 | dataset = StanfordSentiment()
17 | tokens = dataset.tokens()
18 | nWords = len(tokens)
19 | 
20 | # We are going to train 10-dimensional vectors for this assignment
21 | dimVectors = 10
22 | 
23 | # Context size
24 | C = 5
25 | 
26 | # Reset the random seed to make sure that everyone gets the same results
27 | random.seed(31415)
28 | np.random.seed(9265)
29 | 
30 | startTime=time.time()
31 | wordVectors = np.concatenate(
32 |     ((np.random.rand(nWords, dimVectors) - 0.5) /
33 |        dimVectors, np.zeros((nWords, dimVectors))),
34 |     axis=0)
35 | wordVectors = sgd(
36 |     lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C,
37 |         negSamplingCostAndGradient),
38 |     wordVectors, 0.3, 40000, None, True, PRINT_EVERY=10)
39 | # Note that normalization is not called here. This is not a bug,
40 | # normalizing during training loses the notion of length.
41 | 
42 | print "sanity check: cost at convergence should be around or below 10"
43 | print "training took %d seconds" % (time.time() - startTime)
44 | 
45 | # concatenate the input and output word vectors
46 | wordVectors = np.concatenate(
47 |     (wordVectors[:nWords,:], wordVectors[nWords:,:]),
48 |     axis=0)
49 | # wordVectors = wordVectors[:nWords,:] + wordVectors[nWords:,:]
50 | 
51 | visualizeWords = [
52 |     "the", "a", "an", ",", ".", "?", "!", "``", "''", "--",
53 |     "good", "great", "cool", "brilliant", "wonderful", "well", "amazing",
54 |     "worth", "sweet", "enjoyable", "boring", "bad", "waste", "dumb",
55 |     "annoying"]
56 | 
57 | visualizeIdx = [tokens[word] for word in visualizeWords]
58 | visualizeVecs = wordVectors[visualizeIdx, :]
59 | temp = (visualizeVecs - np.mean(visualizeVecs, axis=0))
60 | covariance = 1.0 / len(visualizeIdx) * temp.T.dot(temp)
61 | U,S,V = np.linalg.svd(covariance)
62 | coord = temp.dot(U[:,0:2])
63 | 
64 | for i in xrange(len(visualizeWords)):
65 |     plt.text(coord[i,0], coord[i,1], visualizeWords[i],
66 |         bbox=dict(facecolor='green', alpha=0.1))
67 | 
68 | plt.xlim((np.min(coord[:,0]), np.max(coord[:,0])))
69 | plt.ylim((np.min(coord[:,1]), np.max(coord[:,1])))
70 | 
71 | plt.savefig('q3_word_vectors.png')
72 | 


--------------------------------------------------------------------------------
/Assignment1/TaoJi/assignment1/q3_sgd.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Save parameters every a few SGD iterations as fail-safe
  4 | SAVE_PARAMS_EVERY = 5000
  5 | 
  6 | import glob
  7 | import random
  8 | import numpy as np
  9 | import os.path as op
 10 | import cPickle as pickle
 11 | 
 12 | 
 13 | def load_saved_params():
 14 |     """
 15 |     A helper function that loads previously saved parameters and resets
 16 |     iteration start.
 17 |     """
 18 |     st = 0
 19 |     for f in glob.glob("saved_params_*.npy"):
 20 |         iter = int(op.splitext(op.basename(f))[0].split("_")[2])
 21 |         if (iter > st):
 22 |             st = iter
 23 | 
 24 |     if st > 0:
 25 | 
 26 |         print st
 27 |         with open("saved_params_%d.npy" % st, "r") as f:
 28 |             params = pickle.load(f)
 29 |             state = pickle.load(f)
 30 |         return st, params, state
 31 |     else:
 32 |         return st, None, None
 33 | 
 34 | 
 35 | def save_params(iter, params):
 36 |     with open("saved_params_%d.npy" % iter, "w") as f:
 37 |         pickle.dump(params, f)
 38 |         pickle.dump(random.getstate(), f)
 39 | 
 40 | 
 41 | def sgd(f, x0, step, iterations, postprocessing=None, useSaved=False,
 42 |         PRINT_EVERY=10):
 43 |     """ Stochastic Gradient Descent
 44 | 
 45 |     Implement the stochastic gradient descent method in this function.
 46 | 
 47 |     Arguments:
 48 |     f -- the function to optimize, it should take a single
 49 |          argument and yield two outputs, a cost and the gradient
 50 |          with respect to the arguments
 51 |     x0 -- the initial point to start SGD from
 52 |     step -- the step size for SGD
 53 |     iterations -- total iterations to run SGD for
 54 |     postprocessing -- postprocessing function for the parameters
 55 |                       if necessary. In the case of word2vec we will need to
 56 |                       normalize the word vectors to have unit length.
 57 |     PRINT_EVERY -- specifies how many iterations to output loss
 58 | 
 59 |     Return:
 60 |     x -- the parameter value after SGD finishes
 61 |     """
 62 | 
 63 |     # Anneal learning rate every several iterations
 64 |     ANNEAL_EVERY = 20000
 65 | 
 66 |     if useSaved:
 67 |         start_iter, oldx, state = load_saved_params()
 68 |         if start_iter > 0:
 69 |             x0 = oldx
 70 |             step *= 0.5 ** (start_iter / ANNEAL_EVERY)
 71 | 
 72 |         if state:
 73 |             random.setstate(state)
 74 |     else:
 75 |         start_iter = 0
 76 | 
 77 |     x = x0
 78 | 
 79 |     if not postprocessing:
 80 |         postprocessing = lambda x: x
 81 | 
 82 |     expcost = None
 83 | 
 84 |     for iter in xrange(start_iter + 1, iterations + 1):
 85 |         # Don't forget to apply the postprocessing after every iteration!
 86 |         # You might want to print the progress every few iterations.
 87 | 
 88 |         cost = None
 89 |         ### YOUR CODE HERE
 90 |         cost, grad = f(x)
 91 |         x -= step * grad
 92 |         x = postprocessing(x)
 93 |         ### END YOUR CODE
 94 | 
 95 |         if iter % PRINT_EVERY == 0:
 96 |             if not expcost:
 97 |                 expcost = cost
 98 |             else:
 99 |                 expcost = .95 * expcost + .05 * cost
100 |             print "iter %d: %f" % (iter, expcost)
101 | 
102 |         if iter % SAVE_PARAMS_EVERY == 0 and useSaved:
103 |             save_params(iter, x)
104 | 
105 |         if iter % ANNEAL_EVERY == 0:
106 |             step *= 0.5
107 | 
108 |     return x
109 | 
110 | 
111 | def sanity_check():
112 |     quad = lambda x: (np.sum(x ** 2), x * 2)
113 | 
114 |     print "Running sanity checks..."
115 |     t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=100)
116 |     print "test 1 result:", t1
117 |     assert abs(t1) <= 1e-6
118 | 
119 |     t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100)
120 |     print "test 2 result:", t2
121 |     assert abs(t2) <= 1e-6
122 | 
123 |     t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100)
124 |     print "test 3 result:", t3
125 |     assert abs(t3) <= 1e-6
126 | 
127 |     print ""
128 | 
129 | 
130 | def your_sanity_checks():
131 |     """
132 |     Use this space add any additional sanity checks by running:
133 |         python q3_sgd.py
134 |     This function will not be called by the autograder, nor will
135 |     your additional tests be graded.
136 |     """
137 |     print "Running your sanity checks..."
138 |     ### YOUR CODE HERE
139 |     quad = lambda x: (np.sum((x-1) ** 2), x * 2 - 2)
140 | 
141 |     print "Running sanity checks..."
142 |     t1 = sgd(quad, 1.5, 0.01, 1000, PRINT_EVERY=100)
143 |     print "test 1 result:", t1
144 |     assert abs(t1-1) <= 1e-6
145 | 
146 |     t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100)
147 |     print "test 2 result:", t2
148 |     assert abs(t2-1) <= 1e-6
149 | 
150 |     t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100)
151 |     print "test 3 result:", t3
152 |     assert abs(t3-1) <= 1e-6
153 | 
154 |     print ""
155 |     ### END YOUR CODE
156 | 
157 | 
158 | if __name__ == "__main__":
159 |     sanity_check()
160 |     your_sanity_checks()
161 | 


--------------------------------------------------------------------------------
/Assignment1/TaoJi/assignment1/q3_word_vectors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/TaoJi/assignment1/q3_word_vectors.png


--------------------------------------------------------------------------------
/Assignment1/TaoJi/assignment1/q4_dev_conf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/TaoJi/assignment1/q4_dev_conf.png


--------------------------------------------------------------------------------
/Assignment1/TaoJi/assignment1/q4_reg_v_acc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/TaoJi/assignment1/q4_reg_v_acc.png


--------------------------------------------------------------------------------
/Assignment1/TaoJi/assignment1/q4_sentiment.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | import numpy as np
  5 | import matplotlib
  6 | matplotlib.use('agg')
  7 | import matplotlib.pyplot as plt
  8 | import itertools
  9 | 
 10 | from utils.treebank import StanfordSentiment
 11 | import utils.glove as glove
 12 | 
 13 | from q3_sgd import load_saved_params, sgd
 14 | 
 15 | # We will use sklearn here because it will run faster than implementing
 16 | # ourselves. However, for other parts of this assignment you must implement
 17 | # the functions yourself!
 18 | from sklearn.linear_model import LogisticRegression
 19 | from sklearn.metrics import confusion_matrix
 20 | 
 21 | 
 22 | def getArguments():
 23 |     parser = argparse.ArgumentParser()
 24 |     group = parser.add_mutually_exclusive_group(required=True)
 25 |     group.add_argument("--pretrained", dest="pretrained", action="store_true",
 26 |                        help="Use pretrained GloVe vectors.")
 27 |     group.add_argument("--yourvectors", dest="yourvectors", action="store_true",
 28 |                        help="Use your vectors from q3.")
 29 |     return parser.parse_args()
 30 | 
 31 | 
 32 | def getSentenceFeatures(tokens, wordVectors, sentence):
 33 |     """
 34 |     Obtain the sentence feature for sentiment analysis by averaging its
 35 |     word vectors
 36 |     """
 37 | 
 38 |     # Implement computation for the sentence features given a sentence.
 39 | 
 40 |     # Inputs:
 41 |     # tokens -- a dictionary that maps words to their indices in
 42 |     #           the word vector list
 43 |     # wordVectors -- word vectors (each row) for all tokens
 44 |     # sentence -- a list of words in the sentence of interest
 45 | 
 46 |     # Output:
 47 |     # - sentVector: feature vector for the sentence
 48 | 
 49 |     sentVector = np.zeros((wordVectors.shape[1],))
 50 | 
 51 |     ### YOUR CODE HERE
 52 |     for word in sentence:
 53 |         sentVector += wordVectors[tokens[word]]
 54 |     sentVector /= len(sentence)
 55 |     ### END YOUR CODE
 56 | 
 57 |     assert sentVector.shape == (wordVectors.shape[1],)
 58 |     return sentVector
 59 | 
 60 | 
 61 | def getRegularizationValues():
 62 |     """Try different regularizations
 63 | 
 64 |     Return a sorted list of values to try.
 65 |     """
 66 |     values = None   # Assign a list of floats in the block below
 67 |     ### YOUR CODE HERE
 68 |     values = [0.0001, 0.001, 0.01, 0.1, 0.5, 1, 1.5, 2, 3, 4, 5, 10, 50, 100, 1000]
 69 |     ### END YOUR CODE
 70 |     return sorted(values)
 71 | 
 72 | 
 73 | def chooseBestModel(results):
 74 |     """Choose the best model based on parameter tuning on the dev set
 75 | 
 76 |     Arguments:
 77 |     results -- A list of python dictionaries of the following format:
 78 |         {
 79 |             "reg": regularization,
 80 |             "clf": classifier,
 81 |             "train": trainAccuracy,
 82 |             "dev": devAccuracy,
 83 |             "test": testAccuracy
 84 |         }
 85 | 
 86 |     Returns:
 87 |     Your chosen result dictionary.
 88 |     """
 89 |     bestResult = None
 90 | 
 91 |     ### YOUR CODE HERE
 92 |     bestResult = max(results, key=lambda x:  x["dev"])
 93 |     ### END YOUR CODE
 94 | 
 95 |     return bestResult
 96 | 
 97 | 
 98 | def accuracy(y, yhat):
 99 |     """ Precision for classifier """
100 |     assert(y.shape == yhat.shape)
101 |     return np.sum(y == yhat) * 100.0 / y.size
102 | 
103 | 
104 | def plotRegVsAccuracy(regValues, results, filename):
105 |     """ Make a plot of regularization vs accuracy """
106 |     plt.plot(regValues, [x["train"] for x in results])
107 |     plt.plot(regValues, [x["dev"] for x in results])
108 |     plt.xscale('log')
109 |     plt.xlabel("regularization")
110 |     plt.ylabel("accuracy")
111 |     plt.legend(['train', 'dev'], loc='upper left')
112 |     plt.savefig(filename)
113 | 
114 | 
115 | def outputConfusionMatrix(features, labels, clf, filename):
116 |     """ Generate a confusion matrix """
117 |     pred = clf.predict(features)
118 |     cm = confusion_matrix(labels, pred, labels=range(5))
119 |     plt.figure()
120 |     plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Reds)
121 |     plt.colorbar()
122 |     classes = ["- -", "-", "neut", "+", "+ +"]
123 |     tick_marks = np.arange(len(classes))
124 |     plt.xticks(tick_marks, classes)
125 |     plt.yticks(tick_marks, classes)
126 |     thresh = cm.max() / 2.
127 |     for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
128 |         plt.text(j, i, cm[i, j],
129 |                  horizontalalignment="center",
130 |                  color="white" if cm[i, j] > thresh else "black")
131 |     plt.tight_layout()
132 |     plt.ylabel('True label')
133 |     plt.xlabel('Predicted label')
134 |     plt.savefig(filename)
135 | 
136 | 
137 | def outputPredictions(dataset, features, labels, clf, filename):
138 |     """ Write the predictions to file """
139 |     pred = clf.predict(features)
140 |     with open(filename, "w") as f:
141 |         print >> f, "True\tPredicted\tText"
142 |         for i in xrange(len(dataset)):
143 |             print >> f, "%d\t%d\t%s" % (
144 |                 labels[i], pred[i], " ".join(dataset[i][0]))
145 | 
146 | 
147 | def main(args):
148 |     """ Train a model to do sentiment analyis"""
149 | 
150 |     # Load the dataset
151 |     dataset = StanfordSentiment()
152 |     tokens = dataset.tokens()
153 |     nWords = len(tokens)
154 | 
155 |     if args.yourvectors:
156 |         _, wordVectors, _ = load_saved_params()
157 |         wordVectors = np.concatenate(
158 |             (wordVectors[:nWords,:], wordVectors[nWords:,:]),
159 |             axis=1)
160 |     elif args.pretrained:
161 |         wordVectors = glove.loadWordVectors(tokens)
162 |     dimVectors = wordVectors.shape[1]
163 | 
164 |     # Load the train set
165 |     trainset = dataset.getTrainSentences()
166 |     nTrain = len(trainset)
167 |     trainFeatures = np.zeros((nTrain, dimVectors))
168 |     trainLabels = np.zeros((nTrain,), dtype=np.int32)
169 |     for i in xrange(nTrain):
170 |         words, trainLabels[i] = trainset[i]
171 |         trainFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words)
172 | 
173 |     # Prepare dev set features
174 |     devset = dataset.getDevSentences()
175 |     nDev = len(devset)
176 |     devFeatures = np.zeros((nDev, dimVectors))
177 |     devLabels = np.zeros((nDev,), dtype=np.int32)
178 |     for i in xrange(nDev):
179 |         words, devLabels[i] = devset[i]
180 |         devFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words)
181 | 
182 |     # Prepare test set features
183 |     testset = dataset.getTestSentences()
184 |     nTest = len(testset)
185 |     testFeatures = np.zeros((nTest, dimVectors))
186 |     testLabels = np.zeros((nTest,), dtype=np.int32)
187 |     for i in xrange(nTest):
188 |         words, testLabels[i] = testset[i]
189 |         testFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words)
190 | 
191 |     # We will save our results from each run
192 |     results = []
193 |     regValues = getRegularizationValues()
194 |     for reg in regValues:
195 |         print "Training for reg=%f" % reg
196 |         # Note: add a very small number to regularization to please the library
197 |         clf = LogisticRegression(C=1.0/(reg + 1e-12))
198 |         clf.fit(trainFeatures, trainLabels)
199 | 
200 |         # Test on train set
201 |         pred = clf.predict(trainFeatures)
202 |         trainAccuracy = accuracy(trainLabels, pred)
203 |         print "Train accuracy (%%): %f" % trainAccuracy
204 | 
205 |         # Test on dev set
206 |         pred = clf.predict(devFeatures)
207 |         devAccuracy = accuracy(devLabels, pred)
208 |         print "Dev accuracy (%%): %f" % devAccuracy
209 | 
210 |         # Test on test set
211 |         # Note: always running on test is poor style. Typically, you should
212 |         # do this only after validation.
213 |         pred = clf.predict(testFeatures)
214 |         testAccuracy = accuracy(testLabels, pred)
215 |         print "Test accuracy (%%): %f" % testAccuracy
216 | 
217 |         results.append({
218 |             "reg": reg,
219 |             "clf": clf,
220 |             "train": trainAccuracy,
221 |             "dev": devAccuracy,
222 |             "test": testAccuracy})
223 | 
224 |     # Print the accuracies
225 |     print ""
226 |     print "=== Recap ==="
227 |     print "Reg\t\tTrain\tDev\tTest"
228 |     for result in results:
229 |         print "%.2E\t%.3f\t%.3f\t%.3f" % (
230 |             result["reg"],
231 |             result["train"],
232 |             result["dev"],
233 |             result["test"])
234 |     print ""
235 | 
236 |     bestResult = chooseBestModel(results)
237 |     print "Best regularization value: %0.2E" % bestResult["reg"]
238 |     print "Test accuracy (%%): %f" % bestResult["test"]
239 | 
240 |     # do some error analysis
241 |     if args.pretrained:
242 |         plotRegVsAccuracy(regValues, results, "q4_reg_v_acc.png")
243 |         outputConfusionMatrix(devFeatures, devLabels, bestResult["clf"],
244 |                               "q4_dev_conf.png")
245 |         outputPredictions(devset, devFeatures, devLabels, bestResult["clf"],
246 |                           "q4_dev_pred.txt")
247 | 
248 | 
249 | if __name__ == "__main__":
250 |     main(getArguments())
251 | 


--------------------------------------------------------------------------------
/Assignment1/TaoJi/solution.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/TaoJi/solution.pdf


--------------------------------------------------------------------------------
/Assignment1/WeiYang/assignment1/q1_softmax.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def softmax(x):
 5 |     """Compute the softmax function for each row of the input x.
 6 | 
 7 |     It is crucial that this function is optimized for speed because
 8 |     it will be used frequently in later code. You might find numpy
 9 |     functions np.exp, np.sum, np.reshape, np.max, and numpy
10 |     broadcasting useful for this task.
11 | 
12 |     Numpy broadcasting documentation:
13 |     http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html
14 | 
15 |     You should also make sure that your code works for a single
16 |     N-dimensional vector (treat the vector as a single row) and
17 |     for M x N matrices. This may be useful for testing later. Also,
18 |     make sure that the dimensions of the output match the input.
19 | 
20 |     You must implement the optimization in problem 1(a) of the
21 |     written assignment!
22 | 
23 |     Arguments:
24 |     x -- A N dimensional vector or M x N dimensional numpy matrix.
25 | 
26 |     Return:
27 |     x -- You are allowed to modify x in-place
28 |     """
29 |     orig_shape = x.shape
30 | 
31 |     if len(x.shape) > 1:
32 |         # Matrix
33 |         ### YOUR CODE HERE
34 |         x -= np.max(x, axis=1, keepdims=True)
35 |     	x = np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)
36 |         #raise NotImplementedError
37 |         ### END YOUR CODE
38 |     else:
39 |         # Vector
40 |         ### YOUR CODE HERE
41 |         x -= np.max(x)
42 |         x = np.exp(x) / np.sum(np.exp(x))
43 |         #raise NotImplementedError
44 |         ### END YOUR CODE
45 | 
46 |     assert x.shape == orig_shape
47 |     return x
48 | 
49 | 
50 | def test_softmax_basic():
51 |     """
52 |     Some simple tests to get you started.
53 |     Warning: these are not exhaustive.
54 |     """
55 |     print "Running basic tests..."
56 |     test1 = softmax(np.array([1,2]))
57 |     print test1
58 |     ans1 = np.array([0.26894142,  0.73105858])
59 |     assert np.allclose(test1, ans1, rtol=1e-05, atol=1e-06)
60 | 
61 |     test2 = softmax(np.array([[1001,1002],[3,4]]))
62 |     print test2
63 |     ans2 = np.array([
64 |         [0.26894142, 0.73105858],
65 |         [0.26894142, 0.73105858]])
66 |     assert np.allclose(test2, ans2, rtol=1e-05, atol=1e-06)
67 | 
68 |     test3 = softmax(np.array([[-1001,-1002]]))
69 |     print test3
70 |     ans3 = np.array([0.73105858, 0.26894142])
71 |     assert np.allclose(test3, ans3, rtol=1e-05, atol=1e-06)
72 | 
73 |     print "You should be able to verify these results by hand!\n"
74 | 
75 | 
76 | def test_softmax():
77 |     """
78 |     Use this space to test your softmax implementation by running:
79 |         python q1_softmax.py
80 |     This function will not be called by the autograder, nor will
81 |     your tests be graded.
82 |     """
83 |     print "Running your tests..."
84 |     ### YOUR CODE HERE
85 |     #raise NotImplementedError
86 |     ### END YOUR CODE
87 | 
88 | 
89 | if __name__ == "__main__":
90 |     test_softmax_basic()
91 |     test_softmax()
92 | 


--------------------------------------------------------------------------------
/Assignment1/WeiYang/assignment1/q2_gradcheck.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy as np
 4 | import random
 5 | 
 6 | 
 7 | # First implement a gradient checker by filling in the following functions
 8 | def gradcheck_naive(f, x):
 9 |     """ Gradient check for a function f.
10 | 
11 |     Arguments:
12 |     f -- a function that takes a single argument and outputs the
13 |          cost and its gradients
14 |     x -- the point (numpy array) to check the gradient at
15 |     """
16 | 
17 |     rndstate = random.getstate()
18 |     random.setstate(rndstate)
19 |     fx, grad = f(x) # Evaluate function value at original point
20 |     h = 1e-4        # Do not change this!
21 | 
22 |     # Iterate over all indexes in x
23 |     it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
24 |     while not it.finished:
25 |         ix = it.multi_index
26 | 
27 |         # Try modifying x[ix] with h defined above to compute
28 |         # numerical gradients. Make sure you call random.setstate(rndstate)
29 |         # before calling f(x) each time. This will make it possible
30 |         # to test cost functions with built in randomness later.
31 | 
32 |         ### YOUR CODE HERE:
33 |         old_val = x[ix]
34 |         x[ix] = old_val - h
35 |         random.setstate(rndstate)
36 |         fxh1, _ = f(x)
37 | 
38 |         x[ix] = old_val + h
39 |         random.setstate(rndstate)
40 |         fxh2, _ = f(x)
41 | 
42 |         numgrad = (fxh2 - fxh1) / (2 * h)
43 |         x[ix] = old_val
44 |         ### END YOUR CODE
45 | 
46 |         # Compare gradients
47 |         reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
48 |         if reldiff > 1e-5:
49 |             print "Gradient check failed."
50 |             print "First gradient error found at index %s" % str(ix)
51 |             print "Your gradient: %f \t Numerical gradient: %f" % (
52 |                 grad[ix], numgrad)
53 |             return
54 | 
55 |         it.iternext() # Step to next dimension
56 | 
57 |     print "Gradient check passed!"
58 | 
59 | 
60 | def sanity_check():
61 |     """
62 |     Some basic sanity checks.
63 |     """
64 |     quad = lambda x: (np.sum(x ** 2), x * 2)
65 | 
66 |     print "Running sanity checks..."
67 |     gradcheck_naive(quad, np.array(123.456))      # scalar test
68 |     gradcheck_naive(quad, np.random.randn(3,))    # 1-D test
69 |     gradcheck_naive(quad, np.random.randn(4,5))   # 2-D test
70 |     print ""
71 | 
72 | 
73 | def your_sanity_checks():
74 |     """
75 |     Use this space add any additional sanity checks by running:
76 |         python q2_gradcheck.py
77 |     This function will not be called by the autograder, nor will
78 |     your additional tests be graded.
79 |     """
80 |     print "Running your sanity checks..."
81 |     ### YOUR CODE HERE
82 |     
83 |     ### END YOUR CODE
84 | 
85 | 
86 | if __name__ == "__main__":
87 |     sanity_check()
88 |     your_sanity_checks()
89 | 


--------------------------------------------------------------------------------
/Assignment1/WeiYang/assignment1/q2_neural.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy as np
 4 | import random
 5 | 
 6 | from q1_softmax import softmax
 7 | from q2_sigmoid import sigmoid, sigmoid_grad
 8 | from q2_gradcheck import gradcheck_naive
 9 | 
10 | 
11 | def forward_backward_prop(data, labels, params, dimensions):
12 |     """
13 |     Forward and backward propagation for a two-layer sigmoidal network
14 | 
15 |     Compute the forward propagation and for the cross entropy cost,
16 |     and backward propagation for the gradients for all parameters.
17 | 
18 |     Arguments:
19 |     data -- M x Dx matrix, where each row is a training example.
20 |     labels -- M x Dy matrix, where each row is a one-hot vector.
21 |     params -- Model parameters, these are unpacked for you.
22 |     dimensions -- A tuple of input dimension, number of hidden units
23 |                   and output dimension
24 |     """
25 | 
26 |     ### Unpack network parameters (do not modify)
27 |     ofs = 0
28 |     Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
29 | 
30 |     W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
31 |     ofs += Dx * H
32 |     b1 = np.reshape(params[ofs:ofs + H], (1, H))
33 |     ofs += H
34 |     W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
35 |     ofs += H * Dy
36 |     b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))
37 | 
38 |     ### YOUR CODE HERE: forward propagation
39 |     N = data.shape[0]
40 |     h = sigmoid(data.dot(W1) + b1)
41 |     output = softmax(h.dot(W2) + b2)
42 |     cost = - np.sum(np.log(output[labels == 1])) / N
43 |     ### END YOUR CODE
44 | 
45 |     ### YOUR CODE HERE: backward propagation
46 |     grad_output = output - labels
47 |     gradW2 = np.dot(h.T, grad_output) / N
48 |     gradb2 = np.sum(grad_output, axis=0, keepdims=True) / N
49 |     grad_h = np.dot(grad_output, W2.T) * sigmoid_grad(h)
50 |     gradW1 = np.dot(data.T, grad_h) / N
51 |     gradb1 = np.sum(grad_h, axis=0, keepdims=True) / N
52 |     ### END YOUR CODE
53 | 
54 |     ### Stack gradients (do not modify)
55 |     grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
56 |         gradW2.flatten(), gradb2.flatten()))
57 | 
58 |     return cost, grad
59 | 
60 | 
61 | def sanity_check():
62 |     """
63 |     Set up fake data and parameters for the neural network, and test using
64 |     gradcheck.
65 |     """
66 |     print "Running sanity check..."
67 | 
68 |     N = 20
69 |     dimensions = [10, 5, 10]
70 |     data = np.random.randn(N, dimensions[0])   # each row will be a datum
71 |     labels = np.zeros((N, dimensions[2]))
72 |     for i in xrange(N):
73 |         labels[i, random.randint(0,dimensions[2]-1)] = 1
74 |     params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
75 |         dimensions[1] + 1) * dimensions[2], )
76 | 
77 |     gradcheck_naive(lambda params:
78 |         forward_backward_prop(data, labels, params, dimensions), params)
79 | 
80 | 
81 | def your_sanity_checks():
82 |     """
83 |     Use this space add any additional sanity checks by running:
84 |         python q2_neural.py
85 |     This function will not be called by the autograder, nor will
86 |     your additional tests be graded.
87 |     """
88 |     print "Running your sanity checks..."
89 |     ### YOUR CODE HERE
90 |     
91 |     ### END YOUR CODE
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     sanity_check()
96 |     your_sanity_checks()
97 | 


--------------------------------------------------------------------------------
/Assignment1/WeiYang/assignment1/q2_sigmoid.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def sigmoid(x):
 7 |     """
 8 |     Compute the sigmoid function for the input here.
 9 | 
10 |     Arguments:
11 |     x -- A scalar or numpy array.
12 | 
13 |     Return:
14 |     s -- sigmoid(x)
15 |     """
16 | 
17 |     ### YOUR CODE HERE
18 |     s = 1 / (1 + np.exp(-x))
19 |     ### END YOUR CODE
20 | 
21 |     return s
22 | 
23 | 
24 | def sigmoid_grad(s):
25 |     """
26 |     Compute the gradient for the sigmoid function here. Note that
27 |     for this implementation, the input s should be the sigmoid
28 |     function value of your original input x.
29 | 
30 |     Arguments:
31 |     s -- A scalar or numpy array.
32 | 
33 |     Return:
34 |     ds -- Your computed gradient.
35 |     """
36 | 
37 |     ### YOUR CODE HERE
38 |     ds = s * (1 - s)
39 |     ### END YOUR CODE
40 | 
41 |     return ds
42 | 
43 | 
44 | def test_sigmoid_basic():
45 |     """
46 |     Some simple tests to get you started.
47 |     Warning: these are not exhaustive.
48 |     """
49 |     print "Running basic tests..."
50 |     x = np.array([[1, 2], [-1, -2]])
51 |     f = sigmoid(x)
52 |     print f
53 |     g = sigmoid_grad(f)
54 |     f_ans = np.array([
55 |         [0.73105858, 0.88079708],
56 |         [0.26894142, 0.11920292]])
57 |     assert np.allclose(f, f_ans, rtol=1e-05, atol=1e-06)
58 |     print g
59 |     g_ans = np.array([
60 |         [0.19661193, 0.10499359],
61 |         [0.19661193, 0.10499359]])
62 |     assert np.allclose(g, g_ans, rtol=1e-05, atol=1e-06)
63 |     print "You should verify these results by hand!\n"
64 | 
65 | 
66 | def test_sigmoid():
67 |     """
68 |     Use this space to test your sigmoid implementation by running:
69 |         python q2_sigmoid.py
70 |     This function will not be called by the autograder, nor will
71 |     your tests be graded.
72 |     """
73 |     print "Running your tests..."
74 |     ### YOUR CODE HERE
75 |     
76 |     ### END YOUR CODE
77 | 
78 | 
79 | if __name__ == "__main__":
80 |     test_sigmoid_basic();
81 |     test_sigmoid()
82 | 


--------------------------------------------------------------------------------
/Assignment1/WeiYang/assignment1/q3_run.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import random
 4 | import numpy as np
 5 | from utils.treebank import StanfordSentiment
 6 | import matplotlib
 7 | matplotlib.use('agg')
 8 | import matplotlib.pyplot as plt
 9 | import time
10 | 
11 | from q3_word2vec import *
12 | from q3_sgd import *
13 | 
14 | # Reset the random seed to make sure that everyone gets the same results
15 | random.seed(314)
16 | dataset = StanfordSentiment()
17 | tokens = dataset.tokens()
18 | nWords = len(tokens)
19 | 
20 | # We are going to train 10-dimensional vecto rs for this assignment
21 | dimVectors = 10
22 | 
23 | # Context size
24 | C = 5
25 | 
26 | # Reset the random seed to make sure that everyone gets the same results
27 | random.seed(31415)
28 | np.random.seed(9265)
29 | 
30 | startTime=time.time()
31 | wordVectors = np.concatenate(
32 |     ((np.random.rand(nWords, dimVectors) - 0.5) /
33 |        dimVectors, np.zeros((nWords, dimVectors))),
34 |     axis=0)
35 | wordVectors = sgd(
36 |     lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C,
37 |         negSamplingCostAndGradient),
38 |     wordVectors, 0.3, 40000, None, True, PRINT_EVERY=10)
39 | # Note that normalization is not called here. This is not a bug,
40 | # normalizing during training loses the notion of length.
41 | 
42 | print "sanity check: cost at convergence should be around or below 10"
43 | print "training took %d seconds" % (time.time() - startTime)
44 | 
45 | # concatenate the input and output word vectors
46 | wordVectors = np.concatenate(
47 |     (wordVectors[:nWords,:], wordVectors[nWords:,:]),
48 |     axis=0)
49 | # wordVectors = wordVectors[:nWords,:] + wordVectors[nWords:,:]
50 | 
51 | visualizeWords = [
52 |     "the", "a", "an", ",", ".", "?", "!", "``", "''", "--",
53 |     "good", "great", "cool", "brilliant", "wonderful", "well", "amazing",
54 |     "worth", "sweet", "enjoyable", "boring", "bad", "waste", "dumb",
55 |     "annoying"]
56 | 
57 | visualizeIdx = [tokens[word] for word in visualizeWords]
58 | visualizeVecs = wordVectors[visualizeIdx, :]
59 | temp = (visualizeVecs - np.mean(visualizeVecs, axis=0))
60 | covariance = 1.0 / len(visualizeIdx) * temp.T.dot(temp)
61 | U,S,V = np.linalg.svd(covariance)
62 | coord = temp.dot(U[:,0:2])
63 | 
64 | for i in xrange(len(visualizeWords)):
65 |     plt.text(coord[i,0], coord[i,1], visualizeWords[i],
66 |         bbox=dict(facecolor='green', alpha=0.1))
67 | 
68 | plt.xlim((np.min(coord[:,0]), np.max(coord[:,0])))
69 | plt.ylim((np.min(coord[:,1]), np.max(coord[:,1])))
70 | 
71 | plt.savefig('q3_word_vectors.png')
72 | 


--------------------------------------------------------------------------------
/Assignment1/WeiYang/assignment1/q3_sgd.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Save parameters every a few SGD iterations as fail-safe
  4 | SAVE_PARAMS_EVERY = 5000
  5 | 
  6 | import glob
  7 | import random
  8 | import numpy as np
  9 | import os.path as op
 10 | import cPickle as pickle
 11 | 
 12 | 
 13 | def load_saved_params():
 14 |     """
 15 |     A helper function that loads previously saved parameters and resets
 16 |     iteration start.
 17 |     """
 18 |     st = 0
 19 |     for f in glob.glob("saved_params_*.npy"):
 20 |         iter = int(op.splitext(op.basename(f))[0].split("_")[2])
 21 |         if (iter > st):
 22 |             st = iter
 23 | 
 24 |     if st > 0:
 25 |         with open("saved_params_%d.npy" % st, "r") as f:
 26 |             params = pickle.load(f)
 27 |             state = pickle.load(f)
 28 |         return st, params, state
 29 |     else:
 30 |         return st, None, None
 31 | 
 32 | 
 33 | def save_params(iter, params):
 34 |     with open("saved_params_%d.npy" % iter, "w") as f:
 35 |         pickle.dump(params, f)
 36 |         pickle.dump(random.getstate(), f)
 37 | 
 38 | 
 39 | def sgd(f, x0, step, iterations, postprocessing=None, useSaved=False,
 40 |         PRINT_EVERY=10):
 41 |     """ Stochastic Gradient Descent
 42 | 
 43 |     Implement the stochastic gradient descent method in this function.
 44 | 
 45 |     Arguments:
 46 |     f -- the function to optimize, it should take a single
 47 |          argument and yield two outputs, a cost and the gradient
 48 |          with respect to the arguments
 49 |     x0 -- the initial point to start SGD from
 50 |     step -- the step size for SGD
 51 |     iterations -- total iterations to run SGD for
 52 |     postprocessing -- postprocessing function for the parameters
 53 |                       if necessary. In the case of word2vec we will need to
 54 |                       normalize the word vectors to have unit length.
 55 |     PRINT_EVERY -- specifies how many iterations to output loss
 56 | 
 57 |     Return:
 58 |     x -- the parameter value after SGD finishes
 59 |     """
 60 | 
 61 |     # Anneal learning rate every several iterations
 62 |     ANNEAL_EVERY = 20000
 63 | 
 64 |     if useSaved:
 65 |         start_iter, oldx, state = load_saved_params()
 66 |         if start_iter > 0:
 67 |             x0 = oldx
 68 |             step *= 0.5 ** (start_iter / ANNEAL_EVERY)
 69 | 
 70 |         if state:
 71 |             random.setstate(state)
 72 |     else:
 73 |         start_iter = 0
 74 | 
 75 |     x = x0
 76 | 
 77 |     if not postprocessing:
 78 |         postprocessing = lambda x: x
 79 | 
 80 |     expcost = None
 81 | 
 82 |     for iter in xrange(start_iter + 1, iterations + 1):
 83 |         # Don't forget to apply the postprocessing after every iteration!
 84 |         # You might want to print the progress every few iterations.
 85 | 
 86 |         cost = None
 87 |         ### YOUR CODE HERE
 88 |         cost, grad = f(x)
 89 |         x = x - step * grad
 90 |         x = postprocessing(x)
 91 |         ### END YOUR CODE
 92 | 
 93 |         if iter % PRINT_EVERY == 0:
 94 |             if not expcost:
 95 |                 expcost = cost
 96 |             else:
 97 |                 expcost = .95 * expcost + .05 * cost
 98 |             print "iter %d: %f" % (iter, expcost)
 99 | 
100 |         if iter % SAVE_PARAMS_EVERY == 0 and useSaved:
101 |             save_params(iter, x)
102 | 
103 |         if iter % ANNEAL_EVERY == 0:
104 |             step *= 0.5
105 | 
106 |     return x
107 | 
108 | 
109 | def sanity_check():
110 |     quad = lambda x: (np.sum(x ** 2), x * 2)
111 | 
112 |     print "Running sanity checks..."
113 |     t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=100)
114 |     print "test 1 result:", t1
115 |     assert abs(t1) <= 1e-6
116 | 
117 |     t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100)
118 |     print "test 2 result:", t2
119 |     assert abs(t2) <= 1e-6
120 | 
121 |     t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100)
122 |     print "test 3 result:", t3
123 |     assert abs(t3) <= 1e-6
124 | 
125 |     print ""
126 | 
127 | 
128 | def your_sanity_checks():
129 |     """
130 |     Use this space add any additional sanity checks by running:
131 |         python q3_sgd.py
132 |     This function will not be called by the autograder, nor will
133 |     your additional tests be graded.
134 |     """
135 |     print "Running your sanity checks..."
136 |     ### YOUR CODE HERE
137 |     
138 |     ### END YOUR CODE
139 | 
140 | 
141 | if __name__ == "__main__":
142 |     sanity_check()
143 |     your_sanity_checks()
144 | 


--------------------------------------------------------------------------------
/Assignment1/WeiYang/assignment1/q4_sentiment.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | import numpy as np
  5 | import matplotlib
  6 | matplotlib.use('agg')
  7 | import matplotlib.pyplot as plt
  8 | import itertools
  9 | 
 10 | from utils.treebank import StanfordSentiment
 11 | import utils.glove as glove
 12 | 
 13 | from q3_sgd import load_saved_params, sgd
 14 | 
 15 | # We will use sklearn here because it will run faster than implementing
 16 | # ourselves. However, for other parts of this assignment you must implement
 17 | # the functions yourself!
 18 | from sklearn.linear_model import LogisticRegression
 19 | from sklearn.metrics import confusion_matrix
 20 | 
 21 | 
 22 | def getArguments():
 23 |     parser = argparse.ArgumentParser()
 24 |     group = parser.add_mutually_exclusive_group(required=True)
 25 |     group.add_argument("--pretrained", dest="pretrained", action="store_true",
 26 |                        help="Use pretrained GloVe vectors.")
 27 |     group.add_argument("--yourvectors", dest="yourvectors", action="store_true",
 28 |                        help="Use your vectors from q3.")
 29 |     return parser.parse_args()
 30 | 
 31 | 
 32 | def getSentenceFeatures(tokens, wordVectors, sentence):
 33 |     """
 34 |     Obtain the sentence feature for sentiment analysis by averaging its
 35 |     word vectors
 36 |     """
 37 | 
 38 |     # Implement computation for the sentence features given a sentence.
 39 | 
 40 |     # Inputs:
 41 |     # tokens -- a dictionary that maps words to their indices in
 42 |     #           the word vector list
 43 |     # wordVectors -- word vectors (each row) for all tokens
 44 |     # sentence -- a list of words in the sentence of interest
 45 | 
 46 |     # Output:
 47 |     # - sentVector: feature vector for the sentence
 48 | 
 49 |     sentVector = np.zeros((wordVectors.shape[1],))
 50 | 
 51 |     ### YOUR CODE HERE
 52 |     indices = [tokens[word] for word in sentence]
 53 |     sentVector = np.mean(wordVectors[indices], axis=0)
 54 |     ### END YOUR CODE
 55 | 
 56 |     assert sentVector.shape == (wordVectors.shape[1],)
 57 |     return sentVector
 58 | 
 59 | 
 60 | def getRegularizationValues():
 61 |     """Try different regularizations
 62 | 
 63 |     Return a sorted list of values to try.
 64 |     """
 65 |     values = None   # Assign a list of floats in the block below
 66 |     ### YOUR CODE HERE
 67 |     values = np.logspace(-4, 2, num=20, base=10)
 68 |     ### END YOUR CODE
 69 |     return sorted(values)
 70 | 
 71 | 
 72 | def chooseBestModel(results):
 73 |     """Choose the best model based on parameter tuning on the dev set
 74 | 
 75 |     Arguments:
 76 |     results -- A list of python dictionaries of the following format:
 77 |         {
 78 |             "reg": regularization,
 79 |             "clf": classifier,
 80 |             "train": trainAccuracy,
 81 |             "dev": devAccuracy,
 82 |             "test": testAccuracy
 83 |         }
 84 | 
 85 |     Returns:
 86 |     Your chosen result dictionary.
 87 |     """
 88 |     bestResult = None
 89 | 
 90 |     ### YOUR CODE HERE
 91 |     bestResult = max(results, key=lambda x: x["dev"])
 92 |     ### END YOUR CODE
 93 | 
 94 |     return bestResult
 95 | 
 96 | 
 97 | def accuracy(y, yhat):
 98 |     """ Precision for classifier """
 99 |     assert(y.shape == yhat.shape)
100 |     return np.sum(y == yhat) * 100.0 / y.size
101 | 
102 | 
103 | def plotRegVsAccuracy(regValues, results, filename):
104 |     """ Make a plot of regularization vs accuracy """
105 |     plt.plot(regValues, [x["train"] for x in results])
106 |     plt.plot(regValues, [x["dev"] for x in results])
107 |     plt.xscale('log')
108 |     plt.xlabel("regularization")
109 |     plt.ylabel("accuracy")
110 |     plt.legend(['train', 'dev'], loc='upper left')
111 |     plt.savefig(filename)
112 | 
113 | 
114 | def outputConfusionMatrix(features, labels, clf, filename):
115 |     """ Generate a confusion matrix """
116 |     pred = clf.predict(features)
117 |     cm = confusion_matrix(labels, pred, labels=range(5))
118 |     plt.figure()
119 |     plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Reds)
120 |     plt.colorbar()
121 |     classes = ["- -", "-", "neut", "+", "+ +"]
122 |     tick_marks = np.arange(len(classes))
123 |     plt.xticks(tick_marks, classes)
124 |     plt.yticks(tick_marks, classes)
125 |     thresh = cm.max() / 2.
126 |     for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
127 |         plt.text(j, i, cm[i, j],
128 |                  horizontalalignment="center",
129 |                  color="white" if cm[i, j] > thresh else "black")
130 |     plt.tight_layout()
131 |     plt.ylabel('True label')
132 |     plt.xlabel('Predicted label')
133 |     plt.savefig(filename)
134 | 
135 | 
136 | def outputPredictions(dataset, features, labels, clf, filename):
137 |     """ Write the predictions to file """
138 |     pred = clf.predict(features)
139 |     with open(filename, "w") as f:
140 |         print >> f, "True\tPredicted\tText"
141 |         for i in xrange(len(dataset)):
142 |             print >> f, "%d\t%d\t%s" % (
143 |                 labels[i], pred[i], " ".join(dataset[i][0]))
144 | 
145 | 
146 | def main(args):
147 |     """ Train a model to do sentiment analyis"""
148 | 
149 |     # Load the dataset
150 |     dataset = StanfordSentiment()
151 |     tokens = dataset.tokens()
152 |     nWords = len(tokens)
153 | 
154 |     if args.yourvectors:
155 |         _, wordVectors, _ = load_saved_params()
156 |         wordVectors = np.concatenate(
157 |             (wordVectors[:nWords,:], wordVectors[nWords:,:]),
158 |             axis=1)
159 |     elif args.pretrained:
160 |         wordVectors = glove.loadWordVectors(tokens)
161 |     dimVectors = wordVectors.shape[1]
162 | 
163 |     # Load the train set
164 |     trainset = dataset.getTrainSentences()
165 |     nTrain = len(trainset)
166 |     trainFeatures = np.zeros((nTrain, dimVectors))
167 |     trainLabels = np.zeros((nTrain,), dtype=np.int32)
168 |     for i in xrange(nTrain):
169 |         words, trainLabels[i] = trainset[i]
170 |         trainFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words)
171 | 
172 |     # Prepare dev set features
173 |     devset = dataset.getDevSentences()
174 |     nDev = len(devset)
175 |     devFeatures = np.zeros((nDev, dimVectors))
176 |     devLabels = np.zeros((nDev,), dtype=np.int32)
177 |     for i in xrange(nDev):
178 |         words, devLabels[i] = devset[i]
179 |         devFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words)
180 | 
181 |     # Prepare test set features
182 |     testset = dataset.getTestSentences()
183 |     nTest = len(testset)
184 |     testFeatures = np.zeros((nTest, dimVectors))
185 |     testLabels = np.zeros((nTest,), dtype=np.int32)
186 |     for i in xrange(nTest):
187 |         words, testLabels[i] = testset[i]
188 |         testFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words)
189 | 
190 |     # We will save our results from each run
191 |     results = []
192 |     regValues = getRegularizationValues()
193 |     for reg in regValues:
194 |         print "Training for reg=%f" % reg
195 |         # Note: add a very small number to regularization to please the library
196 |         clf = LogisticRegression(C=1.0/(reg + 1e-12))
197 |         clf.fit(trainFeatures, trainLabels)
198 | 
199 |         # Test on train set
200 |         pred = clf.predict(trainFeatures)
201 |         trainAccuracy = accuracy(trainLabels, pred)
202 |         print "Train accuracy (%%): %f" % trainAccuracy
203 | 
204 |         # Test on dev set
205 |         pred = clf.predict(devFeatures)
206 |         devAccuracy = accuracy(devLabels, pred)
207 |         print "Dev accuracy (%%): %f" % devAccuracy
208 | 
209 |         # Test on test set
210 |         # Note: always running on test is poor style. Typically, you should
211 |         # do this only after validation.
212 |         pred = clf.predict(testFeatures)
213 |         testAccuracy = accuracy(testLabels, pred)
214 |         print "Test accuracy (%%): %f" % testAccuracy
215 | 
216 |         results.append({
217 |             "reg": reg,
218 |             "clf": clf,
219 |             "train": trainAccuracy,
220 |             "dev": devAccuracy,
221 |             "test": testAccuracy})
222 | 
223 |     # Print the accuracies
224 |     print ""
225 |     print "=== Recap ==="
226 |     print "Reg\t\tTrain\tDev\tTest"
227 |     for result in results:
228 |         print "%.2E\t%.3f\t%.3f\t%.3f" % (
229 |             result["reg"],
230 |             result["train"],
231 |             result["dev"],
232 |             result["test"])
233 |     print ""
234 | 
235 |     bestResult = chooseBestModel(results)
236 |     print "Best regularization value: %0.2E" % bestResult["reg"]
237 |     print "Test accuracy (%%): %f" % bestResult["test"]
238 | 
239 |     # do some error analysis
240 |     if args.pretrained:
241 |         plotRegVsAccuracy(regValues, results, "q4_reg_v_acc.png")
242 |         outputConfusionMatrix(devFeatures, devLabels, bestResult["clf"],
243 |                               "q4_dev_conf.png")
244 |         outputPredictions(devset, devFeatures, devLabels, bestResult["clf"],
245 |                           "q4_dev_pred.txt")
246 | 
247 | 
248 | if __name__ == "__main__":
249 |     main(getArguments())
250 | 


--------------------------------------------------------------------------------
/Assignment1/WeiYang/solution.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/WeiYang/solution.pdf


--------------------------------------------------------------------------------
/Assignment1/ZiyinHuang/Assignment1/Makefile:
--------------------------------------------------------------------------------
 1 | DATASETS_DIR=utils/datasets
 2 | 
 3 | init:
 4 | 	sh get_datasets.sh
 5 | 
 6 | submit:
 7 | 	sh collect_submission.sh
 8 | 
 9 | clean:
10 | 	rm -f assignment1.zip
11 | 	rm -rf ${DATASETS_DIR}
12 | 	rm -f *.pyc *.png *.npy utils/*.pyc
13 | 
14 | 


--------------------------------------------------------------------------------
/Assignment1/ZiyinHuang/Assignment1/collect_submission.sh:
--------------------------------------------------------------------------------
1 | rm -f assignment1.zip
2 | zip -r assignment1.zip *.py *.png saved_params_40000.npy
3 | 


--------------------------------------------------------------------------------
/Assignment1/ZiyinHuang/Assignment1/get_datasets.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DATASETS_DIR="utils/datasets"
 4 | mkdir -p $DATASETS_DIR
 5 | 
 6 | cd $DATASETS_DIR
 7 | 
 8 | # Get Stanford Sentiment Treebank
 9 | if hash wget 2>/dev/null; then
10 |   wget http://nlp.stanford.edu/~socherr/stanfordSentimentTreebank.zip
11 | else
12 |   curl -O http://nlp.stanford.edu/~socherr/stanfordSentimentTreebank.zip
13 | fi
14 | unzip stanfordSentimentTreebank.zip
15 | rm stanfordSentimentTreebank.zip
16 | 
17 | # Get 50D GloVe vectors
18 | if hash wget 2>/dev/null; then
19 |   wget http://web.stanford.edu/~jamesh93/tmp/glove.6B.50d.txt.zip
20 | else
21 |   curl -O http://web.stanford.edu/~jamesh93/tmp/glove.6B.50d.txt.zip
22 | fi
23 | unzip glove.6B.50d.txt.zip
24 | rm glove.6B.50d.txt.zip
25 | 


--------------------------------------------------------------------------------
/Assignment1/ZiyinHuang/Assignment1/q1_softmax.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def softmax(x):
 5 |     """Compute the softmax function for each row of the input x.
 6 | 
 7 |     It is crucial that this function is optimized for speed because
 8 |     it will be used frequently in later code. You might find numpy
 9 |     functions np.exp, np.sum, np.reshape, np.max, and numpy
10 |     broadcasting useful for this task.
11 | 
12 |     Numpy broadcasting documentation:
13 |     http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html
14 | 
15 |     You should also make sure that your code works for a single
16 |     N-dimensional vector (treat the vector as a single row) and
17 |     for M x N matrices. This may be useful for testing later. Also,
18 |     make sure that the dimensions of the output match the input.
19 | 
20 |     You must implement the optimization in problem 1(a) of the
21 |     written assignment!
22 | 
23 |     Arguments:
24 |     x -- A N dimensional vector or M x N dimensional numpy matrix.
25 | 
26 |     Return:
27 |     x -- You are allowed to modify x in-place
28 |     """
29 |     orig_shape = x.shape
30 | 
31 |     if len(x.shape) > 1:
32 |         # Matrix
33 |         ### YOUR CODE HERE
34 |         x = x.T
35 |         e_x = np.exp(x - np.max(x,axis=0))
36 |     	x = (e_x / e_x.sum(axis=0)).T
37 |         ### END YOUR CODE
38 |     else:
39 |         # Vector
40 |         ### YOUR CODE HERE
41 |         x = x.T
42 |         e_x = np.exp(x - np.max(x,axis=0))
43 |     	x = (e_x / e_x.sum(axis=0)).T
44 |         ### END YOUR CODE
45 |     assert x.shape == orig_shape
46 |     return x
47 | 
48 | 
49 | def test_softmax_basic():
50 |     """
51 |     Some simple tests to get you started.
52 |     Warning: these are not exhaustive.
53 |     """
54 |     print "Running basic tests..."
55 |     test1 = softmax(np.array([1,2]))
56 |     print test1
57 |     ans1 = np.array([0.26894142,  0.73105858])
58 |     assert np.allclose(test1, ans1, rtol=1e-05, atol=1e-06)
59 | 
60 |     test2 = softmax(np.array([[1001,1002],[3,4]]))
61 |     print test2
62 |     ans2 = np.array([
63 |         [0.26894142, 0.73105858],
64 |         [0.26894142, 0.73105858]])
65 |     assert np.allclose(test2, ans2, rtol=1e-05, atol=1e-06)
66 | 
67 |     test3 = softmax(np.array([[-1001,-1002]]))
68 |     print test3
69 |     ans3 = np.array([0.73105858, 0.26894142])
70 |     assert np.allclose(test3, ans3, rtol=1e-05, atol=1e-06)
71 | 
72 |     print "You should be able to verify these results by hand!\n"
73 | 
74 | 
75 | def test_softmax():
76 |     """
77 |     Use this space to test your softmax implementation by running:
78 |         python q1_softmax.py
79 |     This function will not be called by the autograder, nor will
80 |     your tests be graded.
81 |     """
82 |     print "Running your tests..."
83 |     ### YOUR CODE HERE
84 |     raise NotImplementedError
85 |     ### END YOUR CODE
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     test_softmax_basic()
90 |     test_softmax()
91 | 


--------------------------------------------------------------------------------
/Assignment1/ZiyinHuang/Assignment1/q2_gradcheck.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy as np
 4 | import random
 5 | 
 6 | 
 7 | # First implement a gradient checker by filling in the following functions
 8 | def gradcheck_naive(f, x):
 9 |     """ Gradient check for a function f.
10 | 
11 |     Arguments:
12 |     f -- a function that takes a single argument and outputs the
13 |          cost and its gradients
14 |     x -- the point (numpy array) to check the gradient at
15 |     """
16 | 
17 |     rndstate = random.getstate()
18 |     random.setstate(rndstate)
19 |     fx, grad = f(x) # Evaluate function value at original point
20 |     h = 1e-4        # Do not change this!
21 | 
22 |     # Iterate over all indexes in x
23 |     it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
24 |     while not it.finished:
25 |         ix = it.multi_index
26 | 
27 |         # Try modifying x[ix] with h defined above to compute
28 |         # numerical gradients. Make sure you call random.setstate(rndstate)
29 |         # before calling f(x) each time. This will make it possible
30 |         # to test cost functions with built in randomness later.
31 | 
32 |         old_val = x[ix]
33 |         x[ix] = old_val - h
34 |         random.setstate(rndstate)
35 |         (fxh1, _) = f(x)
36 |         x[ix] = old_val + h
37 |         random.setstate(rndstate)
38 |         (fxh2, _) = f(x)
39 | 
40 |         numgrad = (fxh2 - fxh1)/(2*h)
41 |         x[ix] = old_val
42 | 
43 |         # Compare gradients
44 |         reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
45 |         if reldiff > 1e-5:
46 |             print "Gradient check failed."
47 |             print "First gradient error found at index %s" % str(ix)
48 |             print "Your gradient: %f \t Numerical gradient: %f" % (
49 |                 grad[ix], numgrad)
50 |             return
51 | 
52 |         it.iternext() # Step to next dimension
53 | 
54 |     print "Gradient check passed!"
55 | 
56 | 
57 | def sanity_check():
58 |     """
59 |     Some basic sanity checks.
60 |     """
61 |     quad = lambda x: (np.sum(x ** 2), x * 2)
62 | 
63 |     print "Running sanity checks..."
64 |     gradcheck_naive(quad, np.array(123.456))      # scalar test
65 |     gradcheck_naive(quad, np.random.randn(3,))    # 1-D test
66 |     gradcheck_naive(quad, np.random.randn(4,5))   # 2-D test
67 |     print ""
68 | 
69 | 
70 | def your_sanity_checks():
71 |     """
72 |     Use this space add any additional sanity checks by running:
73 |         python q2_gradcheck.py
74 |     This function will not be called by the autograder, nor will
75 |     your additional tests be graded.
76 |     """
77 |     print "Running your sanity checks..."
78 |     ### YOUR CODE HERE
79 |     raise NotImplementedError
80 |     ### END YOUR CODE
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     sanity_check()
85 |     your_sanity_checks()
86 | 


--------------------------------------------------------------------------------
/Assignment1/ZiyinHuang/Assignment1/q2_neural.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import numpy as np
  4 | import random
  5 | 
  6 | from q1_softmax import softmax
  7 | from q2_sigmoid import sigmoid, sigmoid_grad
  8 | from q2_gradcheck import gradcheck_naive
  9 | 
 10 | 
 11 | def forward_backward_prop(data, labels, params, dimensions):
 12 |     """
 13 |     Forward and backward propagation for a two-layer sigmoidal network
 14 | 
 15 |     Compute the forward propagation and for the cross entropy cost,
 16 |     and backward propagation for the gradients for all parameters.
 17 | 
 18 |     Arguments:
 19 |     data -- M x Dx matrix, where each row is a training example.
 20 |     labels -- M x Dy matrix, where each row is a one-hot vector.
 21 |     params -- Model parameters, these are unpacked for you.
 22 |     dimensions -- A tuple of input dimension, number of hidden units
 23 |                   and output dimension
 24 |     """
 25 | 
 26 |     ### Unpack network parameters (do not modify)
 27 |     ofs = 0
 28 |     Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
 29 | 
 30 |     W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
 31 |     ofs += Dx * H
 32 |     b1 = np.reshape(params[ofs:ofs + H], (1, H))
 33 |     ofs += H
 34 |     W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
 35 |     ofs += H * Dy
 36 |     b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))
 37 | 
 38 |     ### YOUR CODE HERE: forward propagation
 39 |     
 40 |     z2 = np.dot(data, W1) + b1
 41 |     h2 = sigmoid(z2)
 42 |     z3 = np.dot(h2, W2) + b2
 43 |     y = softmax(z3)
 44 |     ### END YOUR CODE
 45 | 
 46 |     M = dimensions[2]
 47 |     cost = np.sum((-1*labels*np.log(y))) / M
 48 |     #print cost.shape
 49 |     ### YOUR CODE HERE: backward propagation
 50 |     delta3 = (y - labels) / M
 51 |     #print delta3.shape
 52 | 
 53 |     gradW2 = np.dot(h2.T, delta3)
 54 |     #print gradW2.shape
 55 |     gradb2 = np.sum(delta3,axis = 0)
 56 |     #print gradb2.shape
 57 |     delta2 = sigmoid_grad(h2) * np.dot(delta3, W2.T)  #hardmard product
 58 |     #print delta2.shape
 59 | 
 60 |     gradW1 = np.dot(data.T, delta2)
 61 |     #print gradW1.shape
 62 |     gradb1 = np.sum(delta2, axis = 0)
 63 |     #print gradb1.shape
 64 |     ### END YOUR CODE
 65 |     
 66 | 
 67 | 
 68 |     ### Stack gradients (do not modify)
 69 |     grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
 70 |         gradW2.flatten(), gradb2.flatten()))
 71 | 
 72 |     return cost, grad
 73 | 
 74 | 
 75 | def sanity_check():
 76 |     """
 77 |     Set up fake data and parameters for the neural network, and test using
 78 |     gradcheck.
 79 |     """
 80 |     print "Running sanity check..."
 81 | 
 82 |     N = 20
 83 |     dimensions = [10, 5, 10]
 84 |     data = np.random.randn(N, dimensions[0])   # each row will be a datum
 85 |     labels = np.zeros((N, dimensions[2]))
 86 |     for i in xrange(N):
 87 |         labels[i, random.randint(0,dimensions[2]-1)] = 1
 88 | 
 89 |     params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
 90 |         dimensions[1] + 1) * dimensions[2], )
 91 | 
 92 |     gradcheck_naive(lambda params:
 93 |         forward_backward_prop(data, labels, params, dimensions), params)
 94 | 
 95 | 
 96 | def your_sanity_checks():
 97 |     """
 98 |     Use this space add any additional sanity checks by running:
 99 |         python q2_neural.py
100 |     This function will not be called by the autograder, nor will
101 |     your additional tests be graded.
102 |     """
103 |     print "Running your sanity checks..."
104 |     ### YOUR CODE HERE
105 |     raise NotImplementedError
106 |     ### END YOUR CODE
107 | 
108 | 
109 | if __name__ == "__main__":
110 |     sanity_check()
111 |     your_sanity_checks()
112 | 


--------------------------------------------------------------------------------
/Assignment1/ZiyinHuang/Assignment1/q2_sigmoid.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def sigmoid(x):
 7 |     """
 8 |     Compute the sigmoid function for the input here.
 9 | 
10 |     Arguments:
11 |     x -- A scalar or numpy array.
12 | 
13 |     Return:
14 |     s -- sigmoid(x)
15 |     """
16 | 
17 |     ### YOUR CODE HERE
18 |     s =  1 / (1+np.exp(-x))
19 |     ### END YOUR CODE
20 | 
21 |     return s
22 | 
23 | 
24 | def sigmoid_grad(s):
25 |     """
26 |     Compute the gradient for the sigmoid function here. Note that
27 |     for this implementation, the input s should be the sigmoid
28 |     function value of your original input x.
29 | 
30 |     Arguments:
31 |     s -- A scalar or numpy array.
32 | 
33 |     Return:
34 |     ds -- Your computed gradient.
35 |     """
36 | 
37 |     ### YOUR CODE HERE
38 | 
39 |     ds =  s * (1-s)
40 |     ### END YOUR CODE
41 | 
42 |     return ds
43 | 
44 | 
45 | def test_sigmoid_basic():
46 |     """
47 |     Some simple tests to get you started.
48 |     Warning: these are not exhaustive.
49 |     """
50 |     print "Running basic tests..."
51 |     x = np.array([[1, 2], [-1, -2]])
52 |     f = sigmoid(x)
53 |     g = sigmoid_grad(f)
54 |     print f
55 |     f_ans = np.array([
56 |         [0.73105858, 0.88079708],
57 |         [0.26894142, 0.11920292]])
58 |     assert np.allclose(f, f_ans, rtol=1e-05, atol=1e-06)
59 |     print g
60 |     g_ans = np.array([
61 |         [0.19661193, 0.10499359],
62 |         [0.19661193, 0.10499359]])
63 |     assert np.allclose(g, g_ans, rtol=1e-05, atol=1e-06)
64 |     print "You should verify these results by hand!\n"
65 | 
66 | 
67 | def test_sigmoid():
68 |     """
69 |     Use this space to test your sigmoid implementation by running:
70 |         python q2_sigmoid.py
71 |     This function will not be called by the autograder, nor will
72 |     your tests be graded.
73 |     """
74 |     print "Running your tests..."
75 |     ### YOUR CODE HERE
76 |     raise NotImplementedError
77 |     ### END YOUR CODE
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     test_sigmoid_basic();
82 |     test_sigmoid()
83 | 


--------------------------------------------------------------------------------
/Assignment1/ZiyinHuang/Assignment1/q3_run.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import random
 4 | import numpy as np
 5 | from utils.treebank import StanfordSentiment
 6 | import matplotlib
 7 | matplotlib.use('agg')
 8 | import matplotlib.pyplot as plt
 9 | import time
10 | 
11 | from q3_word2vec import *
12 | from q3_sgd import *
13 | 
14 | # Reset the random seed to make sure that everyone gets the same results
15 | random.seed(314)
16 | dataset = StanfordSentiment()
17 | tokens = dataset.tokens()
18 | nWords = len(tokens)
19 | 
20 | # We are going to train 10-dimensional vectors for this assignment
21 | dimVectors = 10
22 | 
23 | # Context size
24 | C = 5
25 | 
26 | # Reset the random seed to make sure that everyone gets the same results
27 | random.seed(31415)
28 | np.random.seed(9265)
29 | 
30 | startTime=time.time()
31 | wordVectors = np.concatenate(
32 |     ((np.random.rand(nWords, dimVectors) - 0.5) /
33 |        dimVectors, np.zeros((nWords, dimVectors))),
34 |     axis=0)
35 | wordVectors = sgd(
36 |     lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C,
37 |         negSamplingCostAndGradient),
38 |     wordVectors, 0.3, 40000, None, True, PRINT_EVERY=10)
39 | # Note that normalization is not called here. This is not a bug,
40 | # normalizing during training loses the notion of length.
41 | 
42 | print "sanity check: cost at convergence should be around or below 10"
43 | print "training took %d seconds" % (time.time() - startTime)
44 | 
45 | # concatenate the input and output word vectors
46 | wordVectors = np.concatenate(
47 |     (wordVectors[:nWords,:], wordVectors[nWords:,:]),
48 |     axis=0)
49 | # wordVectors = wordVectors[:nWords,:] + wordVectors[nWords:,:]
50 | 
51 | visualizeWords = [
52 |     "the", "a", "an", ",", ".", "?", "!", "``", "''", "--",
53 |     "good", "great", "cool", "brilliant", "wonderful", "well", "amazing",
54 |     "worth", "sweet", "enjoyable", "boring", "bad", "waste", "dumb",
55 |     "annoying"]
56 | 
57 | visualizeIdx = [tokens[word] for word in visualizeWords]
58 | visualizeVecs = wordVectors[visualizeIdx, :]
59 | temp = (visualizeVecs - np.mean(visualizeVecs, axis=0))
60 | covariance = 1.0 / len(visualizeIdx) * temp.T.dot(temp)
61 | U,S,V = np.linalg.svd(covariance)
62 | coord = temp.dot(U[:,0:2])
63 | 
64 | for i in xrange(len(visualizeWords)):
65 |     plt.text(coord[i,0], coord[i,1], visualizeWords[i],
66 |         bbox=dict(facecolor='green', alpha=0.1))
67 | 
68 | plt.xlim((np.min(coord[:,0]), np.max(coord[:,0])))
69 | plt.ylim((np.min(coord[:,1]), np.max(coord[:,1])))
70 | 
71 | plt.savefig('q3_word_vectors.png')
72 | 


--------------------------------------------------------------------------------
/Assignment1/ZiyinHuang/Assignment1/q3_sgd.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Save parameters every a few SGD iterations as fail-safe
  4 | SAVE_PARAMS_EVERY = 5000
  5 | 
  6 | import glob
  7 | import random
  8 | import numpy as np
  9 | import os.path as op
 10 | import cPickle as pickle
 11 | 
 12 | 
 13 | def load_saved_params():
 14 |     """
 15 |     A helper function that loads previously saved parameters and resets
 16 |     iteration start.
 17 |     """
 18 |     st = 0
 19 |     for f in glob.glob("saved_params_*.npy"):
 20 |         iter = int(op.splitext(op.basename(f))[0].split("_")[2])
 21 |         if (iter > st):
 22 |             st = iter
 23 | 
 24 |     if st > 0:
 25 |         with open("saved_params_%d.npy" % st, "r") as f:
 26 |             params = pickle.load(f)
 27 |             state = pickle.load(f)
 28 |         return st, params, state
 29 |     else:
 30 |         return st, None, None
 31 | 
 32 | 
 33 | def save_params(iter, params):
 34 |     with open("saved_params_%d.npy" % iter, "w") as f:
 35 |         pickle.dump(params, f)
 36 |         pickle.dump(random.getstate(), f)
 37 | 
 38 | 
 39 | def sgd(f, x0, step, iterations, postprocessing=None, useSaved=False,
 40 |         PRINT_EVERY=10):
 41 |     """ Stochastic Gradient Descent
 42 | 
 43 |     Implement the stochastic gradient descent method in this function.
 44 | 
 45 |     Arguments:
 46 |     f -- the function to optimize, it should take a single
 47 |          argument and yield two outputs, a cost and the gradient
 48 |          with respect to the arguments
 49 |     x0 -- the initial point to start SGD from
 50 |     step -- the step size for SGD
 51 |     iterations -- total iterations to run SGD for
 52 |     postprocessing -- postprocessing function for the parameters
 53 |                       if necessary. In the case of word2vec we will need to
 54 |                       normalize the word vectors to have unit length.
 55 |     PRINT_EVERY -- specifies how many iterations to output loss
 56 | 
 57 |     Return:
 58 |     x -- the parameter value after SGD finishes
 59 |     """
 60 | 
 61 |     # Anneal learning rate every several iterations
 62 |     ANNEAL_EVERY = 20000
 63 | 
 64 |     if useSaved:
 65 |         start_iter, oldx, state = load_saved_params()
 66 |         if start_iter > 0:
 67 |             x0 = oldx
 68 |             step *= 0.5 ** (start_iter / ANNEAL_EVERY)
 69 | 
 70 |         if state:
 71 |             random.setstate(state)
 72 |     else:
 73 |         start_iter = 0
 74 | 
 75 |     x = x0
 76 | 
 77 |     if not postprocessing:
 78 |         postprocessing = lambda x: x
 79 | 
 80 |     expcost = None
 81 | 
 82 |     for iter in xrange(start_iter + 1, iterations + 1):
 83 |         # Don't forget to apply the postprocessing after every iteration!
 84 |         # You might want to print the progress every few iterations.
 85 | 
 86 |         cost = None
 87 |         ### YOUR CODE HERE
 88 |         cost, grad = f(x)
 89 |         x -= step * grad
 90 |         x = postprocessing(x)
 91 |         ### END YOUR CODE
 92 | 
 93 |         if iter % PRINT_EVERY == 0:
 94 |             if not expcost:
 95 |                 expcost = cost
 96 |             else:
 97 |                 expcost = .95 * expcost + .05 * cost
 98 |             print "iter %d: %f" % (iter, expcost)
 99 | 
100 |         if iter % SAVE_PARAMS_EVERY == 0 and useSaved:
101 |             save_params(iter, x)
102 | 
103 |         if iter % ANNEAL_EVERY == 0:
104 |             step *= 0.5
105 | 
106 |     return x
107 | 
108 | 
109 | def sanity_check():
110 |     quad = lambda x: (np.sum(x ** 2), x * 2)
111 | 
112 |     print "Running sanity checks..."
113 |     t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=100)
114 |     print "test 1 result:", t1
115 |     assert abs(t1) <= 1e-6
116 | 
117 |     t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100)
118 |     print "test 2 result:", t2
119 |     assert abs(t2) <= 1e-6
120 | 
121 |     t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100)
122 |     print "test 3 result:", t3
123 |     assert abs(t3) <= 1e-6
124 | 
125 |     print ""
126 | 
127 | 
128 | def your_sanity_checks():
129 |     """
130 |     Use this space add any additional sanity checks by running:
131 |         python q3_sgd.py
132 |     This function will not be called by the autograder, nor will
133 |     your additional tests be graded.
134 |     """
135 |     print "Running your sanity checks..."
136 |     ### YOUR CODE HERE
137 |     raise NotImplementedError
138 |     ### END YOUR CODE
139 | 
140 | 
141 | if __name__ == "__main__":
142 |     sanity_check()
143 |     your_sanity_checks()
144 | 


--------------------------------------------------------------------------------
/Assignment1/ZiyinHuang/Assignment1/q3_word_vectors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/ZiyinHuang/Assignment1/q3_word_vectors.png


--------------------------------------------------------------------------------
/Assignment1/ZiyinHuang/Assignment1/q4_dev_conf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/ZiyinHuang/Assignment1/q4_dev_conf.png


--------------------------------------------------------------------------------
/Assignment1/ZiyinHuang/Assignment1/q4_reg_v_acc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/ZiyinHuang/Assignment1/q4_reg_v_acc.png


--------------------------------------------------------------------------------
/Assignment1/ZiyinHuang/Assignment1/q4_sentiment.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | import numpy as np
  5 | import matplotlib
  6 | matplotlib.use('agg')
  7 | import matplotlib.pyplot as plt
  8 | import itertools
  9 | 
 10 | from utils.treebank import StanfordSentiment
 11 | import utils.glove as glove
 12 | 
 13 | from q3_sgd import load_saved_params, sgd
 14 | 
 15 | # We will use sklearn here because it will run faster than implementing
 16 | # ourselves. However, for other parts of this assignment you must implement
 17 | # the functions yourself!
 18 | from sklearn.linear_model import LogisticRegression
 19 | from sklearn.metrics import confusion_matrix
 20 | 
 21 | 
 22 | def getArguments():
 23 |     parser = argparse.ArgumentParser()
 24 |     group = parser.add_mutually_exclusive_group(required=True)
 25 |     group.add_argument("--pretrained", dest="pretrained", action="store_true",
 26 |                        help="Use pretrained GloVe vectors.")
 27 |     group.add_argument("--yourvectors", dest="yourvectors", action="store_true",
 28 |                        help="Use your vectors from q3.")
 29 |     return parser.parse_args()
 30 | 
 31 | 
 32 | def getSentenceFeatures(tokens, wordVectors, sentence):
 33 |     """
 34 |     Obtain the sentence feature for sentiment analysis by averaging its
 35 |     word vectors
 36 |     """
 37 | 
 38 |     # Implement computation for the sentence features given a sentence.
 39 | 
 40 |     # Inputs:
 41 |     # tokens -- a dictionary that maps words to their indices in
 42 |     #           the word vector list
 43 |     # wordVectors -- word vectors (each row) for all tokens
 44 |     # sentence -- a list of words in the sentence of interest
 45 | 
 46 |     # Output:
 47 |     # - sentVector: feature vector for the sentence
 48 | 
 49 |     sentVector = np.zeros((wordVectors.shape[1],))
 50 | 
 51 |     ### YOUR CODE HERE
 52 |     for s in sentence:
 53 |         sentVector += wordVectors[tokens[s], :]
 54 | 
 55 |     sentVector *= 1.0 / len(sentence)
 56 |     ### END YOUR CODE
 57 | 
 58 |     assert sentVector.shape == (wordVectors.shape[1],)
 59 |     return sentVector
 60 | 
 61 | 
 62 | def getRegularizationValues():
 63 |     """Try different regularizations
 64 | 
 65 |     Return a sorted list of values to try.
 66 |     """
 67 |     values = None   # Assign a list of floats in the block below
 68 |     ### YOUR CODE HERE
 69 |     values = [0.0001, 0.001, 0.01, 0.1, 0.5, 1, 1.5, 2, 3, 4, 5, 10, 50, 100, 1000]
 70 |     ### END YOUR CODE
 71 |     return sorted(values)
 72 | 
 73 | 
 74 | def chooseBestModel(results):
 75 |     """Choose the best model based on parameter tuning on the dev set
 76 | 
 77 |     Arguments:
 78 |     results -- A list of python dictionaries of the following format:
 79 |         {
 80 |             "reg": regularization,
 81 |             "clf": classifier,
 82 |             "train": trainAccuracy,
 83 |             "dev": devAccuracy,
 84 |             "test": testAccuracy
 85 |         }
 86 | 
 87 |     Returns:
 88 |     Your chosen result dictionary.
 89 |     """
 90 |     bestResult = None
 91 | 
 92 |     ### YOUR CODE HERE
 93 |     bestResult = max(results, key=lambda x: x["dev"])
 94 |     ### END YOUR CODE
 95 | 
 96 |     return bestResult
 97 | 
 98 | 
 99 | def accuracy(y, yhat):
100 |     """ Precision for classifier """
101 |     assert(y.shape == yhat.shape)
102 |     return np.sum(y == yhat) * 100.0 / y.size
103 | 
104 | 
105 | def plotRegVsAccuracy(regValues, results, filename):
106 |     """ Make a plot of regularization vs accuracy """
107 |     plt.plot(regValues, [x["train"] for x in results])
108 |     plt.plot(regValues, [x["dev"] for x in results])
109 |     plt.xscale('log')
110 |     plt.xlabel("regularization")
111 |     plt.ylabel("accuracy")
112 |     plt.legend(['train', 'dev'], loc='upper left')
113 |     plt.savefig(filename)
114 | 
115 | 
116 | def outputConfusionMatrix(features, labels, clf, filename):
117 |     """ Generate a confusion matrix """
118 |     pred = clf.predict(features)
119 |     cm = confusion_matrix(labels, pred, labels=range(5))
120 |     plt.figure()
121 |     plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Reds)
122 |     plt.colorbar()
123 |     classes = ["- -", "-", "neut", "+", "+ +"]
124 |     tick_marks = np.arange(len(classes))
125 |     plt.xticks(tick_marks, classes)
126 |     plt.yticks(tick_marks, classes)
127 |     thresh = cm.max() / 2.
128 |     for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
129 |         plt.text(j, i, cm[i, j],
130 |                  horizontalalignment="center",
131 |                  color="white" if cm[i, j] > thresh else "black")
132 |     plt.tight_layout()
133 |     plt.ylabel('True label')
134 |     plt.xlabel('Predicted label')
135 |     plt.savefig(filename)
136 | 
137 | 
138 | def outputPredictions(dataset, features, labels, clf, filename):
139 |     """ Write the predictions to file """
140 |     pred = clf.predict(features)
141 |     with open(filename, "w") as f:
142 |         print >> f, "True\tPredicted\tText"
143 |         for i in xrange(len(dataset)):
144 |             print >> f, "%d\t%d\t%s" % (
145 |                 labels[i], pred[i], " ".join(dataset[i][0]))
146 | 
147 | 
148 | def main(args):
149 |     """ Train a model to do sentiment analyis"""
150 | 
151 |     # Load the dataset
152 |     dataset = StanfordSentiment()
153 |     tokens = dataset.tokens()
154 |     nWords = len(tokens)
155 | 
156 |     if args.yourvectors:
157 |         _, wordVectors, _ = load_saved_params()
158 |         wordVectors = np.concatenate(
159 |             (wordVectors[:nWords,:], wordVectors[nWords:,:]),
160 |             axis=1)
161 |     elif args.pretrained:
162 |         wordVectors = glove.loadWordVectors(tokens)
163 |     dimVectors = wordVectors.shape[1]
164 | 
165 |     # Load the train set
166 |     trainset = dataset.getTrainSentences()
167 |     nTrain = len(trainset)
168 |     trainFeatures = np.zeros((nTrain, dimVectors))
169 |     trainLabels = np.zeros((nTrain,), dtype=np.int32)
170 |     for i in xrange(nTrain):
171 |         words, trainLabels[i] = trainset[i]
172 |         trainFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words)
173 | 
174 |     # Prepare dev set features
175 |     devset = dataset.getDevSentences()
176 |     nDev = len(devset)
177 |     devFeatures = np.zeros((nDev, dimVectors))
178 |     devLabels = np.zeros((nDev,), dtype=np.int32)
179 |     for i in xrange(nDev):
180 |         words, devLabels[i] = devset[i]
181 |         devFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words)
182 | 
183 |     # Prepare test set features
184 |     testset = dataset.getTestSentences()
185 |     nTest = len(testset)
186 |     testFeatures = np.zeros((nTest, dimVectors))
187 |     testLabels = np.zeros((nTest,), dtype=np.int32)
188 |     for i in xrange(nTest):
189 |         words, testLabels[i] = testset[i]
190 |         testFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words)
191 | 
192 |     # We will save our results from each run
193 |     results = []
194 |     regValues = getRegularizationValues()
195 |     for reg in regValues:
196 |         print "Training for reg=%f" % reg
197 |         # Note: add a very small number to regularization to please the library
198 |         clf = LogisticRegression(C=1.0/(reg + 1e-12))
199 |         clf.fit(trainFeatures, trainLabels)
200 | 
201 |         # Test on train set
202 |         pred = clf.predict(trainFeatures)
203 |         trainAccuracy = accuracy(trainLabels, pred)
204 |         print "Train accuracy (%%): %f" % trainAccuracy
205 | 
206 |         # Test on dev set
207 |         pred = clf.predict(devFeatures)
208 |         devAccuracy = accuracy(devLabels, pred)
209 |         print "Dev accuracy (%%): %f" % devAccuracy
210 | 
211 |         # Test on test set
212 |         # Note: always running on test is poor style. Typically, you should
213 |         # do this only after validation.
214 |         pred = clf.predict(testFeatures)
215 |         testAccuracy = accuracy(testLabels, pred)
216 |         print "Test accuracy (%%): %f" % testAccuracy
217 | 
218 |         results.append({
219 |             "reg": reg,
220 |             "clf": clf,
221 |             "train": trainAccuracy,
222 |             "dev": devAccuracy,
223 |             "test": testAccuracy})
224 | 
225 |     # Print the accuracies
226 |     print ""
227 |     print "=== Recap ==="
228 |     print "Reg\t\tTrain\tDev\tTest"
229 |     for result in results:
230 |         print "%.2E\t%.3f\t%.3f\t%.3f" % (
231 |             result["reg"],
232 |             result["train"],
233 |             result["dev"],
234 |             result["test"])
235 |     print ""
236 | 
237 |     bestResult = chooseBestModel(results)
238 |     print "Best regularization value: %0.2E" % bestResult["reg"]
239 |     print "Test accuracy (%%): %f" % bestResult["test"]
240 | 
241 |     # do some error analysis
242 |     if args.pretrained:
243 |         plotRegVsAccuracy(regValues, results, "q4_reg_v_acc.png")
244 |         outputConfusionMatrix(devFeatures, devLabels, bestResult["clf"],
245 |                               "q4_dev_conf.png")
246 |         outputPredictions(devset, devFeatures, devLabels, bestResult["clf"],
247 |                           "q4_dev_pred.txt")
248 | 
249 | 
250 | if __name__ == "__main__":
251 |     main(getArguments())
252 | 


--------------------------------------------------------------------------------
/Assignment1/ZiyinHuang/assignment1_writen.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/ZiyinHuang/assignment1_writen.pdf


--------------------------------------------------------------------------------
/Assignment1/ZiyinHuang/readme:
--------------------------------------------------------------------------------
1 | Assignment 1 By ZY.Huang
2 | 


--------------------------------------------------------------------------------
/Assignment2/README.md:
--------------------------------------------------------------------------------
 1 | # Assignment 2 
 2 | 
 3 | Materials: [Assignment 2](http://web.stanford.edu/class/cs224n/assignment2/index.html)
 4 | 
 5 | 
 6 | 
 7 | ```
 8 | ./Assignment2
 9 | 	/TaoJi
10 | 		solution.md		--解题报告 代码题实现+非代码题
11 | 		/assignment2
12 | 			...code...	--项目代码 (去除data)
13 | 	/ZiyinHuang
14 | 		...
15 | 	/YupeiDu
16 | 		...
17 | 	/MingZhong
18 | 		...
19 | 	...
20 | ```
21 | 
22 | 


--------------------------------------------------------------------------------
/Assignment2/TaoJi/assignment2/model.py:
--------------------------------------------------------------------------------
  1 | class Model(object):
  2 |     """Abstracts a Tensorflow graph for a learning task.
  3 | 
  4 |     We use various Model classes as usual abstractions to encapsulate tensorflow
  5 |     computational graphs. Each algorithm you will construct in this homework will
  6 |     inherit from a Model object.
  7 |     """
  8 |     def add_placeholders(self):
  9 |         """Adds placeholder variables to tensorflow computational graph.
 10 | 
 11 |         Tensorflow uses placeholder variables to represent locations in a
 12 |         computational graph where data is inserted.  These placeholders are used as
 13 |         inputs by the rest of the model building and will be fed data during
 14 |         training.
 15 | 
 16 |         See for more information:
 17 |         https://www.tensorflow.org/versions/r0.7/api_docs/python/io_ops.html#placeholders
 18 |         """
 19 |         raise NotImplementedError("Each Model must re-implement this method.")
 20 | 
 21 |     def create_feed_dict(self, inputs_batch, labels_batch=None):
 22 |         """Creates the feed_dict for one step of training.
 23 | 
 24 |         A feed_dict takes the form of:
 25 |         feed_dict = {
 26 |                 <placeholder>: <tensor of values to be passed for placeholder>,
 27 |                 ....
 28 |         }
 29 | 
 30 |         If labels_batch is None, then no labels are added to feed_dict.
 31 | 
 32 |         Hint: The keys for the feed_dict should be a subset of the placeholder
 33 |                     tensors created in add_placeholders.
 34 | 
 35 |         Args:
 36 |             inputs_batch: A batch of input data.
 37 |             labels_batch: A batch of label data.
 38 |         Returns:
 39 |             feed_dict: The feed dictionary mapping from placeholders to values.
 40 |         """
 41 |         raise NotImplementedError("Each Model must re-implement this method.")
 42 | 
 43 |     def add_prediction_op(self):
 44 |         """Implements the core of the model that transforms a batch of input data into predictions.
 45 | 
 46 |         Returns:
 47 |             pred: A tensor of shape (batch_size, n_classes)
 48 |         """
 49 |         raise NotImplementedError("Each Model must re-implement this method.")
 50 | 
 51 |     def add_loss_op(self, pred):
 52 |         """Adds Ops for the loss function to the computational graph.
 53 | 
 54 |         Args:
 55 |             pred: A tensor of shape (batch_size, n_classes)
 56 |         Returns:
 57 |             loss: A 0-d tensor (scalar) output
 58 |         """
 59 |         raise NotImplementedError("Each Model must re-implement this method.")
 60 | 
 61 |     def add_training_op(self, loss):
 62 |         """Sets up the training Ops.
 63 | 
 64 |         Creates an optimizer and applies the gradients to all trainable variables.
 65 |         The Op returned by this function is what must be passed to the
 66 |         sess.run() to train the model. See
 67 | 
 68 |         https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer
 69 | 
 70 |         for more information.
 71 | 
 72 |         Args:
 73 |             loss: Loss tensor (a scalar).
 74 |         Returns:
 75 |             train_op: The Op for training.
 76 |         """
 77 | 
 78 |         raise NotImplementedError("Each Model must re-implement this method.")
 79 | 
 80 |     def train_on_batch(self, sess, inputs_batch, labels_batch):
 81 |         """Perform one step of gradient descent on the provided batch of data.
 82 | 
 83 |         Args:
 84 |             sess: tf.Session()
 85 |             input_batch: np.ndarray of shape (n_samples, n_features)
 86 |             labels_batch: np.ndarray of shape (n_samples, n_classes)
 87 |         Returns:
 88 |             loss: loss over the batch (a scalar)
 89 |         """
 90 |         feed = self.create_feed_dict(inputs_batch, labels_batch=labels_batch)
 91 |         _, loss = sess.run([self.train_op, self.loss], feed_dict=feed)
 92 |         return loss
 93 | 
 94 |     def predict_on_batch(self, sess, inputs_batch):
 95 |         """Make predictions for the provided batch of data
 96 | 
 97 |         Args:
 98 |             sess: tf.Session()
 99 |             input_batch: np.ndarray of shape (n_samples, n_features)
100 |         Returns:
101 |             predictions: np.ndarray of shape (n_samples, n_classes)
102 |         """
103 |         feed = self.create_feed_dict(inputs_batch)
104 |         predictions = sess.run(self.pred, feed_dict=feed)
105 |         return predictions
106 | 
107 |     def build(self):
108 |         self.add_placeholders()
109 |         self.pred = self.add_prediction_op()
110 |         self.loss = self.add_loss_op(self.pred)
111 |         self.train_op = self.add_training_op(self.loss)
112 | 


--------------------------------------------------------------------------------
/Assignment2/TaoJi/assignment2/q1_softmax.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | #import tensorflow as tf
  3 | import dynet as dy
  4 | from utils.general_utils import test_all_close
  5 | 
  6 | 
  7 | def softmax(x):
  8 |     """
  9 |     Compute the softmax function in tensorflow.
 10 | 
 11 |     You might find the tensorflow functions tf.exp, tf.reduce_max,
 12 |     tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may
 13 |     not need to use all of these functions). Recall also that many common
 14 |     tensorflow operations are sugared (e.g. x * y does a tensor multiplication
 15 |     if x and y are both tensors). Make sure to implement the numerical stability
 16 |     fixes as in the previous homework!
 17 | 
 18 |     Args:
 19 |         x:   tf.Tensor with shape (n_samples, n_features). Note feature vectors are
 20 |                   represented by row-vectors. (For simplicity, no need to handle 1-d
 21 |                   input as in the previous homework)
 22 |     Returns:
 23 |         out: tf.Tensor with shape (n_sample, n_features). You need to construct this
 24 |                   tensor in this problem.
 25 |     """
 26 | 
 27 |     ### YOUR CODE HERE
 28 |     fz = dy.exp(dy.colwise_add(x, -dy.max_dim(x, d=1)))
 29 |     fm = dy.sum_cols(fz)
 30 |     out = dy.cdiv(fz, fm)
 31 |     ### END YOUR CODE
 32 | 
 33 |     return out
 34 | 
 35 | 
 36 | def cross_entropy_loss(y, yhat):
 37 |     """
 38 |     Compute the cross entropy loss in tensorflow.
 39 |     The loss should be summed over the current minibatch.
 40 | 
 41 |     y is a one-hot tensor of shape (n_samples, n_classes) and yhat is a tensor
 42 |     of shape (n_samples, n_classes). y should be of dtype tf.int32, and yhat should
 43 |     be of dtype tf.float32.
 44 | 
 45 |     The functions tf.to_float, tf.reduce_sum, and tf.log might prove useful. (Many
 46 |     solutions are possible, so you may not need to use all of these functions).
 47 | 
 48 |     Note: You are NOT allowed to use the tensorflow built-in cross-entropy
 49 |                 functions.
 50 | 
 51 |     Args:
 52 |         y:    tf.Tensor with shape (n_samples, n_classes). One-hot encoded.
 53 |         yhat: tf.Tensorwith shape (n_sample, n_classes). Each row encodes a
 54 |                     probability distribution and should sum to 1.
 55 |     Returns:
 56 |         out:  tf.Tensor with shape (1,) (Scalar output). You need to construct this
 57 |                     tensor in the problem.
 58 |     """
 59 | 
 60 |     ### YOUR CODE HERE
 61 |     #out = (dy.sum_elems(out) / y.value().shape[0]).npvalue().reshape([])
 62 |     out = dy.sum_elems(-dy.cmult(y, dy.log(yhat)))
 63 |     ### END YOUR CODE
 64 | 
 65 |     return out
 66 | 
 67 | 
 68 | def test_softmax_basic():
 69 |     """
 70 |     Some simple tests of softmax to get you started.
 71 |     Warning: these are not exhaustive.
 72 |     """
 73 | 
 74 |     #test1 = softmax(tf.constant(np.array([[1001, 1002], [3, 4]]), dtype=tf.float32))
 75 |     dy.renew_cg()
 76 |     #test1 = softmax(dy.inputTensor(np.array([
 77 |     #    [1001, 1002],
 78 |     #    [3, 4]
 79 |     #], dtype=np.float32)))
 80 | 
 81 |     #with tf.Session() as sess:
 82 |     #        test1 = sess.run(test1)
 83 |     #test_all_close("Softmax test 1", test1, np.array([[0.26894142,  0.73105858],
 84 |     #                                                  [0.26894142,  0.73105858]]))
 85 | 
 86 |     #test2 = softmax(tf.constant(np.array([[-1001, -1002]]), dtype=tf.float32))
 87 |     test2 = softmax(dy.inputTensor(np.array([[-1001, -1002]], dtype=np.float32)))
 88 |     #with tf.Session() as sess:
 89 |     #        test2 = sess.run(test2)
 90 |     test_all_close("Softmax test 2", test2.value(), np.array([[0.73105858, 0.26894142]]))
 91 | 
 92 |     print "Basic (non-exhaustive) softmax tests pass\n"
 93 | 
 94 | 
 95 | def test_cross_entropy_loss_basic():
 96 |     """
 97 |     Some simple tests of cross_entropy_loss to get you started.
 98 |     Warning: these are not exhaustive.
 99 |     """
100 |     dy.renew_cg()
101 |     #y = np.array([[0, 1], [1, 0], [1, 0]])
102 |     #yhat = np.array([[.5, .5], [.5, .5], [.5, .5]])
103 |     y = np.array([[0, 1], [1, 0], [1, 0]], dtype=np.float32)
104 |     yhat = np.array([[.5, .5], [.5, .5], [.5, .5]], dtype=np.float32)
105 | 
106 |     test1 = cross_entropy_loss(
107 |             dy.inputTensor(y),
108 |             dy.inputTensor(yhat))
109 |             #tf.constant(y, dtype=tf.int32),
110 |             #tf.constant(yhat, dtype=tf.float32))
111 |     #with tf.Session() as sess:
112 |     #    test1 = sess.run(test1)
113 |     expected = -3 * np.log(.5)
114 |     test_all_close("Cross-entropy test 1", test1.npvalue().reshape([]), expected)
115 | 
116 |     print "Basic (non-exhaustive) cross-entropy tests pass"
117 | 
118 | if __name__ == "__main__":
119 |     test_softmax_basic()
120 |     test_cross_entropy_loss_basic()
121 | 


--------------------------------------------------------------------------------
/Assignment2/TaoJi/assignment2/q2_initialization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | #import tensorflow as tf
 3 | import dynet as dy
 4 | 
 5 | 
 6 | def xavier_weight_init():
 7 |     """Returns function that creates random tensor.
 8 | 
 9 |     The specified function will take in a shape (tuple or 1-d array) and
10 |     returns a random tensor of the specified shape drawn from the
11 |     Xavier initialization distribution.
12 | 
13 |     Hint: You might find tf.random_uniform useful.
14 |     """
15 |     def _xavier_initializer(shape, **kwargs):
16 |         """Defines an initializer for the Xavier distribution.
17 |         Specifically, the output should be sampled uniformly from [-epsilon, epsilon] where
18 |             epsilon = sqrt(6) / <sum of the sizes of shape's dimensions>
19 |         e.g., if shape = (2, 3), epsilon = sqrt(6 / (2 + 3))
20 | 
21 |         This function will be used as a variable initializer.
22 | 
23 |         Args:
24 |             shape: Tuple or 1-d array that species the dimensions of the requested tensor.
25 |         Returns:
26 |             out: tf.Tensor of specified shape sampled from the Xavier distribution.
27 |         """
28 |         ### YOUR CODE HERE
29 |         m = dy.ParameterCollection()
30 |         out = m.add_parameters(shape).as_array()
31 |         ### END YOUR CODE
32 |         return out
33 |     # Returns defined initializer function.
34 |     return _xavier_initializer
35 | 
36 | 
37 | def test_initialization_basic():
38 |     """Some simple tests for the initialization.
39 |     """
40 |     print "Running basic tests..."
41 |     xavier_initializer = xavier_weight_init()
42 |     shape = (1,)
43 |     xavier_mat = xavier_initializer(shape)
44 |     assert xavier_mat.shape == shape
45 |     #assert xavier_mat.get_shape() == shape
46 | 
47 |     shape = (1, 2, 3)
48 |     xavier_mat = xavier_initializer(shape)
49 |     assert xavier_mat.shape == shape
50 |     #assert xavier_mat.get_shape() == shape
51 |     print "Basic (non-exhaustive) Xavier initialization tests pass"
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     test_initialization_basic()
56 | 


--------------------------------------------------------------------------------
/Assignment2/TaoJi/assignment2/q2_parser_transitions.py:
--------------------------------------------------------------------------------
  1 | class PartialParse(object):
  2 |     def __init__(self, sentence):
  3 |         """Initializes this partial parse.
  4 | 
  5 |         Your code should initialize the following fields:
  6 |             self.stack: The current stack represented as a list with the top of the stack as the
  7 |                         last element of the list.
  8 |             self.buffer: The current buffer represented as a list with the first item on the
  9 |                          buffer as the first item of the list
 10 |             self.dependencies: The list of dependencies produced so far. Represented as a list of
 11 |                     tuples where each tuple is of the form (head, dependent).
 12 |                     Order for this list doesn't matter.
 13 | 
 14 |         The root token should be represented with the string "ROOT"
 15 | 
 16 |         Args:
 17 |             sentence: The sentence to be parsed as a list of words.
 18 |                       Your code should not modify the sentence.
 19 |         """
 20 |         # The sentence being parsed is kept for bookkeeping purposes. Do not use it in your code.
 21 |         self.sentence = sentence
 22 | 
 23 |         ### YOUR CODE HERE
 24 |         self.stack = ["ROOT"]
 25 |         self.buffer = sentence[:]
 26 |         self.dependencies = []
 27 |         ### END YOUR CODE
 28 | 
 29 |     def parse_step(self, transition):
 30 |         """Performs a single parse step by applying the given transition to this partial parse
 31 | 
 32 |         Args:
 33 |             transition: A string that equals "S", "LA", or "RA" representing the shift, left-arc,
 34 |                         and right-arc transitions.
 35 |         """
 36 |         ### YOUR CODE HERE
 37 |         if transition == "S":
 38 |             if self.buffer:
 39 |                 self.stack.append(self.buffer[0])
 40 |                 self.buffer.pop(0)
 41 |         elif transition == "LA":
 42 |             if len(self.stack) >= 2:
 43 |                 self.dependencies.append((self.stack[-1], self.stack[-2]))
 44 |                 self.stack.pop(-2)
 45 |         else:
 46 |             if len(self.stack) >= 2:
 47 |                 self.dependencies.append((self.stack[-2], self.stack[-1]))
 48 |                 self.stack.pop(-1)
 49 |         ### END YOUR CODE
 50 | 
 51 |     def parse(self, transitions):
 52 |         """Applies the provided transitions to this PartialParse
 53 | 
 54 |         Args:
 55 |             transitions: The list of transitions in the order they should be applied
 56 |         Returns:
 57 |             dependencies: The list of dependencies produced when parsing the sentence. Represented
 58 |                           as a list of tuples where each tuple is of the form (head, dependent)
 59 |         """
 60 |         for transition in transitions:
 61 |             self.parse_step(transition)
 62 |         return self.dependencies
 63 | 
 64 | 
 65 | def minibatch_parse(sentences, model, batch_size):
 66 |     """Parses a list of sentences in minibatches using a model.
 67 | 
 68 |     Args:
 69 |         sentences: A list of sentences to be parsed (each sentence is a list of words)
 70 |         model: The model that makes parsing decisions. It is assumed to have a function
 71 |                model.predict(partial_parses) that takes in a list of PartialParses as input and
 72 |                returns a list of transitions predicted for each parse. That is, after calling
 73 |                    transitions = model.predict(partial_parses)
 74 |                transitions[i] will be the next transition to apply to partial_parses[i].
 75 |         batch_size: The number of PartialParses to include in each minibatch
 76 |     Returns:
 77 |         dependencies: A list where each element is the dependencies list for a parsed sentence.
 78 |                       Ordering should be the same as in sentences (i.e., dependencies[i] should
 79 |                       contain the parse for sentences[i]).
 80 |     """
 81 | 
 82 |     ### YOUR CODE HERE
 83 |     dependencies = []
 84 |     for sentence in sentences:
 85 |         pp = PartialParse(sentence)
 86 |         for i in xrange(2*len(sentence)):
 87 |             action = model.predict([pp])
 88 |             pp.parse(action)
 89 |         dependencies.append(pp.dependencies)
 90 |     ### END YOUR CODE
 91 | 
 92 |     return dependencies
 93 | 
 94 | 
 95 | def test_step(name, transition, stack, buf, deps,
 96 |               ex_stack, ex_buf, ex_deps):
 97 |     """Tests that a single parse step returns the expected output"""
 98 |     pp = PartialParse([])
 99 |     pp.stack, pp.buffer, pp.dependencies = stack, buf, deps
100 | 
101 |     pp.parse_step(transition)
102 |     stack, buf, deps = (tuple(pp.stack), tuple(pp.buffer), tuple(sorted(pp.dependencies)))
103 |     assert stack == ex_stack, \
104 |         "{:} test resulted in stack {:}, expected {:}".format(name, stack, ex_stack)
105 |     assert buf == ex_buf, \
106 |         "{:} test resulted in buffer {:}, expected {:}".format(name, buf, ex_buf)
107 |     assert deps == ex_deps, \
108 |         "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps)
109 |     print "{:} test passed!".format(name)
110 | 
111 | 
112 | def test_parse_step():
113 |     """Simple tests for the PartialParse.parse_step function
114 |     Warning: these are not exhaustive
115 |     """
116 |     test_step("SHIFT", "S", ["ROOT", "the"], ["cat", "sat"], [],
117 |               ("ROOT", "the", "cat"), ("sat",), ())
118 |     test_step("LEFT-ARC", "LA", ["ROOT", "the", "cat"], ["sat"], [],
119 |               ("ROOT", "cat",), ("sat",), (("cat", "the"),))
120 |     test_step("RIGHT-ARC", "RA", ["ROOT", "run", "fast"], [], [],
121 |               ("ROOT", "run",), (), (("run", "fast"),))
122 | 
123 | 
124 | def test_parse():
125 |     """Simple tests for the PartialParse.parse function
126 |     Warning: these are not exhaustive
127 |     """
128 |     sentence = ["parse", "this", "sentence"]
129 |     dependencies = PartialParse(sentence).parse(["S", "S", "S", "LA", "RA", "RA"])
130 |     dependencies = tuple(sorted(dependencies))
131 |     expected = (('ROOT', 'parse'), ('parse', 'sentence'), ('sentence', 'this'))
132 |     assert dependencies == expected,  \
133 |         "parse test resulted in dependencies {:}, expected {:}".format(dependencies, expected)
134 |     assert tuple(sentence) == ("parse", "this", "sentence"), \
135 |         "parse test failed: the input sentence should not be modified"
136 |     print "parse test passed!"
137 | 
138 | 
139 | class DummyModel:
140 |     """Dummy model for testing the minibatch_parse function
141 |     First shifts everything onto the stack and then does exclusively right arcs if the first word of
142 |     the sentence is "right", "left" if otherwise.
143 |     """
144 |     def predict(self, partial_parses):
145 |         return [("RA" if pp.stack[1] is "right" else "LA") if len(pp.buffer) == 0 else "S"
146 |                 for pp in partial_parses]
147 | 
148 | 
149 | def test_dependencies(name, deps, ex_deps):
150 |     """Tests the provided dependencies match the expected dependencies"""
151 |     deps = tuple(sorted(deps))
152 |     assert deps == ex_deps, \
153 |         "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps)
154 | 
155 | 
156 | def test_minibatch_parse():
157 |     """Simple tests for the minibatch_parse function
158 |     Warning: these are not exhaustive
159 |     """
160 |     sentences = [["right", "arcs", "only"],
161 |                  ["right", "arcs", "only", "again"],
162 |                  ["left", "arcs", "only"],
163 |                  ["left", "arcs", "only", "again"]]
164 |     deps = minibatch_parse(sentences, DummyModel(), 2)
165 |     test_dependencies("minibatch_parse", deps[0],
166 |                       (('ROOT', 'right'), ('arcs', 'only'), ('right', 'arcs')))
167 |     test_dependencies("minibatch_parse", deps[1],
168 |                       (('ROOT', 'right'), ('arcs', 'only'), ('only', 'again'), ('right', 'arcs')))
169 |     test_dependencies("minibatch_parse", deps[2],
170 |                       (('only', 'ROOT'), ('only', 'arcs'), ('only', 'left')))
171 |     test_dependencies("minibatch_parse", deps[3],
172 |                       (('again', 'ROOT'), ('again', 'arcs'), ('again', 'left'), ('again', 'only')))
173 |     print "minibatch_parse test passed!"
174 | 
175 | if __name__ == '__main__':
176 |     test_parse_step()
177 |     test_parse()
178 |     test_minibatch_parse()
179 | 


--------------------------------------------------------------------------------
/Assignment2/TaoJi/assignment2/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/TaoJi/assignment2/utils/__init__.py


--------------------------------------------------------------------------------
/Assignment2/TaoJi/assignment2/utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/TaoJi/assignment2/utils/__init__.pyc


--------------------------------------------------------------------------------
/Assignment2/TaoJi/assignment2/utils/general_utils.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import numpy as np
  4 | 
  5 | 
  6 | def get_minibatches(data, minibatch_size, shuffle=True):
  7 |     """
  8 |     Iterates through the provided data one minibatch at at time. You can use this function to
  9 |     iterate through data in minibatches as follows:
 10 | 
 11 |         for inputs_minibatch in get_minibatches(inputs, minibatch_size):
 12 |             ...
 13 | 
 14 |     Or with multiple data sources:
 15 | 
 16 |         for inputs_minibatch, labels_minibatch in get_minibatches([inputs, labels], minibatch_size):
 17 |             ...
 18 | 
 19 |     Args:
 20 |         data: there are two possible values:
 21 |             - a list or numpy array
 22 |             - a list where each element is either a list or numpy array
 23 |         minibatch_size: the maximum number of items in a minibatch
 24 |         shuffle: whether to randomize the order of returned data
 25 |     Returns:
 26 |         minibatches: the return value depends on data:
 27 |             - If data is a list/array it yields the next minibatch of data.
 28 |             - If data a list of lists/arrays it returns the next minibatch of each element in the
 29 |               list. This can be used to iterate through multiple data sources
 30 |               (e.g., features and labels) at the same time.
 31 | 
 32 |     """
 33 |     list_data = type(data) is list and (type(data[0]) is list or type(data[0]) is np.ndarray)
 34 |     data_size = len(data[0]) if list_data else len(data)
 35 |     indices = np.arange(data_size)
 36 |     if shuffle:
 37 |         np.random.shuffle(indices)
 38 |     for minibatch_start in np.arange(0, data_size, minibatch_size):
 39 |         minibatch_indices = indices[minibatch_start:minibatch_start + minibatch_size]
 40 |         yield [minibatch(d, minibatch_indices) for d in data] if list_data \
 41 |             else minibatch(data, minibatch_indices)
 42 | 
 43 | 
 44 | def minibatch(data, minibatch_idx):
 45 |     return data[minibatch_idx] if type(data) is np.ndarray else [data[i] for i in minibatch_idx]
 46 | 
 47 | 
 48 | def test_all_close(name, actual, expected):
 49 |     if actual.shape != expected.shape:
 50 |         raise ValueError("{:} failed, expected output to have shape {:} but has shape {:}"
 51 |                          .format(name, expected.shape, actual.shape))
 52 |     if np.amax(np.fabs(actual - expected)) > 1e-6:
 53 |         raise ValueError("{:} failed, expected {:} but value is {:}".format(name, expected, actual))
 54 |     else:
 55 |         print name, "passed!"
 56 | 
 57 | 
 58 | def logged_loop(iterable, n=None):
 59 |     if n is None:
 60 |         n = len(iterable)
 61 |     step = max(1, n / 1000)
 62 |     prog = Progbar(n)
 63 |     for i, elem in enumerate(iterable):
 64 |         if i % step == 0 or i == n - 1:
 65 |             prog.update(i + 1)
 66 |         yield elem
 67 | 
 68 | 
 69 | class Progbar(object):
 70 |     """
 71 |     Progbar class copied from keras (https://github.com/fchollet/keras/)
 72 |     Displays a progress bar.
 73 |     # Arguments
 74 |         target: Total number of steps expected.
 75 |         interval: Minimum visual progress update interval (in seconds).
 76 |     """
 77 | 
 78 |     def __init__(self, target, width=30, verbose=1):
 79 |         self.width = width
 80 |         self.target = target
 81 |         self.sum_values = {}
 82 |         self.unique_values = []
 83 |         self.start = time.time()
 84 |         self.total_width = 0
 85 |         self.seen_so_far = 0
 86 |         self.verbose = verbose
 87 | 
 88 |     def update(self, current, values=[], exact=[]):
 89 |         """
 90 |         Updates the progress bar.
 91 |         # Arguments
 92 |             current: Index of current step.
 93 |             values: List of tuples (name, value_for_last_step).
 94 |                 The progress bar will display averages for these values.
 95 |             exact: List of tuples (name, value_for_last_step).
 96 |                 The progress bar will display these values directly.
 97 |         """
 98 | 
 99 |         for k, v in values:
100 |             if k not in self.sum_values:
101 |                 self.sum_values[k] = [v * (current - self.seen_so_far), current - self.seen_so_far]
102 |                 self.unique_values.append(k)
103 |             else:
104 |                 self.sum_values[k][0] += v * (current - self.seen_so_far)
105 |                 self.sum_values[k][1] += (current - self.seen_so_far)
106 |         for k, v in exact:
107 |             if k not in self.sum_values:
108 |                 self.unique_values.append(k)
109 |             self.sum_values[k] = [v, 1]
110 |         self.seen_so_far = current
111 | 
112 |         now = time.time()
113 |         if self.verbose == 1:
114 |             prev_total_width = self.total_width
115 |             sys.stdout.write("\b" * prev_total_width)
116 |             sys.stdout.write("\r")
117 | 
118 |             numdigits = int(np.floor(np.log10(self.target))) + 1
119 |             barstr = '%%%dd/%%%dd [' % (numdigits, numdigits)
120 |             bar = barstr % (current, self.target)
121 |             prog = float(current)/self.target
122 |             prog_width = int(self.width*prog)
123 |             if prog_width > 0:
124 |                 bar += ('='*(prog_width-1))
125 |                 if current < self.target:
126 |                     bar += '>'
127 |                 else:
128 |                     bar += '='
129 |             bar += ('.'*(self.width-prog_width))
130 |             bar += ']'
131 |             sys.stdout.write(bar)
132 |             self.total_width = len(bar)
133 | 
134 |             if current:
135 |                 time_per_unit = (now - self.start) / current
136 |             else:
137 |                 time_per_unit = 0
138 |             eta = time_per_unit*(self.target - current)
139 |             info = ''
140 |             if current < self.target:
141 |                 info += ' - ETA: %ds' % eta
142 |             else:
143 |                 info += ' - %ds' % (now - self.start)
144 |             for k in self.unique_values:
145 |                 if type(self.sum_values[k]) is list:
146 |                     info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1]))
147 |                 else:
148 |                     info += ' - %s: %s' % (k, self.sum_values[k])
149 | 
150 |             self.total_width += len(info)
151 |             if prev_total_width > self.total_width:
152 |                 info += ((prev_total_width-self.total_width) * " ")
153 | 
154 |             sys.stdout.write(info)
155 |             sys.stdout.flush()
156 | 
157 |             if current >= self.target:
158 |                 sys.stdout.write("\n")
159 | 
160 |         if self.verbose == 2:
161 |             if current >= self.target:
162 |                 info = '%ds' % (now - self.start)
163 |                 for k in self.unique_values:
164 |                     info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1]))
165 |                 sys.stdout.write(info + "\n")
166 | 
167 |     def add(self, n, values=[]):
168 |         self.update(self.seen_so_far+n, values)
169 | 


--------------------------------------------------------------------------------
/Assignment2/TaoJi/assignment2/utils/general_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/TaoJi/assignment2/utils/general_utils.pyc


--------------------------------------------------------------------------------
/Assignment2/TaoJi/assignment2/utils/parser_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/TaoJi/assignment2/utils/parser_utils.pyc


--------------------------------------------------------------------------------
/Assignment2/TaoJi/solution.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/TaoJi/solution.pdf


--------------------------------------------------------------------------------
/Assignment2/WeiYang/assignment2/.idea/.name:
--------------------------------------------------------------------------------
1 | assignment2


--------------------------------------------------------------------------------
/Assignment2/WeiYang/assignment2/.idea/assignment2.iml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <module type="PYTHON_MODULE" version="4">
3 |   <component name="NewModuleRootManager">
4 |     <content url="file://$MODULE_DIR$" />
5 |     <orderEntry type="inheritedJdk" />
6 |     <orderEntry type="sourceFolder" forTests="false" />
7 |   </component>
8 | </module>


--------------------------------------------------------------------------------
/Assignment2/WeiYang/assignment2/.idea/misc.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="ProjectLevelVcsManager" settingsEditedManually="false">
 4 |     <OptionsSetting value="true" id="Add" />
 5 |     <OptionsSetting value="true" id="Remove" />
 6 |     <OptionsSetting value="true" id="Checkout" />
 7 |     <OptionsSetting value="true" id="Update" />
 8 |     <OptionsSetting value="true" id="Status" />
 9 |     <OptionsSetting value="true" id="Edit" />
10 |     <ConfirmationsSetting value="0" id="Add" />
11 |     <ConfirmationsSetting value="0" id="Remove" />
12 |   </component>
13 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.5 (/usr/local/bin/python)" project-jdk-type="Python SDK" />
14 | </project>


--------------------------------------------------------------------------------
/Assignment2/WeiYang/assignment2/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/assignment2.iml" filepath="$PROJECT_DIR$/.idea/assignment2.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/Assignment2/WeiYang/assignment2/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="" vcs="" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/Assignment2/WeiYang/assignment2/model.py:
--------------------------------------------------------------------------------
 1 | import dynet as dy
 2 | 
 3 | class Model(object):
 4 |     def create_feed_dict(self, inputs_batch, labels_batch=None):
 5 |         raise NotImplementedError("Each Model must re-implement this method.")
 6 | 
 7 |     def init_parameters(self):
 8 |         raise NotImplementedError("Each Model must re-implement this method.")
 9 | 
10 |     def init_trainer(self):
11 |         raise NotImplementedError("Each Model must re-implement this method.")
12 | 
13 |     def train_on_batch(self, inputs_batch, labels_batch):
14 |         self.create_feed_dict(inputs_batch, labels_batch=labels_batch)
15 |         pred = self.prediction()
16 |         loss = self.compute_loss(pred)
17 |         return loss
18 | 
19 |     def predict_on_batch(self, inputs_batch):
20 |         self.create_feed_dict(inputs_batch)
21 |         pred = self.prediction()
22 |         return pred
23 | 
24 |     def build(self):
25 |         self.init_trainer()
26 |         self.init_parameters()
27 | 


--------------------------------------------------------------------------------
/Assignment2/WeiYang/assignment2/q1_classifier.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | import numpy as np
  4 | import dynet as dy
  5 | 
  6 | from q1_softmax import softmax
  7 | from q1_softmax import cross_entropy_loss
  8 | from model import Model
  9 | from utils.general_utils import get_minibatches
 10 | 
 11 | 
 12 | class Config(object):
 13 |     n_samples = 1024
 14 |     n_features = 100
 15 |     n_classes = 5
 16 |     batch_size = 64
 17 |     n_epochs = 50
 18 |     lr = 1e-4
 19 | 
 20 | 
 21 | class SoftmaxModel(Model):
 22 |     def init_trainer(self):
 23 |         ### YOUR CODE HERE
 24 |         self.sModel = dy.ParameterCollection()
 25 |         self.trainer = dy.SimpleSGDTrainer(self.sModel)
 26 |         self.trainer.learning_rate = self.config.lr
 27 |         ### END YOUR CODE
 28 | 
 29 |     def init_parameters(self):
 30 |         ### YOUR CODE HERE
 31 |         self._pW = self.sModel.add_parameters((self.config.n_features, self.config.n_classes))
 32 |         self._pb = self.sModel.add_parameters((self.config.n_classes))
 33 |         ### END YOUR CODE
 34 | 
 35 |     def create_feed_dict(self, inputs_batch, labels_batch=None):
 36 |         ### YOUR CODE HERE
 37 |         self.input = inputs_batch
 38 |         self.labels = labels_batch
 39 |         ### END YOUR CODE
 40 | 
 41 |     def prediction(self):
 42 |         W = dy.parameter(self._pW)
 43 |         b = dy.parameter(self._pb)
 44 |         x = dy.inputTensor(self.input)
 45 |         z_m = x * W
 46 |         z_T = dy.concatenate_cols([z_m[i]+b for i in range(self.config.batch_size)])
 47 |         z = dy.transpose(z_T)
 48 |         pred = softmax(z)
 49 |         return pred
 50 | 
 51 |     def compute_loss(self, pred):
 52 |         y = dy.inputTensor(self.labels)
 53 |         loss = cross_entropy_loss(y, pred)
 54 |         return loss
 55 | 
 56 |     def run_epoch(self, inputs, labels):
 57 |         config = self.config
 58 |         n_minibatches, total_loss = 0, 0
 59 |         for input_batch, labels_batch in get_minibatches([inputs, labels], config.batch_size):
 60 |             n_minibatches += 1
 61 |             dy.renew_cg()
 62 |             loss = self.train_on_batch(input_batch, labels_batch) / config.batch_size
 63 | 
 64 |             loss.forward()
 65 |             loss.backward()
 66 |             self.trainer.update()
 67 | 
 68 |             total_loss += loss.value()
 69 |         return total_loss / n_minibatches
 70 | 
 71 |     def fit(self, inputs, labels):
 72 |         losses = []
 73 |         for epoch in range(self.config.n_epochs):
 74 |             start_time = time.time()
 75 |             average_loss = self.run_epoch(inputs, labels)
 76 |             duration = time.time() - start_time
 77 |             print 'Epoch {:}: loss = {:.2f} ({:.3f} sec)'.format(epoch, average_loss, duration)
 78 |             losses.append(average_loss)
 79 |         return losses
 80 | 
 81 |     def __init__(self, config):
 82 |         self.config = config
 83 |         self.build()
 84 | 
 85 | 
 86 | def test_softmax_model():
 87 |     config = Config()
 88 |     np.random.seed(1234)
 89 |     inputs = np.random.rand(config.n_samples, config.n_features)
 90 |     labels = np.zeros((config.n_samples, config.n_classes), dtype=np.int32)
 91 |     labels[:, 1] = 1
 92 |     # for i in xrange(config.n_samples):
 93 |     #     labels[i, i%config.n_classes] = 1
 94 | 
 95 |     model = SoftmaxModel(config)
 96 |     losses = model.fit(inputs, labels)
 97 |     assert losses[-1] < .5
 98 |     print "Basic (non-exhaustive) classifier tests pass"
 99 | 
100 | if __name__ == "__main__":
101 |     test_softmax_model()
102 | 


--------------------------------------------------------------------------------
/Assignment2/WeiYang/assignment2/q1_softmax.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import dynet as dy
 3 | from utils.general_utils import test_all_close
 4 | 
 5 | 
 6 | def softmax(x):
 7 |     ### YOUR CODE HERE
 8 |     x_max = dy.max_dim(x, 1)
 9 |     x_sub = dy.colwise_add(x, -x_max)
10 |     x_exp = dy.exp(x_sub)
11 |     x_sum = dy.sum_cols(x_exp)
12 |     x_tmp = dy.zeroes(x.dim()[0])
13 |     x_tmp = dy.colwise_add(x_tmp, x_sum)
14 |     out = dy.cdiv(x_exp, x_tmp)
15 |     ### END YOUR CODE
16 |     return out
17 | 
18 | 
19 | def cross_entropy_loss(y, yhat):
20 |     ### YOUR CODE HERE
21 |     out = dy.sum_elems(-dy.cmult(y, dy.log(yhat)))
22 |     ### END YOUR CODE
23 |     return out
24 | 
25 | 
26 | def test_softmax_basic():
27 |     """
28 |     Some simple tests of softmax to get you started.
29 |     Warning: these are not exhaustive.
30 |     """
31 | 
32 |     # test1 = softmax(torch.Tensor([[1001, 1002], [3, 4]]))
33 |     # test1 = test1.numpy()
34 |     test1 = softmax(dy.inputTensor([[1001, 1002], [3, 4]]))
35 |     test1 = test1.npvalue();
36 |     test_all_close("Softmax test 1", test1, np.array([[0.26894142,  0.73105858],
37 |                                                       [0.26894142,  0.73105858]]))
38 | 
39 |     # test2 = softmax(torch.Tensor([[-1001, -1002]]))
40 |     # test2 = test2.numpy()
41 |     test2 = softmax(dy.inputTensor([[-1001, -1002]]))
42 |     test2 = test2.npvalue();
43 |     test_all_close("Softmax test 2", test2, np.array([[0.73105858, 0.26894142]]))
44 | 
45 |     print "Basic (non-exhaustive) softmax tests pass\n"
46 | 
47 | 
48 | def test_cross_entropy_loss_basic():
49 |     """
50 |     Some simple tests of cross_entropy_loss to get you started.
51 |     Warning: these are not exhaustive.
52 |     """
53 |     y = np.array([[0, 1], [1, 0], [1, 0]])
54 |     yhat = np.array([[.5, .5], [.5, .5], [.5, .5]])
55 | 
56 |     # test1 = cross_entropy_loss(
57 |     #         torch.Tensor([[0, 1], [1, 0], [1, 0]]),
58 |     #        torch.Tensor([[.5, .5], [.5, .5], [.5, .5]]))
59 |     # test1 = np.array(test1)
60 |     test1 = cross_entropy_loss(
61 |             dy.inputTensor([[0, 1], [1, 0], [1, 0]]),
62 |            dy.inputTensor([[.5, .5], [.5, .5], [.5, .5]]))
63 |     test1 = np.array(test1.value())
64 |     expected = -3 * np.log(.5)
65 |     test_all_close("Cross-entropy test 1", test1, expected)
66 | 
67 |     print "Basic (non-exhaustive) cross-entropy tests pass"
68 | 
69 | if __name__ == "__main__":
70 |     test_softmax_basic()
71 |     test_cross_entropy_loss_basic()
72 | 


--------------------------------------------------------------------------------
/Assignment2/WeiYang/assignment2/q2_initialization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import dynet as dy
 3 | 
 4 | 
 5 | def xavier_weight_init():
 6 |     def _xavier_initializer(shape, **kwargs):
 7 |         ### YOUR CODE HERE
 8 |         epsilon = np.sqrt(6 / np.sum(shape))
 9 |         out = dy.random_uniform(dim=shape, left=-epsilon, right=epsilon)
10 |         ### END YOUR CODE
11 |         return out
12 |     return _xavier_initializer
13 | 
14 | 
15 | def test_initialization_basic():
16 |     print "Running basic tests..."
17 |     xavier_initializer = xavier_weight_init()
18 |     shape = (1,)
19 |     xavier_mat = xavier_initializer(shape)
20 |     assert xavier_mat.dim()[0] == shape
21 | 
22 |     shape = (1, 2, 3)
23 |     xavier_mat = xavier_initializer(shape)
24 |     assert xavier_mat.dim()[0] == shape
25 |     print "Basic (non-exhaustive) Xavier initialization tests pass"
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     test_initialization_basic()
30 | 


--------------------------------------------------------------------------------
/Assignment2/WeiYang/assignment2/q2_parser_model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import dynet as dy
  4 | import numpy as np
  5 | import cPickle
  6 | 
  7 | from model import Model
  8 | from q1_softmax import cross_entropy_loss
  9 | from q2_initialization import xavier_weight_init
 10 | from utils.general_utils import Progbar
 11 | from utils.parser_utils import minibatches, load_and_preprocess_data
 12 | 
 13 | 
 14 | class Config(object):
 15 |     n_features = 36
 16 |     n_classes = 3
 17 |     dropout = 0.5
 18 |     embed_size = 50
 19 |     hidden_size = 200
 20 |     batch_size = 2048
 21 |     n_epochs = 10
 22 |     lr = 0.001
 23 | 
 24 | 
 25 | class ParserModel(Model):
 26 |     def init_trainer(self):
 27 |         self.m = dy.ParameterCollection()
 28 |         self.trainer = dy.AdamTrainer(self.m)
 29 |         self.trainer.learning_rate = self.config.lr
 30 | 
 31 |     def init_parameters(self):
 32 |         zeroInit = dy.ConstInitializer(0.0)
 33 |         # xavier = xavier_weight_init()
 34 | 
 35 |         self._pW = self.m.add_parameters((self.config.n_features * self.config.embed_size, self.config.hidden_size))
 36 |         self._pB1 = self.m.add_parameters((1, self.config.hidden_size), init=zeroInit)
 37 |         self._pU = self.m.add_parameters((self.config.hidden_size, self.config.n_classes))
 38 |         self._pB2 = self.m.add_parameters((1, self.config.n_classes), init=zeroInit)
 39 | 
 40 |         self.word_dict = self.m.lookup_parameters_from_numpy(self.pretrained_embeddings)
 41 | 
 42 |     def create_feed_dict(self, inputs_batch, labels_batch=None, dropout=1):
 43 |         self.input = inputs_batch
 44 |         # 2048*36
 45 |         self.labels = labels_batch
 46 |         self.dropout = dropout
 47 | 
 48 |     def add_embedding(self):
 49 |         embeddings = dy.concatenate([self.word_dict.batch(x) for x in np.transpose(self.input)])
 50 |         embeddings = dy.transpose(embeddings)
 51 |         # ((1, 50*36), 2048)
 52 |         return embeddings
 53 | 
 54 |     def prediction(self, dropout=False):
 55 |         x = self.add_embedding()
 56 |         W = dy.parameter(self._pW)
 57 |         U = dy.parameter(self._pU)
 58 |         b1 = dy.parameter(self._pB1)
 59 |         b2 = dy.parameter(self._pB2)
 60 | 
 61 |         z1 = x * W + b1
 62 |         h = dy.rectify(z1)
 63 |         h_drop = dy.dropout(h, self.dropout) if dropout else h
 64 | 
 65 |         z2 = h_drop * U + b2
 66 |         # print "z2: ", z2.dim()
 67 | 
 68 |         pred = dy.softmax(dy.reshape(z2, (self.config.n_classes,)))
 69 |         return pred
 70 | 
 71 |     def compute_loss(self, pred):
 72 |         y = dy.inputTensor(np.transpose(self.labels), batched=True)
 73 |         losses = cross_entropy_loss(y, pred)
 74 |         loss = dy.sum_batches(losses) / self.config.batch_size
 75 |         return loss
 76 | 
 77 |     def train_on_batch(self, inputs_batch, labels_batch):
 78 |         self.create_feed_dict(inputs_batch, labels_batch=labels_batch,
 79 |                                      dropout=self.config.dropout)
 80 |         pred = self.prediction(dropout=True)
 81 |         loss = self.compute_loss(pred)
 82 |         return loss
 83 | 
 84 |     def predict_on_batch(self, inputs_batch):
 85 |         self.create_feed_dict(inputs_batch)
 86 |         pred_dy = self.prediction()
 87 |         pred = np.transpose(pred_dy.npvalue())
 88 |         return pred
 89 | 
 90 |     def run_epoch(self, parser, train_examples, dev_set):
 91 |         for i, (train_x, train_y) in enumerate(minibatches(train_examples, self.config.batch_size)):
 92 |             dy.renew_cg()
 93 |             loss = self.train_on_batch(train_x, train_y)
 94 |             loss.forward()
 95 |             loss.backward()
 96 |             self.trainer.update()
 97 |         print "Training Loss: ", loss.value()
 98 |         print "Evaluating on dev set",
 99 |         dev_UAS, _ = parser.parse(dev_set)
100 |         print "- dev UAS: {:.2f}".format(dev_UAS * 100.0)
101 |         return dev_UAS
102 | 
103 |     def fit(self, saver, parser, train_examples, dev_set):
104 |         best_dev_UAS = 0
105 |         for epoch in range(self.config.n_epochs):
106 |             print "Epoch {:} out of {:}".format(epoch + 1, self.config.n_epochs)
107 |             dev_UAS = self.run_epoch(parser, train_examples, dev_set)
108 |             if dev_UAS > best_dev_UAS:
109 |                 best_dev_UAS = dev_UAS
110 |                 if saver:
111 |                     print "New best dev UAS! Saving model in ./data/weights/parser.weights"
112 |                     dy.save('./data/weights/parser.weights')
113 |             print
114 | 
115 |     def __init__(self, config, pretrained_embeddings):
116 |         self.pretrained_embeddings = pretrained_embeddings
117 |         self.config = config
118 |         self.build()
119 | 
120 | 
121 | def main(debug=False):
122 |     print 80 * "="
123 |     print "INITIALIZING"
124 |     print 80 * "="
125 |     config = Config()
126 |     parser, embeddings, train_examples, dev_set, test_set = load_and_preprocess_data(debug)
127 |     if not os.path.exists('./data/weights/'):
128 |         os.makedirs('./data/weights/')
129 | 
130 |     print "Building model...",
131 |     start = time.time()
132 |     model = ParserModel(config, embeddings)
133 |     parser.model = model
134 |     print "took {:.2f} seconds\n".format(time.time() - start)
135 | 
136 |     saver = None if debug else True
137 | 
138 |     print 80 * "="
139 |     print "TRAINING"
140 |     print 80 * "="
141 |     model.fit(saver, parser, train_examples, dev_set)
142 | 
143 |     if not debug:
144 |         print 80 * "="
145 |         print "TESTING"
146 |         print 80 * "="
147 |         print "Restoring the best model weights found on the dev set"
148 |         saver.restore('./data/weights/parser.weights')
149 |         print "Final evaluation on test set",
150 |         UAS, dependencies = parser.parse(test_set)
151 |         print "- test UAS: {:.2f}".format(UAS * 100.0)
152 |         print "Writing predictions"
153 |         with open('q2_test.predicted.pkl', 'w') as f:
154 |             cPickle.dump(dependencies, f, -1)
155 |         print "Done!"
156 | 
157 | if __name__ == '__main__':
158 |     main()
159 | 
160 | 
161 | 


--------------------------------------------------------------------------------
/Assignment2/WeiYang/assignment2/q2_parser_transitions.py:
--------------------------------------------------------------------------------
  1 | class PartialParse(object):
  2 |     def __init__(self, sentence):
  3 |         self.sentence = sentence
  4 |         ### YOUR CODE HERE
  5 |         self.stack = ["ROOT"]
  6 |         self.buffer = sentence[:]
  7 |         self.dependencies = []
  8 |         ### END YOUR CODE
  9 | 
 10 |     def parse_step(self, transition):
 11 |         ### YOUR CODE HERE
 12 |         if transition == "S":
 13 |             self.stack.append(self.buffer[0])
 14 |             self.buffer.pop(0)
 15 |         elif transition == "LA":
 16 |             self.dependencies.append((self.stack[-1], self.stack[-2]))
 17 |             self.stack.pop(-2)
 18 |         else:
 19 |             self.dependencies.append((self.stack[-2], self.stack[-1]))
 20 |             self.stack.pop(-1)
 21 |         ### END YOUR CODE
 22 | 
 23 |     def parse(self, transitions):
 24 |         for transition in transitions:
 25 |             self.parse_step(transition)
 26 |         return self.dependencies
 27 | 
 28 | 
 29 | def minibatch_parse(sentences, model, batch_size):
 30 |     ### YOUR CODE HERE
 31 |     partial_parses = [PartialParse(s) for s in sentences]
 32 |     unfinished_parse = partial_parses
 33 |     while len(unfinished_parse) > 0:
 34 |         minibatch = unfinished_parse[0:batch_size]
 35 |         while len(minibatch) > 0:
 36 |             transitions = model.predict(minibatch)
 37 |             for index, action in enumerate(transitions):
 38 |                 minibatch[index].parse_step(action)
 39 |             minibatch = [parse for parse in minibatch if len(parse.stack) > 1 or len(parse.buffer) > 0]
 40 |         unfinished_parse = unfinished_parse[batch_size:]
 41 |     dependencies = []
 42 |     for n in range(len(sentences)):
 43 |         dependencies.append(partial_parses[n].dependencies)
 44 |     ### END YOUR CODE
 45 | 
 46 |     return dependencies
 47 | 
 48 | 
 49 | def test_step(name, transition, stack, buf, deps,
 50 |               ex_stack, ex_buf, ex_deps):
 51 |     """Tests that a single parse step returns the expected output"""
 52 |     pp = PartialParse([])
 53 |     pp.stack, pp.buffer, pp.dependencies = stack, buf, deps
 54 | 
 55 |     pp.parse_step(transition)
 56 |     stack, buf, deps = (tuple(pp.stack), tuple(pp.buffer), tuple(sorted(pp.dependencies)))
 57 |     assert stack == ex_stack, \
 58 |         "{:} test resulted in stack {:}, expected {:}".format(name, stack, ex_stack)
 59 |     assert buf == ex_buf, \
 60 |         "{:} test resulted in buffer {:}, expected {:}".format(name, buf, ex_buf)
 61 |     assert deps == ex_deps, \
 62 |         "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps)
 63 |     print "{:} test passed!".format(name)
 64 | 
 65 | 
 66 | def test_parse_step():
 67 |     """Simple tests for the PartialParse.parse_step function
 68 |     Warning: these are not exhaustive
 69 |     """
 70 |     test_step("SHIFT", "S", ["ROOT", "the"], ["cat", "sat"], [],
 71 |               ("ROOT", "the", "cat"), ("sat",), ())
 72 |     test_step("LEFT-ARC", "LA", ["ROOT", "the", "cat"], ["sat"], [],
 73 |               ("ROOT", "cat",), ("sat",), (("cat", "the"),))
 74 |     test_step("RIGHT-ARC", "RA", ["ROOT", "run", "fast"], [], [],
 75 |               ("ROOT", "run",), (), (("run", "fast"),))
 76 | 
 77 | 
 78 | def test_parse():
 79 |     """Simple tests for the PartialParse.parse function
 80 |     Warning: these are not exhaustive
 81 |     """
 82 |     sentence = ["parse", "this", "sentence"]
 83 |     dependencies = PartialParse(sentence).parse(["S", "S", "S", "LA", "RA", "RA"])
 84 |     dependencies = tuple(sorted(dependencies))
 85 |     expected = (('ROOT', 'parse'), ('parse', 'sentence'), ('sentence', 'this'))
 86 |     assert dependencies == expected,  \
 87 |         "parse test resulted in dependencies {:}, expected {:}".format(dependencies, expected)
 88 |     assert tuple(sentence) == ("parse", "this", "sentence"), \
 89 |         "parse test failed: the input sentence should not be modified"
 90 |     print "parse test passed!"
 91 | 
 92 | 
 93 | class DummyModel:
 94 |     """Dummy model for testing the minibatch_parse function
 95 |     First shifts everything onto the stack and then does exclusively right arcs if the first word of
 96 |     the sentence is "right", "left" if otherwise.
 97 |     """
 98 |     def predict(self, partial_parses):
 99 |         return [("RA" if pp.stack[1] is "right" else "LA") if len(pp.buffer) == 0 else "S"
100 |                 for pp in partial_parses]
101 | 
102 | 
103 | def test_dependencies(name, deps, ex_deps):
104 |     """Tests the provided dependencies match the expected dependencies"""
105 |     deps = tuple(sorted(deps))
106 |     assert deps == ex_deps, \
107 |         "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps)
108 | 
109 | 
110 | def test_minibatch_parse():
111 |     """Simple tests for the minibatch_parse function
112 |     Warning: these are not exhaustive
113 |     """
114 |     sentences = [["right", "arcs", "only"],
115 |                  ["right", "arcs", "only", "again"],
116 |                  ["left", "arcs", "only"],
117 |                  ["left", "arcs", "only", "again"]]
118 |     deps = minibatch_parse(sentences, DummyModel(), 2)
119 |     test_dependencies("minibatch_parse", deps[0],
120 |                       (('ROOT', 'right'), ('arcs', 'only'), ('right', 'arcs')))
121 |     test_dependencies("minibatch_parse", deps[1],
122 |                       (('ROOT', 'right'), ('arcs', 'only'), ('only', 'again'), ('right', 'arcs')))
123 |     test_dependencies("minibatch_parse", deps[2],
124 |                       (('only', 'ROOT'), ('only', 'arcs'), ('only', 'left')))
125 |     test_dependencies("minibatch_parse", deps[3],
126 |                       (('again', 'ROOT'), ('again', 'arcs'), ('again', 'left'), ('again', 'only')))
127 |     print "minibatch_parse test passed!"
128 | 
129 | if __name__ == '__main__':
130 |     test_parse_step()
131 |     test_parse()
132 |     test_minibatch_parse()
133 | 


--------------------------------------------------------------------------------
/Assignment2/WeiYang/assignment2/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/WeiYang/assignment2/utils/__init__.py


--------------------------------------------------------------------------------
/Assignment2/WeiYang/assignment2/utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/WeiYang/assignment2/utils/__init__.pyc


--------------------------------------------------------------------------------
/Assignment2/WeiYang/assignment2/utils/general_utils.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import numpy as np
  4 | 
  5 | 
  6 | def get_minibatches(data, minibatch_size, shuffle=True):
  7 |     """
  8 |     Iterates through the provided data one minibatch at at time. You can use this function to
  9 |     iterate through data in minibatches as follows:
 10 | 
 11 |         for inputs_minibatch in get_minibatches(inputs, minibatch_size):
 12 |             ...
 13 | 
 14 |     Or with multiple data sources:
 15 | 
 16 |         for inputs_minibatch, labels_minibatch in get_minibatches([inputs, labels], minibatch_size):
 17 |             ...
 18 | 
 19 |     Args:
 20 |         data: there are two possible values:
 21 |             - a list or numpy array
 22 |             - a list where each element is either a list or numpy array
 23 |         minibatch_size: the maximum number of items in a minibatch
 24 |         shuffle: whether to randomize the order of returned data
 25 |     Returns:
 26 |         minibatches: the return value depends on data:
 27 |             - If data is a list/array it yields the next minibatch of data.
 28 |             - If data a list of lists/arrays it returns the next minibatch of each element in the
 29 |               list. This can be used to iterate through multiple data sources
 30 |               (e.g., features and labels) at the same time.
 31 | 
 32 |     """
 33 |     list_data = type(data) is list and (type(data[0]) is list or type(data[0]) is np.ndarray)
 34 |     data_size = len(data[0]) if list_data else len(data)
 35 |     indices = np.arange(data_size)
 36 |     if shuffle:
 37 |         np.random.shuffle(indices)
 38 |     for minibatch_start in np.arange(0, data_size, minibatch_size):
 39 |         minibatch_indices = indices[minibatch_start:minibatch_start + minibatch_size]
 40 |         yield [minibatch(d, minibatch_indices) for d in data] if list_data \
 41 |             else minibatch(data, minibatch_indices)
 42 | 
 43 | 
 44 | def minibatch(data, minibatch_idx):
 45 |     return data[minibatch_idx] if type(data) is np.ndarray else [data[i] for i in minibatch_idx]
 46 | 
 47 | 
 48 | def test_all_close(name, actual, expected):
 49 |     if actual.shape != expected.shape:
 50 |         raise ValueError("{:} failed, expected output to have shape {:} but has shape {:}"
 51 |                          .format(name, expected.shape, actual.shape))
 52 |     if np.amax(np.fabs(actual - expected)) > 1e-6:
 53 |         raise ValueError("{:} failed, expected {:} but value is {:}".format(name, expected, actual))
 54 |     else:
 55 |         print name, "passed!"
 56 | 
 57 | 
 58 | def logged_loop(iterable, n=None):
 59 |     if n is None:
 60 |         n = len(iterable)
 61 |     step = max(1, n / 1000)
 62 |     prog = Progbar(n)
 63 |     for i, elem in enumerate(iterable):
 64 |         if i % step == 0 or i == n - 1:
 65 |             prog.update(i + 1)
 66 |         yield elem
 67 | 
 68 | 
 69 | class Progbar(object):
 70 |     """
 71 |     Progbar class copied from keras (https://github.com/fchollet/keras/)
 72 |     Displays a progress bar.
 73 |     # Arguments
 74 |         target: Total number of steps expected.
 75 |         interval: Minimum visual progress update interval (in seconds).
 76 |     """
 77 | 
 78 |     def __init__(self, target, width=30, verbose=1):
 79 |         self.width = width
 80 |         self.target = target
 81 |         self.sum_values = {}
 82 |         self.unique_values = []
 83 |         self.start = time.time()
 84 |         self.total_width = 0
 85 |         self.seen_so_far = 0
 86 |         self.verbose = verbose
 87 | 
 88 |     def update(self, current, values=[], exact=[]):
 89 |         """
 90 |         Updates the progress bar.
 91 |         # Arguments
 92 |             current: Index of current step.
 93 |             values: List of tuples (name, value_for_last_step).
 94 |                 The progress bar will display averages for these values.
 95 |             exact: List of tuples (name, value_for_last_step).
 96 |                 The progress bar will display these values directly.
 97 |         """
 98 | 
 99 |         for k, v in values:
100 |             if k not in self.sum_values:
101 |                 self.sum_values[k] = [v * (current - self.seen_so_far), current - self.seen_so_far]
102 |                 self.unique_values.append(k)
103 |             else:
104 |                 self.sum_values[k][0] += v * (current - self.seen_so_far)
105 |                 self.sum_values[k][1] += (current - self.seen_so_far)
106 |         for k, v in exact:
107 |             if k not in self.sum_values:
108 |                 self.unique_values.append(k)
109 |             self.sum_values[k] = [v, 1]
110 |         self.seen_so_far = current
111 | 
112 |         now = time.time()
113 |         if self.verbose == 1:
114 |             prev_total_width = self.total_width
115 |             sys.stdout.write("\b" * prev_total_width)
116 |             sys.stdout.write("\r")
117 | 
118 |             numdigits = int(np.floor(np.log10(self.target))) + 1
119 |             barstr = '%%%dd/%%%dd [' % (numdigits, numdigits)
120 |             bar = barstr % (current, self.target)
121 |             prog = float(current)/self.target
122 |             prog_width = int(self.width*prog)
123 |             if prog_width > 0:
124 |                 bar += ('='*(prog_width-1))
125 |                 if current < self.target:
126 |                     bar += '>'
127 |                 else:
128 |                     bar += '='
129 |             bar += ('.'*(self.width-prog_width))
130 |             bar += ']'
131 |             sys.stdout.write(bar)
132 |             self.total_width = len(bar)
133 | 
134 |             if current:
135 |                 time_per_unit = (now - self.start) / current
136 |             else:
137 |                 time_per_unit = 0
138 |             eta = time_per_unit*(self.target - current)
139 |             info = ''
140 |             if current < self.target:
141 |                 info += ' - ETA: %ds' % eta
142 |             else:
143 |                 info += ' - %ds' % (now - self.start)
144 |             for k in self.unique_values:
145 |                 if type(self.sum_values[k]) is list:
146 |                     info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1]))
147 |                 else:
148 |                     info += ' - %s: %s' % (k, self.sum_values[k])
149 | 
150 |             self.total_width += len(info)
151 |             if prev_total_width > self.total_width:
152 |                 info += ((prev_total_width-self.total_width) * " ")
153 | 
154 |             sys.stdout.write(info)
155 |             sys.stdout.flush()
156 | 
157 |             if current >= self.target:
158 |                 sys.stdout.write("\n")
159 | 
160 |         if self.verbose == 2:
161 |             if current >= self.target:
162 |                 info = '%ds' % (now - self.start)
163 |                 for k in self.unique_values:
164 |                     info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1]))
165 |                 sys.stdout.write(info + "\n")
166 | 
167 |     def add(self, n, values=[]):
168 |         self.update(self.seen_so_far+n, values)
169 | 


--------------------------------------------------------------------------------
/Assignment2/WeiYang/assignment2/utils/general_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/WeiYang/assignment2/utils/general_utils.pyc


--------------------------------------------------------------------------------
/Assignment2/WeiYang/assignment2/utils/parser_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/WeiYang/assignment2/utils/parser_utils.pyc


--------------------------------------------------------------------------------
/Assignment2/WeiYang/solution.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/WeiYang/solution.pdf


--------------------------------------------------------------------------------
/Assignment2/ZhichaoFu/a.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/ZhichaoFu/a.txt


--------------------------------------------------------------------------------
/Assignment2/ZhichaoFu/assignment2/model.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import sys
 3 | reload(sys)
 4 | sys.setdefaultencoding('utf-8')
 5 | 
 6 | import dynet as dy
 7 | 
 8 | class Model(object):
 9 |     """Abstracts a Tensorflow graph for a learning task.
10 | 
11 |     We use various Model classes as usual abstractions to encapsulate tensorflow
12 |     computational graphs. Each algorithm you will construct in this homework will
13 |     inherit from a Model object.
14 |     """
15 | 
16 |     def create_feed_dict(self, inputs_batch, labels_batch=None):
17 |         """Creates the feed_dict for one step of training.
18 | 
19 |         If labels_batch is None, then no labels are added to feed_dict.
20 | 
21 |         Hint: The keys for the feed_dict should be a subset of the placeholder
22 |                     tensors created in add_placeholders.
23 | 
24 |         Args:
25 |             inputs_batch: A batch of input data.
26 |             labels_batch: A batch of label data.
27 |         Returns:
28 |             feed_dict: The feed dictionary mapping from placeholders to values.
29 |         """
30 |         raise NotImplementedError("Each Model must re-implement this method.")
31 | 
32 |     def init_parameters(self):
33 |         """Initialize parameters for the Dynet model
34 | 
35 |         """
36 |         raise NotImplementedError("Each Model must re-implement this method.")
37 | 
38 |     def init_trainer(self):
39 |         """Sets up the trainer.
40 |         """
41 | 
42 |         raise NotImplementedError("Each Model must re-implement this method.")
43 | 
44 |     def train_on_batch(self, inputs_batch, labels_batch):
45 |         """Perform one step of gradient descent on the provided batch of data.
46 | 
47 |         Args:
48 |             input_batch: np.ndarray of shape (n_samples, n_features)
49 |             labels_batch: np.ndarray of shape (n_samples, n_classes)
50 |         Returns:
51 |             loss: loss over the batch (a scalar)
52 |         """
53 | 
54 |         self.create_feed_dict(inputs_batch, labels_batch=labels_batch)
55 | 
56 |         pred = self.prediction()
57 | 
58 |         loss = self.compute_loss(pred)
59 | 
60 |         return loss
61 | 
62 |     def predict_on_batch(self, inputs_batch):
63 |         """Make predictions for the provided batch of data
64 | 
65 |         Args:
66 |             input_batch: np.ndarray of shape (n_samples, n_features)
67 |         Returns:
68 |             predictions: np.ndarray of shape (n_samples, n_classes)
69 |         """
70 |         self.create_feed_dict(inputs_batch)
71 | 
72 |         pred = self.prediction()
73 | 
74 |         return pred
75 | 
76 |     def build(self):
77 |         self.init_trainer()
78 |         self.init_parameters()
79 | 


--------------------------------------------------------------------------------
/Assignment2/ZhichaoFu/assignment2/q1_classifier.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import sys
  3 | reload(sys)
  4 | sys.setdefaultencoding('utf-8')
  5 | 
  6 | import time
  7 | 
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | import dynet as dy
 11 | 
 12 | from q1_softmax import softmax
 13 | from q1_softmax import cross_entropy_loss
 14 | from model import Model
 15 | from utils.general_utils import get_minibatches
 16 | 
 17 | 
 18 | class Config(object):
 19 |     """Holds model hyperparams and data information.
 20 | 
 21 |     The config class is used to store various hyperparameters and dataset
 22 |     information parameters. Model objects are passed a Config() object at
 23 |     instantiation.
 24 |     """
 25 |     n_samples = 1024
 26 |     n_features = 100
 27 |     n_classes = 5
 28 |     batch_size = 64
 29 |     n_epochs = 50
 30 |     lr = 1e-4
 31 | 
 32 | 
 33 | class SoftmaxModel(Model):
 34 |     """ a Softmax classifier with cross-entropy loss."""
 35 | 
 36 |     def init_trainer(self):
 37 |         """Sets up the trainer.
 38 | 
 39 |         """
 40 |         ### YOUR CODE HERE
 41 |         self.sModel = dy.ParameterCollection()
 42 |         self.trainer = dy.SimpleSGDTrainer(self.sModel)
 43 |         self.trainer.learning_rate = self.config.lr
 44 |         ### END YOUR CODE
 45 | 
 46 |     def init_parameters(self):
 47 |         """Set up parameters
 48 | 
 49 |         """
 50 |         ### YOUR CODE HERE
 51 |         self._pW = self.sModel.add_parameters((self.config.n_features, self.config.n_classes))
 52 |         self._pb = self.sModel.add_parameters((self.config.n_classes))
 53 |         # associate the parameters with cg Expressions
 54 | 
 55 |         ### END YOUR CODE
 56 | 
 57 |     def create_feed_dict(self, inputs_batch, labels_batch=None):
 58 |         """Creates the feed_dict for training the given step.
 59 | 
 60 |         If label_batch is None, then no labels are added to feed_dict.
 61 | 
 62 |         Hint: The keys for the feed_dict should be the placeholder
 63 |                 tensors created in add_placeholders.
 64 | 
 65 |         Args:
 66 |             inputs_batch: A batch of input data.
 67 |             labels_batch: A batch of label data.
 68 |         Returns:
 69 |             feed_dict: The feed dictionary mapping from placeholders to values.
 70 |         """
 71 |         ### YOUR CODE HERE
 72 |         self.input = inputs_batch
 73 |         self.labels = labels_batch
 74 |         ### END YOUR CODE
 75 | 
 76 |     def prediction(self):
 77 |         """Adds the core transformation for this model which transforms a batch of input
 78 |         data into a batch of predictions. In this case, the transformation is a linear layer plus a
 79 |         softmax transformation:
 80 | 
 81 |         y = softmax(xW + b)
 82 | 
 83 |         Args:
 84 |             input_data: A tensor of shape (batch_size, n_features).
 85 |         Returns:
 86 |             pred: A tensor of shape (batch_size, n_classes)
 87 |         """
 88 |         W = dy.parameter(self._pW)
 89 |         b = dy.parameter(self._pb)
 90 |         x = dy.inputTensor(self.input)
 91 | 
 92 |         z_m = x * W
 93 |         z_T = dy.concatenate_cols([z_m[i]+b for i in range(self.config.batch_size)])
 94 |         z = dy.transpose(z_T)
 95 |         # z = x * W + b
 96 | 
 97 |         pred = softmax(z)
 98 |         return pred
 99 | 
100 |     def compute_loss(self, pred):
101 |         """Adds cross_entropy_loss ops to the computational graph.
102 | 
103 |         Args:
104 |             pred: A tensor of shape (batch_size, n_classes)
105 |         Returns:
106 |             loss: A 0-d tensor (scalar)
107 |         """
108 |         y = dy.inputTensor(self.labels)
109 |         loss = cross_entropy_loss(y, pred)
110 |         return loss
111 | 
112 |     def run_epoch(self, inputs, labels):
113 |         """Runs an epoch of training.
114 | 
115 |         Args:
116 |             inputs: np.ndarray of shape (n_samples, n_features)
117 |             labels: np.ndarray of shape (n_samples, n_classes)
118 |         Returns:
119 |             average_loss: scalar. Average minibatch loss of model on epoch.
120 |         """
121 |         config = self.config
122 |         n_minibatches, total_loss = 0, 0
123 |         for input_batch, labels_batch in get_minibatches([inputs, labels], config.batch_size):
124 |             n_minibatches += 1
125 |             dy.renew_cg()
126 |             '''Compute the loss of a batch'''
127 |             # loss = []
128 |             # for i in xrange(config.batch_size):
129 |             #     input_t, labels_t = input_batch[i].reshape(1, config.n_features), labels_batch[i].reshape(1, config.n_classes)
130 |             #     loss_t = self.train_on_batch(input_t, labels_t)
131 |             #     loss.append(loss_t)
132 |             # loss = dy.esum(loss) / config.batch_size
133 |             loss = self.train_on_batch(input_batch, labels_batch) / config.batch_size
134 | 
135 |             loss.forward()
136 |             loss.backward()
137 |             self.trainer.update()
138 | 
139 |             total_loss += loss.value()
140 |         return total_loss / n_minibatches
141 | 
142 |     def fit(self, inputs, labels):
143 |         """Fit model on provided data.
144 | 
145 |         Args:
146 |             inputs: np.ndarray of shape (n_samples, n_features)
147 |             labels: np.ndarray of shape (n_samples, n_classes)
148 |         Returns:
149 |             losses: list of loss per epoch
150 |         """
151 |         losses = []
152 |         for epoch in range(self.config.n_epochs):
153 |             start_time = time.time()
154 |             average_loss = self.run_epoch(inputs, labels)
155 |             duration = time.time() - start_time
156 |             print 'Epoch {:}: loss = {:.2f} ({:.3f} sec)'.format(epoch, average_loss, duration)
157 |             losses.append(average_loss)
158 |         return losses
159 | 
160 |     def __init__(self, config):
161 |         """Initializes the model.
162 | 
163 |         Args:
164 |             config: A model configuration object of type Config
165 |         """
166 |         self.config = config
167 |         self.build()
168 | 
169 | 
170 | def test_softmax_model():
171 |     """Train softmax model for a number of steps."""
172 |     config = Config()
173 | 
174 |     # Generate random data to train the model on
175 |     np.random.seed(1234)
176 |     inputs = np.random.rand(config.n_samples, config.n_features)
177 |     labels = np.zeros((config.n_samples, config.n_classes), dtype=np.int32)
178 |     labels[:, 1] = 1
179 |     # for i in xrange(config.n_samples):
180 |     #     labels[i, i%config.n_classes] = 1
181 | 
182 |     model = SoftmaxModel(config)
183 |     losses = model.fit(inputs, labels)
184 | 
185 |     # If Ops are implemented correctly, the average loss should fall close to zero
186 |     # rapidly.
187 |     assert losses[-1] < .5
188 |     print "Basic (non-exhaustive) classifier tests pass"
189 | 
190 | if __name__ == "__main__":
191 |     test_softmax_model()
192 | 


--------------------------------------------------------------------------------
/Assignment2/ZhichaoFu/assignment2/q1_softmax.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import sys
  3 | reload(sys)
  4 | sys.setdefaultencoding('utf-8')
  5 | 
  6 | import numpy as np
  7 | #import tensorflow as tf
  8 | import dynet as dy
  9 | from utils.general_utils import test_all_close
 10 | 
 11 | def softmax(x):
 12 |     """
 13 |     Compute the softmax function in tensorflow.
 14 | 
 15 |     You might find the tensorflow functions tf.exp, tf.reduce_max,
 16 |     tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may
 17 |     not need to use all of these functions). Recall also that many common
 18 |     tensorflow operations are sugared (e.g. x * y does a tensor multiplication
 19 |     if x and y are both tensors). Make sure to implement the numerical stability
 20 |     fixes as in the previous homework!
 21 | 
 22 |     Args:
 23 |         x:   tf.Tensor with shape (n_samples, n_features). Note feature vectors are
 24 |                   represented by row-vectors. (For simplicity, no need to handle 1-d
 25 |                   input as in the previous homework)
 26 |     Returns:
 27 |         out: tf.Tensor with shape (n_sample, n_features). You need to construct this
 28 |                   tensor in this problem.
 29 |     """
 30 | 
 31 |     ### YOUR CODE HERE
 32 |     x_max = dy.max_dim(x, 1)
 33 |     x_sub = dy.colwise_add(x, -x_max)
 34 |     x_exp = dy.exp(x_sub)
 35 |     sum_exp = dy.colwise_add(dy.zeroes(x.dim()[0]), dy.sum_cols(x_exp))
 36 | 
 37 |     out = dy.cdiv(x_exp, sum_exp)
 38 |     ### END YOUR CODE
 39 | 
 40 |     return out
 41 | 
 42 | 
 43 | def cross_entropy_loss(y, yhat):
 44 |     """
 45 |     Compute the cross entropy loss in tensorflow.
 46 |     The loss should be summed over the current minibatch.
 47 | 
 48 |     y is a one-hot tensor of shape (n_samples, n_classes) and yhat is a tensor
 49 |     of shape (n_samples, n_classes). y should be of dtype tf.int32, and yhat should
 50 |     be of dtype tf.float32.
 51 | 
 52 |     The functions tf.to_float, tf.reduce_sum, and tf.log might prove useful. (Many
 53 |     solutions are possible, so you may not need to use all of these functions).
 54 | 
 55 |     Note: You are NOT allowed to use the tensorflow built-in cross-entropy
 56 |                 functions.
 57 | 
 58 |     Args:
 59 |         y:    tf.Tensor with shape (n_samples, n_classes). One-hot encoded.
 60 |         yhat: tf.Tensorwith shape (n_sample, n_classes). Each row encodes a
 61 |                     probability distribution and should sum to 1.
 62 |     Returns:
 63 |         out:  tf.Tensor with shape (1,) (Scalar output). You need to construct this
 64 |                     tensor in the problem.
 65 |     """
 66 | 
 67 |     ### YOUR CODE HERE
 68 |     l_yhat = dy.log(yhat)
 69 |     product = dy.cmult(y, l_yhat)
 70 |     out = (-dy.sum_elems(product))
 71 |     ### END YOUR CODE
 72 | 
 73 |     return out
 74 | 
 75 | 
 76 | def test_softmax_basic():
 77 |     """
 78 |     Some simple tests of softmax to get you started.
 79 |     Warning: these are not exhaustive.
 80 |     """
 81 |     dy.renew_cg()
 82 |     test1 = softmax(dy.inputTensor(np.array([[1001, 1002], [3, 4]])))
 83 |     test_all_close("Softmax test 1", test1.value(), np.array([[0.26894142,  0.73105858],
 84 |                                                       [0.26894142,  0.73105858]]))
 85 |     dy.renew_cg()
 86 |     test2 = softmax(dy.inputTensor(np.array([[-1001, -1002]])))
 87 |     test_all_close("Softmax test 2", test2.value(), np.array([[0.73105858, 0.26894142]]))
 88 | 
 89 |     print "Basic (non-exhaustive) softmax tests pass\n"
 90 | 
 91 | 
 92 | def test_cross_entropy_loss_basic():
 93 |     """
 94 |     Some simple tests of cross_entropy_loss to get you started.
 95 |     Warning: these are not exhaustive.
 96 |     """
 97 |     y = np.array([[0, 1], [1, 0], [1, 0]])
 98 |     yhat = np.array([[.5, .5], [.5, .5], [.5, .5]])
 99 | 
100 |     test1 = cross_entropy_loss(dy.inputTensor(y), dy.inputTensor(yhat))
101 | 
102 |     expected = -3 * np.log(.5)
103 |     test_all_close("Cross-entropy test 1", np.array(test1.value()), expected)
104 | 
105 |     print "Basic (non-exhaustive) cross-entropy tests pass"
106 | 
107 | if __name__ == "__main__":
108 |     test_softmax_basic()
109 |     test_cross_entropy_loss_basic()
110 | 


--------------------------------------------------------------------------------
/Assignment2/ZhichaoFu/assignment2/q2_initialization.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import sys
 3 | reload(sys)
 4 | sys.setdefaultencoding('utf-8')
 5 | 
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | import dynet as dy
 9 | 
10 | def xavier_weight_init():
11 |     """Returns function that creates random tensor.
12 | 
13 |     The specified function will take in a shape (tuple or 1-d array) and
14 |     returns a random tensor of the specified shape drawn from the
15 |     Xavier initialization distribution.
16 | 
17 |     Hint: You might find tf.random_uniform useful.
18 |     """
19 |     def _xavier_initializer(shape, **kwargs):
20 |         """Defines an initializer for the Xavier distribution.
21 |         Specifically, the output should be sampled uniformly from [-epsilon, epsilon] where
22 |             epsilon = sqrt(6) / <sum of the sizes of shape's dimensions>
23 |         e.g., if shape = (2, 3), epsilon = sqrt(6 / (2 + 3))
24 | 
25 |         This function will be used as a variable initializer.
26 | 
27 |         Args:
28 |             shape: Tuple or 1-d array that species the dimensions of the requested tensor.
29 |         Returns:
30 |             out: tf.Tensor of specified shape sampled from the Xavier distribution.
31 |         """
32 |         ### YOUR CODE HERE
33 |         epsilon = np.sqrt(6 / np.sum(shape))
34 |         out = dy.random_uniform(dim=shape, left=-epsilon, right=epsilon)
35 |         ### END YOUR CODE
36 |         return out.npvalue()
37 |     # Returns defined initializer function.
38 |     return _xavier_initializer
39 | 
40 | 
41 | def test_initialization_basic():
42 |     """Some simple tests for the initialization.
43 |     """
44 |     print "Running basic tests..."
45 |     xavier_initializer = xavier_weight_init()
46 |     shape = (1,)
47 |     xavier_mat = xavier_initializer(shape)
48 |     assert xavier_mat.shape == shape
49 | 
50 |     shape = (1, 2, 3)
51 |     xavier_mat = xavier_initializer(shape)
52 |     assert xavier_mat.shape == shape
53 |     print "Basic (non-exhaustive) Xavier initialization tests pass"
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     test_initialization_basic()
58 | 


--------------------------------------------------------------------------------
/Assignment2/ZhichaoFu/assignment2/q2_parser_transitions.py:
--------------------------------------------------------------------------------
  1 | class PartialParse(object):
  2 |     def __init__(self, sentence):
  3 |         """Initializes this partial parse.
  4 | 
  5 |         Your code should initialize the following fields:
  6 |             self.stack: The current stack represented as a list with the top of the stack as the
  7 |                         last element of the list.
  8 |             self.buffer: The current buffer represented as a list with the first item on the
  9 |                          buffer as the first item of the list
 10 |             self.dependencies: The list of dependencies produced so far. Represented as a list of
 11 |                     tuples where each tuple is of the form (head, dependent).
 12 |                     Order for this list doesn't matter.
 13 | 
 14 |         The root token should be represented with the string "ROOT"
 15 | 
 16 |         Args:
 17 |             sentence: The sentence to be parsed as a list of words.
 18 |                       Your code should not modify the sentence.
 19 |         """
 20 |         # The sentence being parsed is kept for bookkeeping purposes. Do not use it in your code.
 21 |         self.sentence = sentence
 22 | 
 23 |         ### YOUR CODE HERE
 24 |         self.stack = ['ROOT']
 25 |         self.buffer = sentence[:]
 26 |         self.dependencies = []
 27 |         ### END YOUR CODE
 28 | 
 29 |     def parse_step(self, transition):
 30 |         """Performs a single parse step by applying the given transition to this partial parse
 31 | 
 32 |         Args:
 33 |             transition: A string that equals "S", "LA", or "RA" representing the shift, left-arc,
 34 |                         and right-arc transitions.
 35 |         """
 36 |         ### YOUR CODE HERE
 37 |         if transition == 'S':
 38 |             if self.buffer:
 39 |                 self.stack.append(self.buffer[0])
 40 |                 self.buffer.pop(0)
 41 |         elif transition == 'LA':
 42 |             if len(self.stack) >= 2:
 43 |                 self.dependencies.append((self.stack[-1], self.stack[-2]))
 44 |                 self.stack.pop(-2)
 45 |         else:
 46 |             if len(self.stack) >= 2:
 47 |                 self.dependencies.append((self.stack[-2], self.stack[-1]))
 48 |                 self.stack.pop(-1)
 49 |         ### END YOUR CODE
 50 | 
 51 |     def parse(self, transitions):
 52 |         """Applies the provided transitions to this PartialParse
 53 | 
 54 |         Args:
 55 |             transitions: The list of transitions in the order they should be applied
 56 |         Returns:
 57 |             dependencies: The list of dependencies produced when parsing the sentence. Represented
 58 |                           as a list of tuples where each tuple is of the form (head, dependent)
 59 |         """
 60 |         for transition in transitions:
 61 |             self.parse_step(transition)
 62 |         return self.dependencies
 63 | 
 64 | 
 65 | def minibatch_parse(sentences, model, batch_size):
 66 |     """Parses a list of sentences in minibatches using a model.
 67 | 
 68 |     Args:
 69 |         sentences: A list of sentences to be parsed (each sentence is a list of words)
 70 |         model: The model that makes parsing decisions. It is assumed to have a function
 71 |                model.predict(partial_parses) that takes in a list of PartialParses as input and
 72 |                returns a list of transitions predicted for each parse. That is, after calling
 73 |                    transitions = model.predict(partial_parses)
 74 |                transitions[i] will be the next transition to apply to partial_parses[i].
 75 |         batch_size: The number of PartialParses to include in each minibatch
 76 |     Returns:
 77 |         dependencies: A list where each element is the dependencies list for a parsed sentence.
 78 |                       Ordering should be the same as in sentences (i.e., dependencies[i] should
 79 |                       contain the parse for sentences[i]).
 80 |     """
 81 | 
 82 |     ### YOUR CODE HERE
 83 |     dependencies = []
 84 |     for sentence in sentences:
 85 |         pp = PartialParse(sentence)
 86 |         for i in xrange(2*len(sentence)):
 87 |             action = model.predict([pp])
 88 |             pp.parse(action)
 89 |         dependencies.append(pp.dependencies)
 90 |     ### END YOUR CODE
 91 | 
 92 |     return dependencies
 93 | 
 94 | 
 95 | def test_step(name, transition, stack, buf, deps,
 96 |               ex_stack, ex_buf, ex_deps):
 97 |     """Tests that a single parse step returns the expected output"""
 98 |     pp = PartialParse([])
 99 |     pp.stack, pp.buffer, pp.dependencies = stack, buf, deps
100 | 
101 |     pp.parse_step(transition)
102 |     stack, buf, deps = (tuple(pp.stack), tuple(pp.buffer), tuple(sorted(pp.dependencies)))
103 |     assert stack == ex_stack, \
104 |         "{:} test resulted in stack {:}, expected {:}".format(name, stack, ex_stack)
105 |     assert buf == ex_buf, \
106 |         "{:} test resulted in buffer {:}, expected {:}".format(name, buf, ex_buf)
107 |     assert deps == ex_deps, \
108 |         "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps)
109 |     print "{:} test passed!".format(name)
110 | 
111 | 
112 | def test_parse_step():
113 |     """Simple tests for the PartialParse.parse_step function
114 |     Warning: these are not exhaustive
115 |     """
116 |     test_step("SHIFT", "S", ["ROOT", "the"], ["cat", "sat"], [],
117 |               ("ROOT", "the", "cat"), ("sat",), ())
118 |     test_step("LEFT-ARC", "LA", ["ROOT", "the", "cat"], ["sat"], [],
119 |               ("ROOT", "cat",), ("sat",), (("cat", "the"),))
120 |     test_step("RIGHT-ARC", "RA", ["ROOT", "run", "fast"], [], [],
121 |               ("ROOT", "run",), (), (("run", "fast"),))
122 | 
123 | 
124 | def test_parse():
125 |     """Simple tests for the PartialParse.parse function
126 |     Warning: these are not exhaustive
127 |     """
128 |     sentence = ["parse", "this", "sentence"]
129 |     dependencies = PartialParse(sentence).parse(["S", "S", "S", "LA", "RA", "RA"])
130 |     dependencies = tuple(sorted(dependencies))
131 |     expected = (('ROOT', 'parse'), ('parse', 'sentence'), ('sentence', 'this'))
132 |     assert dependencies == expected,  \
133 |         "parse test resulted in dependencies {:}, expected {:}".format(dependencies, expected)
134 |     assert tuple(sentence) == ("parse", "this", "sentence"), \
135 |         "parse test failed: the input sentence should not be modified"
136 |     print "parse test passed!"
137 | 
138 | 
139 | class DummyModel:
140 |     """Dummy model for testing the minibatch_parse function
141 |     First shifts everything onto the stack and then does exclusively right arcs if the first word of
142 |     the sentence is "right", "left" if otherwise.
143 |     """
144 |     def predict(self, partial_parses):
145 |         return [("RA" if pp.stack[1] is "right" else "LA") if len(pp.buffer) == 0 else "S"
146 |                 for pp in partial_parses]
147 | 
148 | 
149 | def test_dependencies(name, deps, ex_deps):
150 |     """Tests the provided dependencies match the expected dependencies"""
151 |     deps = tuple(sorted(deps))
152 |     assert deps == ex_deps, \
153 |         "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps)
154 | 
155 | 
156 | def test_minibatch_parse():
157 |     """Simple tests for the minibatch_parse function
158 |     Warning: these are not exhaustive
159 |     """
160 |     sentences = [["right", "arcs", "only"],
161 |                  ["right", "arcs", "only", "again"],
162 |                  ["left", "arcs", "only"],
163 |                  ["left", "arcs", "only", "again"]]
164 |     deps = minibatch_parse(sentences, DummyModel(), 2)
165 |     test_dependencies("minibatch_parse", deps[0],
166 |                       (('ROOT', 'right'), ('arcs', 'only'), ('right', 'arcs')))
167 |     test_dependencies("minibatch_parse", deps[1],
168 |                       (('ROOT', 'right'), ('arcs', 'only'), ('only', 'again'), ('right', 'arcs')))
169 |     test_dependencies("minibatch_parse", deps[2],
170 |                       (('only', 'ROOT'), ('only', 'arcs'), ('only', 'left')))
171 |     test_dependencies("minibatch_parse", deps[3],
172 |                       (('again', 'ROOT'), ('again', 'arcs'), ('again', 'left'), ('again', 'only')))
173 |     print "minibatch_parse test passed!"
174 | 
175 | if __name__ == '__main__':
176 |     test_parse_step()
177 |     test_parse()
178 |     test_minibatch_parse()
179 | 


--------------------------------------------------------------------------------
/Assignment2/ZhichaoFu/assignment2/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/ZhichaoFu/assignment2/utils/__init__.py


--------------------------------------------------------------------------------
/Assignment2/ZhichaoFu/assignment2/utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/ZhichaoFu/assignment2/utils/__init__.pyc


--------------------------------------------------------------------------------
/Assignment2/ZhichaoFu/assignment2/utils/general_utils.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import numpy as np
  4 | 
  5 | 
  6 | def get_minibatches(data, minibatch_size, shuffle=True):
  7 |     """
  8 |     Iterates through the provided data one minibatch at at time. You can use this function to
  9 |     iterate through data in minibatches as follows:
 10 | 
 11 |         for inputs_minibatch in get_minibatches(inputs, minibatch_size):
 12 |             ...
 13 | 
 14 |     Or with multiple data sources:
 15 | 
 16 |         for inputs_minibatch, labels_minibatch in get_minibatches([inputs, labels], minibatch_size):
 17 |             ...
 18 | 
 19 |     Args:
 20 |         data: there are two possible values:
 21 |             - a list or numpy array
 22 |             - a list where each element is either a list or numpy array
 23 |         minibatch_size: the maximum number of items in a minibatch
 24 |         shuffle: whether to randomize the order of returned data
 25 |     Returns:
 26 |         minibatches: the return value depends on data:
 27 |             - If data is a list/array it yields the next minibatch of data.
 28 |             - If data a list of lists/arrays it returns the next minibatch of each element in the
 29 |               list. This can be used to iterate through multiple data sources
 30 |               (e.g., features and labels) at the same time.
 31 | 
 32 |     """
 33 |     list_data = type(data) is list and (type(data[0]) is list or type(data[0]) is np.ndarray)
 34 |     data_size = len(data[0]) if list_data else len(data)
 35 |     indices = np.arange(data_size)
 36 |     if shuffle:
 37 |         np.random.shuffle(indices)
 38 |     for minibatch_start in np.arange(0, data_size, minibatch_size):
 39 |         minibatch_indices = indices[minibatch_start:minibatch_start + minibatch_size]
 40 |         yield [minibatch(d, minibatch_indices) for d in data] if list_data \
 41 |             else minibatch(data, minibatch_indices)
 42 | 
 43 | 
 44 | def minibatch(data, minibatch_idx):
 45 |     return data[minibatch_idx] if type(data) is np.ndarray else [data[i] for i in minibatch_idx]
 46 | 
 47 | 
 48 | def test_all_close(name, actual, expected):
 49 |     if actual.shape != expected.shape:
 50 |         raise ValueError("{:} failed, expected output to have shape {:} but has shape {:}"
 51 |                          .format(name, expected.shape, actual.shape))
 52 |     if np.amax(np.fabs(actual - expected)) > 1e-6:
 53 |         raise ValueError("{:} failed, expected {:} but value is {:}".format(name, expected, actual))
 54 |     else:
 55 |         print name, "passed!"
 56 | 
 57 | 
 58 | def logged_loop(iterable, n=None):
 59 |     if n is None:
 60 |         n = len(iterable)
 61 |     step = max(1, n / 1000)
 62 |     prog = Progbar(n)
 63 |     for i, elem in enumerate(iterable):
 64 |         if i % step == 0 or i == n - 1:
 65 |             prog.update(i + 1)
 66 |         yield elem
 67 | 
 68 | 
 69 | class Progbar(object):
 70 |     """
 71 |     Progbar class copied from keras (https://github.com/fchollet/keras/)
 72 |     Displays a progress bar.
 73 |     # Arguments
 74 |         target: Total number of steps expected.
 75 |         interval: Minimum visual progress update interval (in seconds).
 76 |     """
 77 | 
 78 |     def __init__(self, target, width=30, verbose=1):
 79 |         self.width = width
 80 |         self.target = target
 81 |         self.sum_values = {}
 82 |         self.unique_values = []
 83 |         self.start = time.time()
 84 |         self.total_width = 0
 85 |         self.seen_so_far = 0
 86 |         self.verbose = verbose
 87 | 
 88 |     def update(self, current, values=[], exact=[]):
 89 |         """
 90 |         Updates the progress bar.
 91 |         # Arguments
 92 |             current: Index of current step.
 93 |             values: List of tuples (name, value_for_last_step).
 94 |                 The progress bar will display averages for these values.
 95 |             exact: List of tuples (name, value_for_last_step).
 96 |                 The progress bar will display these values directly.
 97 |         """
 98 | 
 99 |         for k, v in values:
100 |             if k not in self.sum_values:
101 |                 self.sum_values[k] = [v * (current - self.seen_so_far), current - self.seen_so_far]
102 |                 self.unique_values.append(k)
103 |             else:
104 |                 self.sum_values[k][0] += v * (current - self.seen_so_far)
105 |                 self.sum_values[k][1] += (current - self.seen_so_far)
106 |         for k, v in exact:
107 |             if k not in self.sum_values:
108 |                 self.unique_values.append(k)
109 |             self.sum_values[k] = [v, 1]
110 |         self.seen_so_far = current
111 | 
112 |         now = time.time()
113 |         if self.verbose == 1:
114 |             prev_total_width = self.total_width
115 |             sys.stdout.write("\b" * prev_total_width)
116 |             sys.stdout.write("\r")
117 | 
118 |             numdigits = int(np.floor(np.log10(self.target))) + 1
119 |             barstr = '%%%dd/%%%dd [' % (numdigits, numdigits)
120 |             bar = barstr % (current, self.target)
121 |             prog = float(current)/self.target
122 |             prog_width = int(self.width*prog)
123 |             if prog_width > 0:
124 |                 bar += ('='*(prog_width-1))
125 |                 if current < self.target:
126 |                     bar += '>'
127 |                 else:
128 |                     bar += '='
129 |             bar += ('.'*(self.width-prog_width))
130 |             bar += ']'
131 |             sys.stdout.write(bar)
132 |             self.total_width = len(bar)
133 | 
134 |             if current:
135 |                 time_per_unit = (now - self.start) / current
136 |             else:
137 |                 time_per_unit = 0
138 |             eta = time_per_unit*(self.target - current)
139 |             info = ''
140 |             if current < self.target:
141 |                 info += ' - ETA: %ds' % eta
142 |             else:
143 |                 info += ' - %ds' % (now - self.start)
144 |             for k in self.unique_values:
145 |                 if type(self.sum_values[k]) is list:
146 |                     info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1]))
147 |                 else:
148 |                     info += ' - %s: %s' % (k, self.sum_values[k])
149 | 
150 |             self.total_width += len(info)
151 |             if prev_total_width > self.total_width:
152 |                 info += ((prev_total_width-self.total_width) * " ")
153 | 
154 |             sys.stdout.write(info)
155 |             sys.stdout.flush()
156 | 
157 |             if current >= self.target:
158 |                 sys.stdout.write("\n")
159 | 
160 |         if self.verbose == 2:
161 |             if current >= self.target:
162 |                 info = '%ds' % (now - self.start)
163 |                 for k in self.unique_values:
164 |                     info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1]))
165 |                 sys.stdout.write(info + "\n")
166 | 
167 |     def add(self, n, values=[]):
168 |         self.update(self.seen_so_far+n, values)
169 | 


--------------------------------------------------------------------------------
/Assignment2/ZhichaoFu/assignment2/utils/general_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/ZhichaoFu/assignment2/utils/general_utils.pyc


--------------------------------------------------------------------------------
/Assignment2/ZhichaoFu/assignment2/utils/parser_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/ZhichaoFu/assignment2/utils/parser_utils.pyc


--------------------------------------------------------------------------------
/Lecture11/1611.04558.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture11/1611.04558.pdf


--------------------------------------------------------------------------------
/Lecture11/Lecture11.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture11/Lecture11.pdf


--------------------------------------------------------------------------------
/Lecture11/Lecture11_highlight.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture11/Lecture11_highlight.pdf


--------------------------------------------------------------------------------
/Lecture11/README.md:
--------------------------------------------------------------------------------
1 | # CS224n Lecture11
2 | 
3 | + [Lecture11 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture11/Lecture11.pdf)
4 | + [Lecture11 highlight PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture11/Lecture11_highlight.pdf)
5 | + Highlight Paper: [Google’s Multilingual Neural Machine Translation System: Enabling Zero-Shot Translation](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture11/1611.04558.pdf)
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/Lecture12/1611.05358.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture12/1611.05358.pdf


--------------------------------------------------------------------------------
/Lecture12/Lecture12.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture12/Lecture12.pdf


--------------------------------------------------------------------------------
/Lecture12/Lecture12_highlight.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture12/Lecture12_highlight.pdf


--------------------------------------------------------------------------------
/Lecture12/README.md:
--------------------------------------------------------------------------------
1 | # CS224n Lecture12
2 | 
3 | + [Lecture12 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture12/Lecture12.pdf)
4 | + [Lecture12 highlight PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture12/Lecture12_highlight.pdf)
5 | + Highlight Paper: [Lip Reading Sentences in the Wild](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture12/1611.05358.pdf)
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/Lecture13/Lecture13.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture13/Lecture13.pdf


--------------------------------------------------------------------------------
/Lecture13/README.md:
--------------------------------------------------------------------------------
1 | # CS224n Lecture13
2 | 
3 | + [Lecture13 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture13/Lecture13.pdf)
4 | 
5 | 


--------------------------------------------------------------------------------
/Lecture14/1508.06615.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture14/1508.06615.pdf


--------------------------------------------------------------------------------
/Lecture14/D14-1181.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture14/D14-1181.pdf


--------------------------------------------------------------------------------
/Lecture14/P14-1062.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture14/P14-1062.pdf


--------------------------------------------------------------------------------
/Lecture14/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture14/README.md


--------------------------------------------------------------------------------
/Lecture14/cs224n-2017-lecture13-CNNs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture14/cs224n-2017-lecture13-CNNs.pdf


--------------------------------------------------------------------------------
/Lecture14/cs224n-2017-lecture13-highlight.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture14/cs224n-2017-lecture13-highlight.pdf


--------------------------------------------------------------------------------
/Lecture2/2016 Arora.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture2/2016 Arora.pdf


--------------------------------------------------------------------------------
/Lecture2/CBOW.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture2/CBOW.png


--------------------------------------------------------------------------------
/Lecture2/HS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture2/HS.png


--------------------------------------------------------------------------------
/Lecture2/Lecture2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture2/Lecture2.pdf


--------------------------------------------------------------------------------
/Lecture2/Lecture2_highlight.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture2/Lecture2_highlight.pdf


--------------------------------------------------------------------------------
/Lecture2/Lecture2_supplement.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "#### Distributed Representations of Words and Phrases and their Compositionality\n",
 8 |     "<br/>\n",
 9 |     "#### Efficient Estimation of Word Representations in Vector Space\n",
10 |     "<br/>\n",
11 |     "### （一）CBOW(Continuous Bag-of-Words)\n",
12 |     "<br/>\n",
13 |     "<img src=\"http://images2015.cnblogs.com/blog/1008922/201608/1008922-20160830124440402-325932526.png\" style=\"float:left\">\n",
14 |     "\n",
15 |     "\n",
16 |     "\\begin{eqnarray}\n",
17 |     "\\boldsymbol v_{t+j}=V\\boldsymbol x_{t+j}\n",
18 |     "\\tag{1}\\end{eqnarray}\n",
19 |     "\n",
20 |     "\\begin{eqnarray}\n",
21 |     "\\hat{\\boldsymbol v}_t=\\frac{1}{2m}\\sum_j\\boldsymbol v_{t+j}\n",
22 |     "\\tag{2}\\end{eqnarray}\n",
23 |     "\n",
24 |     "\\begin{eqnarray}\n",
25 |     "\\boldsymbol z=U\\hat{\\boldsymbol v}_t\n",
26 |     "\\tag{3}\\end{eqnarray}\n",
27 |     "\n",
28 |     "\\begin{eqnarray}\n",
29 |     "\\hat y_{\\underline i}=P(w_{\\underline i}|w_{t-m},...,w_{t-1},w_{t+1},...,w_{t+m})=\\text{softmax}(z_{\\underline i})=\\text{softmax}(\\boldsymbol u_{\\underline i}^\\top \\hat{\\boldsymbol v}_t),\\quad w_{\\underline i}\\in \\mathbb V\n",
30 |     "\\tag{4}\\end{eqnarray}\n",
31 |     "\n",
32 |     "<br/>\n",
33 |     "损失函数的推导：\n",
34 |     "\\begin{aligned}\\mathcal L&=-\\log \\hat y_t\\\\&=-\\log P(w_t|w_{t-m},...,w_{t-1},w_{t+1},...,w_{t+m})\\\\&=-\\log \\text{softmax}(z_t)\\\\&=-\\log \\frac{\\exp (\\boldsymbol u_t^\\top \\hat{\\boldsymbol v}_t)}{\\sum_{k=1}^{|\\mathbb V|}\\exp (\\boldsymbol u_{\\underline k}^\\top \\hat{\\boldsymbol v}_t)}\\\\&=-\\boldsymbol u_t^\\top \\hat{\\boldsymbol v}_t+\\log \\sum_{k=1}^{|\\mathbb V|}\\exp (\\boldsymbol u_{\\underline k}^\\top \\hat{\\boldsymbol v}_t)\\\\&=-z_t+\\log \\sum_{k=1}^{|\\mathbb V|}\\exp z_{\\underline k} \\end{aligned}\n",
35 |     "\n",
36 |     "<br/>\n",
37 |     "<br/>\n",
38 |     "### （二）Hierarchical Softmax\n",
39 |     "![](HS.png)\n",
40 |     "\\begin{eqnarray}\n",
41 |     "&\\sigma(\\boldsymbol u_{n(w,j)}^\\top \\hat{\\boldsymbol v}_t)\\\\\n",
42 |     "&1-\\sigma(\\boldsymbol u_{n(w,j)}^\\top \\hat{\\boldsymbol v}_t)=\\sigma(-\\boldsymbol u_{n(w,j)}^\\top \\hat{\\boldsymbol v}_t)\n",
43 |     "\\end{eqnarray}\n",
44 |     "\n",
45 |     "<br/>\n",
46 |     "<br/>\n",
47 |     "### （三）Negative Sampling"
48 |    ]
49 |   },
50 |   {
51 |    "cell_type": "code",
52 |    "execution_count": null,
53 |    "metadata": {
54 |     "collapsed": true
55 |    },
56 |    "outputs": [],
57 |    "source": []
58 |   }
59 |  ],
60 |  "metadata": {
61 |   "kernelspec": {
62 |    "display_name": "Python 2",
63 |    "language": "python",
64 |    "name": "python2"
65 |   },
66 |   "language_info": {
67 |    "codemirror_mode": {
68 |     "name": "ipython",
69 |     "version": 2
70 |    },
71 |    "file_extension": ".py",
72 |    "mimetype": "text/x-python",
73 |    "name": "python",
74 |    "nbconvert_exporter": "python",
75 |    "pygments_lexer": "ipython2",
76 |    "version": "2.7.12"
77 |   }
78 |  },
79 |  "nbformat": 4,
80 |  "nbformat_minor": 2
81 | }
82 | 


--------------------------------------------------------------------------------
/Lecture2/README.md:
--------------------------------------------------------------------------------
 1 | ## CS224n Lecture2
 2 | 
 3 | + [Lecture2 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture2/Lecture2.pdf)
 4 | 
 5 | + [Lecture2 highlight PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture2/Lecture2_highlight.pdf)
 6 | 
 7 | + Highlight Paper: [A SIMPLE BUT TOUGH-TO-BEAT BASELINE FOR SENTENCE EMBEDDINGS](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture2/2016%20Arora.pdf)
 8 | 
 9 | + [word2vec supplement](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture2/word2vec.md)
10 | 
11 | + Paper: [Skip-gram](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture2/arXiv%202013%20Mikolov.pdf)
12 | 
13 | + Paper: [CBOW](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture2/arXiv%202013%20Mikolov-1.pdf)
14 | 
15 | + Paper: [word2vec Explained](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture2/arXiv%202014%20Goldberg.pdf)
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/Lecture2/arXiv 2013 Mikolov-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture2/arXiv 2013 Mikolov-1.pdf


--------------------------------------------------------------------------------
/Lecture2/arXiv 2013 Mikolov.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture2/arXiv 2013 Mikolov.pdf


--------------------------------------------------------------------------------
/Lecture2/arXiv 2014 Goldberg.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture2/arXiv 2014 Goldberg.pdf


--------------------------------------------------------------------------------
/Lecture2/word2vec.md:
--------------------------------------------------------------------------------
 1 | > Distributed Representations of Words and Phrases and their Compositionality
 2 | >
 3 | > Efficient Estimation of Word Representations in Vector Space
 4 | 
 5 | 
 6 | 
 7 | ## CBOW(Continuous Bag-of-Words)
 8 | 
 9 | $$
10 | \boldsymbol v_{t+j}=V\boldsymbol x_{t+j}  \\
11 | \hat{\boldsymbol v}_t=\frac{1}{2m}\sum_j\boldsymbol v_{t+j} \\
12 | \boldsymbol z=U\hat{\boldsymbol v}_t \\
13 | \hat y_{i}=P(w_{ i}|w_{t-m},\cdots,w_{t-1},w_{t+1},\cdots,w_{t+m})=\text{softmax}(z_{ i})=\text{softmax}(\boldsymbol u_{ i}^\top \hat{\boldsymbol v}_t)
14 | $$
15 | 
16 | 损失函数的推导：
17 | $$
18 | \begin{align*}
19 | \mathcal L &=-\log \hat y_t\\
20 | &=-\log P(w_t|w_{t-m},...,w_{t-1},w_{t+1},...,w_{t+m})\\
21 | &=-\log \text{softmax}(z_t)\\
22 | &=-\log \frac{\exp (\boldsymbol u_t^\top \hat{\boldsymbol v}_t)}{\sum_{k=1}^{|\mathbb V|}\exp (\boldsymbol u_{\underline k}^\top \hat{\boldsymbol v}_t)}\\
23 | &=-\boldsymbol u_t^\top \hat{\boldsymbol v}_t+\log \sum_{k=1}^{|\mathbb V|}\exp (\boldsymbol u_{\underline k}^\top \hat{\boldsymbol v}_t)\\
24 | &=-z_t+\log \sum_{k=1}^{|\mathbb V|}\exp z_{\underline k}
25 | \end{align*}
26 | $$
27 | 
28 | ## Hierarchical Softmax
29 | 
30 | ![](HS.png)
31 | 
32 | 
33 | 
34 | 
35 | $$
36 | \begin{eqnarray}
37 | 
38 | &\sigma(\boldsymbol u_{n(w,j)}^\top \hat{\boldsymbol v}_t)\\
39 | 
40 | &1-\sigma(\boldsymbol u_{n(w,j)}^\top \hat{\boldsymbol v}_t)=\sigma(-\boldsymbol u_{n(w,j)}^\top \hat{\boldsymbol v}_t)
41 | 
42 | \end{eqnarray}
43 | $$
44 | 
45 | ##  Negative Sampling
46 | 
47 | ==构造语料中不存在的上下文词对作为负样本，最大化正样本的同时最小化负样本的概率==
48 | 
49 | 正样本的概率：
50 | $$
51 | P(\mathbb D=1|w,c)=\sigma(\boldsymbol u_{w}^\top \boldsymbol v_{c})
52 | $$
53 | 全部正样本的似然：
54 | $$
55 | \prod_{(w,c)\in\mathbb D}P(\mathbb D=1|w,c)
56 | $$
57 | 全部负样本的似然：
58 | $$
59 | \prod_{(w,c)\notin\mathbb D}P(\mathbb D=1|w,c)
60 | $$
61 | 最大化下式：
62 | $$
63 | \begin{align*}
64 | &\arg\max_{\theta}\prod_{(w,c)\in\mathbb D}P(\mathbb D=1|w,c)\prod_{(w,c)\notin\mathbb D}(1-P(\mathbb D=1|w,c)) \\
65 | =&\arg\max_{\theta}\sum_{(w,c)\in\mathbb D}\log\sigma(\boldsymbol u_{w}^\top \boldsymbol v_{c}) + \sum_{(w,c)\notin\mathbb D}\log\sigma(-\boldsymbol u_{w}^\top \boldsymbol v_{c}) 
66 | \end{align*}\\
67 | 
68 | \log\sigma(\boldsymbol u_{w}^\top \boldsymbol v_{c}) + \sum_{w_j\in W_{negative}}\log\sigma(-\boldsymbol u_{w_j}^\top \boldsymbol v_{c})
69 | $$
70 | 
71 | 
72 | ## Sub-Sampling
73 | 
74 | $$
75 | p(w_i)=1-\sqrt{\frac{sample}{freq(w_i)}}
76 | $$
77 | 
78 | $$
79 | p(w_i)=1-\Bigg(\sqrt{\frac{sample}{freq(w_i)}} + \frac{sample}{freq(w_i)}\Bigg)
80 | $$
81 | 
82 | $sample$为超参数，建议设置为$1e^{-3}$到$1e^{-5}$。$freq(w_i)$ 表示词频。
83 | 
84 | ​	
85 | ​	
86 | 
87 | ​			
88 | ​				
89 | ​					
90 | 
91 | 
92 | ​			
93 | ​		
94 | ​	
95 | 
96 | 


--------------------------------------------------------------------------------
/Lecture3/2014 Pennington.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture3/2014 Pennington.pdf


--------------------------------------------------------------------------------
/Lecture3/Lecture3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture3/Lecture3.pdf


--------------------------------------------------------------------------------
/Lecture3/Lecture3_highlight.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture3/Lecture3_highlight.pdf


--------------------------------------------------------------------------------
/Lecture3/README.md:
--------------------------------------------------------------------------------
1 | # CS224n Lecture3
2 | 
3 | + [Lecture3 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture3/Lecture3.pdf)
4 | + [Lecture3 highlight PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture3/Lecture3_highlight.pdf)
5 | + Highlight Paper: [GloVe: Global Vectors for Word Representation](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture3/2014%20Pennington.pdf)
6 | 
7 | 


--------------------------------------------------------------------------------
/Lecture4/Lecture4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture4/Lecture4.pdf


--------------------------------------------------------------------------------
/Lecture4/README.md:
--------------------------------------------------------------------------------
1 | # CS224n Lecture4
2 | 
3 | + [Lecture4 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture4/Lecture4.pdf)
4 | 
5 | 


--------------------------------------------------------------------------------
/Lecture5/A Primer on Neural Network Models.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture5/A Primer on Neural Network Models.pdf


--------------------------------------------------------------------------------
/Lecture5/README.md:
--------------------------------------------------------------------------------
1 | Lecture5 [Slides & (Highplot Slides](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture5/lecture5.pdf) & [Paper](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture5/A%20Primer%20on%20Neural%20Network%20Models.pdf))
2 | 


--------------------------------------------------------------------------------
/Lecture5/lecture5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture5/lecture5.pdf


--------------------------------------------------------------------------------
/Lecture6/Lecture6.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture6/Lecture6.pdf


--------------------------------------------------------------------------------
/Lecture6/Lecture6_highlight.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture6/Lecture6_highlight.pdf


--------------------------------------------------------------------------------
/Lecture6/README.md:
--------------------------------------------------------------------------------
1 | # CS224n Lecture6
2 | 
3 | + [Lecture6 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture6/Lecture6.pdf)
4 | + [Lecture6 highlight PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture6/Lecture6_highlight.pdf)
5 | + Highlight Paper: [Improving-distributional-similarity](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture6/improving-distributional-similarity-tacl-2015.pdf)
6 | 
7 | 


--------------------------------------------------------------------------------
/Lecture6/improving-distributional-similarity-tacl-2015.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture6/improving-distributional-similarity-tacl-2015.pdf


--------------------------------------------------------------------------------
/Lecture8/Lecture8.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture8/Lecture8.pdf


--------------------------------------------------------------------------------
/Lecture8/Lecture8_highlight.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture8/Lecture8_highlight.pdf


--------------------------------------------------------------------------------
/Lecture8/README.md:
--------------------------------------------------------------------------------
1 | # CS224n Lecture8
2 | 
3 | + [Lecture8 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture8/Lecture8.pdf)
4 | + [Lecture8 highlight PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture8/Lecture8_highlight.pdf)
5 | + Highlight Paper: [Structured Training for Neural Network Transition-Based Parsing](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture8/acl15.pdf)
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/Lecture8/acl15.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture8/acl15.pdf


--------------------------------------------------------------------------------
/Lecture9/1602.02410.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture9/1602.02410.pdf


--------------------------------------------------------------------------------
/Lecture9/1703.02573.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture9/1703.02573.pdf


--------------------------------------------------------------------------------
/Lecture9/Lecture9.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture9/Lecture9.pdf


--------------------------------------------------------------------------------
/Lecture9/Lecture9_highlight.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture9/Lecture9_highlight.pdf


--------------------------------------------------------------------------------
/Lecture9/README.md:
--------------------------------------------------------------------------------
 1 | # CS224n Lecture9
 2 | 
 3 | + [Lecture9 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture9/Lecture9.pdf)
 4 | 
 5 | + [Lecture9 highlight PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture9/Lecture9_highlight.pdf)
 6 | 
 7 | + Highlight Paper:
 8 | 
 9 |   + [SUBWORD LANGUAGE MODELING WITH NEURAL NETWORKS](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture9/char.pdf)
10 |   + [DATA NOISING AS SMOOTHING IN NEURAL NETWORK LANGUAGE MODELS](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture9/1703.02573.pdf)
11 |   + [Exploring the Limits of Language Modeling](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture9/1602.02410.pdf)
12 | 
13 | 
14 |   ​
15 | 
16 | 


--------------------------------------------------------------------------------
/Lecture9/char.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture9/char.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ECNU17_Summer_Seminar
 2 | ECNU NLP group learns CS224n in the form of seminars in the 2017 summer.
 3 | 
 4 | 
 5 | 
 6 | ## Seminar Participants
 7 | 
 8 | 纪焘、黄子寅、杜雨沛
 9 | 
10 | *钟鸣（7月4日~7月16日回家）*
11 | 
12 | *姚岳坤（7月10日之后）*
13 | 
14 | *郑淇（7月15日之后、因为实习所以尽量少讲）*
15 | 
16 | *[韦阳](https://github.com/godweiyang)、付志超（远程参与）*
17 | 
18 | *焦乙竹、王江舟（旁听）*
19 | 
20 | 
21 | 
22 | ## Introduce CS224n
23 | 
24 | #### Lecture video Collection
25 | 
26 | + [YouTube](https://www.youtube.com/playlist?list=PL3FW7Lu3i5Jsnh1rnUwq_TcylNr7EkRe6)
27 | + [Bilibili](http://space.bilibili.com/23852932#!/channel/detail?cid=11177)
28 | 
29 | 
30 | 
31 | #### Lecture Materials
32 | 
33 | [CS224n: Natural Language Processing with Deep Learning](http://web.stanford.edu/class/cs224n/syllabus.html)
34 | 
35 | 
36 | 
37 | #### Lecture List
38 | 
39 | |       Event       |  Date  | Description                              | 描述               |                 Speaker                  |
40 | | :---------------: | :----: | :--------------------------------------- | :--------------- | :--------------------------------------: |
41 | |     Lecture1      | 7.3 一  | Introduction to NLP and Deep Learning    | 介绍自然语言和深度学习      |                   王江舟                    |
42 | |     Lecture2      | 7.5 三  | Word Vector Representations:word2vec     | Word2Vec词向量表示    | [纪焘](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture2) |
43 | |     Lecture3      | 7.8 六  | Advanced Word Vector Representations     | 高级词向量表示          | [杜雨沛](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture3) |
44 | |     Lecture4      | 7.10 一 | Word Window Classification and Neural Networks | 词窗分类与神经网络        | [杜雨沛](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture4) |
45 | |     Lecture5      | 7.12 三 | Backpropagation                          | 反向传播             | [黄子寅](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture5) |
46 | |     Lecture6      | 7.15 六 | Dependency Parsing                       | 依存句法分析           | [姚岳坤](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture6) |
47 | | **Assignment #1** | 7.15 六 |                                          |                  | [纪焘](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Assignment1) |
48 | |   ~~Lecture7~~    |        | ~~Introduction to TensorFlow~~           | ~~介绍TensorFlow~~ |                                          |
49 | |     Lecture8      | 7. 18二 | Recurrent Neural Networks and Language Models | RNN与语言模型         | [钟鸣](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture8) |
50 | |     Lecture9      | 7.20四  | Machine translation and advanced recurrent LSTMs and GRUs | 机器翻译与高级RNN       | [钟鸣](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture9) |
51 | |   ~~Lecture10~~   |        | ~~Midterm Review~~                       | ~~中期回顾~~         |                                          |
52 | |     Lecture11     | 7.22 六 | Neural Machine Translation and Models with Attention | NMT与注意力模型        | [黄子寅](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture11) |
53 | |     Lecture12     | 7.24 一 | Gated recurrent units and further topics in NMT | GRU与NMT进阶        | [黄子寅](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture12) |
54 | |     Lecture13     | 7.26 三 | End-to-end models for Speech Processing  | 端到端语音处理          | [姚岳坤](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture13) |
55 | |     Lecture14     | 7.29 六 | Convolutional Neural Networks            | CNN              | [郑淇](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture14) |
56 | | **Assignment #2** | 7.29 六 |                                          |                  | [纪焘](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Assignment2) |
57 | |     Lecture15     | 7.31 一 | Tree Recursive Neural Networks and Constituency Parsing | 树RNN与短语句法分析      |                    钟鸣                    |
58 | |     Lecture16     | 8.2 三  | Coreference Resolution                   | 共指消解             |                   杜雨沛                    |
59 | |     Lecture17     | 8.6 日  | Dynamic Neural Networks for Question Answering | 动态神经网络QA         |                    钟鸣                    |
60 | |     Lecture18     | 8.7 一  | Issues in NLP and Possible Architectures for NLP | NLP中的问题与可能的解决框架  |                    韦阳                    |
61 | |     Lecture19     | 8.9 三  | Tackling the Limits of Deep Learning for NLP | 聚焦深度学习在NLP上的局限性  |                    郑淇                    |
62 | | **Assignment #3** | 8.9 三  |                                          |                  |                    纪焘                    |
63 | | **Assignment #4** | 9.10 六 |                                          |                  |                    纪焘                    |
64 | 
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------