├── Assignment1 ├── FuZhichao │ ├── Assignment1 │ │ ├── q1_softmax.py │ │ ├── q2_gradcheck.py │ │ ├── q2_neural.py │ │ ├── q2_sigmoid.py │ │ ├── q3_run.py │ │ ├── q3_sgd.py │ │ ├── q3_word2vec.py │ │ └── q4_sentiment.py │ └── assignment1-solution.pdf ├── README.md ├── TaoJi │ ├── assignment1 │ │ ├── q1_softmax.py │ │ ├── q2_gradcheck.py │ │ ├── q2_neural.py │ │ ├── q2_sigmoid.py │ │ ├── q3_run.py │ │ ├── q3_sgd.py │ │ ├── q3_word2vec.py │ │ ├── q3_word_vectors.png │ │ ├── q4_dev_conf.png │ │ ├── q4_dev_pred.txt │ │ ├── q4_reg_v_acc.png │ │ └── q4_sentiment.py │ ├── solution.md │ └── solution.pdf ├── WeiYang │ ├── assignment1 │ │ ├── q1_softmax.py │ │ ├── q2_gradcheck.py │ │ ├── q2_neural.py │ │ ├── q2_sigmoid.py │ │ ├── q3_run.py │ │ ├── q3_sgd.py │ │ ├── q3_word2vec.py │ │ └── q4_sentiment.py │ └── solution.pdf └── ZiyinHuang │ ├── Assignment1 │ ├── Makefile │ ├── collect_submission.sh │ ├── get_datasets.sh │ ├── q1_softmax.py │ ├── q2_gradcheck.py │ ├── q2_neural.py │ ├── q2_sigmoid.py │ ├── q3_run.py │ ├── q3_sgd.py │ ├── q3_word2vec.py │ ├── q3_word_vectors.png │ ├── q4_dev_conf.png │ ├── q4_reg_v_acc.png │ └── q4_sentiment.py │ ├── assignment1_writen.pdf │ └── readme ├── Assignment2 ├── README.md ├── TaoJi │ ├── assignment2 │ │ ├── model.py │ │ ├── q1_classifier.py │ │ ├── q1_softmax.py │ │ ├── q2_initialization.py │ │ ├── q2_parser_model.py │ │ ├── q2_parser_transitions.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── __init__.pyc │ │ │ ├── general_utils.py │ │ │ ├── general_utils.pyc │ │ │ ├── parser_utils.py │ │ │ └── parser_utils.pyc │ ├── solution.md │ └── solution.pdf ├── WeiYang │ ├── assignment2 │ │ ├── .idea │ │ │ ├── .name │ │ │ ├── assignment2.iml │ │ │ ├── misc.xml │ │ │ ├── modules.xml │ │ │ ├── vcs.xml │ │ │ └── workspace.xml │ │ ├── model.py │ │ ├── q1_classifier.py │ │ ├── q1_softmax.py │ │ ├── q2_initialization.py │ │ ├── q2_parser_model.py │ │ ├── q2_parser_transitions.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── __init__.pyc │ │ │ ├── general_utils.py │ │ │ ├── general_utils.pyc │ │ │ ├── parser_utils.py │ │ │ └── parser_utils.pyc │ └── solution.pdf └── ZhichaoFu │ ├── a.txt │ └── assignment2 │ ├── model.py │ ├── q1_classifier.py │ ├── q1_softmax.py │ ├── q2_initialization.py │ ├── q2_parser_model.py │ ├── q2_parser_transitions.py │ └── utils │ ├── __init__.py │ ├── __init__.pyc │ ├── general_utils.py │ ├── general_utils.pyc │ ├── parser_utils.py │ └── parser_utils.pyc ├── Lecture11 ├── 1611.04558.pdf ├── Lecture11.pdf ├── Lecture11_highlight.pdf └── README.md ├── Lecture12 ├── 1611.05358.pdf ├── Lecture12.pdf ├── Lecture12_highlight.pdf └── README.md ├── Lecture13 ├── Lecture13.pdf └── README.md ├── Lecture14 ├── 1508.06615.pdf ├── D14-1181.pdf ├── P14-1062.pdf ├── README.md ├── cs224n-2017-lecture13-CNNs.pdf └── cs224n-2017-lecture13-highlight.pdf ├── Lecture2 ├── 2016 Arora.pdf ├── CBOW.png ├── HS.png ├── Lecture2.pdf ├── Lecture2_highlight.pdf ├── Lecture2_supplement.ipynb ├── README.md ├── arXiv 2013 Mikolov-1.pdf ├── arXiv 2013 Mikolov.pdf ├── arXiv 2014 Goldberg.pdf └── word2vec.md ├── Lecture3 ├── 2014 Pennington.pdf ├── Lecture3.pdf ├── Lecture3_highlight.pdf └── README.md ├── Lecture4 ├── Lecture4.pdf └── README.md ├── Lecture5 ├── A Primer on Neural Network Models.pdf ├── README.md └── lecture5.pdf ├── Lecture6 ├── Lecture6.pdf ├── Lecture6_highlight.pdf ├── README.md └── improving-distributional-similarity-tacl-2015.pdf ├── Lecture8 ├── Lecture8.pdf ├── Lecture8_highlight.pdf ├── README.md └── acl15.pdf ├── Lecture9 ├── 1602.02410.pdf ├── 1703.02573.pdf ├── Lecture9.pdf ├── Lecture9_highlight.pdf ├── README.md └── char.pdf └── README.md /Assignment1/FuZhichao/Assignment1/q1_softmax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def softmax(x): 5 | """Compute the softmax function for each row of the input x. 6 | 7 | It is crucial that this function is optimized for speed because 8 | it will be used frequently in later code. You might find numpy 9 | functions np.exp, np.sum, np.reshape, np.max, and numpy 10 | broadcasting useful for this task. 11 | 12 | Numpy broadcasting documentation: 13 | http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html 14 | 15 | You should also make sure that your code works for a single 16 | N-dimensional vector (treat the vector as a single row) and 17 | for M x N matrices. This may be useful for testing later. Also, 18 | make sure that the dimensions of the output match the input. 19 | 20 | You must implement the optimization in problem 1(a) of the 21 | written assignment! 22 | 23 | Arguments: 24 | x -- A N dimensional vector or M x N dimensional numpy matrix. 25 | 26 | Return: 27 | x -- You are allowed to modify x in-place 28 | """ 29 | orig_shape = x.shape 30 | 31 | if len(x.shape) > 1: 32 | # Matrix 33 | x -= np.max(x, axis=1, keepdims=True) 34 | x = np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True) 35 | ### YOUR CODE HERE 36 | #raise NotImplementedError 37 | ### END YOUR CODE 38 | else: 39 | # Vector 40 | x -= np.max(x) 41 | x = np.exp(x) / np.sum(np.exp(x)) 42 | ### YOUR CODE HERE 43 | #raise NotImplementedError 44 | ### END YOUR CODE 45 | 46 | assert x.shape == orig_shape 47 | return x 48 | 49 | 50 | def test_softmax_basic(): 51 | """ 52 | Some simple tests to get you started. 53 | Warning: these are not exhaustive. 54 | """ 55 | print "Running basic tests..." 56 | test1 = softmax(np.array([1,2])) 57 | print test1 58 | ans1 = np.array([0.26894142, 0.73105858]) 59 | assert np.allclose(test1, ans1, rtol=1e-05, atol=1e-06) 60 | 61 | test2 = softmax(np.array([[1001,1002],[3,4]])) 62 | print test2 63 | ans2 = np.array([ 64 | [0.26894142, 0.73105858], 65 | [0.26894142, 0.73105858]]) 66 | assert np.allclose(test2, ans2, rtol=1e-05, atol=1e-06) 67 | 68 | test3 = softmax(np.array([[-1001,-1002]])) 69 | print test3 70 | ans3 = np.array([0.73105858, 0.26894142]) 71 | assert np.allclose(test3, ans3, rtol=1e-05, atol=1e-06) 72 | 73 | print "You should be able to verify these results by hand!\n" 74 | 75 | 76 | def test_softmax(): 77 | """ 78 | Use this space to test your softmax implementation by running: 79 | python q1_softmax.py 80 | This function will not be called by the autograder, nor will 81 | your tests be graded. 82 | """ 83 | print "Running your tests..." 84 | ### YOUR CODE HERE 85 | #raise NotImplementedError 86 | ### END YOUR CODE 87 | 88 | 89 | if __name__ == "__main__": 90 | test_softmax_basic() 91 | test_softmax() 92 | -------------------------------------------------------------------------------- /Assignment1/FuZhichao/Assignment1/q2_gradcheck.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import random 5 | 6 | 7 | # First implement a gradient checker by filling in the following functions 8 | def gradcheck_naive(f, x): 9 | """ Gradient check for a function f. 10 | 11 | Arguments: 12 | f -- a function that takes a single argument and outputs the 13 | cost and its gradients 14 | x -- the point (numpy array) to check the gradient at 15 | """ 16 | 17 | rndstate = random.getstate() 18 | random.setstate(rndstate) 19 | fx, grad = f(x) # Evaluate function value at original point 20 | h = 1e-4 # Do not change this! 21 | 22 | # Iterate over all indexes in x 23 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 24 | while not it.finished: 25 | ix = it.multi_index 26 | 27 | # Try modifying x[ix] with h defined above to compute 28 | # numerical gradients. Make sure you call random.setstate(rndstate) 29 | # before calling f(x) each time. This will make it possible 30 | # to test cost functions with built in randomness later. 31 | 32 | ### YOUR CODE HERE: 33 | x[ix] += h 34 | random.setstate(rndstate) 35 | new_f1 = f(x)[0] 36 | x[ix] -= 2*h 37 | random.setstate(rndstate) 38 | new_f2 = f(x)[0] 39 | x[ix] += h 40 | numgrad = (new_f1 - new_f2) / (2 * h) 41 | #raise NotImplementedError 42 | ### END YOUR CODE 43 | 44 | # Compare gradients 45 | reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix])) 46 | if reldiff > 1e-5: 47 | print "Gradient check failed." 48 | print "First gradient error found at index %s" % str(ix) 49 | print "Your gradient: %f \t Numerical gradient: %f" % ( 50 | grad[ix], numgrad) 51 | return 52 | 53 | it.iternext() # Step to next dimension 54 | 55 | print "Gradient check passed!" 56 | 57 | 58 | def sanity_check(): 59 | """ 60 | Some basic sanity checks. 61 | """ 62 | quad = lambda x: (np.sum(x ** 2), x * 2) 63 | 64 | print "Running sanity checks..." 65 | gradcheck_naive(quad, np.array(123.456)) # scalar test 66 | gradcheck_naive(quad, np.random.randn(3,)) # 1-D test 67 | gradcheck_naive(quad, np.random.randn(4,5)) # 2-D test 68 | print "" 69 | 70 | 71 | def your_sanity_checks(): 72 | """ 73 | Use this space add any additional sanity checks by running: 74 | python q2_gradcheck.py 75 | This function will not be called by the autograder, nor will 76 | your additional tests be graded. 77 | """ 78 | print "Running your sanity checks..." 79 | ### YOUR CODE HERE 80 | #raise NotImplementedError 81 | ### END YOUR CODE 82 | 83 | 84 | if __name__ == "__main__": 85 | sanity_check() 86 | your_sanity_checks() 87 | -------------------------------------------------------------------------------- /Assignment1/FuZhichao/Assignment1/q2_neural.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import random 5 | 6 | from q1_softmax import softmax 7 | from q2_sigmoid import sigmoid, sigmoid_grad 8 | from q2_gradcheck import gradcheck_naive 9 | 10 | 11 | def forward_backward_prop(data, labels, params, dimensions): 12 | """ 13 | Forward and backward propagation for a two-layer sigmoidal network 14 | 15 | Compute the forward propagation and for the cross entropy cost, 16 | and backward propagation for the gradients for all parameters. 17 | 18 | Arguments: 19 | data -- M x Dx matrix, where each row is a training example. 20 | labels -- M x Dy matrix, where each row is a one-hot vector. 21 | params -- Model parameters, these are unpacked for you. 22 | dimensions -- A tuple of input dimension, number of hidden units 23 | and output dimension 24 | """ 25 | 26 | ### Unpack network parameters (do not modify) 27 | if len(data.shape) >= 2: 28 | (N, _) = data.shape 29 | 30 | ofs = 0 31 | Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2]) 32 | 33 | W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H)) 34 | ofs += Dx * H 35 | b1 = np.reshape(params[ofs:ofs + H], (1, H)) 36 | ofs += H 37 | W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy)) 38 | ofs += H * Dy 39 | b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy)) 40 | 41 | ### YOUR CODE HERE: forward propagation 42 | a1 = sigmoid(data.dot(W1) + b1) 43 | a2 = softmax(a1.dot(W2) + b2) 44 | 45 | cost = -np.sum(np.log(a2[labels == 1])) / N 46 | #raise NotImplementedError 47 | ### END YOUR CODE 48 | 49 | ### YOUR CODE HERE: backward propagation 50 | grad_a2 = ( a2 - labels ) 51 | 52 | gradW2 = np.dot( a1.T, grad_a2 ) * (1.0/N) 53 | gradb2 = np.sum( grad_a2, axis=0, keepdims=True ) * (1.0/N) 54 | 55 | grad_a1 = np.dot( grad_a2, W2.T ) * sigmoid_grad(a1) 56 | 57 | gradW1 = np.dot( data.T, grad_a1 ) * (1.0/N) 58 | gradb1 = np.sum( grad_a1, axis=0, keepdims=True ) * (1.0/N) 59 | 60 | #raise NotImplementedError 61 | ### END YOUR CODE 62 | 63 | ### Stack gradients (do not modify) 64 | grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 65 | gradW2.flatten(), gradb2.flatten())) 66 | 67 | return cost, grad 68 | 69 | 70 | def sanity_check(): 71 | """ 72 | Set up fake data and parameters for the neural network, and test using 73 | gradcheck. 74 | """ 75 | print "Running sanity check..." 76 | 77 | N = 20 78 | dimensions = [10, 5, 10] 79 | data = np.random.randn(N, dimensions[0]) # each row will be a datum 80 | labels = np.zeros((N, dimensions[2])) 81 | for i in xrange(N): 82 | labels[i, random.randint(0,dimensions[2]-1)] = 1 83 | 84 | params = np.random.randn((dimensions[0] + 1) * dimensions[1] + ( 85 | dimensions[1] + 1) * dimensions[2], ) 86 | 87 | gradcheck_naive(lambda params: 88 | forward_backward_prop(data, labels, params, dimensions), params) 89 | 90 | 91 | def your_sanity_checks(): 92 | """ 93 | Use this space add any additional sanity checks by running: 94 | python q2_neural.py 95 | This function will not be called by the autograder, nor will 96 | your additional tests be graded. 97 | """ 98 | print "Running your sanity checks..." 99 | ### YOUR CODE HERE 100 | #raise NotImplementedError 101 | ### END YOUR CODE 102 | 103 | 104 | if __name__ == "__main__": 105 | sanity_check() 106 | your_sanity_checks() 107 | -------------------------------------------------------------------------------- /Assignment1/FuZhichao/Assignment1/q2_sigmoid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | 5 | 6 | def sigmoid(x): 7 | """ 8 | Compute the sigmoid function for the input here. 9 | 10 | Arguments: 11 | x -- A scalar or numpy array. 12 | 13 | Return: 14 | s -- sigmoid(x) 15 | """ 16 | 17 | ### YOUR CODE HERE 18 | s = 1.0 / (1 + np.exp(-x)) 19 | #raise NotImplementedError 20 | ### END YOUR CODE 21 | 22 | return s 23 | 24 | 25 | def sigmoid_grad(s): 26 | """ 27 | Compute the gradient for the sigmoid function here. Note that 28 | for this implementation, the input s should be the sigmoid 29 | function value of your original input x. 30 | 31 | Arguments: 32 | s -- A scalar or numpy array. 33 | 34 | Return: 35 | ds -- Your computed gradient. 36 | """ 37 | ds = s * (1 - s) 38 | ### YOUR CODE HERE 39 | #raise NotImplementedError 40 | ### END YOUR CODE 41 | 42 | return ds 43 | 44 | 45 | def test_sigmoid_basic(): 46 | """ 47 | Some simple tests to get you started. 48 | Warning: these are not exhaustive. 49 | """ 50 | print "Running basic tests..." 51 | x = np.array([[1, 2], [-1, -2]]) 52 | f = sigmoid(x) 53 | g = sigmoid_grad(f) 54 | print f 55 | f_ans = np.array([ 56 | [0.73105858, 0.88079708], 57 | [0.26894142, 0.11920292]]) 58 | assert np.allclose(f, f_ans, rtol=1e-05, atol=1e-06) 59 | print g 60 | g_ans = np.array([ 61 | [0.19661193, 0.10499359], 62 | [0.19661193, 0.10499359]]) 63 | assert np.allclose(g, g_ans, rtol=1e-05, atol=1e-06) 64 | print "You should verify these results by hand!\n" 65 | 66 | 67 | def test_sigmoid(): 68 | """ 69 | Use this space to test your sigmoid implementation by running: 70 | python q2_sigmoid.py 71 | This function will not be called by the autograder, nor will 72 | your tests be graded. 73 | """ 74 | print "Running your tests..." 75 | ### YOUR CODE HERE 76 | #raise NotImplementedError 77 | ### END YOUR CODE 78 | 79 | 80 | if __name__ == "__main__": 81 | test_sigmoid_basic(); 82 | test_sigmoid() 83 | -------------------------------------------------------------------------------- /Assignment1/FuZhichao/Assignment1/q3_run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import random 4 | import numpy as np 5 | from utils.treebank import StanfordSentiment 6 | import matplotlib 7 | matplotlib.use('agg') 8 | import matplotlib.pyplot as plt 9 | import time 10 | 11 | from q3_word2vec import * 12 | from q3_sgd import * 13 | 14 | # Reset the random seed to make sure that everyone gets the same results 15 | random.seed(314) 16 | dataset = StanfordSentiment() 17 | tokens = dataset.tokens() 18 | nWords = len(tokens) 19 | 20 | # We are going to train 10-dimensional vectors for this assignment 21 | dimVectors = 10 22 | 23 | # Context size 24 | C = 5 25 | 26 | # Reset the random seed to make sure that everyone gets the same results 27 | random.seed(31415) 28 | np.random.seed(9265) 29 | 30 | startTime=time.time() 31 | wordVectors = np.concatenate( 32 | ((np.random.rand(nWords, dimVectors) - 0.5) / 33 | dimVectors, np.zeros((nWords, dimVectors))), 34 | axis=0) 35 | wordVectors = sgd( 36 | lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C, 37 | negSamplingCostAndGradient), 38 | wordVectors, 0.3, 40000, None, True, PRINT_EVERY=10) 39 | # Note that normalization is not called here. This is not a bug, 40 | # normalizing during training loses the notion of length. 41 | 42 | print "sanity check: cost at convergence should be around or below 10" 43 | print "training took %d seconds" % (time.time() - startTime) 44 | 45 | # concatenate the input and output word vectors 46 | wordVectors = np.concatenate( 47 | (wordVectors[:nWords,:], wordVectors[nWords:,:]), 48 | axis=0) 49 | # wordVectors = wordVectors[:nWords,:] + wordVectors[nWords:,:] 50 | 51 | visualizeWords = [ 52 | "the", "a", "an", ",", ".", "?", "!", "``", "''", "--", 53 | "good", "great", "cool", "brilliant", "wonderful", "well", "amazing", 54 | "worth", "sweet", "enjoyable", "boring", "bad", "waste", "dumb", 55 | "annoying"] 56 | 57 | visualizeIdx = [tokens[word] for word in visualizeWords] 58 | visualizeVecs = wordVectors[visualizeIdx, :] 59 | temp = (visualizeVecs - np.mean(visualizeVecs, axis=0)) 60 | covariance = 1.0 / len(visualizeIdx) * temp.T.dot(temp) 61 | U,S,V = np.linalg.svd(covariance) 62 | coord = temp.dot(U[:,0:2]) 63 | 64 | for i in xrange(len(visualizeWords)): 65 | plt.text(coord[i,0], coord[i,1], visualizeWords[i], 66 | bbox=dict(facecolor='green', alpha=0.1)) 67 | 68 | plt.xlim((np.min(coord[:,0]), np.max(coord[:,0]))) 69 | plt.ylim((np.min(coord[:,1]), np.max(coord[:,1]))) 70 | 71 | plt.savefig('q3_word_vectors.png') 72 | -------------------------------------------------------------------------------- /Assignment1/FuZhichao/Assignment1/q3_sgd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Save parameters every a few SGD iterations as fail-safe 4 | SAVE_PARAMS_EVERY = 5000 5 | 6 | import glob 7 | import random 8 | import numpy as np 9 | import os.path as op 10 | import cPickle as pickle 11 | 12 | 13 | def load_saved_params(): 14 | """ 15 | A helper function that loads previously saved parameters and resets 16 | iteration start. 17 | """ 18 | st = 0 19 | for f in glob.glob("saved_params_*.npy"): 20 | iter = int(op.splitext(op.basename(f))[0].split("_")[2]) 21 | if (iter > st): 22 | st = iter 23 | 24 | if st > 0: 25 | with open("saved_params_%d.npy" % st, "r") as f: 26 | params = pickle.load(f) 27 | state = pickle.load(f) 28 | return st, params, state 29 | else: 30 | return st, None, None 31 | 32 | 33 | def save_params(iter, params): 34 | with open("saved_params_%d.npy" % iter, "w") as f: 35 | pickle.dump(params, f) 36 | pickle.dump(random.getstate(), f) 37 | 38 | 39 | def sgd(f, x0, step, iterations, postprocessing=None, useSaved=False, 40 | PRINT_EVERY=10): 41 | """ Stochastic Gradient Descent 42 | Implement the stochastic gradient descent method in this function. 43 | Arguments: 44 | f -- the function to optimize, it should take a single 45 | argument and yield two outputs, a cost and the gradient 46 | with respect to the arguments 47 | x0 -- the initial point to start SGD from 48 | step -- the step size for SGD 49 | iterations -- total iterations to run SGD for 50 | postprocessing -- postprocessing function for the parameters 51 | if necessary. In the case of word2vec we will need to 52 | normalize the word vectors to have unit length. 53 | PRINT_EVERY -- specifies how many iterations to output loss 54 | Return: 55 | x -- the parameter value after SGD finishes 56 | """ 57 | # Anneal learning rate every several iterations 58 | ANNEAL_EVERY = 20000 59 | if useSaved: 60 | start_iter, oldx, state = load_saved_params() 61 | if start_iter > 0: 62 | x0 = oldx 63 | step *= 0.5 ** (start_iter / ANNEAL_EVERY) 64 | if state: 65 | random.setstate(state) 66 | else: 67 | start_iter = 0 68 | x = x0 69 | if not postprocessing: 70 | postprocessing = lambda x: x 71 | expcost = None 72 | for iter in xrange(start_iter + 1, iterations + 1): 73 | # Don't forget to apply the postprocessing after every iteration! 74 | # You might want to print the progress every few iterations. 75 | cost = None 76 | ### YOUR CODE HERE 77 | cost, grad = f(x) 78 | x -= step * grad 79 | postprocessing(x) 80 | ### END YOUR CODE 81 | if iter % PRINT_EVERY == 0: 82 | if not expcost: 83 | expcost = cost 84 | else: 85 | expcost = .95 * expcost + .05 * cost 86 | print "iter %d: %f" % (iter, expcost) 87 | if iter % SAVE_PARAMS_EVERY == 0 and useSaved: 88 | save_params(iter, x) 89 | if iter % ANNEAL_EVERY == 0: 90 | step *= 0.5 91 | return x 92 | 93 | 94 | def sanity_check(): 95 | quad = lambda x: (np.sum(x ** 2), x * 2) 96 | 97 | print "Running sanity checks..." 98 | t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=100) 99 | print "test 1 result:", t1 100 | assert abs(t1) <= 1e-6 101 | 102 | t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100) 103 | print "test 2 result:", t2 104 | assert abs(t2) <= 1e-6 105 | 106 | t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100) 107 | print "test 3 result:", t3 108 | assert abs(t3) <= 1e-6 109 | 110 | print "" 111 | 112 | 113 | def your_sanity_checks(): 114 | """ 115 | Use this space add any additional sanity checks by running: 116 | python q3_sgd.py 117 | This function will not be called by the autograder, nor will 118 | your additional tests be graded. 119 | """ 120 | print "Running your sanity checks..." 121 | ### YOUR CODE HERE 122 | #raise NotImplementedError 123 | ### END YOUR CODE 124 | 125 | 126 | if __name__ == "__main__": 127 | sanity_check() 128 | your_sanity_checks() 129 | -------------------------------------------------------------------------------- /Assignment1/FuZhichao/assignment1-solution.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/FuZhichao/assignment1-solution.pdf -------------------------------------------------------------------------------- /Assignment1/README.md: -------------------------------------------------------------------------------- 1 | # Assignment 1 2 | 3 | Materials: [Assignment 1](http://web.stanford.edu/class/cs224n/assignment1/index.html) 4 | 5 | [stanford SentimentTreebank](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Assignment1/stanfordSentimentTreebank.zip) 6 | 7 | [Glove wordvec](http://nlp.stanford.edu/data/glove.6B.zip) 8 | 9 | 10 | 11 | ``` 12 | ./Assignment1 13 | /TaoJi 14 | solution.md --解题报告 代码题实现+非代码题 15 | /assignment1 16 | ...code... --项目代码 (去除data) 17 | /ZiyinHuang 18 | ... 19 | /YupeiDu 20 | ... 21 | /MingZhong 22 | ... 23 | ... 24 | ``` 25 | 26 | -------------------------------------------------------------------------------- /Assignment1/TaoJi/assignment1/q1_softmax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def softmax(x): 5 | """Compute the softmax function for each row of the input x. 6 | 7 | It is crucial that this function is optimized for speed because 8 | it will be used frequently in later code. You might find numpy 9 | functions np.exp, np.sum, np.reshape, np.max, and numpy 10 | broadcasting useful for this task. 11 | 12 | Numpy broadcasting documentation: 13 | http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html 14 | 15 | You should also make sure that your code works for a single 16 | N-dimensional vector (treat the vector as a single row) and 17 | for M x N matrices. This may be useful for testing later. Also, 18 | make sure that the dimensions of the output match the input. 19 | 20 | You must implement the optimization in problem 1(a) of the 21 | written assignment! 22 | 23 | Arguments: 24 | x -- A N dimensional vector or M x N dimensional numpy matrix. 25 | 26 | Return: 27 | x -- You are allowed to modify x in-place 28 | """ 29 | orig_shape = x.shape 30 | 31 | if len(x.shape) > 1: 32 | # Matrix 33 | ### YOUR CODE HERE 34 | c = np.max(x, axis=1).reshape(x.shape[0], 1) 35 | x = np.exp(x - c) 36 | norm = np.sum(x, axis=1).reshape(x.shape[0], 1) 37 | x = x / norm 38 | ### END YOUR CODE 39 | else: 40 | # Vector 41 | ### YOUR CODE HERE 42 | c = np.max(x) 43 | x = np.exp(x - c) 44 | x = x / x.sum() 45 | ### END YOUR CODE 46 | 47 | assert x.shape == orig_shape 48 | return x 49 | 50 | 51 | def test_softmax_basic(): 52 | """ 53 | Some simple tests to get you started. 54 | Warning: these are not exhaustive. 55 | """ 56 | print "Running basic tests..." 57 | test1 = softmax(np.array([1,2])) 58 | 59 | ans1 = np.array([0.26894142, 0.73105858]) 60 | print test1 61 | assert np.allclose(test1, ans1, rtol=1e-05, atol=1e-06) 62 | 63 | test2 = softmax(np.array([[1001,1002],[3,4]])) 64 | print test2 65 | ans2 = np.array([ 66 | [0.26894142, 0.73105858], 67 | [0.26894142, 0.73105858]]) 68 | assert np.allclose(test2, ans2, rtol=1e-05, atol=1e-06) 69 | 70 | test3 = softmax(np.array([[-1001,-1002]])) 71 | print test3 72 | ans3 = np.array([0.73105858, 0.26894142]) 73 | assert np.allclose(test3, ans3, rtol=1e-05, atol=1e-06) 74 | 75 | print "You should be able to verify these results by hand!\n" 76 | 77 | 78 | def test_softmax(): 79 | """ 80 | Use this space to test your softmax implementation by running: 81 | python q1_softmax.py 82 | This function will not be called by the autograder, nor will 83 | your tests be graded. 84 | """ 85 | print "Running your tests..." 86 | ### YOUR CODE HERE 87 | mytest1 = softmax(np.array([ 88 | [1, 2, 3, 6], 89 | [2, 4, 5, 6], 90 | [1, 2, 3, 6]])) 91 | print mytest1 92 | myans1 = np.array([ 93 | [0.00626879, 0.01704033, 0.04632042, 0.93037047], 94 | [0.01203764, 0.08894682, 0.24178252, 0.65723302], 95 | [0.00626879, 0.01704033, 0.04632042, 0.93037047]]) 96 | assert np.allclose(mytest1, myans1, rtol=1e-05, atol=1e-06) 97 | ### END YOUR CODE 98 | 99 | 100 | if __name__ == "__main__": 101 | test_softmax_basic() 102 | test_softmax() 103 | -------------------------------------------------------------------------------- /Assignment1/TaoJi/assignment1/q2_gradcheck.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import random 5 | 6 | 7 | # First implement a gradient checker by filling in the following functions 8 | def gradcheck_naive(f, x): 9 | """ Gradient check for a function f. 10 | 11 | Arguments: 12 | f -- a function that takes a single argument and outputs the 13 | cost and its gradients 14 | x -- the point (numpy array) to check the gradient at 15 | """ 16 | rndstate = random.getstate() 17 | random.setstate(rndstate) 18 | fx, grad = f(x) # Evaluate function value at original point 19 | h = 1e-4 # Do not change this! 20 | 21 | # Iterate over all indexes in x 22 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 23 | while not it.finished: 24 | ix = it.multi_index 25 | 26 | # Try modifying x[ix] with h defined above to compute 27 | # numerical gradients. Make sure you call random.setstate(rndstate) 28 | # before calling f(x) each time. This will make it possible 29 | # to test cost functions with built in randomness later. 30 | 31 | ### YOUR CODE HERE: 32 | 33 | x[ix] += h 34 | random.setstate(rndstate) 35 | fx1, _ = f(x) 36 | x[ix] -= 2 * h 37 | random.setstate(rndstate) 38 | fx2, _ = f(x) 39 | numgrad = (fx1-fx2) / (2.0*h) 40 | x[ix] += h 41 | 42 | ### END YOUR CODE 43 | 44 | # Compare gradients 45 | reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix])) 46 | if reldiff > 1e-5: 47 | print "Gradient check failed." 48 | print "First gradient error found at index %s" % str(ix) 49 | print "Your gradient: %f \t Numerical gradient: %f" % ( 50 | grad[ix], numgrad) 51 | return 52 | 53 | it.iternext() # Step to next dimension 54 | 55 | print "Gradient check passed!" 56 | 57 | 58 | def sanity_check(): 59 | """ 60 | Some basic sanity checks. 61 | """ 62 | quad = lambda x: (np.sum(x ** 2), x * 2) 63 | 64 | print "Running sanity checks..." 65 | gradcheck_naive(quad, np.array(123.456)) # scalar test 66 | gradcheck_naive(quad, np.random.randn(3,)) # 1-D test 67 | gradcheck_naive(quad, np.random.randn(4,5)) # 2-D test 68 | print "" 69 | 70 | 71 | def your_sanity_checks(): 72 | """ 73 | Use this space add any additional sanity checks by running: 74 | python q2_gradcheck.py 75 | This function will not be called by the autograder, nor will 76 | your additional tests be graded. 77 | """ 78 | print "Running your sanity checks..." 79 | ### YOUR CODE HERE 80 | quad = lambda x: (np.sum(x ** 3), 3 * x ** 2) 81 | gradcheck_naive(quad, np.random.randn(3,4,5)) # 3-D test 82 | ### END YOUR CODE 83 | 84 | 85 | if __name__ == "__main__": 86 | sanity_check() 87 | your_sanity_checks() 88 | -------------------------------------------------------------------------------- /Assignment1/TaoJi/assignment1/q2_neural.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import random 5 | 6 | from q1_softmax import softmax 7 | from q2_sigmoid import sigmoid, sigmoid_grad 8 | from q2_gradcheck import gradcheck_naive 9 | 10 | 11 | def forward_backward_prop(data, labels, params, dimensions): 12 | """ 13 | Forward and backward propagation for a two-layer sigmoidal network 14 | 15 | Compute the forward propagation and for the cross entropy cost, 16 | and backward propagation for the gradients for all parameters. 17 | 18 | Arguments: 19 | data -- M x Dx matrix, where each row is a training example. 20 | labels -- M x Dy matrix, where each row is a one-hot vector. 21 | params -- Model parameters, these are unpacked for you. 22 | dimensions -- A tuple of input dimension, number of hidden units 23 | and output dimension 24 | """ 25 | 26 | ### Unpack network parameters (do not modify) 27 | ofs = 0 28 | Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2]) 29 | 30 | W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H)) 31 | ofs += Dx * H 32 | b1 = np.reshape(params[ofs:ofs + H], (1, H)) 33 | ofs += H 34 | W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy)) 35 | ofs += H * Dy 36 | b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy)) 37 | 38 | ### YOUR CODE HERE: forward propagation 39 | 40 | M = data.shape[0] 41 | # (M, H) 42 | a = np.dot(data, W1) + b1 43 | hiddens = sigmoid(a) 44 | # (M, Dy) 45 | z = np.dot(hiddens, W2) + b2 46 | outputs = softmax(z) 47 | 48 | ### END YOUR CODE 49 | 50 | cost = -1 * labels * np.log(outputs) 51 | cost = cost.sum() / M 52 | 53 | ### YOUR CODE HERE: backward propagation 54 | 55 | # (M, Dy) 56 | gradZs = outputs - labels 57 | # (M, H, Dx) 58 | gradW2 = np.array([np.dot(hiddens[i].reshape(1, H).T, gradZs[i].reshape(1, Dy)) for i in xrange(M)]) 59 | # (H, Dx) 60 | gradW2 = gradW2.sum(axis=0) * (1.0/M) 61 | # (1, Dx) 62 | gradb2 = (gradZs.sum(axis=0) * (1.0/M)).reshape(1, Dy) 63 | # (M, H) 64 | gradAs = np.array([np.dot(gradZs[i].reshape(1, Dy), W2.T)*sigmoid_grad(hiddens[i]) for i in xrange(M)]) 65 | # (M, Dx, H) 66 | gradW1 = np.array([np.dot(data[i].reshape(1, Dx).T, gradAs[i].reshape(1, H)) for i in xrange(M)]) 67 | # (Dx, H) 68 | gradW1 = gradW1.sum(axis=0) * (1.0/M) 69 | # (1, H) 70 | gradb1 = gradAs.sum(axis=0) * (1.0/M) 71 | 72 | ### END YOUR CODE 73 | 74 | ### Stack gradients (do not modify) 75 | grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 76 | gradW2.flatten(), gradb2.flatten())) 77 | #print grad 78 | return cost, grad 79 | 80 | 81 | def sanity_check(): 82 | """ 83 | Set up fake data and parameters for the neural network, and test using 84 | gradcheck. 85 | """ 86 | print "Running sanity check..." 87 | 88 | N = 20 89 | dimensions = [10, 5, 10] 90 | data = np.random.randn(N, dimensions[0]) # each row will be a datum 91 | labels = np.zeros((N, dimensions[2])) 92 | for i in xrange(N): 93 | labels[i, random.randint(0,dimensions[2]-1)] = 1 94 | 95 | params = np.random.randn((dimensions[0] + 1) * dimensions[1] + ( 96 | dimensions[1] + 1) * dimensions[2], ) 97 | 98 | gradcheck_naive(lambda params: 99 | forward_backward_prop(data, labels, params, dimensions), params) 100 | 101 | 102 | def your_sanity_checks(): 103 | """ 104 | Use this space add any additional sanity checks by running: 105 | python q2_neural.py 106 | This function will not be called by the autograder, nor will 107 | your additional tests be graded. 108 | """ 109 | print "Running your sanity checks..." 110 | ### YOUR CODE HERE 111 | raise NotImplementedError 112 | ### END YOUR CODE 113 | 114 | 115 | if __name__ == "__main__": 116 | sanity_check() 117 | your_sanity_checks() 118 | -------------------------------------------------------------------------------- /Assignment1/TaoJi/assignment1/q2_sigmoid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | 5 | 6 | def sigmoid(x): 7 | """ 8 | Compute the sigmoid function for the input here. 9 | 10 | Arguments: 11 | x -- A scalar or numpy array. 12 | 13 | Return: 14 | s -- sigmoid(x) 15 | """ 16 | 17 | ### YOUR CODE HERE 18 | s = 1. / (1. + np.exp(-x)) 19 | ### END YOUR CODE 20 | 21 | return s 22 | 23 | 24 | def sigmoid_grad(s): 25 | """ 26 | Compute the gradient for the sigmoid function here. Note that 27 | for this implementation, the input s should be the sigmoid 28 | function value of your original input x. 29 | 30 | Arguments: 31 | s -- A scalar or numpy array. 32 | 33 | Return: 34 | ds -- Your computed gradient. 35 | """ 36 | 37 | ### YOUR CODE HERE 38 | ds = s * (1. - s) 39 | ### END YOUR CODE 40 | 41 | return ds 42 | 43 | 44 | def test_sigmoid_basic(): 45 | """ 46 | Some simple tests to get you started. 47 | Warning: these are not exhaustive. 48 | """ 49 | print "Running basic tests..." 50 | x = np.array([[1, 2], [-1, -2]]) 51 | f = sigmoid(x) 52 | g = sigmoid_grad(f) 53 | print f 54 | f_ans = np.array([ 55 | [0.73105858, 0.88079708], 56 | [0.26894142, 0.11920292]]) 57 | assert np.allclose(f, f_ans, rtol=1e-05, atol=1e-06) 58 | print g 59 | g_ans = np.array([ 60 | [0.19661193, 0.10499359], 61 | [0.19661193, 0.10499359]]) 62 | assert np.allclose(g, g_ans, rtol=1e-05, atol=1e-06) 63 | print "You should verify these results by hand!\n" 64 | 65 | 66 | def test_sigmoid(): 67 | """ 68 | Use this space to test your sigmoid implementation by running: 69 | python q2_sigmoid.py 70 | This function will not be called by the autograder, nor will 71 | your tests be graded. 72 | """ 73 | print "Running your tests..." 74 | ### YOUR CODE HERE 75 | x = 0.45 76 | f = sigmoid(x) 77 | g = sigmoid_grad(f) 78 | print f 79 | f_ans = 0.61063923 80 | assert np.allclose(f, f_ans, rtol=1e-05, atol=1e-06) 81 | print g 82 | g_ans = 0.23775896 83 | assert np.allclose(g, g_ans, rtol=1e-05, atol=1e-06) 84 | ### END YOUR CODE 85 | 86 | 87 | if __name__ == "__main__": 88 | test_sigmoid_basic(); 89 | test_sigmoid() 90 | -------------------------------------------------------------------------------- /Assignment1/TaoJi/assignment1/q3_run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import random 4 | import numpy as np 5 | from utils.treebank import StanfordSentiment 6 | import matplotlib 7 | matplotlib.use('agg') 8 | import matplotlib.pyplot as plt 9 | import time 10 | 11 | from q3_word2vec import * 12 | from q3_sgd import * 13 | 14 | # Reset the random seed to make sure that everyone gets the same results 15 | random.seed(314) 16 | dataset = StanfordSentiment() 17 | tokens = dataset.tokens() 18 | nWords = len(tokens) 19 | 20 | # We are going to train 10-dimensional vectors for this assignment 21 | dimVectors = 10 22 | 23 | # Context size 24 | C = 5 25 | 26 | # Reset the random seed to make sure that everyone gets the same results 27 | random.seed(31415) 28 | np.random.seed(9265) 29 | 30 | startTime=time.time() 31 | wordVectors = np.concatenate( 32 | ((np.random.rand(nWords, dimVectors) - 0.5) / 33 | dimVectors, np.zeros((nWords, dimVectors))), 34 | axis=0) 35 | wordVectors = sgd( 36 | lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C, 37 | negSamplingCostAndGradient), 38 | wordVectors, 0.3, 40000, None, True, PRINT_EVERY=10) 39 | # Note that normalization is not called here. This is not a bug, 40 | # normalizing during training loses the notion of length. 41 | 42 | print "sanity check: cost at convergence should be around or below 10" 43 | print "training took %d seconds" % (time.time() - startTime) 44 | 45 | # concatenate the input and output word vectors 46 | wordVectors = np.concatenate( 47 | (wordVectors[:nWords,:], wordVectors[nWords:,:]), 48 | axis=0) 49 | # wordVectors = wordVectors[:nWords,:] + wordVectors[nWords:,:] 50 | 51 | visualizeWords = [ 52 | "the", "a", "an", ",", ".", "?", "!", "``", "''", "--", 53 | "good", "great", "cool", "brilliant", "wonderful", "well", "amazing", 54 | "worth", "sweet", "enjoyable", "boring", "bad", "waste", "dumb", 55 | "annoying"] 56 | 57 | visualizeIdx = [tokens[word] for word in visualizeWords] 58 | visualizeVecs = wordVectors[visualizeIdx, :] 59 | temp = (visualizeVecs - np.mean(visualizeVecs, axis=0)) 60 | covariance = 1.0 / len(visualizeIdx) * temp.T.dot(temp) 61 | U,S,V = np.linalg.svd(covariance) 62 | coord = temp.dot(U[:,0:2]) 63 | 64 | for i in xrange(len(visualizeWords)): 65 | plt.text(coord[i,0], coord[i,1], visualizeWords[i], 66 | bbox=dict(facecolor='green', alpha=0.1)) 67 | 68 | plt.xlim((np.min(coord[:,0]), np.max(coord[:,0]))) 69 | plt.ylim((np.min(coord[:,1]), np.max(coord[:,1]))) 70 | 71 | plt.savefig('q3_word_vectors.png') 72 | -------------------------------------------------------------------------------- /Assignment1/TaoJi/assignment1/q3_sgd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Save parameters every a few SGD iterations as fail-safe 4 | SAVE_PARAMS_EVERY = 5000 5 | 6 | import glob 7 | import random 8 | import numpy as np 9 | import os.path as op 10 | import cPickle as pickle 11 | 12 | 13 | def load_saved_params(): 14 | """ 15 | A helper function that loads previously saved parameters and resets 16 | iteration start. 17 | """ 18 | st = 0 19 | for f in glob.glob("saved_params_*.npy"): 20 | iter = int(op.splitext(op.basename(f))[0].split("_")[2]) 21 | if (iter > st): 22 | st = iter 23 | 24 | if st > 0: 25 | 26 | print st 27 | with open("saved_params_%d.npy" % st, "r") as f: 28 | params = pickle.load(f) 29 | state = pickle.load(f) 30 | return st, params, state 31 | else: 32 | return st, None, None 33 | 34 | 35 | def save_params(iter, params): 36 | with open("saved_params_%d.npy" % iter, "w") as f: 37 | pickle.dump(params, f) 38 | pickle.dump(random.getstate(), f) 39 | 40 | 41 | def sgd(f, x0, step, iterations, postprocessing=None, useSaved=False, 42 | PRINT_EVERY=10): 43 | """ Stochastic Gradient Descent 44 | 45 | Implement the stochastic gradient descent method in this function. 46 | 47 | Arguments: 48 | f -- the function to optimize, it should take a single 49 | argument and yield two outputs, a cost and the gradient 50 | with respect to the arguments 51 | x0 -- the initial point to start SGD from 52 | step -- the step size for SGD 53 | iterations -- total iterations to run SGD for 54 | postprocessing -- postprocessing function for the parameters 55 | if necessary. In the case of word2vec we will need to 56 | normalize the word vectors to have unit length. 57 | PRINT_EVERY -- specifies how many iterations to output loss 58 | 59 | Return: 60 | x -- the parameter value after SGD finishes 61 | """ 62 | 63 | # Anneal learning rate every several iterations 64 | ANNEAL_EVERY = 20000 65 | 66 | if useSaved: 67 | start_iter, oldx, state = load_saved_params() 68 | if start_iter > 0: 69 | x0 = oldx 70 | step *= 0.5 ** (start_iter / ANNEAL_EVERY) 71 | 72 | if state: 73 | random.setstate(state) 74 | else: 75 | start_iter = 0 76 | 77 | x = x0 78 | 79 | if not postprocessing: 80 | postprocessing = lambda x: x 81 | 82 | expcost = None 83 | 84 | for iter in xrange(start_iter + 1, iterations + 1): 85 | # Don't forget to apply the postprocessing after every iteration! 86 | # You might want to print the progress every few iterations. 87 | 88 | cost = None 89 | ### YOUR CODE HERE 90 | cost, grad = f(x) 91 | x -= step * grad 92 | x = postprocessing(x) 93 | ### END YOUR CODE 94 | 95 | if iter % PRINT_EVERY == 0: 96 | if not expcost: 97 | expcost = cost 98 | else: 99 | expcost = .95 * expcost + .05 * cost 100 | print "iter %d: %f" % (iter, expcost) 101 | 102 | if iter % SAVE_PARAMS_EVERY == 0 and useSaved: 103 | save_params(iter, x) 104 | 105 | if iter % ANNEAL_EVERY == 0: 106 | step *= 0.5 107 | 108 | return x 109 | 110 | 111 | def sanity_check(): 112 | quad = lambda x: (np.sum(x ** 2), x * 2) 113 | 114 | print "Running sanity checks..." 115 | t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=100) 116 | print "test 1 result:", t1 117 | assert abs(t1) <= 1e-6 118 | 119 | t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100) 120 | print "test 2 result:", t2 121 | assert abs(t2) <= 1e-6 122 | 123 | t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100) 124 | print "test 3 result:", t3 125 | assert abs(t3) <= 1e-6 126 | 127 | print "" 128 | 129 | 130 | def your_sanity_checks(): 131 | """ 132 | Use this space add any additional sanity checks by running: 133 | python q3_sgd.py 134 | This function will not be called by the autograder, nor will 135 | your additional tests be graded. 136 | """ 137 | print "Running your sanity checks..." 138 | ### YOUR CODE HERE 139 | quad = lambda x: (np.sum((x-1) ** 2), x * 2 - 2) 140 | 141 | print "Running sanity checks..." 142 | t1 = sgd(quad, 1.5, 0.01, 1000, PRINT_EVERY=100) 143 | print "test 1 result:", t1 144 | assert abs(t1-1) <= 1e-6 145 | 146 | t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100) 147 | print "test 2 result:", t2 148 | assert abs(t2-1) <= 1e-6 149 | 150 | t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100) 151 | print "test 3 result:", t3 152 | assert abs(t3-1) <= 1e-6 153 | 154 | print "" 155 | ### END YOUR CODE 156 | 157 | 158 | if __name__ == "__main__": 159 | sanity_check() 160 | your_sanity_checks() 161 | -------------------------------------------------------------------------------- /Assignment1/TaoJi/assignment1/q3_word_vectors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/TaoJi/assignment1/q3_word_vectors.png -------------------------------------------------------------------------------- /Assignment1/TaoJi/assignment1/q4_dev_conf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/TaoJi/assignment1/q4_dev_conf.png -------------------------------------------------------------------------------- /Assignment1/TaoJi/assignment1/q4_reg_v_acc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/TaoJi/assignment1/q4_reg_v_acc.png -------------------------------------------------------------------------------- /Assignment1/TaoJi/assignment1/q4_sentiment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import numpy as np 5 | import matplotlib 6 | matplotlib.use('agg') 7 | import matplotlib.pyplot as plt 8 | import itertools 9 | 10 | from utils.treebank import StanfordSentiment 11 | import utils.glove as glove 12 | 13 | from q3_sgd import load_saved_params, sgd 14 | 15 | # We will use sklearn here because it will run faster than implementing 16 | # ourselves. However, for other parts of this assignment you must implement 17 | # the functions yourself! 18 | from sklearn.linear_model import LogisticRegression 19 | from sklearn.metrics import confusion_matrix 20 | 21 | 22 | def getArguments(): 23 | parser = argparse.ArgumentParser() 24 | group = parser.add_mutually_exclusive_group(required=True) 25 | group.add_argument("--pretrained", dest="pretrained", action="store_true", 26 | help="Use pretrained GloVe vectors.") 27 | group.add_argument("--yourvectors", dest="yourvectors", action="store_true", 28 | help="Use your vectors from q3.") 29 | return parser.parse_args() 30 | 31 | 32 | def getSentenceFeatures(tokens, wordVectors, sentence): 33 | """ 34 | Obtain the sentence feature for sentiment analysis by averaging its 35 | word vectors 36 | """ 37 | 38 | # Implement computation for the sentence features given a sentence. 39 | 40 | # Inputs: 41 | # tokens -- a dictionary that maps words to their indices in 42 | # the word vector list 43 | # wordVectors -- word vectors (each row) for all tokens 44 | # sentence -- a list of words in the sentence of interest 45 | 46 | # Output: 47 | # - sentVector: feature vector for the sentence 48 | 49 | sentVector = np.zeros((wordVectors.shape[1],)) 50 | 51 | ### YOUR CODE HERE 52 | for word in sentence: 53 | sentVector += wordVectors[tokens[word]] 54 | sentVector /= len(sentence) 55 | ### END YOUR CODE 56 | 57 | assert sentVector.shape == (wordVectors.shape[1],) 58 | return sentVector 59 | 60 | 61 | def getRegularizationValues(): 62 | """Try different regularizations 63 | 64 | Return a sorted list of values to try. 65 | """ 66 | values = None # Assign a list of floats in the block below 67 | ### YOUR CODE HERE 68 | values = [0.0001, 0.001, 0.01, 0.1, 0.5, 1, 1.5, 2, 3, 4, 5, 10, 50, 100, 1000] 69 | ### END YOUR CODE 70 | return sorted(values) 71 | 72 | 73 | def chooseBestModel(results): 74 | """Choose the best model based on parameter tuning on the dev set 75 | 76 | Arguments: 77 | results -- A list of python dictionaries of the following format: 78 | { 79 | "reg": regularization, 80 | "clf": classifier, 81 | "train": trainAccuracy, 82 | "dev": devAccuracy, 83 | "test": testAccuracy 84 | } 85 | 86 | Returns: 87 | Your chosen result dictionary. 88 | """ 89 | bestResult = None 90 | 91 | ### YOUR CODE HERE 92 | bestResult = max(results, key=lambda x: x["dev"]) 93 | ### END YOUR CODE 94 | 95 | return bestResult 96 | 97 | 98 | def accuracy(y, yhat): 99 | """ Precision for classifier """ 100 | assert(y.shape == yhat.shape) 101 | return np.sum(y == yhat) * 100.0 / y.size 102 | 103 | 104 | def plotRegVsAccuracy(regValues, results, filename): 105 | """ Make a plot of regularization vs accuracy """ 106 | plt.plot(regValues, [x["train"] for x in results]) 107 | plt.plot(regValues, [x["dev"] for x in results]) 108 | plt.xscale('log') 109 | plt.xlabel("regularization") 110 | plt.ylabel("accuracy") 111 | plt.legend(['train', 'dev'], loc='upper left') 112 | plt.savefig(filename) 113 | 114 | 115 | def outputConfusionMatrix(features, labels, clf, filename): 116 | """ Generate a confusion matrix """ 117 | pred = clf.predict(features) 118 | cm = confusion_matrix(labels, pred, labels=range(5)) 119 | plt.figure() 120 | plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Reds) 121 | plt.colorbar() 122 | classes = ["- -", "-", "neut", "+", "+ +"] 123 | tick_marks = np.arange(len(classes)) 124 | plt.xticks(tick_marks, classes) 125 | plt.yticks(tick_marks, classes) 126 | thresh = cm.max() / 2. 127 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 128 | plt.text(j, i, cm[i, j], 129 | horizontalalignment="center", 130 | color="white" if cm[i, j] > thresh else "black") 131 | plt.tight_layout() 132 | plt.ylabel('True label') 133 | plt.xlabel('Predicted label') 134 | plt.savefig(filename) 135 | 136 | 137 | def outputPredictions(dataset, features, labels, clf, filename): 138 | """ Write the predictions to file """ 139 | pred = clf.predict(features) 140 | with open(filename, "w") as f: 141 | print >> f, "True\tPredicted\tText" 142 | for i in xrange(len(dataset)): 143 | print >> f, "%d\t%d\t%s" % ( 144 | labels[i], pred[i], " ".join(dataset[i][0])) 145 | 146 | 147 | def main(args): 148 | """ Train a model to do sentiment analyis""" 149 | 150 | # Load the dataset 151 | dataset = StanfordSentiment() 152 | tokens = dataset.tokens() 153 | nWords = len(tokens) 154 | 155 | if args.yourvectors: 156 | _, wordVectors, _ = load_saved_params() 157 | wordVectors = np.concatenate( 158 | (wordVectors[:nWords,:], wordVectors[nWords:,:]), 159 | axis=1) 160 | elif args.pretrained: 161 | wordVectors = glove.loadWordVectors(tokens) 162 | dimVectors = wordVectors.shape[1] 163 | 164 | # Load the train set 165 | trainset = dataset.getTrainSentences() 166 | nTrain = len(trainset) 167 | trainFeatures = np.zeros((nTrain, dimVectors)) 168 | trainLabels = np.zeros((nTrain,), dtype=np.int32) 169 | for i in xrange(nTrain): 170 | words, trainLabels[i] = trainset[i] 171 | trainFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words) 172 | 173 | # Prepare dev set features 174 | devset = dataset.getDevSentences() 175 | nDev = len(devset) 176 | devFeatures = np.zeros((nDev, dimVectors)) 177 | devLabels = np.zeros((nDev,), dtype=np.int32) 178 | for i in xrange(nDev): 179 | words, devLabels[i] = devset[i] 180 | devFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words) 181 | 182 | # Prepare test set features 183 | testset = dataset.getTestSentences() 184 | nTest = len(testset) 185 | testFeatures = np.zeros((nTest, dimVectors)) 186 | testLabels = np.zeros((nTest,), dtype=np.int32) 187 | for i in xrange(nTest): 188 | words, testLabels[i] = testset[i] 189 | testFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words) 190 | 191 | # We will save our results from each run 192 | results = [] 193 | regValues = getRegularizationValues() 194 | for reg in regValues: 195 | print "Training for reg=%f" % reg 196 | # Note: add a very small number to regularization to please the library 197 | clf = LogisticRegression(C=1.0/(reg + 1e-12)) 198 | clf.fit(trainFeatures, trainLabels) 199 | 200 | # Test on train set 201 | pred = clf.predict(trainFeatures) 202 | trainAccuracy = accuracy(trainLabels, pred) 203 | print "Train accuracy (%%): %f" % trainAccuracy 204 | 205 | # Test on dev set 206 | pred = clf.predict(devFeatures) 207 | devAccuracy = accuracy(devLabels, pred) 208 | print "Dev accuracy (%%): %f" % devAccuracy 209 | 210 | # Test on test set 211 | # Note: always running on test is poor style. Typically, you should 212 | # do this only after validation. 213 | pred = clf.predict(testFeatures) 214 | testAccuracy = accuracy(testLabels, pred) 215 | print "Test accuracy (%%): %f" % testAccuracy 216 | 217 | results.append({ 218 | "reg": reg, 219 | "clf": clf, 220 | "train": trainAccuracy, 221 | "dev": devAccuracy, 222 | "test": testAccuracy}) 223 | 224 | # Print the accuracies 225 | print "" 226 | print "=== Recap ===" 227 | print "Reg\t\tTrain\tDev\tTest" 228 | for result in results: 229 | print "%.2E\t%.3f\t%.3f\t%.3f" % ( 230 | result["reg"], 231 | result["train"], 232 | result["dev"], 233 | result["test"]) 234 | print "" 235 | 236 | bestResult = chooseBestModel(results) 237 | print "Best regularization value: %0.2E" % bestResult["reg"] 238 | print "Test accuracy (%%): %f" % bestResult["test"] 239 | 240 | # do some error analysis 241 | if args.pretrained: 242 | plotRegVsAccuracy(regValues, results, "q4_reg_v_acc.png") 243 | outputConfusionMatrix(devFeatures, devLabels, bestResult["clf"], 244 | "q4_dev_conf.png") 245 | outputPredictions(devset, devFeatures, devLabels, bestResult["clf"], 246 | "q4_dev_pred.txt") 247 | 248 | 249 | if __name__ == "__main__": 250 | main(getArguments()) 251 | -------------------------------------------------------------------------------- /Assignment1/TaoJi/solution.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/TaoJi/solution.pdf -------------------------------------------------------------------------------- /Assignment1/WeiYang/assignment1/q1_softmax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def softmax(x): 5 | """Compute the softmax function for each row of the input x. 6 | 7 | It is crucial that this function is optimized for speed because 8 | it will be used frequently in later code. You might find numpy 9 | functions np.exp, np.sum, np.reshape, np.max, and numpy 10 | broadcasting useful for this task. 11 | 12 | Numpy broadcasting documentation: 13 | http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html 14 | 15 | You should also make sure that your code works for a single 16 | N-dimensional vector (treat the vector as a single row) and 17 | for M x N matrices. This may be useful for testing later. Also, 18 | make sure that the dimensions of the output match the input. 19 | 20 | You must implement the optimization in problem 1(a) of the 21 | written assignment! 22 | 23 | Arguments: 24 | x -- A N dimensional vector or M x N dimensional numpy matrix. 25 | 26 | Return: 27 | x -- You are allowed to modify x in-place 28 | """ 29 | orig_shape = x.shape 30 | 31 | if len(x.shape) > 1: 32 | # Matrix 33 | ### YOUR CODE HERE 34 | x -= np.max(x, axis=1, keepdims=True) 35 | x = np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True) 36 | #raise NotImplementedError 37 | ### END YOUR CODE 38 | else: 39 | # Vector 40 | ### YOUR CODE HERE 41 | x -= np.max(x) 42 | x = np.exp(x) / np.sum(np.exp(x)) 43 | #raise NotImplementedError 44 | ### END YOUR CODE 45 | 46 | assert x.shape == orig_shape 47 | return x 48 | 49 | 50 | def test_softmax_basic(): 51 | """ 52 | Some simple tests to get you started. 53 | Warning: these are not exhaustive. 54 | """ 55 | print "Running basic tests..." 56 | test1 = softmax(np.array([1,2])) 57 | print test1 58 | ans1 = np.array([0.26894142, 0.73105858]) 59 | assert np.allclose(test1, ans1, rtol=1e-05, atol=1e-06) 60 | 61 | test2 = softmax(np.array([[1001,1002],[3,4]])) 62 | print test2 63 | ans2 = np.array([ 64 | [0.26894142, 0.73105858], 65 | [0.26894142, 0.73105858]]) 66 | assert np.allclose(test2, ans2, rtol=1e-05, atol=1e-06) 67 | 68 | test3 = softmax(np.array([[-1001,-1002]])) 69 | print test3 70 | ans3 = np.array([0.73105858, 0.26894142]) 71 | assert np.allclose(test3, ans3, rtol=1e-05, atol=1e-06) 72 | 73 | print "You should be able to verify these results by hand!\n" 74 | 75 | 76 | def test_softmax(): 77 | """ 78 | Use this space to test your softmax implementation by running: 79 | python q1_softmax.py 80 | This function will not be called by the autograder, nor will 81 | your tests be graded. 82 | """ 83 | print "Running your tests..." 84 | ### YOUR CODE HERE 85 | #raise NotImplementedError 86 | ### END YOUR CODE 87 | 88 | 89 | if __name__ == "__main__": 90 | test_softmax_basic() 91 | test_softmax() 92 | -------------------------------------------------------------------------------- /Assignment1/WeiYang/assignment1/q2_gradcheck.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import random 5 | 6 | 7 | # First implement a gradient checker by filling in the following functions 8 | def gradcheck_naive(f, x): 9 | """ Gradient check for a function f. 10 | 11 | Arguments: 12 | f -- a function that takes a single argument and outputs the 13 | cost and its gradients 14 | x -- the point (numpy array) to check the gradient at 15 | """ 16 | 17 | rndstate = random.getstate() 18 | random.setstate(rndstate) 19 | fx, grad = f(x) # Evaluate function value at original point 20 | h = 1e-4 # Do not change this! 21 | 22 | # Iterate over all indexes in x 23 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 24 | while not it.finished: 25 | ix = it.multi_index 26 | 27 | # Try modifying x[ix] with h defined above to compute 28 | # numerical gradients. Make sure you call random.setstate(rndstate) 29 | # before calling f(x) each time. This will make it possible 30 | # to test cost functions with built in randomness later. 31 | 32 | ### YOUR CODE HERE: 33 | old_val = x[ix] 34 | x[ix] = old_val - h 35 | random.setstate(rndstate) 36 | fxh1, _ = f(x) 37 | 38 | x[ix] = old_val + h 39 | random.setstate(rndstate) 40 | fxh2, _ = f(x) 41 | 42 | numgrad = (fxh2 - fxh1) / (2 * h) 43 | x[ix] = old_val 44 | ### END YOUR CODE 45 | 46 | # Compare gradients 47 | reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix])) 48 | if reldiff > 1e-5: 49 | print "Gradient check failed." 50 | print "First gradient error found at index %s" % str(ix) 51 | print "Your gradient: %f \t Numerical gradient: %f" % ( 52 | grad[ix], numgrad) 53 | return 54 | 55 | it.iternext() # Step to next dimension 56 | 57 | print "Gradient check passed!" 58 | 59 | 60 | def sanity_check(): 61 | """ 62 | Some basic sanity checks. 63 | """ 64 | quad = lambda x: (np.sum(x ** 2), x * 2) 65 | 66 | print "Running sanity checks..." 67 | gradcheck_naive(quad, np.array(123.456)) # scalar test 68 | gradcheck_naive(quad, np.random.randn(3,)) # 1-D test 69 | gradcheck_naive(quad, np.random.randn(4,5)) # 2-D test 70 | print "" 71 | 72 | 73 | def your_sanity_checks(): 74 | """ 75 | Use this space add any additional sanity checks by running: 76 | python q2_gradcheck.py 77 | This function will not be called by the autograder, nor will 78 | your additional tests be graded. 79 | """ 80 | print "Running your sanity checks..." 81 | ### YOUR CODE HERE 82 | 83 | ### END YOUR CODE 84 | 85 | 86 | if __name__ == "__main__": 87 | sanity_check() 88 | your_sanity_checks() 89 | -------------------------------------------------------------------------------- /Assignment1/WeiYang/assignment1/q2_neural.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import random 5 | 6 | from q1_softmax import softmax 7 | from q2_sigmoid import sigmoid, sigmoid_grad 8 | from q2_gradcheck import gradcheck_naive 9 | 10 | 11 | def forward_backward_prop(data, labels, params, dimensions): 12 | """ 13 | Forward and backward propagation for a two-layer sigmoidal network 14 | 15 | Compute the forward propagation and for the cross entropy cost, 16 | and backward propagation for the gradients for all parameters. 17 | 18 | Arguments: 19 | data -- M x Dx matrix, where each row is a training example. 20 | labels -- M x Dy matrix, where each row is a one-hot vector. 21 | params -- Model parameters, these are unpacked for you. 22 | dimensions -- A tuple of input dimension, number of hidden units 23 | and output dimension 24 | """ 25 | 26 | ### Unpack network parameters (do not modify) 27 | ofs = 0 28 | Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2]) 29 | 30 | W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H)) 31 | ofs += Dx * H 32 | b1 = np.reshape(params[ofs:ofs + H], (1, H)) 33 | ofs += H 34 | W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy)) 35 | ofs += H * Dy 36 | b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy)) 37 | 38 | ### YOUR CODE HERE: forward propagation 39 | N = data.shape[0] 40 | h = sigmoid(data.dot(W1) + b1) 41 | output = softmax(h.dot(W2) + b2) 42 | cost = - np.sum(np.log(output[labels == 1])) / N 43 | ### END YOUR CODE 44 | 45 | ### YOUR CODE HERE: backward propagation 46 | grad_output = output - labels 47 | gradW2 = np.dot(h.T, grad_output) / N 48 | gradb2 = np.sum(grad_output, axis=0, keepdims=True) / N 49 | grad_h = np.dot(grad_output, W2.T) * sigmoid_grad(h) 50 | gradW1 = np.dot(data.T, grad_h) / N 51 | gradb1 = np.sum(grad_h, axis=0, keepdims=True) / N 52 | ### END YOUR CODE 53 | 54 | ### Stack gradients (do not modify) 55 | grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 56 | gradW2.flatten(), gradb2.flatten())) 57 | 58 | return cost, grad 59 | 60 | 61 | def sanity_check(): 62 | """ 63 | Set up fake data and parameters for the neural network, and test using 64 | gradcheck. 65 | """ 66 | print "Running sanity check..." 67 | 68 | N = 20 69 | dimensions = [10, 5, 10] 70 | data = np.random.randn(N, dimensions[0]) # each row will be a datum 71 | labels = np.zeros((N, dimensions[2])) 72 | for i in xrange(N): 73 | labels[i, random.randint(0,dimensions[2]-1)] = 1 74 | params = np.random.randn((dimensions[0] + 1) * dimensions[1] + ( 75 | dimensions[1] + 1) * dimensions[2], ) 76 | 77 | gradcheck_naive(lambda params: 78 | forward_backward_prop(data, labels, params, dimensions), params) 79 | 80 | 81 | def your_sanity_checks(): 82 | """ 83 | Use this space add any additional sanity checks by running: 84 | python q2_neural.py 85 | This function will not be called by the autograder, nor will 86 | your additional tests be graded. 87 | """ 88 | print "Running your sanity checks..." 89 | ### YOUR CODE HERE 90 | 91 | ### END YOUR CODE 92 | 93 | 94 | if __name__ == "__main__": 95 | sanity_check() 96 | your_sanity_checks() 97 | -------------------------------------------------------------------------------- /Assignment1/WeiYang/assignment1/q2_sigmoid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | 5 | 6 | def sigmoid(x): 7 | """ 8 | Compute the sigmoid function for the input here. 9 | 10 | Arguments: 11 | x -- A scalar or numpy array. 12 | 13 | Return: 14 | s -- sigmoid(x) 15 | """ 16 | 17 | ### YOUR CODE HERE 18 | s = 1 / (1 + np.exp(-x)) 19 | ### END YOUR CODE 20 | 21 | return s 22 | 23 | 24 | def sigmoid_grad(s): 25 | """ 26 | Compute the gradient for the sigmoid function here. Note that 27 | for this implementation, the input s should be the sigmoid 28 | function value of your original input x. 29 | 30 | Arguments: 31 | s -- A scalar or numpy array. 32 | 33 | Return: 34 | ds -- Your computed gradient. 35 | """ 36 | 37 | ### YOUR CODE HERE 38 | ds = s * (1 - s) 39 | ### END YOUR CODE 40 | 41 | return ds 42 | 43 | 44 | def test_sigmoid_basic(): 45 | """ 46 | Some simple tests to get you started. 47 | Warning: these are not exhaustive. 48 | """ 49 | print "Running basic tests..." 50 | x = np.array([[1, 2], [-1, -2]]) 51 | f = sigmoid(x) 52 | print f 53 | g = sigmoid_grad(f) 54 | f_ans = np.array([ 55 | [0.73105858, 0.88079708], 56 | [0.26894142, 0.11920292]]) 57 | assert np.allclose(f, f_ans, rtol=1e-05, atol=1e-06) 58 | print g 59 | g_ans = np.array([ 60 | [0.19661193, 0.10499359], 61 | [0.19661193, 0.10499359]]) 62 | assert np.allclose(g, g_ans, rtol=1e-05, atol=1e-06) 63 | print "You should verify these results by hand!\n" 64 | 65 | 66 | def test_sigmoid(): 67 | """ 68 | Use this space to test your sigmoid implementation by running: 69 | python q2_sigmoid.py 70 | This function will not be called by the autograder, nor will 71 | your tests be graded. 72 | """ 73 | print "Running your tests..." 74 | ### YOUR CODE HERE 75 | 76 | ### END YOUR CODE 77 | 78 | 79 | if __name__ == "__main__": 80 | test_sigmoid_basic(); 81 | test_sigmoid() 82 | -------------------------------------------------------------------------------- /Assignment1/WeiYang/assignment1/q3_run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import random 4 | import numpy as np 5 | from utils.treebank import StanfordSentiment 6 | import matplotlib 7 | matplotlib.use('agg') 8 | import matplotlib.pyplot as plt 9 | import time 10 | 11 | from q3_word2vec import * 12 | from q3_sgd import * 13 | 14 | # Reset the random seed to make sure that everyone gets the same results 15 | random.seed(314) 16 | dataset = StanfordSentiment() 17 | tokens = dataset.tokens() 18 | nWords = len(tokens) 19 | 20 | # We are going to train 10-dimensional vecto rs for this assignment 21 | dimVectors = 10 22 | 23 | # Context size 24 | C = 5 25 | 26 | # Reset the random seed to make sure that everyone gets the same results 27 | random.seed(31415) 28 | np.random.seed(9265) 29 | 30 | startTime=time.time() 31 | wordVectors = np.concatenate( 32 | ((np.random.rand(nWords, dimVectors) - 0.5) / 33 | dimVectors, np.zeros((nWords, dimVectors))), 34 | axis=0) 35 | wordVectors = sgd( 36 | lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C, 37 | negSamplingCostAndGradient), 38 | wordVectors, 0.3, 40000, None, True, PRINT_EVERY=10) 39 | # Note that normalization is not called here. This is not a bug, 40 | # normalizing during training loses the notion of length. 41 | 42 | print "sanity check: cost at convergence should be around or below 10" 43 | print "training took %d seconds" % (time.time() - startTime) 44 | 45 | # concatenate the input and output word vectors 46 | wordVectors = np.concatenate( 47 | (wordVectors[:nWords,:], wordVectors[nWords:,:]), 48 | axis=0) 49 | # wordVectors = wordVectors[:nWords,:] + wordVectors[nWords:,:] 50 | 51 | visualizeWords = [ 52 | "the", "a", "an", ",", ".", "?", "!", "``", "''", "--", 53 | "good", "great", "cool", "brilliant", "wonderful", "well", "amazing", 54 | "worth", "sweet", "enjoyable", "boring", "bad", "waste", "dumb", 55 | "annoying"] 56 | 57 | visualizeIdx = [tokens[word] for word in visualizeWords] 58 | visualizeVecs = wordVectors[visualizeIdx, :] 59 | temp = (visualizeVecs - np.mean(visualizeVecs, axis=0)) 60 | covariance = 1.0 / len(visualizeIdx) * temp.T.dot(temp) 61 | U,S,V = np.linalg.svd(covariance) 62 | coord = temp.dot(U[:,0:2]) 63 | 64 | for i in xrange(len(visualizeWords)): 65 | plt.text(coord[i,0], coord[i,1], visualizeWords[i], 66 | bbox=dict(facecolor='green', alpha=0.1)) 67 | 68 | plt.xlim((np.min(coord[:,0]), np.max(coord[:,0]))) 69 | plt.ylim((np.min(coord[:,1]), np.max(coord[:,1]))) 70 | 71 | plt.savefig('q3_word_vectors.png') 72 | -------------------------------------------------------------------------------- /Assignment1/WeiYang/assignment1/q3_sgd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Save parameters every a few SGD iterations as fail-safe 4 | SAVE_PARAMS_EVERY = 5000 5 | 6 | import glob 7 | import random 8 | import numpy as np 9 | import os.path as op 10 | import cPickle as pickle 11 | 12 | 13 | def load_saved_params(): 14 | """ 15 | A helper function that loads previously saved parameters and resets 16 | iteration start. 17 | """ 18 | st = 0 19 | for f in glob.glob("saved_params_*.npy"): 20 | iter = int(op.splitext(op.basename(f))[0].split("_")[2]) 21 | if (iter > st): 22 | st = iter 23 | 24 | if st > 0: 25 | with open("saved_params_%d.npy" % st, "r") as f: 26 | params = pickle.load(f) 27 | state = pickle.load(f) 28 | return st, params, state 29 | else: 30 | return st, None, None 31 | 32 | 33 | def save_params(iter, params): 34 | with open("saved_params_%d.npy" % iter, "w") as f: 35 | pickle.dump(params, f) 36 | pickle.dump(random.getstate(), f) 37 | 38 | 39 | def sgd(f, x0, step, iterations, postprocessing=None, useSaved=False, 40 | PRINT_EVERY=10): 41 | """ Stochastic Gradient Descent 42 | 43 | Implement the stochastic gradient descent method in this function. 44 | 45 | Arguments: 46 | f -- the function to optimize, it should take a single 47 | argument and yield two outputs, a cost and the gradient 48 | with respect to the arguments 49 | x0 -- the initial point to start SGD from 50 | step -- the step size for SGD 51 | iterations -- total iterations to run SGD for 52 | postprocessing -- postprocessing function for the parameters 53 | if necessary. In the case of word2vec we will need to 54 | normalize the word vectors to have unit length. 55 | PRINT_EVERY -- specifies how many iterations to output loss 56 | 57 | Return: 58 | x -- the parameter value after SGD finishes 59 | """ 60 | 61 | # Anneal learning rate every several iterations 62 | ANNEAL_EVERY = 20000 63 | 64 | if useSaved: 65 | start_iter, oldx, state = load_saved_params() 66 | if start_iter > 0: 67 | x0 = oldx 68 | step *= 0.5 ** (start_iter / ANNEAL_EVERY) 69 | 70 | if state: 71 | random.setstate(state) 72 | else: 73 | start_iter = 0 74 | 75 | x = x0 76 | 77 | if not postprocessing: 78 | postprocessing = lambda x: x 79 | 80 | expcost = None 81 | 82 | for iter in xrange(start_iter + 1, iterations + 1): 83 | # Don't forget to apply the postprocessing after every iteration! 84 | # You might want to print the progress every few iterations. 85 | 86 | cost = None 87 | ### YOUR CODE HERE 88 | cost, grad = f(x) 89 | x = x - step * grad 90 | x = postprocessing(x) 91 | ### END YOUR CODE 92 | 93 | if iter % PRINT_EVERY == 0: 94 | if not expcost: 95 | expcost = cost 96 | else: 97 | expcost = .95 * expcost + .05 * cost 98 | print "iter %d: %f" % (iter, expcost) 99 | 100 | if iter % SAVE_PARAMS_EVERY == 0 and useSaved: 101 | save_params(iter, x) 102 | 103 | if iter % ANNEAL_EVERY == 0: 104 | step *= 0.5 105 | 106 | return x 107 | 108 | 109 | def sanity_check(): 110 | quad = lambda x: (np.sum(x ** 2), x * 2) 111 | 112 | print "Running sanity checks..." 113 | t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=100) 114 | print "test 1 result:", t1 115 | assert abs(t1) <= 1e-6 116 | 117 | t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100) 118 | print "test 2 result:", t2 119 | assert abs(t2) <= 1e-6 120 | 121 | t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100) 122 | print "test 3 result:", t3 123 | assert abs(t3) <= 1e-6 124 | 125 | print "" 126 | 127 | 128 | def your_sanity_checks(): 129 | """ 130 | Use this space add any additional sanity checks by running: 131 | python q3_sgd.py 132 | This function will not be called by the autograder, nor will 133 | your additional tests be graded. 134 | """ 135 | print "Running your sanity checks..." 136 | ### YOUR CODE HERE 137 | 138 | ### END YOUR CODE 139 | 140 | 141 | if __name__ == "__main__": 142 | sanity_check() 143 | your_sanity_checks() 144 | -------------------------------------------------------------------------------- /Assignment1/WeiYang/assignment1/q4_sentiment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import numpy as np 5 | import matplotlib 6 | matplotlib.use('agg') 7 | import matplotlib.pyplot as plt 8 | import itertools 9 | 10 | from utils.treebank import StanfordSentiment 11 | import utils.glove as glove 12 | 13 | from q3_sgd import load_saved_params, sgd 14 | 15 | # We will use sklearn here because it will run faster than implementing 16 | # ourselves. However, for other parts of this assignment you must implement 17 | # the functions yourself! 18 | from sklearn.linear_model import LogisticRegression 19 | from sklearn.metrics import confusion_matrix 20 | 21 | 22 | def getArguments(): 23 | parser = argparse.ArgumentParser() 24 | group = parser.add_mutually_exclusive_group(required=True) 25 | group.add_argument("--pretrained", dest="pretrained", action="store_true", 26 | help="Use pretrained GloVe vectors.") 27 | group.add_argument("--yourvectors", dest="yourvectors", action="store_true", 28 | help="Use your vectors from q3.") 29 | return parser.parse_args() 30 | 31 | 32 | def getSentenceFeatures(tokens, wordVectors, sentence): 33 | """ 34 | Obtain the sentence feature for sentiment analysis by averaging its 35 | word vectors 36 | """ 37 | 38 | # Implement computation for the sentence features given a sentence. 39 | 40 | # Inputs: 41 | # tokens -- a dictionary that maps words to their indices in 42 | # the word vector list 43 | # wordVectors -- word vectors (each row) for all tokens 44 | # sentence -- a list of words in the sentence of interest 45 | 46 | # Output: 47 | # - sentVector: feature vector for the sentence 48 | 49 | sentVector = np.zeros((wordVectors.shape[1],)) 50 | 51 | ### YOUR CODE HERE 52 | indices = [tokens[word] for word in sentence] 53 | sentVector = np.mean(wordVectors[indices], axis=0) 54 | ### END YOUR CODE 55 | 56 | assert sentVector.shape == (wordVectors.shape[1],) 57 | return sentVector 58 | 59 | 60 | def getRegularizationValues(): 61 | """Try different regularizations 62 | 63 | Return a sorted list of values to try. 64 | """ 65 | values = None # Assign a list of floats in the block below 66 | ### YOUR CODE HERE 67 | values = np.logspace(-4, 2, num=20, base=10) 68 | ### END YOUR CODE 69 | return sorted(values) 70 | 71 | 72 | def chooseBestModel(results): 73 | """Choose the best model based on parameter tuning on the dev set 74 | 75 | Arguments: 76 | results -- A list of python dictionaries of the following format: 77 | { 78 | "reg": regularization, 79 | "clf": classifier, 80 | "train": trainAccuracy, 81 | "dev": devAccuracy, 82 | "test": testAccuracy 83 | } 84 | 85 | Returns: 86 | Your chosen result dictionary. 87 | """ 88 | bestResult = None 89 | 90 | ### YOUR CODE HERE 91 | bestResult = max(results, key=lambda x: x["dev"]) 92 | ### END YOUR CODE 93 | 94 | return bestResult 95 | 96 | 97 | def accuracy(y, yhat): 98 | """ Precision for classifier """ 99 | assert(y.shape == yhat.shape) 100 | return np.sum(y == yhat) * 100.0 / y.size 101 | 102 | 103 | def plotRegVsAccuracy(regValues, results, filename): 104 | """ Make a plot of regularization vs accuracy """ 105 | plt.plot(regValues, [x["train"] for x in results]) 106 | plt.plot(regValues, [x["dev"] for x in results]) 107 | plt.xscale('log') 108 | plt.xlabel("regularization") 109 | plt.ylabel("accuracy") 110 | plt.legend(['train', 'dev'], loc='upper left') 111 | plt.savefig(filename) 112 | 113 | 114 | def outputConfusionMatrix(features, labels, clf, filename): 115 | """ Generate a confusion matrix """ 116 | pred = clf.predict(features) 117 | cm = confusion_matrix(labels, pred, labels=range(5)) 118 | plt.figure() 119 | plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Reds) 120 | plt.colorbar() 121 | classes = ["- -", "-", "neut", "+", "+ +"] 122 | tick_marks = np.arange(len(classes)) 123 | plt.xticks(tick_marks, classes) 124 | plt.yticks(tick_marks, classes) 125 | thresh = cm.max() / 2. 126 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 127 | plt.text(j, i, cm[i, j], 128 | horizontalalignment="center", 129 | color="white" if cm[i, j] > thresh else "black") 130 | plt.tight_layout() 131 | plt.ylabel('True label') 132 | plt.xlabel('Predicted label') 133 | plt.savefig(filename) 134 | 135 | 136 | def outputPredictions(dataset, features, labels, clf, filename): 137 | """ Write the predictions to file """ 138 | pred = clf.predict(features) 139 | with open(filename, "w") as f: 140 | print >> f, "True\tPredicted\tText" 141 | for i in xrange(len(dataset)): 142 | print >> f, "%d\t%d\t%s" % ( 143 | labels[i], pred[i], " ".join(dataset[i][0])) 144 | 145 | 146 | def main(args): 147 | """ Train a model to do sentiment analyis""" 148 | 149 | # Load the dataset 150 | dataset = StanfordSentiment() 151 | tokens = dataset.tokens() 152 | nWords = len(tokens) 153 | 154 | if args.yourvectors: 155 | _, wordVectors, _ = load_saved_params() 156 | wordVectors = np.concatenate( 157 | (wordVectors[:nWords,:], wordVectors[nWords:,:]), 158 | axis=1) 159 | elif args.pretrained: 160 | wordVectors = glove.loadWordVectors(tokens) 161 | dimVectors = wordVectors.shape[1] 162 | 163 | # Load the train set 164 | trainset = dataset.getTrainSentences() 165 | nTrain = len(trainset) 166 | trainFeatures = np.zeros((nTrain, dimVectors)) 167 | trainLabels = np.zeros((nTrain,), dtype=np.int32) 168 | for i in xrange(nTrain): 169 | words, trainLabels[i] = trainset[i] 170 | trainFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words) 171 | 172 | # Prepare dev set features 173 | devset = dataset.getDevSentences() 174 | nDev = len(devset) 175 | devFeatures = np.zeros((nDev, dimVectors)) 176 | devLabels = np.zeros((nDev,), dtype=np.int32) 177 | for i in xrange(nDev): 178 | words, devLabels[i] = devset[i] 179 | devFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words) 180 | 181 | # Prepare test set features 182 | testset = dataset.getTestSentences() 183 | nTest = len(testset) 184 | testFeatures = np.zeros((nTest, dimVectors)) 185 | testLabels = np.zeros((nTest,), dtype=np.int32) 186 | for i in xrange(nTest): 187 | words, testLabels[i] = testset[i] 188 | testFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words) 189 | 190 | # We will save our results from each run 191 | results = [] 192 | regValues = getRegularizationValues() 193 | for reg in regValues: 194 | print "Training for reg=%f" % reg 195 | # Note: add a very small number to regularization to please the library 196 | clf = LogisticRegression(C=1.0/(reg + 1e-12)) 197 | clf.fit(trainFeatures, trainLabels) 198 | 199 | # Test on train set 200 | pred = clf.predict(trainFeatures) 201 | trainAccuracy = accuracy(trainLabels, pred) 202 | print "Train accuracy (%%): %f" % trainAccuracy 203 | 204 | # Test on dev set 205 | pred = clf.predict(devFeatures) 206 | devAccuracy = accuracy(devLabels, pred) 207 | print "Dev accuracy (%%): %f" % devAccuracy 208 | 209 | # Test on test set 210 | # Note: always running on test is poor style. Typically, you should 211 | # do this only after validation. 212 | pred = clf.predict(testFeatures) 213 | testAccuracy = accuracy(testLabels, pred) 214 | print "Test accuracy (%%): %f" % testAccuracy 215 | 216 | results.append({ 217 | "reg": reg, 218 | "clf": clf, 219 | "train": trainAccuracy, 220 | "dev": devAccuracy, 221 | "test": testAccuracy}) 222 | 223 | # Print the accuracies 224 | print "" 225 | print "=== Recap ===" 226 | print "Reg\t\tTrain\tDev\tTest" 227 | for result in results: 228 | print "%.2E\t%.3f\t%.3f\t%.3f" % ( 229 | result["reg"], 230 | result["train"], 231 | result["dev"], 232 | result["test"]) 233 | print "" 234 | 235 | bestResult = chooseBestModel(results) 236 | print "Best regularization value: %0.2E" % bestResult["reg"] 237 | print "Test accuracy (%%): %f" % bestResult["test"] 238 | 239 | # do some error analysis 240 | if args.pretrained: 241 | plotRegVsAccuracy(regValues, results, "q4_reg_v_acc.png") 242 | outputConfusionMatrix(devFeatures, devLabels, bestResult["clf"], 243 | "q4_dev_conf.png") 244 | outputPredictions(devset, devFeatures, devLabels, bestResult["clf"], 245 | "q4_dev_pred.txt") 246 | 247 | 248 | if __name__ == "__main__": 249 | main(getArguments()) 250 | -------------------------------------------------------------------------------- /Assignment1/WeiYang/solution.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/WeiYang/solution.pdf -------------------------------------------------------------------------------- /Assignment1/ZiyinHuang/Assignment1/Makefile: -------------------------------------------------------------------------------- 1 | DATASETS_DIR=utils/datasets 2 | 3 | init: 4 | sh get_datasets.sh 5 | 6 | submit: 7 | sh collect_submission.sh 8 | 9 | clean: 10 | rm -f assignment1.zip 11 | rm -rf ${DATASETS_DIR} 12 | rm -f *.pyc *.png *.npy utils/*.pyc 13 | 14 | -------------------------------------------------------------------------------- /Assignment1/ZiyinHuang/Assignment1/collect_submission.sh: -------------------------------------------------------------------------------- 1 | rm -f assignment1.zip 2 | zip -r assignment1.zip *.py *.png saved_params_40000.npy 3 | -------------------------------------------------------------------------------- /Assignment1/ZiyinHuang/Assignment1/get_datasets.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DATASETS_DIR="utils/datasets" 4 | mkdir -p $DATASETS_DIR 5 | 6 | cd $DATASETS_DIR 7 | 8 | # Get Stanford Sentiment Treebank 9 | if hash wget 2>/dev/null; then 10 | wget http://nlp.stanford.edu/~socherr/stanfordSentimentTreebank.zip 11 | else 12 | curl -O http://nlp.stanford.edu/~socherr/stanfordSentimentTreebank.zip 13 | fi 14 | unzip stanfordSentimentTreebank.zip 15 | rm stanfordSentimentTreebank.zip 16 | 17 | # Get 50D GloVe vectors 18 | if hash wget 2>/dev/null; then 19 | wget http://web.stanford.edu/~jamesh93/tmp/glove.6B.50d.txt.zip 20 | else 21 | curl -O http://web.stanford.edu/~jamesh93/tmp/glove.6B.50d.txt.zip 22 | fi 23 | unzip glove.6B.50d.txt.zip 24 | rm glove.6B.50d.txt.zip 25 | -------------------------------------------------------------------------------- /Assignment1/ZiyinHuang/Assignment1/q1_softmax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def softmax(x): 5 | """Compute the softmax function for each row of the input x. 6 | 7 | It is crucial that this function is optimized for speed because 8 | it will be used frequently in later code. You might find numpy 9 | functions np.exp, np.sum, np.reshape, np.max, and numpy 10 | broadcasting useful for this task. 11 | 12 | Numpy broadcasting documentation: 13 | http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html 14 | 15 | You should also make sure that your code works for a single 16 | N-dimensional vector (treat the vector as a single row) and 17 | for M x N matrices. This may be useful for testing later. Also, 18 | make sure that the dimensions of the output match the input. 19 | 20 | You must implement the optimization in problem 1(a) of the 21 | written assignment! 22 | 23 | Arguments: 24 | x -- A N dimensional vector or M x N dimensional numpy matrix. 25 | 26 | Return: 27 | x -- You are allowed to modify x in-place 28 | """ 29 | orig_shape = x.shape 30 | 31 | if len(x.shape) > 1: 32 | # Matrix 33 | ### YOUR CODE HERE 34 | x = x.T 35 | e_x = np.exp(x - np.max(x,axis=0)) 36 | x = (e_x / e_x.sum(axis=0)).T 37 | ### END YOUR CODE 38 | else: 39 | # Vector 40 | ### YOUR CODE HERE 41 | x = x.T 42 | e_x = np.exp(x - np.max(x,axis=0)) 43 | x = (e_x / e_x.sum(axis=0)).T 44 | ### END YOUR CODE 45 | assert x.shape == orig_shape 46 | return x 47 | 48 | 49 | def test_softmax_basic(): 50 | """ 51 | Some simple tests to get you started. 52 | Warning: these are not exhaustive. 53 | """ 54 | print "Running basic tests..." 55 | test1 = softmax(np.array([1,2])) 56 | print test1 57 | ans1 = np.array([0.26894142, 0.73105858]) 58 | assert np.allclose(test1, ans1, rtol=1e-05, atol=1e-06) 59 | 60 | test2 = softmax(np.array([[1001,1002],[3,4]])) 61 | print test2 62 | ans2 = np.array([ 63 | [0.26894142, 0.73105858], 64 | [0.26894142, 0.73105858]]) 65 | assert np.allclose(test2, ans2, rtol=1e-05, atol=1e-06) 66 | 67 | test3 = softmax(np.array([[-1001,-1002]])) 68 | print test3 69 | ans3 = np.array([0.73105858, 0.26894142]) 70 | assert np.allclose(test3, ans3, rtol=1e-05, atol=1e-06) 71 | 72 | print "You should be able to verify these results by hand!\n" 73 | 74 | 75 | def test_softmax(): 76 | """ 77 | Use this space to test your softmax implementation by running: 78 | python q1_softmax.py 79 | This function will not be called by the autograder, nor will 80 | your tests be graded. 81 | """ 82 | print "Running your tests..." 83 | ### YOUR CODE HERE 84 | raise NotImplementedError 85 | ### END YOUR CODE 86 | 87 | 88 | if __name__ == "__main__": 89 | test_softmax_basic() 90 | test_softmax() 91 | -------------------------------------------------------------------------------- /Assignment1/ZiyinHuang/Assignment1/q2_gradcheck.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import random 5 | 6 | 7 | # First implement a gradient checker by filling in the following functions 8 | def gradcheck_naive(f, x): 9 | """ Gradient check for a function f. 10 | 11 | Arguments: 12 | f -- a function that takes a single argument and outputs the 13 | cost and its gradients 14 | x -- the point (numpy array) to check the gradient at 15 | """ 16 | 17 | rndstate = random.getstate() 18 | random.setstate(rndstate) 19 | fx, grad = f(x) # Evaluate function value at original point 20 | h = 1e-4 # Do not change this! 21 | 22 | # Iterate over all indexes in x 23 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 24 | while not it.finished: 25 | ix = it.multi_index 26 | 27 | # Try modifying x[ix] with h defined above to compute 28 | # numerical gradients. Make sure you call random.setstate(rndstate) 29 | # before calling f(x) each time. This will make it possible 30 | # to test cost functions with built in randomness later. 31 | 32 | old_val = x[ix] 33 | x[ix] = old_val - h 34 | random.setstate(rndstate) 35 | (fxh1, _) = f(x) 36 | x[ix] = old_val + h 37 | random.setstate(rndstate) 38 | (fxh2, _) = f(x) 39 | 40 | numgrad = (fxh2 - fxh1)/(2*h) 41 | x[ix] = old_val 42 | 43 | # Compare gradients 44 | reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix])) 45 | if reldiff > 1e-5: 46 | print "Gradient check failed." 47 | print "First gradient error found at index %s" % str(ix) 48 | print "Your gradient: %f \t Numerical gradient: %f" % ( 49 | grad[ix], numgrad) 50 | return 51 | 52 | it.iternext() # Step to next dimension 53 | 54 | print "Gradient check passed!" 55 | 56 | 57 | def sanity_check(): 58 | """ 59 | Some basic sanity checks. 60 | """ 61 | quad = lambda x: (np.sum(x ** 2), x * 2) 62 | 63 | print "Running sanity checks..." 64 | gradcheck_naive(quad, np.array(123.456)) # scalar test 65 | gradcheck_naive(quad, np.random.randn(3,)) # 1-D test 66 | gradcheck_naive(quad, np.random.randn(4,5)) # 2-D test 67 | print "" 68 | 69 | 70 | def your_sanity_checks(): 71 | """ 72 | Use this space add any additional sanity checks by running: 73 | python q2_gradcheck.py 74 | This function will not be called by the autograder, nor will 75 | your additional tests be graded. 76 | """ 77 | print "Running your sanity checks..." 78 | ### YOUR CODE HERE 79 | raise NotImplementedError 80 | ### END YOUR CODE 81 | 82 | 83 | if __name__ == "__main__": 84 | sanity_check() 85 | your_sanity_checks() 86 | -------------------------------------------------------------------------------- /Assignment1/ZiyinHuang/Assignment1/q2_neural.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import random 5 | 6 | from q1_softmax import softmax 7 | from q2_sigmoid import sigmoid, sigmoid_grad 8 | from q2_gradcheck import gradcheck_naive 9 | 10 | 11 | def forward_backward_prop(data, labels, params, dimensions): 12 | """ 13 | Forward and backward propagation for a two-layer sigmoidal network 14 | 15 | Compute the forward propagation and for the cross entropy cost, 16 | and backward propagation for the gradients for all parameters. 17 | 18 | Arguments: 19 | data -- M x Dx matrix, where each row is a training example. 20 | labels -- M x Dy matrix, where each row is a one-hot vector. 21 | params -- Model parameters, these are unpacked for you. 22 | dimensions -- A tuple of input dimension, number of hidden units 23 | and output dimension 24 | """ 25 | 26 | ### Unpack network parameters (do not modify) 27 | ofs = 0 28 | Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2]) 29 | 30 | W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H)) 31 | ofs += Dx * H 32 | b1 = np.reshape(params[ofs:ofs + H], (1, H)) 33 | ofs += H 34 | W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy)) 35 | ofs += H * Dy 36 | b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy)) 37 | 38 | ### YOUR CODE HERE: forward propagation 39 | 40 | z2 = np.dot(data, W1) + b1 41 | h2 = sigmoid(z2) 42 | z3 = np.dot(h2, W2) + b2 43 | y = softmax(z3) 44 | ### END YOUR CODE 45 | 46 | M = dimensions[2] 47 | cost = np.sum((-1*labels*np.log(y))) / M 48 | #print cost.shape 49 | ### YOUR CODE HERE: backward propagation 50 | delta3 = (y - labels) / M 51 | #print delta3.shape 52 | 53 | gradW2 = np.dot(h2.T, delta3) 54 | #print gradW2.shape 55 | gradb2 = np.sum(delta3,axis = 0) 56 | #print gradb2.shape 57 | delta2 = sigmoid_grad(h2) * np.dot(delta3, W2.T) #hardmard product 58 | #print delta2.shape 59 | 60 | gradW1 = np.dot(data.T, delta2) 61 | #print gradW1.shape 62 | gradb1 = np.sum(delta2, axis = 0) 63 | #print gradb1.shape 64 | ### END YOUR CODE 65 | 66 | 67 | 68 | ### Stack gradients (do not modify) 69 | grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), 70 | gradW2.flatten(), gradb2.flatten())) 71 | 72 | return cost, grad 73 | 74 | 75 | def sanity_check(): 76 | """ 77 | Set up fake data and parameters for the neural network, and test using 78 | gradcheck. 79 | """ 80 | print "Running sanity check..." 81 | 82 | N = 20 83 | dimensions = [10, 5, 10] 84 | data = np.random.randn(N, dimensions[0]) # each row will be a datum 85 | labels = np.zeros((N, dimensions[2])) 86 | for i in xrange(N): 87 | labels[i, random.randint(0,dimensions[2]-1)] = 1 88 | 89 | params = np.random.randn((dimensions[0] + 1) * dimensions[1] + ( 90 | dimensions[1] + 1) * dimensions[2], ) 91 | 92 | gradcheck_naive(lambda params: 93 | forward_backward_prop(data, labels, params, dimensions), params) 94 | 95 | 96 | def your_sanity_checks(): 97 | """ 98 | Use this space add any additional sanity checks by running: 99 | python q2_neural.py 100 | This function will not be called by the autograder, nor will 101 | your additional tests be graded. 102 | """ 103 | print "Running your sanity checks..." 104 | ### YOUR CODE HERE 105 | raise NotImplementedError 106 | ### END YOUR CODE 107 | 108 | 109 | if __name__ == "__main__": 110 | sanity_check() 111 | your_sanity_checks() 112 | -------------------------------------------------------------------------------- /Assignment1/ZiyinHuang/Assignment1/q2_sigmoid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | 5 | 6 | def sigmoid(x): 7 | """ 8 | Compute the sigmoid function for the input here. 9 | 10 | Arguments: 11 | x -- A scalar or numpy array. 12 | 13 | Return: 14 | s -- sigmoid(x) 15 | """ 16 | 17 | ### YOUR CODE HERE 18 | s = 1 / (1+np.exp(-x)) 19 | ### END YOUR CODE 20 | 21 | return s 22 | 23 | 24 | def sigmoid_grad(s): 25 | """ 26 | Compute the gradient for the sigmoid function here. Note that 27 | for this implementation, the input s should be the sigmoid 28 | function value of your original input x. 29 | 30 | Arguments: 31 | s -- A scalar or numpy array. 32 | 33 | Return: 34 | ds -- Your computed gradient. 35 | """ 36 | 37 | ### YOUR CODE HERE 38 | 39 | ds = s * (1-s) 40 | ### END YOUR CODE 41 | 42 | return ds 43 | 44 | 45 | def test_sigmoid_basic(): 46 | """ 47 | Some simple tests to get you started. 48 | Warning: these are not exhaustive. 49 | """ 50 | print "Running basic tests..." 51 | x = np.array([[1, 2], [-1, -2]]) 52 | f = sigmoid(x) 53 | g = sigmoid_grad(f) 54 | print f 55 | f_ans = np.array([ 56 | [0.73105858, 0.88079708], 57 | [0.26894142, 0.11920292]]) 58 | assert np.allclose(f, f_ans, rtol=1e-05, atol=1e-06) 59 | print g 60 | g_ans = np.array([ 61 | [0.19661193, 0.10499359], 62 | [0.19661193, 0.10499359]]) 63 | assert np.allclose(g, g_ans, rtol=1e-05, atol=1e-06) 64 | print "You should verify these results by hand!\n" 65 | 66 | 67 | def test_sigmoid(): 68 | """ 69 | Use this space to test your sigmoid implementation by running: 70 | python q2_sigmoid.py 71 | This function will not be called by the autograder, nor will 72 | your tests be graded. 73 | """ 74 | print "Running your tests..." 75 | ### YOUR CODE HERE 76 | raise NotImplementedError 77 | ### END YOUR CODE 78 | 79 | 80 | if __name__ == "__main__": 81 | test_sigmoid_basic(); 82 | test_sigmoid() 83 | -------------------------------------------------------------------------------- /Assignment1/ZiyinHuang/Assignment1/q3_run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import random 4 | import numpy as np 5 | from utils.treebank import StanfordSentiment 6 | import matplotlib 7 | matplotlib.use('agg') 8 | import matplotlib.pyplot as plt 9 | import time 10 | 11 | from q3_word2vec import * 12 | from q3_sgd import * 13 | 14 | # Reset the random seed to make sure that everyone gets the same results 15 | random.seed(314) 16 | dataset = StanfordSentiment() 17 | tokens = dataset.tokens() 18 | nWords = len(tokens) 19 | 20 | # We are going to train 10-dimensional vectors for this assignment 21 | dimVectors = 10 22 | 23 | # Context size 24 | C = 5 25 | 26 | # Reset the random seed to make sure that everyone gets the same results 27 | random.seed(31415) 28 | np.random.seed(9265) 29 | 30 | startTime=time.time() 31 | wordVectors = np.concatenate( 32 | ((np.random.rand(nWords, dimVectors) - 0.5) / 33 | dimVectors, np.zeros((nWords, dimVectors))), 34 | axis=0) 35 | wordVectors = sgd( 36 | lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C, 37 | negSamplingCostAndGradient), 38 | wordVectors, 0.3, 40000, None, True, PRINT_EVERY=10) 39 | # Note that normalization is not called here. This is not a bug, 40 | # normalizing during training loses the notion of length. 41 | 42 | print "sanity check: cost at convergence should be around or below 10" 43 | print "training took %d seconds" % (time.time() - startTime) 44 | 45 | # concatenate the input and output word vectors 46 | wordVectors = np.concatenate( 47 | (wordVectors[:nWords,:], wordVectors[nWords:,:]), 48 | axis=0) 49 | # wordVectors = wordVectors[:nWords,:] + wordVectors[nWords:,:] 50 | 51 | visualizeWords = [ 52 | "the", "a", "an", ",", ".", "?", "!", "``", "''", "--", 53 | "good", "great", "cool", "brilliant", "wonderful", "well", "amazing", 54 | "worth", "sweet", "enjoyable", "boring", "bad", "waste", "dumb", 55 | "annoying"] 56 | 57 | visualizeIdx = [tokens[word] for word in visualizeWords] 58 | visualizeVecs = wordVectors[visualizeIdx, :] 59 | temp = (visualizeVecs - np.mean(visualizeVecs, axis=0)) 60 | covariance = 1.0 / len(visualizeIdx) * temp.T.dot(temp) 61 | U,S,V = np.linalg.svd(covariance) 62 | coord = temp.dot(U[:,0:2]) 63 | 64 | for i in xrange(len(visualizeWords)): 65 | plt.text(coord[i,0], coord[i,1], visualizeWords[i], 66 | bbox=dict(facecolor='green', alpha=0.1)) 67 | 68 | plt.xlim((np.min(coord[:,0]), np.max(coord[:,0]))) 69 | plt.ylim((np.min(coord[:,1]), np.max(coord[:,1]))) 70 | 71 | plt.savefig('q3_word_vectors.png') 72 | -------------------------------------------------------------------------------- /Assignment1/ZiyinHuang/Assignment1/q3_sgd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Save parameters every a few SGD iterations as fail-safe 4 | SAVE_PARAMS_EVERY = 5000 5 | 6 | import glob 7 | import random 8 | import numpy as np 9 | import os.path as op 10 | import cPickle as pickle 11 | 12 | 13 | def load_saved_params(): 14 | """ 15 | A helper function that loads previously saved parameters and resets 16 | iteration start. 17 | """ 18 | st = 0 19 | for f in glob.glob("saved_params_*.npy"): 20 | iter = int(op.splitext(op.basename(f))[0].split("_")[2]) 21 | if (iter > st): 22 | st = iter 23 | 24 | if st > 0: 25 | with open("saved_params_%d.npy" % st, "r") as f: 26 | params = pickle.load(f) 27 | state = pickle.load(f) 28 | return st, params, state 29 | else: 30 | return st, None, None 31 | 32 | 33 | def save_params(iter, params): 34 | with open("saved_params_%d.npy" % iter, "w") as f: 35 | pickle.dump(params, f) 36 | pickle.dump(random.getstate(), f) 37 | 38 | 39 | def sgd(f, x0, step, iterations, postprocessing=None, useSaved=False, 40 | PRINT_EVERY=10): 41 | """ Stochastic Gradient Descent 42 | 43 | Implement the stochastic gradient descent method in this function. 44 | 45 | Arguments: 46 | f -- the function to optimize, it should take a single 47 | argument and yield two outputs, a cost and the gradient 48 | with respect to the arguments 49 | x0 -- the initial point to start SGD from 50 | step -- the step size for SGD 51 | iterations -- total iterations to run SGD for 52 | postprocessing -- postprocessing function for the parameters 53 | if necessary. In the case of word2vec we will need to 54 | normalize the word vectors to have unit length. 55 | PRINT_EVERY -- specifies how many iterations to output loss 56 | 57 | Return: 58 | x -- the parameter value after SGD finishes 59 | """ 60 | 61 | # Anneal learning rate every several iterations 62 | ANNEAL_EVERY = 20000 63 | 64 | if useSaved: 65 | start_iter, oldx, state = load_saved_params() 66 | if start_iter > 0: 67 | x0 = oldx 68 | step *= 0.5 ** (start_iter / ANNEAL_EVERY) 69 | 70 | if state: 71 | random.setstate(state) 72 | else: 73 | start_iter = 0 74 | 75 | x = x0 76 | 77 | if not postprocessing: 78 | postprocessing = lambda x: x 79 | 80 | expcost = None 81 | 82 | for iter in xrange(start_iter + 1, iterations + 1): 83 | # Don't forget to apply the postprocessing after every iteration! 84 | # You might want to print the progress every few iterations. 85 | 86 | cost = None 87 | ### YOUR CODE HERE 88 | cost, grad = f(x) 89 | x -= step * grad 90 | x = postprocessing(x) 91 | ### END YOUR CODE 92 | 93 | if iter % PRINT_EVERY == 0: 94 | if not expcost: 95 | expcost = cost 96 | else: 97 | expcost = .95 * expcost + .05 * cost 98 | print "iter %d: %f" % (iter, expcost) 99 | 100 | if iter % SAVE_PARAMS_EVERY == 0 and useSaved: 101 | save_params(iter, x) 102 | 103 | if iter % ANNEAL_EVERY == 0: 104 | step *= 0.5 105 | 106 | return x 107 | 108 | 109 | def sanity_check(): 110 | quad = lambda x: (np.sum(x ** 2), x * 2) 111 | 112 | print "Running sanity checks..." 113 | t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=100) 114 | print "test 1 result:", t1 115 | assert abs(t1) <= 1e-6 116 | 117 | t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=100) 118 | print "test 2 result:", t2 119 | assert abs(t2) <= 1e-6 120 | 121 | t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=100) 122 | print "test 3 result:", t3 123 | assert abs(t3) <= 1e-6 124 | 125 | print "" 126 | 127 | 128 | def your_sanity_checks(): 129 | """ 130 | Use this space add any additional sanity checks by running: 131 | python q3_sgd.py 132 | This function will not be called by the autograder, nor will 133 | your additional tests be graded. 134 | """ 135 | print "Running your sanity checks..." 136 | ### YOUR CODE HERE 137 | raise NotImplementedError 138 | ### END YOUR CODE 139 | 140 | 141 | if __name__ == "__main__": 142 | sanity_check() 143 | your_sanity_checks() 144 | -------------------------------------------------------------------------------- /Assignment1/ZiyinHuang/Assignment1/q3_word_vectors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/ZiyinHuang/Assignment1/q3_word_vectors.png -------------------------------------------------------------------------------- /Assignment1/ZiyinHuang/Assignment1/q4_dev_conf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/ZiyinHuang/Assignment1/q4_dev_conf.png -------------------------------------------------------------------------------- /Assignment1/ZiyinHuang/Assignment1/q4_reg_v_acc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/ZiyinHuang/Assignment1/q4_reg_v_acc.png -------------------------------------------------------------------------------- /Assignment1/ZiyinHuang/Assignment1/q4_sentiment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import numpy as np 5 | import matplotlib 6 | matplotlib.use('agg') 7 | import matplotlib.pyplot as plt 8 | import itertools 9 | 10 | from utils.treebank import StanfordSentiment 11 | import utils.glove as glove 12 | 13 | from q3_sgd import load_saved_params, sgd 14 | 15 | # We will use sklearn here because it will run faster than implementing 16 | # ourselves. However, for other parts of this assignment you must implement 17 | # the functions yourself! 18 | from sklearn.linear_model import LogisticRegression 19 | from sklearn.metrics import confusion_matrix 20 | 21 | 22 | def getArguments(): 23 | parser = argparse.ArgumentParser() 24 | group = parser.add_mutually_exclusive_group(required=True) 25 | group.add_argument("--pretrained", dest="pretrained", action="store_true", 26 | help="Use pretrained GloVe vectors.") 27 | group.add_argument("--yourvectors", dest="yourvectors", action="store_true", 28 | help="Use your vectors from q3.") 29 | return parser.parse_args() 30 | 31 | 32 | def getSentenceFeatures(tokens, wordVectors, sentence): 33 | """ 34 | Obtain the sentence feature for sentiment analysis by averaging its 35 | word vectors 36 | """ 37 | 38 | # Implement computation for the sentence features given a sentence. 39 | 40 | # Inputs: 41 | # tokens -- a dictionary that maps words to their indices in 42 | # the word vector list 43 | # wordVectors -- word vectors (each row) for all tokens 44 | # sentence -- a list of words in the sentence of interest 45 | 46 | # Output: 47 | # - sentVector: feature vector for the sentence 48 | 49 | sentVector = np.zeros((wordVectors.shape[1],)) 50 | 51 | ### YOUR CODE HERE 52 | for s in sentence: 53 | sentVector += wordVectors[tokens[s], :] 54 | 55 | sentVector *= 1.0 / len(sentence) 56 | ### END YOUR CODE 57 | 58 | assert sentVector.shape == (wordVectors.shape[1],) 59 | return sentVector 60 | 61 | 62 | def getRegularizationValues(): 63 | """Try different regularizations 64 | 65 | Return a sorted list of values to try. 66 | """ 67 | values = None # Assign a list of floats in the block below 68 | ### YOUR CODE HERE 69 | values = [0.0001, 0.001, 0.01, 0.1, 0.5, 1, 1.5, 2, 3, 4, 5, 10, 50, 100, 1000] 70 | ### END YOUR CODE 71 | return sorted(values) 72 | 73 | 74 | def chooseBestModel(results): 75 | """Choose the best model based on parameter tuning on the dev set 76 | 77 | Arguments: 78 | results -- A list of python dictionaries of the following format: 79 | { 80 | "reg": regularization, 81 | "clf": classifier, 82 | "train": trainAccuracy, 83 | "dev": devAccuracy, 84 | "test": testAccuracy 85 | } 86 | 87 | Returns: 88 | Your chosen result dictionary. 89 | """ 90 | bestResult = None 91 | 92 | ### YOUR CODE HERE 93 | bestResult = max(results, key=lambda x: x["dev"]) 94 | ### END YOUR CODE 95 | 96 | return bestResult 97 | 98 | 99 | def accuracy(y, yhat): 100 | """ Precision for classifier """ 101 | assert(y.shape == yhat.shape) 102 | return np.sum(y == yhat) * 100.0 / y.size 103 | 104 | 105 | def plotRegVsAccuracy(regValues, results, filename): 106 | """ Make a plot of regularization vs accuracy """ 107 | plt.plot(regValues, [x["train"] for x in results]) 108 | plt.plot(regValues, [x["dev"] for x in results]) 109 | plt.xscale('log') 110 | plt.xlabel("regularization") 111 | plt.ylabel("accuracy") 112 | plt.legend(['train', 'dev'], loc='upper left') 113 | plt.savefig(filename) 114 | 115 | 116 | def outputConfusionMatrix(features, labels, clf, filename): 117 | """ Generate a confusion matrix """ 118 | pred = clf.predict(features) 119 | cm = confusion_matrix(labels, pred, labels=range(5)) 120 | plt.figure() 121 | plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Reds) 122 | plt.colorbar() 123 | classes = ["- -", "-", "neut", "+", "+ +"] 124 | tick_marks = np.arange(len(classes)) 125 | plt.xticks(tick_marks, classes) 126 | plt.yticks(tick_marks, classes) 127 | thresh = cm.max() / 2. 128 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 129 | plt.text(j, i, cm[i, j], 130 | horizontalalignment="center", 131 | color="white" if cm[i, j] > thresh else "black") 132 | plt.tight_layout() 133 | plt.ylabel('True label') 134 | plt.xlabel('Predicted label') 135 | plt.savefig(filename) 136 | 137 | 138 | def outputPredictions(dataset, features, labels, clf, filename): 139 | """ Write the predictions to file """ 140 | pred = clf.predict(features) 141 | with open(filename, "w") as f: 142 | print >> f, "True\tPredicted\tText" 143 | for i in xrange(len(dataset)): 144 | print >> f, "%d\t%d\t%s" % ( 145 | labels[i], pred[i], " ".join(dataset[i][0])) 146 | 147 | 148 | def main(args): 149 | """ Train a model to do sentiment analyis""" 150 | 151 | # Load the dataset 152 | dataset = StanfordSentiment() 153 | tokens = dataset.tokens() 154 | nWords = len(tokens) 155 | 156 | if args.yourvectors: 157 | _, wordVectors, _ = load_saved_params() 158 | wordVectors = np.concatenate( 159 | (wordVectors[:nWords,:], wordVectors[nWords:,:]), 160 | axis=1) 161 | elif args.pretrained: 162 | wordVectors = glove.loadWordVectors(tokens) 163 | dimVectors = wordVectors.shape[1] 164 | 165 | # Load the train set 166 | trainset = dataset.getTrainSentences() 167 | nTrain = len(trainset) 168 | trainFeatures = np.zeros((nTrain, dimVectors)) 169 | trainLabels = np.zeros((nTrain,), dtype=np.int32) 170 | for i in xrange(nTrain): 171 | words, trainLabels[i] = trainset[i] 172 | trainFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words) 173 | 174 | # Prepare dev set features 175 | devset = dataset.getDevSentences() 176 | nDev = len(devset) 177 | devFeatures = np.zeros((nDev, dimVectors)) 178 | devLabels = np.zeros((nDev,), dtype=np.int32) 179 | for i in xrange(nDev): 180 | words, devLabels[i] = devset[i] 181 | devFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words) 182 | 183 | # Prepare test set features 184 | testset = dataset.getTestSentences() 185 | nTest = len(testset) 186 | testFeatures = np.zeros((nTest, dimVectors)) 187 | testLabels = np.zeros((nTest,), dtype=np.int32) 188 | for i in xrange(nTest): 189 | words, testLabels[i] = testset[i] 190 | testFeatures[i, :] = getSentenceFeatures(tokens, wordVectors, words) 191 | 192 | # We will save our results from each run 193 | results = [] 194 | regValues = getRegularizationValues() 195 | for reg in regValues: 196 | print "Training for reg=%f" % reg 197 | # Note: add a very small number to regularization to please the library 198 | clf = LogisticRegression(C=1.0/(reg + 1e-12)) 199 | clf.fit(trainFeatures, trainLabels) 200 | 201 | # Test on train set 202 | pred = clf.predict(trainFeatures) 203 | trainAccuracy = accuracy(trainLabels, pred) 204 | print "Train accuracy (%%): %f" % trainAccuracy 205 | 206 | # Test on dev set 207 | pred = clf.predict(devFeatures) 208 | devAccuracy = accuracy(devLabels, pred) 209 | print "Dev accuracy (%%): %f" % devAccuracy 210 | 211 | # Test on test set 212 | # Note: always running on test is poor style. Typically, you should 213 | # do this only after validation. 214 | pred = clf.predict(testFeatures) 215 | testAccuracy = accuracy(testLabels, pred) 216 | print "Test accuracy (%%): %f" % testAccuracy 217 | 218 | results.append({ 219 | "reg": reg, 220 | "clf": clf, 221 | "train": trainAccuracy, 222 | "dev": devAccuracy, 223 | "test": testAccuracy}) 224 | 225 | # Print the accuracies 226 | print "" 227 | print "=== Recap ===" 228 | print "Reg\t\tTrain\tDev\tTest" 229 | for result in results: 230 | print "%.2E\t%.3f\t%.3f\t%.3f" % ( 231 | result["reg"], 232 | result["train"], 233 | result["dev"], 234 | result["test"]) 235 | print "" 236 | 237 | bestResult = chooseBestModel(results) 238 | print "Best regularization value: %0.2E" % bestResult["reg"] 239 | print "Test accuracy (%%): %f" % bestResult["test"] 240 | 241 | # do some error analysis 242 | if args.pretrained: 243 | plotRegVsAccuracy(regValues, results, "q4_reg_v_acc.png") 244 | outputConfusionMatrix(devFeatures, devLabels, bestResult["clf"], 245 | "q4_dev_conf.png") 246 | outputPredictions(devset, devFeatures, devLabels, bestResult["clf"], 247 | "q4_dev_pred.txt") 248 | 249 | 250 | if __name__ == "__main__": 251 | main(getArguments()) 252 | -------------------------------------------------------------------------------- /Assignment1/ZiyinHuang/assignment1_writen.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment1/ZiyinHuang/assignment1_writen.pdf -------------------------------------------------------------------------------- /Assignment1/ZiyinHuang/readme: -------------------------------------------------------------------------------- 1 | Assignment 1 By ZY.Huang 2 | -------------------------------------------------------------------------------- /Assignment2/README.md: -------------------------------------------------------------------------------- 1 | # Assignment 2 2 | 3 | Materials: [Assignment 2](http://web.stanford.edu/class/cs224n/assignment2/index.html) 4 | 5 | 6 | 7 | ``` 8 | ./Assignment2 9 | /TaoJi 10 | solution.md --解题报告 代码题实现+非代码题 11 | /assignment2 12 | ...code... --项目代码 (去除data) 13 | /ZiyinHuang 14 | ... 15 | /YupeiDu 16 | ... 17 | /MingZhong 18 | ... 19 | ... 20 | ``` 21 | 22 | -------------------------------------------------------------------------------- /Assignment2/TaoJi/assignment2/model.py: -------------------------------------------------------------------------------- 1 | class Model(object): 2 | """Abstracts a Tensorflow graph for a learning task. 3 | 4 | We use various Model classes as usual abstractions to encapsulate tensorflow 5 | computational graphs. Each algorithm you will construct in this homework will 6 | inherit from a Model object. 7 | """ 8 | def add_placeholders(self): 9 | """Adds placeholder variables to tensorflow computational graph. 10 | 11 | Tensorflow uses placeholder variables to represent locations in a 12 | computational graph where data is inserted. These placeholders are used as 13 | inputs by the rest of the model building and will be fed data during 14 | training. 15 | 16 | See for more information: 17 | https://www.tensorflow.org/versions/r0.7/api_docs/python/io_ops.html#placeholders 18 | """ 19 | raise NotImplementedError("Each Model must re-implement this method.") 20 | 21 | def create_feed_dict(self, inputs_batch, labels_batch=None): 22 | """Creates the feed_dict for one step of training. 23 | 24 | A feed_dict takes the form of: 25 | feed_dict = { 26 | : , 27 | .... 28 | } 29 | 30 | If labels_batch is None, then no labels are added to feed_dict. 31 | 32 | Hint: The keys for the feed_dict should be a subset of the placeholder 33 | tensors created in add_placeholders. 34 | 35 | Args: 36 | inputs_batch: A batch of input data. 37 | labels_batch: A batch of label data. 38 | Returns: 39 | feed_dict: The feed dictionary mapping from placeholders to values. 40 | """ 41 | raise NotImplementedError("Each Model must re-implement this method.") 42 | 43 | def add_prediction_op(self): 44 | """Implements the core of the model that transforms a batch of input data into predictions. 45 | 46 | Returns: 47 | pred: A tensor of shape (batch_size, n_classes) 48 | """ 49 | raise NotImplementedError("Each Model must re-implement this method.") 50 | 51 | def add_loss_op(self, pred): 52 | """Adds Ops for the loss function to the computational graph. 53 | 54 | Args: 55 | pred: A tensor of shape (batch_size, n_classes) 56 | Returns: 57 | loss: A 0-d tensor (scalar) output 58 | """ 59 | raise NotImplementedError("Each Model must re-implement this method.") 60 | 61 | def add_training_op(self, loss): 62 | """Sets up the training Ops. 63 | 64 | Creates an optimizer and applies the gradients to all trainable variables. 65 | The Op returned by this function is what must be passed to the 66 | sess.run() to train the model. See 67 | 68 | https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer 69 | 70 | for more information. 71 | 72 | Args: 73 | loss: Loss tensor (a scalar). 74 | Returns: 75 | train_op: The Op for training. 76 | """ 77 | 78 | raise NotImplementedError("Each Model must re-implement this method.") 79 | 80 | def train_on_batch(self, sess, inputs_batch, labels_batch): 81 | """Perform one step of gradient descent on the provided batch of data. 82 | 83 | Args: 84 | sess: tf.Session() 85 | input_batch: np.ndarray of shape (n_samples, n_features) 86 | labels_batch: np.ndarray of shape (n_samples, n_classes) 87 | Returns: 88 | loss: loss over the batch (a scalar) 89 | """ 90 | feed = self.create_feed_dict(inputs_batch, labels_batch=labels_batch) 91 | _, loss = sess.run([self.train_op, self.loss], feed_dict=feed) 92 | return loss 93 | 94 | def predict_on_batch(self, sess, inputs_batch): 95 | """Make predictions for the provided batch of data 96 | 97 | Args: 98 | sess: tf.Session() 99 | input_batch: np.ndarray of shape (n_samples, n_features) 100 | Returns: 101 | predictions: np.ndarray of shape (n_samples, n_classes) 102 | """ 103 | feed = self.create_feed_dict(inputs_batch) 104 | predictions = sess.run(self.pred, feed_dict=feed) 105 | return predictions 106 | 107 | def build(self): 108 | self.add_placeholders() 109 | self.pred = self.add_prediction_op() 110 | self.loss = self.add_loss_op(self.pred) 111 | self.train_op = self.add_training_op(self.loss) 112 | -------------------------------------------------------------------------------- /Assignment2/TaoJi/assignment2/q1_softmax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | #import tensorflow as tf 3 | import dynet as dy 4 | from utils.general_utils import test_all_close 5 | 6 | 7 | def softmax(x): 8 | """ 9 | Compute the softmax function in tensorflow. 10 | 11 | You might find the tensorflow functions tf.exp, tf.reduce_max, 12 | tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may 13 | not need to use all of these functions). Recall also that many common 14 | tensorflow operations are sugared (e.g. x * y does a tensor multiplication 15 | if x and y are both tensors). Make sure to implement the numerical stability 16 | fixes as in the previous homework! 17 | 18 | Args: 19 | x: tf.Tensor with shape (n_samples, n_features). Note feature vectors are 20 | represented by row-vectors. (For simplicity, no need to handle 1-d 21 | input as in the previous homework) 22 | Returns: 23 | out: tf.Tensor with shape (n_sample, n_features). You need to construct this 24 | tensor in this problem. 25 | """ 26 | 27 | ### YOUR CODE HERE 28 | fz = dy.exp(dy.colwise_add(x, -dy.max_dim(x, d=1))) 29 | fm = dy.sum_cols(fz) 30 | out = dy.cdiv(fz, fm) 31 | ### END YOUR CODE 32 | 33 | return out 34 | 35 | 36 | def cross_entropy_loss(y, yhat): 37 | """ 38 | Compute the cross entropy loss in tensorflow. 39 | The loss should be summed over the current minibatch. 40 | 41 | y is a one-hot tensor of shape (n_samples, n_classes) and yhat is a tensor 42 | of shape (n_samples, n_classes). y should be of dtype tf.int32, and yhat should 43 | be of dtype tf.float32. 44 | 45 | The functions tf.to_float, tf.reduce_sum, and tf.log might prove useful. (Many 46 | solutions are possible, so you may not need to use all of these functions). 47 | 48 | Note: You are NOT allowed to use the tensorflow built-in cross-entropy 49 | functions. 50 | 51 | Args: 52 | y: tf.Tensor with shape (n_samples, n_classes). One-hot encoded. 53 | yhat: tf.Tensorwith shape (n_sample, n_classes). Each row encodes a 54 | probability distribution and should sum to 1. 55 | Returns: 56 | out: tf.Tensor with shape (1,) (Scalar output). You need to construct this 57 | tensor in the problem. 58 | """ 59 | 60 | ### YOUR CODE HERE 61 | #out = (dy.sum_elems(out) / y.value().shape[0]).npvalue().reshape([]) 62 | out = dy.sum_elems(-dy.cmult(y, dy.log(yhat))) 63 | ### END YOUR CODE 64 | 65 | return out 66 | 67 | 68 | def test_softmax_basic(): 69 | """ 70 | Some simple tests of softmax to get you started. 71 | Warning: these are not exhaustive. 72 | """ 73 | 74 | #test1 = softmax(tf.constant(np.array([[1001, 1002], [3, 4]]), dtype=tf.float32)) 75 | dy.renew_cg() 76 | #test1 = softmax(dy.inputTensor(np.array([ 77 | # [1001, 1002], 78 | # [3, 4] 79 | #], dtype=np.float32))) 80 | 81 | #with tf.Session() as sess: 82 | # test1 = sess.run(test1) 83 | #test_all_close("Softmax test 1", test1, np.array([[0.26894142, 0.73105858], 84 | # [0.26894142, 0.73105858]])) 85 | 86 | #test2 = softmax(tf.constant(np.array([[-1001, -1002]]), dtype=tf.float32)) 87 | test2 = softmax(dy.inputTensor(np.array([[-1001, -1002]], dtype=np.float32))) 88 | #with tf.Session() as sess: 89 | # test2 = sess.run(test2) 90 | test_all_close("Softmax test 2", test2.value(), np.array([[0.73105858, 0.26894142]])) 91 | 92 | print "Basic (non-exhaustive) softmax tests pass\n" 93 | 94 | 95 | def test_cross_entropy_loss_basic(): 96 | """ 97 | Some simple tests of cross_entropy_loss to get you started. 98 | Warning: these are not exhaustive. 99 | """ 100 | dy.renew_cg() 101 | #y = np.array([[0, 1], [1, 0], [1, 0]]) 102 | #yhat = np.array([[.5, .5], [.5, .5], [.5, .5]]) 103 | y = np.array([[0, 1], [1, 0], [1, 0]], dtype=np.float32) 104 | yhat = np.array([[.5, .5], [.5, .5], [.5, .5]], dtype=np.float32) 105 | 106 | test1 = cross_entropy_loss( 107 | dy.inputTensor(y), 108 | dy.inputTensor(yhat)) 109 | #tf.constant(y, dtype=tf.int32), 110 | #tf.constant(yhat, dtype=tf.float32)) 111 | #with tf.Session() as sess: 112 | # test1 = sess.run(test1) 113 | expected = -3 * np.log(.5) 114 | test_all_close("Cross-entropy test 1", test1.npvalue().reshape([]), expected) 115 | 116 | print "Basic (non-exhaustive) cross-entropy tests pass" 117 | 118 | if __name__ == "__main__": 119 | test_softmax_basic() 120 | test_cross_entropy_loss_basic() 121 | -------------------------------------------------------------------------------- /Assignment2/TaoJi/assignment2/q2_initialization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | #import tensorflow as tf 3 | import dynet as dy 4 | 5 | 6 | def xavier_weight_init(): 7 | """Returns function that creates random tensor. 8 | 9 | The specified function will take in a shape (tuple or 1-d array) and 10 | returns a random tensor of the specified shape drawn from the 11 | Xavier initialization distribution. 12 | 13 | Hint: You might find tf.random_uniform useful. 14 | """ 15 | def _xavier_initializer(shape, **kwargs): 16 | """Defines an initializer for the Xavier distribution. 17 | Specifically, the output should be sampled uniformly from [-epsilon, epsilon] where 18 | epsilon = sqrt(6) / 19 | e.g., if shape = (2, 3), epsilon = sqrt(6 / (2 + 3)) 20 | 21 | This function will be used as a variable initializer. 22 | 23 | Args: 24 | shape: Tuple or 1-d array that species the dimensions of the requested tensor. 25 | Returns: 26 | out: tf.Tensor of specified shape sampled from the Xavier distribution. 27 | """ 28 | ### YOUR CODE HERE 29 | m = dy.ParameterCollection() 30 | out = m.add_parameters(shape).as_array() 31 | ### END YOUR CODE 32 | return out 33 | # Returns defined initializer function. 34 | return _xavier_initializer 35 | 36 | 37 | def test_initialization_basic(): 38 | """Some simple tests for the initialization. 39 | """ 40 | print "Running basic tests..." 41 | xavier_initializer = xavier_weight_init() 42 | shape = (1,) 43 | xavier_mat = xavier_initializer(shape) 44 | assert xavier_mat.shape == shape 45 | #assert xavier_mat.get_shape() == shape 46 | 47 | shape = (1, 2, 3) 48 | xavier_mat = xavier_initializer(shape) 49 | assert xavier_mat.shape == shape 50 | #assert xavier_mat.get_shape() == shape 51 | print "Basic (non-exhaustive) Xavier initialization tests pass" 52 | 53 | 54 | if __name__ == "__main__": 55 | test_initialization_basic() 56 | -------------------------------------------------------------------------------- /Assignment2/TaoJi/assignment2/q2_parser_transitions.py: -------------------------------------------------------------------------------- 1 | class PartialParse(object): 2 | def __init__(self, sentence): 3 | """Initializes this partial parse. 4 | 5 | Your code should initialize the following fields: 6 | self.stack: The current stack represented as a list with the top of the stack as the 7 | last element of the list. 8 | self.buffer: The current buffer represented as a list with the first item on the 9 | buffer as the first item of the list 10 | self.dependencies: The list of dependencies produced so far. Represented as a list of 11 | tuples where each tuple is of the form (head, dependent). 12 | Order for this list doesn't matter. 13 | 14 | The root token should be represented with the string "ROOT" 15 | 16 | Args: 17 | sentence: The sentence to be parsed as a list of words. 18 | Your code should not modify the sentence. 19 | """ 20 | # The sentence being parsed is kept for bookkeeping purposes. Do not use it in your code. 21 | self.sentence = sentence 22 | 23 | ### YOUR CODE HERE 24 | self.stack = ["ROOT"] 25 | self.buffer = sentence[:] 26 | self.dependencies = [] 27 | ### END YOUR CODE 28 | 29 | def parse_step(self, transition): 30 | """Performs a single parse step by applying the given transition to this partial parse 31 | 32 | Args: 33 | transition: A string that equals "S", "LA", or "RA" representing the shift, left-arc, 34 | and right-arc transitions. 35 | """ 36 | ### YOUR CODE HERE 37 | if transition == "S": 38 | if self.buffer: 39 | self.stack.append(self.buffer[0]) 40 | self.buffer.pop(0) 41 | elif transition == "LA": 42 | if len(self.stack) >= 2: 43 | self.dependencies.append((self.stack[-1], self.stack[-2])) 44 | self.stack.pop(-2) 45 | else: 46 | if len(self.stack) >= 2: 47 | self.dependencies.append((self.stack[-2], self.stack[-1])) 48 | self.stack.pop(-1) 49 | ### END YOUR CODE 50 | 51 | def parse(self, transitions): 52 | """Applies the provided transitions to this PartialParse 53 | 54 | Args: 55 | transitions: The list of transitions in the order they should be applied 56 | Returns: 57 | dependencies: The list of dependencies produced when parsing the sentence. Represented 58 | as a list of tuples where each tuple is of the form (head, dependent) 59 | """ 60 | for transition in transitions: 61 | self.parse_step(transition) 62 | return self.dependencies 63 | 64 | 65 | def minibatch_parse(sentences, model, batch_size): 66 | """Parses a list of sentences in minibatches using a model. 67 | 68 | Args: 69 | sentences: A list of sentences to be parsed (each sentence is a list of words) 70 | model: The model that makes parsing decisions. It is assumed to have a function 71 | model.predict(partial_parses) that takes in a list of PartialParses as input and 72 | returns a list of transitions predicted for each parse. That is, after calling 73 | transitions = model.predict(partial_parses) 74 | transitions[i] will be the next transition to apply to partial_parses[i]. 75 | batch_size: The number of PartialParses to include in each minibatch 76 | Returns: 77 | dependencies: A list where each element is the dependencies list for a parsed sentence. 78 | Ordering should be the same as in sentences (i.e., dependencies[i] should 79 | contain the parse for sentences[i]). 80 | """ 81 | 82 | ### YOUR CODE HERE 83 | dependencies = [] 84 | for sentence in sentences: 85 | pp = PartialParse(sentence) 86 | for i in xrange(2*len(sentence)): 87 | action = model.predict([pp]) 88 | pp.parse(action) 89 | dependencies.append(pp.dependencies) 90 | ### END YOUR CODE 91 | 92 | return dependencies 93 | 94 | 95 | def test_step(name, transition, stack, buf, deps, 96 | ex_stack, ex_buf, ex_deps): 97 | """Tests that a single parse step returns the expected output""" 98 | pp = PartialParse([]) 99 | pp.stack, pp.buffer, pp.dependencies = stack, buf, deps 100 | 101 | pp.parse_step(transition) 102 | stack, buf, deps = (tuple(pp.stack), tuple(pp.buffer), tuple(sorted(pp.dependencies))) 103 | assert stack == ex_stack, \ 104 | "{:} test resulted in stack {:}, expected {:}".format(name, stack, ex_stack) 105 | assert buf == ex_buf, \ 106 | "{:} test resulted in buffer {:}, expected {:}".format(name, buf, ex_buf) 107 | assert deps == ex_deps, \ 108 | "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps) 109 | print "{:} test passed!".format(name) 110 | 111 | 112 | def test_parse_step(): 113 | """Simple tests for the PartialParse.parse_step function 114 | Warning: these are not exhaustive 115 | """ 116 | test_step("SHIFT", "S", ["ROOT", "the"], ["cat", "sat"], [], 117 | ("ROOT", "the", "cat"), ("sat",), ()) 118 | test_step("LEFT-ARC", "LA", ["ROOT", "the", "cat"], ["sat"], [], 119 | ("ROOT", "cat",), ("sat",), (("cat", "the"),)) 120 | test_step("RIGHT-ARC", "RA", ["ROOT", "run", "fast"], [], [], 121 | ("ROOT", "run",), (), (("run", "fast"),)) 122 | 123 | 124 | def test_parse(): 125 | """Simple tests for the PartialParse.parse function 126 | Warning: these are not exhaustive 127 | """ 128 | sentence = ["parse", "this", "sentence"] 129 | dependencies = PartialParse(sentence).parse(["S", "S", "S", "LA", "RA", "RA"]) 130 | dependencies = tuple(sorted(dependencies)) 131 | expected = (('ROOT', 'parse'), ('parse', 'sentence'), ('sentence', 'this')) 132 | assert dependencies == expected, \ 133 | "parse test resulted in dependencies {:}, expected {:}".format(dependencies, expected) 134 | assert tuple(sentence) == ("parse", "this", "sentence"), \ 135 | "parse test failed: the input sentence should not be modified" 136 | print "parse test passed!" 137 | 138 | 139 | class DummyModel: 140 | """Dummy model for testing the minibatch_parse function 141 | First shifts everything onto the stack and then does exclusively right arcs if the first word of 142 | the sentence is "right", "left" if otherwise. 143 | """ 144 | def predict(self, partial_parses): 145 | return [("RA" if pp.stack[1] is "right" else "LA") if len(pp.buffer) == 0 else "S" 146 | for pp in partial_parses] 147 | 148 | 149 | def test_dependencies(name, deps, ex_deps): 150 | """Tests the provided dependencies match the expected dependencies""" 151 | deps = tuple(sorted(deps)) 152 | assert deps == ex_deps, \ 153 | "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps) 154 | 155 | 156 | def test_minibatch_parse(): 157 | """Simple tests for the minibatch_parse function 158 | Warning: these are not exhaustive 159 | """ 160 | sentences = [["right", "arcs", "only"], 161 | ["right", "arcs", "only", "again"], 162 | ["left", "arcs", "only"], 163 | ["left", "arcs", "only", "again"]] 164 | deps = minibatch_parse(sentences, DummyModel(), 2) 165 | test_dependencies("minibatch_parse", deps[0], 166 | (('ROOT', 'right'), ('arcs', 'only'), ('right', 'arcs'))) 167 | test_dependencies("minibatch_parse", deps[1], 168 | (('ROOT', 'right'), ('arcs', 'only'), ('only', 'again'), ('right', 'arcs'))) 169 | test_dependencies("minibatch_parse", deps[2], 170 | (('only', 'ROOT'), ('only', 'arcs'), ('only', 'left'))) 171 | test_dependencies("minibatch_parse", deps[3], 172 | (('again', 'ROOT'), ('again', 'arcs'), ('again', 'left'), ('again', 'only'))) 173 | print "minibatch_parse test passed!" 174 | 175 | if __name__ == '__main__': 176 | test_parse_step() 177 | test_parse() 178 | test_minibatch_parse() 179 | -------------------------------------------------------------------------------- /Assignment2/TaoJi/assignment2/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/TaoJi/assignment2/utils/__init__.py -------------------------------------------------------------------------------- /Assignment2/TaoJi/assignment2/utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/TaoJi/assignment2/utils/__init__.pyc -------------------------------------------------------------------------------- /Assignment2/TaoJi/assignment2/utils/general_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import numpy as np 4 | 5 | 6 | def get_minibatches(data, minibatch_size, shuffle=True): 7 | """ 8 | Iterates through the provided data one minibatch at at time. You can use this function to 9 | iterate through data in minibatches as follows: 10 | 11 | for inputs_minibatch in get_minibatches(inputs, minibatch_size): 12 | ... 13 | 14 | Or with multiple data sources: 15 | 16 | for inputs_minibatch, labels_minibatch in get_minibatches([inputs, labels], minibatch_size): 17 | ... 18 | 19 | Args: 20 | data: there are two possible values: 21 | - a list or numpy array 22 | - a list where each element is either a list or numpy array 23 | minibatch_size: the maximum number of items in a minibatch 24 | shuffle: whether to randomize the order of returned data 25 | Returns: 26 | minibatches: the return value depends on data: 27 | - If data is a list/array it yields the next minibatch of data. 28 | - If data a list of lists/arrays it returns the next minibatch of each element in the 29 | list. This can be used to iterate through multiple data sources 30 | (e.g., features and labels) at the same time. 31 | 32 | """ 33 | list_data = type(data) is list and (type(data[0]) is list or type(data[0]) is np.ndarray) 34 | data_size = len(data[0]) if list_data else len(data) 35 | indices = np.arange(data_size) 36 | if shuffle: 37 | np.random.shuffle(indices) 38 | for minibatch_start in np.arange(0, data_size, minibatch_size): 39 | minibatch_indices = indices[minibatch_start:minibatch_start + minibatch_size] 40 | yield [minibatch(d, minibatch_indices) for d in data] if list_data \ 41 | else minibatch(data, minibatch_indices) 42 | 43 | 44 | def minibatch(data, minibatch_idx): 45 | return data[minibatch_idx] if type(data) is np.ndarray else [data[i] for i in minibatch_idx] 46 | 47 | 48 | def test_all_close(name, actual, expected): 49 | if actual.shape != expected.shape: 50 | raise ValueError("{:} failed, expected output to have shape {:} but has shape {:}" 51 | .format(name, expected.shape, actual.shape)) 52 | if np.amax(np.fabs(actual - expected)) > 1e-6: 53 | raise ValueError("{:} failed, expected {:} but value is {:}".format(name, expected, actual)) 54 | else: 55 | print name, "passed!" 56 | 57 | 58 | def logged_loop(iterable, n=None): 59 | if n is None: 60 | n = len(iterable) 61 | step = max(1, n / 1000) 62 | prog = Progbar(n) 63 | for i, elem in enumerate(iterable): 64 | if i % step == 0 or i == n - 1: 65 | prog.update(i + 1) 66 | yield elem 67 | 68 | 69 | class Progbar(object): 70 | """ 71 | Progbar class copied from keras (https://github.com/fchollet/keras/) 72 | Displays a progress bar. 73 | # Arguments 74 | target: Total number of steps expected. 75 | interval: Minimum visual progress update interval (in seconds). 76 | """ 77 | 78 | def __init__(self, target, width=30, verbose=1): 79 | self.width = width 80 | self.target = target 81 | self.sum_values = {} 82 | self.unique_values = [] 83 | self.start = time.time() 84 | self.total_width = 0 85 | self.seen_so_far = 0 86 | self.verbose = verbose 87 | 88 | def update(self, current, values=[], exact=[]): 89 | """ 90 | Updates the progress bar. 91 | # Arguments 92 | current: Index of current step. 93 | values: List of tuples (name, value_for_last_step). 94 | The progress bar will display averages for these values. 95 | exact: List of tuples (name, value_for_last_step). 96 | The progress bar will display these values directly. 97 | """ 98 | 99 | for k, v in values: 100 | if k not in self.sum_values: 101 | self.sum_values[k] = [v * (current - self.seen_so_far), current - self.seen_so_far] 102 | self.unique_values.append(k) 103 | else: 104 | self.sum_values[k][0] += v * (current - self.seen_so_far) 105 | self.sum_values[k][1] += (current - self.seen_so_far) 106 | for k, v in exact: 107 | if k not in self.sum_values: 108 | self.unique_values.append(k) 109 | self.sum_values[k] = [v, 1] 110 | self.seen_so_far = current 111 | 112 | now = time.time() 113 | if self.verbose == 1: 114 | prev_total_width = self.total_width 115 | sys.stdout.write("\b" * prev_total_width) 116 | sys.stdout.write("\r") 117 | 118 | numdigits = int(np.floor(np.log10(self.target))) + 1 119 | barstr = '%%%dd/%%%dd [' % (numdigits, numdigits) 120 | bar = barstr % (current, self.target) 121 | prog = float(current)/self.target 122 | prog_width = int(self.width*prog) 123 | if prog_width > 0: 124 | bar += ('='*(prog_width-1)) 125 | if current < self.target: 126 | bar += '>' 127 | else: 128 | bar += '=' 129 | bar += ('.'*(self.width-prog_width)) 130 | bar += ']' 131 | sys.stdout.write(bar) 132 | self.total_width = len(bar) 133 | 134 | if current: 135 | time_per_unit = (now - self.start) / current 136 | else: 137 | time_per_unit = 0 138 | eta = time_per_unit*(self.target - current) 139 | info = '' 140 | if current < self.target: 141 | info += ' - ETA: %ds' % eta 142 | else: 143 | info += ' - %ds' % (now - self.start) 144 | for k in self.unique_values: 145 | if type(self.sum_values[k]) is list: 146 | info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1])) 147 | else: 148 | info += ' - %s: %s' % (k, self.sum_values[k]) 149 | 150 | self.total_width += len(info) 151 | if prev_total_width > self.total_width: 152 | info += ((prev_total_width-self.total_width) * " ") 153 | 154 | sys.stdout.write(info) 155 | sys.stdout.flush() 156 | 157 | if current >= self.target: 158 | sys.stdout.write("\n") 159 | 160 | if self.verbose == 2: 161 | if current >= self.target: 162 | info = '%ds' % (now - self.start) 163 | for k in self.unique_values: 164 | info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1])) 165 | sys.stdout.write(info + "\n") 166 | 167 | def add(self, n, values=[]): 168 | self.update(self.seen_so_far+n, values) 169 | -------------------------------------------------------------------------------- /Assignment2/TaoJi/assignment2/utils/general_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/TaoJi/assignment2/utils/general_utils.pyc -------------------------------------------------------------------------------- /Assignment2/TaoJi/assignment2/utils/parser_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/TaoJi/assignment2/utils/parser_utils.pyc -------------------------------------------------------------------------------- /Assignment2/TaoJi/solution.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/TaoJi/solution.pdf -------------------------------------------------------------------------------- /Assignment2/WeiYang/assignment2/.idea/.name: -------------------------------------------------------------------------------- 1 | assignment2 -------------------------------------------------------------------------------- /Assignment2/WeiYang/assignment2/.idea/assignment2.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /Assignment2/WeiYang/assignment2/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /Assignment2/WeiYang/assignment2/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /Assignment2/WeiYang/assignment2/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /Assignment2/WeiYang/assignment2/model.py: -------------------------------------------------------------------------------- 1 | import dynet as dy 2 | 3 | class Model(object): 4 | def create_feed_dict(self, inputs_batch, labels_batch=None): 5 | raise NotImplementedError("Each Model must re-implement this method.") 6 | 7 | def init_parameters(self): 8 | raise NotImplementedError("Each Model must re-implement this method.") 9 | 10 | def init_trainer(self): 11 | raise NotImplementedError("Each Model must re-implement this method.") 12 | 13 | def train_on_batch(self, inputs_batch, labels_batch): 14 | self.create_feed_dict(inputs_batch, labels_batch=labels_batch) 15 | pred = self.prediction() 16 | loss = self.compute_loss(pred) 17 | return loss 18 | 19 | def predict_on_batch(self, inputs_batch): 20 | self.create_feed_dict(inputs_batch) 21 | pred = self.prediction() 22 | return pred 23 | 24 | def build(self): 25 | self.init_trainer() 26 | self.init_parameters() 27 | -------------------------------------------------------------------------------- /Assignment2/WeiYang/assignment2/q1_classifier.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | import dynet as dy 5 | 6 | from q1_softmax import softmax 7 | from q1_softmax import cross_entropy_loss 8 | from model import Model 9 | from utils.general_utils import get_minibatches 10 | 11 | 12 | class Config(object): 13 | n_samples = 1024 14 | n_features = 100 15 | n_classes = 5 16 | batch_size = 64 17 | n_epochs = 50 18 | lr = 1e-4 19 | 20 | 21 | class SoftmaxModel(Model): 22 | def init_trainer(self): 23 | ### YOUR CODE HERE 24 | self.sModel = dy.ParameterCollection() 25 | self.trainer = dy.SimpleSGDTrainer(self.sModel) 26 | self.trainer.learning_rate = self.config.lr 27 | ### END YOUR CODE 28 | 29 | def init_parameters(self): 30 | ### YOUR CODE HERE 31 | self._pW = self.sModel.add_parameters((self.config.n_features, self.config.n_classes)) 32 | self._pb = self.sModel.add_parameters((self.config.n_classes)) 33 | ### END YOUR CODE 34 | 35 | def create_feed_dict(self, inputs_batch, labels_batch=None): 36 | ### YOUR CODE HERE 37 | self.input = inputs_batch 38 | self.labels = labels_batch 39 | ### END YOUR CODE 40 | 41 | def prediction(self): 42 | W = dy.parameter(self._pW) 43 | b = dy.parameter(self._pb) 44 | x = dy.inputTensor(self.input) 45 | z_m = x * W 46 | z_T = dy.concatenate_cols([z_m[i]+b for i in range(self.config.batch_size)]) 47 | z = dy.transpose(z_T) 48 | pred = softmax(z) 49 | return pred 50 | 51 | def compute_loss(self, pred): 52 | y = dy.inputTensor(self.labels) 53 | loss = cross_entropy_loss(y, pred) 54 | return loss 55 | 56 | def run_epoch(self, inputs, labels): 57 | config = self.config 58 | n_minibatches, total_loss = 0, 0 59 | for input_batch, labels_batch in get_minibatches([inputs, labels], config.batch_size): 60 | n_minibatches += 1 61 | dy.renew_cg() 62 | loss = self.train_on_batch(input_batch, labels_batch) / config.batch_size 63 | 64 | loss.forward() 65 | loss.backward() 66 | self.trainer.update() 67 | 68 | total_loss += loss.value() 69 | return total_loss / n_minibatches 70 | 71 | def fit(self, inputs, labels): 72 | losses = [] 73 | for epoch in range(self.config.n_epochs): 74 | start_time = time.time() 75 | average_loss = self.run_epoch(inputs, labels) 76 | duration = time.time() - start_time 77 | print 'Epoch {:}: loss = {:.2f} ({:.3f} sec)'.format(epoch, average_loss, duration) 78 | losses.append(average_loss) 79 | return losses 80 | 81 | def __init__(self, config): 82 | self.config = config 83 | self.build() 84 | 85 | 86 | def test_softmax_model(): 87 | config = Config() 88 | np.random.seed(1234) 89 | inputs = np.random.rand(config.n_samples, config.n_features) 90 | labels = np.zeros((config.n_samples, config.n_classes), dtype=np.int32) 91 | labels[:, 1] = 1 92 | # for i in xrange(config.n_samples): 93 | # labels[i, i%config.n_classes] = 1 94 | 95 | model = SoftmaxModel(config) 96 | losses = model.fit(inputs, labels) 97 | assert losses[-1] < .5 98 | print "Basic (non-exhaustive) classifier tests pass" 99 | 100 | if __name__ == "__main__": 101 | test_softmax_model() 102 | -------------------------------------------------------------------------------- /Assignment2/WeiYang/assignment2/q1_softmax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import dynet as dy 3 | from utils.general_utils import test_all_close 4 | 5 | 6 | def softmax(x): 7 | ### YOUR CODE HERE 8 | x_max = dy.max_dim(x, 1) 9 | x_sub = dy.colwise_add(x, -x_max) 10 | x_exp = dy.exp(x_sub) 11 | x_sum = dy.sum_cols(x_exp) 12 | x_tmp = dy.zeroes(x.dim()[0]) 13 | x_tmp = dy.colwise_add(x_tmp, x_sum) 14 | out = dy.cdiv(x_exp, x_tmp) 15 | ### END YOUR CODE 16 | return out 17 | 18 | 19 | def cross_entropy_loss(y, yhat): 20 | ### YOUR CODE HERE 21 | out = dy.sum_elems(-dy.cmult(y, dy.log(yhat))) 22 | ### END YOUR CODE 23 | return out 24 | 25 | 26 | def test_softmax_basic(): 27 | """ 28 | Some simple tests of softmax to get you started. 29 | Warning: these are not exhaustive. 30 | """ 31 | 32 | # test1 = softmax(torch.Tensor([[1001, 1002], [3, 4]])) 33 | # test1 = test1.numpy() 34 | test1 = softmax(dy.inputTensor([[1001, 1002], [3, 4]])) 35 | test1 = test1.npvalue(); 36 | test_all_close("Softmax test 1", test1, np.array([[0.26894142, 0.73105858], 37 | [0.26894142, 0.73105858]])) 38 | 39 | # test2 = softmax(torch.Tensor([[-1001, -1002]])) 40 | # test2 = test2.numpy() 41 | test2 = softmax(dy.inputTensor([[-1001, -1002]])) 42 | test2 = test2.npvalue(); 43 | test_all_close("Softmax test 2", test2, np.array([[0.73105858, 0.26894142]])) 44 | 45 | print "Basic (non-exhaustive) softmax tests pass\n" 46 | 47 | 48 | def test_cross_entropy_loss_basic(): 49 | """ 50 | Some simple tests of cross_entropy_loss to get you started. 51 | Warning: these are not exhaustive. 52 | """ 53 | y = np.array([[0, 1], [1, 0], [1, 0]]) 54 | yhat = np.array([[.5, .5], [.5, .5], [.5, .5]]) 55 | 56 | # test1 = cross_entropy_loss( 57 | # torch.Tensor([[0, 1], [1, 0], [1, 0]]), 58 | # torch.Tensor([[.5, .5], [.5, .5], [.5, .5]])) 59 | # test1 = np.array(test1) 60 | test1 = cross_entropy_loss( 61 | dy.inputTensor([[0, 1], [1, 0], [1, 0]]), 62 | dy.inputTensor([[.5, .5], [.5, .5], [.5, .5]])) 63 | test1 = np.array(test1.value()) 64 | expected = -3 * np.log(.5) 65 | test_all_close("Cross-entropy test 1", test1, expected) 66 | 67 | print "Basic (non-exhaustive) cross-entropy tests pass" 68 | 69 | if __name__ == "__main__": 70 | test_softmax_basic() 71 | test_cross_entropy_loss_basic() 72 | -------------------------------------------------------------------------------- /Assignment2/WeiYang/assignment2/q2_initialization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import dynet as dy 3 | 4 | 5 | def xavier_weight_init(): 6 | def _xavier_initializer(shape, **kwargs): 7 | ### YOUR CODE HERE 8 | epsilon = np.sqrt(6 / np.sum(shape)) 9 | out = dy.random_uniform(dim=shape, left=-epsilon, right=epsilon) 10 | ### END YOUR CODE 11 | return out 12 | return _xavier_initializer 13 | 14 | 15 | def test_initialization_basic(): 16 | print "Running basic tests..." 17 | xavier_initializer = xavier_weight_init() 18 | shape = (1,) 19 | xavier_mat = xavier_initializer(shape) 20 | assert xavier_mat.dim()[0] == shape 21 | 22 | shape = (1, 2, 3) 23 | xavier_mat = xavier_initializer(shape) 24 | assert xavier_mat.dim()[0] == shape 25 | print "Basic (non-exhaustive) Xavier initialization tests pass" 26 | 27 | 28 | if __name__ == "__main__": 29 | test_initialization_basic() 30 | -------------------------------------------------------------------------------- /Assignment2/WeiYang/assignment2/q2_parser_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import dynet as dy 4 | import numpy as np 5 | import cPickle 6 | 7 | from model import Model 8 | from q1_softmax import cross_entropy_loss 9 | from q2_initialization import xavier_weight_init 10 | from utils.general_utils import Progbar 11 | from utils.parser_utils import minibatches, load_and_preprocess_data 12 | 13 | 14 | class Config(object): 15 | n_features = 36 16 | n_classes = 3 17 | dropout = 0.5 18 | embed_size = 50 19 | hidden_size = 200 20 | batch_size = 2048 21 | n_epochs = 10 22 | lr = 0.001 23 | 24 | 25 | class ParserModel(Model): 26 | def init_trainer(self): 27 | self.m = dy.ParameterCollection() 28 | self.trainer = dy.AdamTrainer(self.m) 29 | self.trainer.learning_rate = self.config.lr 30 | 31 | def init_parameters(self): 32 | zeroInit = dy.ConstInitializer(0.0) 33 | # xavier = xavier_weight_init() 34 | 35 | self._pW = self.m.add_parameters((self.config.n_features * self.config.embed_size, self.config.hidden_size)) 36 | self._pB1 = self.m.add_parameters((1, self.config.hidden_size), init=zeroInit) 37 | self._pU = self.m.add_parameters((self.config.hidden_size, self.config.n_classes)) 38 | self._pB2 = self.m.add_parameters((1, self.config.n_classes), init=zeroInit) 39 | 40 | self.word_dict = self.m.lookup_parameters_from_numpy(self.pretrained_embeddings) 41 | 42 | def create_feed_dict(self, inputs_batch, labels_batch=None, dropout=1): 43 | self.input = inputs_batch 44 | # 2048*36 45 | self.labels = labels_batch 46 | self.dropout = dropout 47 | 48 | def add_embedding(self): 49 | embeddings = dy.concatenate([self.word_dict.batch(x) for x in np.transpose(self.input)]) 50 | embeddings = dy.transpose(embeddings) 51 | # ((1, 50*36), 2048) 52 | return embeddings 53 | 54 | def prediction(self, dropout=False): 55 | x = self.add_embedding() 56 | W = dy.parameter(self._pW) 57 | U = dy.parameter(self._pU) 58 | b1 = dy.parameter(self._pB1) 59 | b2 = dy.parameter(self._pB2) 60 | 61 | z1 = x * W + b1 62 | h = dy.rectify(z1) 63 | h_drop = dy.dropout(h, self.dropout) if dropout else h 64 | 65 | z2 = h_drop * U + b2 66 | # print "z2: ", z2.dim() 67 | 68 | pred = dy.softmax(dy.reshape(z2, (self.config.n_classes,))) 69 | return pred 70 | 71 | def compute_loss(self, pred): 72 | y = dy.inputTensor(np.transpose(self.labels), batched=True) 73 | losses = cross_entropy_loss(y, pred) 74 | loss = dy.sum_batches(losses) / self.config.batch_size 75 | return loss 76 | 77 | def train_on_batch(self, inputs_batch, labels_batch): 78 | self.create_feed_dict(inputs_batch, labels_batch=labels_batch, 79 | dropout=self.config.dropout) 80 | pred = self.prediction(dropout=True) 81 | loss = self.compute_loss(pred) 82 | return loss 83 | 84 | def predict_on_batch(self, inputs_batch): 85 | self.create_feed_dict(inputs_batch) 86 | pred_dy = self.prediction() 87 | pred = np.transpose(pred_dy.npvalue()) 88 | return pred 89 | 90 | def run_epoch(self, parser, train_examples, dev_set): 91 | for i, (train_x, train_y) in enumerate(minibatches(train_examples, self.config.batch_size)): 92 | dy.renew_cg() 93 | loss = self.train_on_batch(train_x, train_y) 94 | loss.forward() 95 | loss.backward() 96 | self.trainer.update() 97 | print "Training Loss: ", loss.value() 98 | print "Evaluating on dev set", 99 | dev_UAS, _ = parser.parse(dev_set) 100 | print "- dev UAS: {:.2f}".format(dev_UAS * 100.0) 101 | return dev_UAS 102 | 103 | def fit(self, saver, parser, train_examples, dev_set): 104 | best_dev_UAS = 0 105 | for epoch in range(self.config.n_epochs): 106 | print "Epoch {:} out of {:}".format(epoch + 1, self.config.n_epochs) 107 | dev_UAS = self.run_epoch(parser, train_examples, dev_set) 108 | if dev_UAS > best_dev_UAS: 109 | best_dev_UAS = dev_UAS 110 | if saver: 111 | print "New best dev UAS! Saving model in ./data/weights/parser.weights" 112 | dy.save('./data/weights/parser.weights') 113 | print 114 | 115 | def __init__(self, config, pretrained_embeddings): 116 | self.pretrained_embeddings = pretrained_embeddings 117 | self.config = config 118 | self.build() 119 | 120 | 121 | def main(debug=False): 122 | print 80 * "=" 123 | print "INITIALIZING" 124 | print 80 * "=" 125 | config = Config() 126 | parser, embeddings, train_examples, dev_set, test_set = load_and_preprocess_data(debug) 127 | if not os.path.exists('./data/weights/'): 128 | os.makedirs('./data/weights/') 129 | 130 | print "Building model...", 131 | start = time.time() 132 | model = ParserModel(config, embeddings) 133 | parser.model = model 134 | print "took {:.2f} seconds\n".format(time.time() - start) 135 | 136 | saver = None if debug else True 137 | 138 | print 80 * "=" 139 | print "TRAINING" 140 | print 80 * "=" 141 | model.fit(saver, parser, train_examples, dev_set) 142 | 143 | if not debug: 144 | print 80 * "=" 145 | print "TESTING" 146 | print 80 * "=" 147 | print "Restoring the best model weights found on the dev set" 148 | saver.restore('./data/weights/parser.weights') 149 | print "Final evaluation on test set", 150 | UAS, dependencies = parser.parse(test_set) 151 | print "- test UAS: {:.2f}".format(UAS * 100.0) 152 | print "Writing predictions" 153 | with open('q2_test.predicted.pkl', 'w') as f: 154 | cPickle.dump(dependencies, f, -1) 155 | print "Done!" 156 | 157 | if __name__ == '__main__': 158 | main() 159 | 160 | 161 | -------------------------------------------------------------------------------- /Assignment2/WeiYang/assignment2/q2_parser_transitions.py: -------------------------------------------------------------------------------- 1 | class PartialParse(object): 2 | def __init__(self, sentence): 3 | self.sentence = sentence 4 | ### YOUR CODE HERE 5 | self.stack = ["ROOT"] 6 | self.buffer = sentence[:] 7 | self.dependencies = [] 8 | ### END YOUR CODE 9 | 10 | def parse_step(self, transition): 11 | ### YOUR CODE HERE 12 | if transition == "S": 13 | self.stack.append(self.buffer[0]) 14 | self.buffer.pop(0) 15 | elif transition == "LA": 16 | self.dependencies.append((self.stack[-1], self.stack[-2])) 17 | self.stack.pop(-2) 18 | else: 19 | self.dependencies.append((self.stack[-2], self.stack[-1])) 20 | self.stack.pop(-1) 21 | ### END YOUR CODE 22 | 23 | def parse(self, transitions): 24 | for transition in transitions: 25 | self.parse_step(transition) 26 | return self.dependencies 27 | 28 | 29 | def minibatch_parse(sentences, model, batch_size): 30 | ### YOUR CODE HERE 31 | partial_parses = [PartialParse(s) for s in sentences] 32 | unfinished_parse = partial_parses 33 | while len(unfinished_parse) > 0: 34 | minibatch = unfinished_parse[0:batch_size] 35 | while len(minibatch) > 0: 36 | transitions = model.predict(minibatch) 37 | for index, action in enumerate(transitions): 38 | minibatch[index].parse_step(action) 39 | minibatch = [parse for parse in minibatch if len(parse.stack) > 1 or len(parse.buffer) > 0] 40 | unfinished_parse = unfinished_parse[batch_size:] 41 | dependencies = [] 42 | for n in range(len(sentences)): 43 | dependencies.append(partial_parses[n].dependencies) 44 | ### END YOUR CODE 45 | 46 | return dependencies 47 | 48 | 49 | def test_step(name, transition, stack, buf, deps, 50 | ex_stack, ex_buf, ex_deps): 51 | """Tests that a single parse step returns the expected output""" 52 | pp = PartialParse([]) 53 | pp.stack, pp.buffer, pp.dependencies = stack, buf, deps 54 | 55 | pp.parse_step(transition) 56 | stack, buf, deps = (tuple(pp.stack), tuple(pp.buffer), tuple(sorted(pp.dependencies))) 57 | assert stack == ex_stack, \ 58 | "{:} test resulted in stack {:}, expected {:}".format(name, stack, ex_stack) 59 | assert buf == ex_buf, \ 60 | "{:} test resulted in buffer {:}, expected {:}".format(name, buf, ex_buf) 61 | assert deps == ex_deps, \ 62 | "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps) 63 | print "{:} test passed!".format(name) 64 | 65 | 66 | def test_parse_step(): 67 | """Simple tests for the PartialParse.parse_step function 68 | Warning: these are not exhaustive 69 | """ 70 | test_step("SHIFT", "S", ["ROOT", "the"], ["cat", "sat"], [], 71 | ("ROOT", "the", "cat"), ("sat",), ()) 72 | test_step("LEFT-ARC", "LA", ["ROOT", "the", "cat"], ["sat"], [], 73 | ("ROOT", "cat",), ("sat",), (("cat", "the"),)) 74 | test_step("RIGHT-ARC", "RA", ["ROOT", "run", "fast"], [], [], 75 | ("ROOT", "run",), (), (("run", "fast"),)) 76 | 77 | 78 | def test_parse(): 79 | """Simple tests for the PartialParse.parse function 80 | Warning: these are not exhaustive 81 | """ 82 | sentence = ["parse", "this", "sentence"] 83 | dependencies = PartialParse(sentence).parse(["S", "S", "S", "LA", "RA", "RA"]) 84 | dependencies = tuple(sorted(dependencies)) 85 | expected = (('ROOT', 'parse'), ('parse', 'sentence'), ('sentence', 'this')) 86 | assert dependencies == expected, \ 87 | "parse test resulted in dependencies {:}, expected {:}".format(dependencies, expected) 88 | assert tuple(sentence) == ("parse", "this", "sentence"), \ 89 | "parse test failed: the input sentence should not be modified" 90 | print "parse test passed!" 91 | 92 | 93 | class DummyModel: 94 | """Dummy model for testing the minibatch_parse function 95 | First shifts everything onto the stack and then does exclusively right arcs if the first word of 96 | the sentence is "right", "left" if otherwise. 97 | """ 98 | def predict(self, partial_parses): 99 | return [("RA" if pp.stack[1] is "right" else "LA") if len(pp.buffer) == 0 else "S" 100 | for pp in partial_parses] 101 | 102 | 103 | def test_dependencies(name, deps, ex_deps): 104 | """Tests the provided dependencies match the expected dependencies""" 105 | deps = tuple(sorted(deps)) 106 | assert deps == ex_deps, \ 107 | "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps) 108 | 109 | 110 | def test_minibatch_parse(): 111 | """Simple tests for the minibatch_parse function 112 | Warning: these are not exhaustive 113 | """ 114 | sentences = [["right", "arcs", "only"], 115 | ["right", "arcs", "only", "again"], 116 | ["left", "arcs", "only"], 117 | ["left", "arcs", "only", "again"]] 118 | deps = minibatch_parse(sentences, DummyModel(), 2) 119 | test_dependencies("minibatch_parse", deps[0], 120 | (('ROOT', 'right'), ('arcs', 'only'), ('right', 'arcs'))) 121 | test_dependencies("minibatch_parse", deps[1], 122 | (('ROOT', 'right'), ('arcs', 'only'), ('only', 'again'), ('right', 'arcs'))) 123 | test_dependencies("minibatch_parse", deps[2], 124 | (('only', 'ROOT'), ('only', 'arcs'), ('only', 'left'))) 125 | test_dependencies("minibatch_parse", deps[3], 126 | (('again', 'ROOT'), ('again', 'arcs'), ('again', 'left'), ('again', 'only'))) 127 | print "minibatch_parse test passed!" 128 | 129 | if __name__ == '__main__': 130 | test_parse_step() 131 | test_parse() 132 | test_minibatch_parse() 133 | -------------------------------------------------------------------------------- /Assignment2/WeiYang/assignment2/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/WeiYang/assignment2/utils/__init__.py -------------------------------------------------------------------------------- /Assignment2/WeiYang/assignment2/utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/WeiYang/assignment2/utils/__init__.pyc -------------------------------------------------------------------------------- /Assignment2/WeiYang/assignment2/utils/general_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import numpy as np 4 | 5 | 6 | def get_minibatches(data, minibatch_size, shuffle=True): 7 | """ 8 | Iterates through the provided data one minibatch at at time. You can use this function to 9 | iterate through data in minibatches as follows: 10 | 11 | for inputs_minibatch in get_minibatches(inputs, minibatch_size): 12 | ... 13 | 14 | Or with multiple data sources: 15 | 16 | for inputs_minibatch, labels_minibatch in get_minibatches([inputs, labels], minibatch_size): 17 | ... 18 | 19 | Args: 20 | data: there are two possible values: 21 | - a list or numpy array 22 | - a list where each element is either a list or numpy array 23 | minibatch_size: the maximum number of items in a minibatch 24 | shuffle: whether to randomize the order of returned data 25 | Returns: 26 | minibatches: the return value depends on data: 27 | - If data is a list/array it yields the next minibatch of data. 28 | - If data a list of lists/arrays it returns the next minibatch of each element in the 29 | list. This can be used to iterate through multiple data sources 30 | (e.g., features and labels) at the same time. 31 | 32 | """ 33 | list_data = type(data) is list and (type(data[0]) is list or type(data[0]) is np.ndarray) 34 | data_size = len(data[0]) if list_data else len(data) 35 | indices = np.arange(data_size) 36 | if shuffle: 37 | np.random.shuffle(indices) 38 | for minibatch_start in np.arange(0, data_size, minibatch_size): 39 | minibatch_indices = indices[minibatch_start:minibatch_start + minibatch_size] 40 | yield [minibatch(d, minibatch_indices) for d in data] if list_data \ 41 | else minibatch(data, minibatch_indices) 42 | 43 | 44 | def minibatch(data, minibatch_idx): 45 | return data[minibatch_idx] if type(data) is np.ndarray else [data[i] for i in minibatch_idx] 46 | 47 | 48 | def test_all_close(name, actual, expected): 49 | if actual.shape != expected.shape: 50 | raise ValueError("{:} failed, expected output to have shape {:} but has shape {:}" 51 | .format(name, expected.shape, actual.shape)) 52 | if np.amax(np.fabs(actual - expected)) > 1e-6: 53 | raise ValueError("{:} failed, expected {:} but value is {:}".format(name, expected, actual)) 54 | else: 55 | print name, "passed!" 56 | 57 | 58 | def logged_loop(iterable, n=None): 59 | if n is None: 60 | n = len(iterable) 61 | step = max(1, n / 1000) 62 | prog = Progbar(n) 63 | for i, elem in enumerate(iterable): 64 | if i % step == 0 or i == n - 1: 65 | prog.update(i + 1) 66 | yield elem 67 | 68 | 69 | class Progbar(object): 70 | """ 71 | Progbar class copied from keras (https://github.com/fchollet/keras/) 72 | Displays a progress bar. 73 | # Arguments 74 | target: Total number of steps expected. 75 | interval: Minimum visual progress update interval (in seconds). 76 | """ 77 | 78 | def __init__(self, target, width=30, verbose=1): 79 | self.width = width 80 | self.target = target 81 | self.sum_values = {} 82 | self.unique_values = [] 83 | self.start = time.time() 84 | self.total_width = 0 85 | self.seen_so_far = 0 86 | self.verbose = verbose 87 | 88 | def update(self, current, values=[], exact=[]): 89 | """ 90 | Updates the progress bar. 91 | # Arguments 92 | current: Index of current step. 93 | values: List of tuples (name, value_for_last_step). 94 | The progress bar will display averages for these values. 95 | exact: List of tuples (name, value_for_last_step). 96 | The progress bar will display these values directly. 97 | """ 98 | 99 | for k, v in values: 100 | if k not in self.sum_values: 101 | self.sum_values[k] = [v * (current - self.seen_so_far), current - self.seen_so_far] 102 | self.unique_values.append(k) 103 | else: 104 | self.sum_values[k][0] += v * (current - self.seen_so_far) 105 | self.sum_values[k][1] += (current - self.seen_so_far) 106 | for k, v in exact: 107 | if k not in self.sum_values: 108 | self.unique_values.append(k) 109 | self.sum_values[k] = [v, 1] 110 | self.seen_so_far = current 111 | 112 | now = time.time() 113 | if self.verbose == 1: 114 | prev_total_width = self.total_width 115 | sys.stdout.write("\b" * prev_total_width) 116 | sys.stdout.write("\r") 117 | 118 | numdigits = int(np.floor(np.log10(self.target))) + 1 119 | barstr = '%%%dd/%%%dd [' % (numdigits, numdigits) 120 | bar = barstr % (current, self.target) 121 | prog = float(current)/self.target 122 | prog_width = int(self.width*prog) 123 | if prog_width > 0: 124 | bar += ('='*(prog_width-1)) 125 | if current < self.target: 126 | bar += '>' 127 | else: 128 | bar += '=' 129 | bar += ('.'*(self.width-prog_width)) 130 | bar += ']' 131 | sys.stdout.write(bar) 132 | self.total_width = len(bar) 133 | 134 | if current: 135 | time_per_unit = (now - self.start) / current 136 | else: 137 | time_per_unit = 0 138 | eta = time_per_unit*(self.target - current) 139 | info = '' 140 | if current < self.target: 141 | info += ' - ETA: %ds' % eta 142 | else: 143 | info += ' - %ds' % (now - self.start) 144 | for k in self.unique_values: 145 | if type(self.sum_values[k]) is list: 146 | info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1])) 147 | else: 148 | info += ' - %s: %s' % (k, self.sum_values[k]) 149 | 150 | self.total_width += len(info) 151 | if prev_total_width > self.total_width: 152 | info += ((prev_total_width-self.total_width) * " ") 153 | 154 | sys.stdout.write(info) 155 | sys.stdout.flush() 156 | 157 | if current >= self.target: 158 | sys.stdout.write("\n") 159 | 160 | if self.verbose == 2: 161 | if current >= self.target: 162 | info = '%ds' % (now - self.start) 163 | for k in self.unique_values: 164 | info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1])) 165 | sys.stdout.write(info + "\n") 166 | 167 | def add(self, n, values=[]): 168 | self.update(self.seen_so_far+n, values) 169 | -------------------------------------------------------------------------------- /Assignment2/WeiYang/assignment2/utils/general_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/WeiYang/assignment2/utils/general_utils.pyc -------------------------------------------------------------------------------- /Assignment2/WeiYang/assignment2/utils/parser_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/WeiYang/assignment2/utils/parser_utils.pyc -------------------------------------------------------------------------------- /Assignment2/WeiYang/solution.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/WeiYang/solution.pdf -------------------------------------------------------------------------------- /Assignment2/ZhichaoFu/a.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/ZhichaoFu/a.txt -------------------------------------------------------------------------------- /Assignment2/ZhichaoFu/assignment2/model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | reload(sys) 4 | sys.setdefaultencoding('utf-8') 5 | 6 | import dynet as dy 7 | 8 | class Model(object): 9 | """Abstracts a Tensorflow graph for a learning task. 10 | 11 | We use various Model classes as usual abstractions to encapsulate tensorflow 12 | computational graphs. Each algorithm you will construct in this homework will 13 | inherit from a Model object. 14 | """ 15 | 16 | def create_feed_dict(self, inputs_batch, labels_batch=None): 17 | """Creates the feed_dict for one step of training. 18 | 19 | If labels_batch is None, then no labels are added to feed_dict. 20 | 21 | Hint: The keys for the feed_dict should be a subset of the placeholder 22 | tensors created in add_placeholders. 23 | 24 | Args: 25 | inputs_batch: A batch of input data. 26 | labels_batch: A batch of label data. 27 | Returns: 28 | feed_dict: The feed dictionary mapping from placeholders to values. 29 | """ 30 | raise NotImplementedError("Each Model must re-implement this method.") 31 | 32 | def init_parameters(self): 33 | """Initialize parameters for the Dynet model 34 | 35 | """ 36 | raise NotImplementedError("Each Model must re-implement this method.") 37 | 38 | def init_trainer(self): 39 | """Sets up the trainer. 40 | """ 41 | 42 | raise NotImplementedError("Each Model must re-implement this method.") 43 | 44 | def train_on_batch(self, inputs_batch, labels_batch): 45 | """Perform one step of gradient descent on the provided batch of data. 46 | 47 | Args: 48 | input_batch: np.ndarray of shape (n_samples, n_features) 49 | labels_batch: np.ndarray of shape (n_samples, n_classes) 50 | Returns: 51 | loss: loss over the batch (a scalar) 52 | """ 53 | 54 | self.create_feed_dict(inputs_batch, labels_batch=labels_batch) 55 | 56 | pred = self.prediction() 57 | 58 | loss = self.compute_loss(pred) 59 | 60 | return loss 61 | 62 | def predict_on_batch(self, inputs_batch): 63 | """Make predictions for the provided batch of data 64 | 65 | Args: 66 | input_batch: np.ndarray of shape (n_samples, n_features) 67 | Returns: 68 | predictions: np.ndarray of shape (n_samples, n_classes) 69 | """ 70 | self.create_feed_dict(inputs_batch) 71 | 72 | pred = self.prediction() 73 | 74 | return pred 75 | 76 | def build(self): 77 | self.init_trainer() 78 | self.init_parameters() 79 | -------------------------------------------------------------------------------- /Assignment2/ZhichaoFu/assignment2/q1_classifier.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | reload(sys) 4 | sys.setdefaultencoding('utf-8') 5 | 6 | import time 7 | 8 | import numpy as np 9 | import tensorflow as tf 10 | import dynet as dy 11 | 12 | from q1_softmax import softmax 13 | from q1_softmax import cross_entropy_loss 14 | from model import Model 15 | from utils.general_utils import get_minibatches 16 | 17 | 18 | class Config(object): 19 | """Holds model hyperparams and data information. 20 | 21 | The config class is used to store various hyperparameters and dataset 22 | information parameters. Model objects are passed a Config() object at 23 | instantiation. 24 | """ 25 | n_samples = 1024 26 | n_features = 100 27 | n_classes = 5 28 | batch_size = 64 29 | n_epochs = 50 30 | lr = 1e-4 31 | 32 | 33 | class SoftmaxModel(Model): 34 | """ a Softmax classifier with cross-entropy loss.""" 35 | 36 | def init_trainer(self): 37 | """Sets up the trainer. 38 | 39 | """ 40 | ### YOUR CODE HERE 41 | self.sModel = dy.ParameterCollection() 42 | self.trainer = dy.SimpleSGDTrainer(self.sModel) 43 | self.trainer.learning_rate = self.config.lr 44 | ### END YOUR CODE 45 | 46 | def init_parameters(self): 47 | """Set up parameters 48 | 49 | """ 50 | ### YOUR CODE HERE 51 | self._pW = self.sModel.add_parameters((self.config.n_features, self.config.n_classes)) 52 | self._pb = self.sModel.add_parameters((self.config.n_classes)) 53 | # associate the parameters with cg Expressions 54 | 55 | ### END YOUR CODE 56 | 57 | def create_feed_dict(self, inputs_batch, labels_batch=None): 58 | """Creates the feed_dict for training the given step. 59 | 60 | If label_batch is None, then no labels are added to feed_dict. 61 | 62 | Hint: The keys for the feed_dict should be the placeholder 63 | tensors created in add_placeholders. 64 | 65 | Args: 66 | inputs_batch: A batch of input data. 67 | labels_batch: A batch of label data. 68 | Returns: 69 | feed_dict: The feed dictionary mapping from placeholders to values. 70 | """ 71 | ### YOUR CODE HERE 72 | self.input = inputs_batch 73 | self.labels = labels_batch 74 | ### END YOUR CODE 75 | 76 | def prediction(self): 77 | """Adds the core transformation for this model which transforms a batch of input 78 | data into a batch of predictions. In this case, the transformation is a linear layer plus a 79 | softmax transformation: 80 | 81 | y = softmax(xW + b) 82 | 83 | Args: 84 | input_data: A tensor of shape (batch_size, n_features). 85 | Returns: 86 | pred: A tensor of shape (batch_size, n_classes) 87 | """ 88 | W = dy.parameter(self._pW) 89 | b = dy.parameter(self._pb) 90 | x = dy.inputTensor(self.input) 91 | 92 | z_m = x * W 93 | z_T = dy.concatenate_cols([z_m[i]+b for i in range(self.config.batch_size)]) 94 | z = dy.transpose(z_T) 95 | # z = x * W + b 96 | 97 | pred = softmax(z) 98 | return pred 99 | 100 | def compute_loss(self, pred): 101 | """Adds cross_entropy_loss ops to the computational graph. 102 | 103 | Args: 104 | pred: A tensor of shape (batch_size, n_classes) 105 | Returns: 106 | loss: A 0-d tensor (scalar) 107 | """ 108 | y = dy.inputTensor(self.labels) 109 | loss = cross_entropy_loss(y, pred) 110 | return loss 111 | 112 | def run_epoch(self, inputs, labels): 113 | """Runs an epoch of training. 114 | 115 | Args: 116 | inputs: np.ndarray of shape (n_samples, n_features) 117 | labels: np.ndarray of shape (n_samples, n_classes) 118 | Returns: 119 | average_loss: scalar. Average minibatch loss of model on epoch. 120 | """ 121 | config = self.config 122 | n_minibatches, total_loss = 0, 0 123 | for input_batch, labels_batch in get_minibatches([inputs, labels], config.batch_size): 124 | n_minibatches += 1 125 | dy.renew_cg() 126 | '''Compute the loss of a batch''' 127 | # loss = [] 128 | # for i in xrange(config.batch_size): 129 | # input_t, labels_t = input_batch[i].reshape(1, config.n_features), labels_batch[i].reshape(1, config.n_classes) 130 | # loss_t = self.train_on_batch(input_t, labels_t) 131 | # loss.append(loss_t) 132 | # loss = dy.esum(loss) / config.batch_size 133 | loss = self.train_on_batch(input_batch, labels_batch) / config.batch_size 134 | 135 | loss.forward() 136 | loss.backward() 137 | self.trainer.update() 138 | 139 | total_loss += loss.value() 140 | return total_loss / n_minibatches 141 | 142 | def fit(self, inputs, labels): 143 | """Fit model on provided data. 144 | 145 | Args: 146 | inputs: np.ndarray of shape (n_samples, n_features) 147 | labels: np.ndarray of shape (n_samples, n_classes) 148 | Returns: 149 | losses: list of loss per epoch 150 | """ 151 | losses = [] 152 | for epoch in range(self.config.n_epochs): 153 | start_time = time.time() 154 | average_loss = self.run_epoch(inputs, labels) 155 | duration = time.time() - start_time 156 | print 'Epoch {:}: loss = {:.2f} ({:.3f} sec)'.format(epoch, average_loss, duration) 157 | losses.append(average_loss) 158 | return losses 159 | 160 | def __init__(self, config): 161 | """Initializes the model. 162 | 163 | Args: 164 | config: A model configuration object of type Config 165 | """ 166 | self.config = config 167 | self.build() 168 | 169 | 170 | def test_softmax_model(): 171 | """Train softmax model for a number of steps.""" 172 | config = Config() 173 | 174 | # Generate random data to train the model on 175 | np.random.seed(1234) 176 | inputs = np.random.rand(config.n_samples, config.n_features) 177 | labels = np.zeros((config.n_samples, config.n_classes), dtype=np.int32) 178 | labels[:, 1] = 1 179 | # for i in xrange(config.n_samples): 180 | # labels[i, i%config.n_classes] = 1 181 | 182 | model = SoftmaxModel(config) 183 | losses = model.fit(inputs, labels) 184 | 185 | # If Ops are implemented correctly, the average loss should fall close to zero 186 | # rapidly. 187 | assert losses[-1] < .5 188 | print "Basic (non-exhaustive) classifier tests pass" 189 | 190 | if __name__ == "__main__": 191 | test_softmax_model() 192 | -------------------------------------------------------------------------------- /Assignment2/ZhichaoFu/assignment2/q1_softmax.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | reload(sys) 4 | sys.setdefaultencoding('utf-8') 5 | 6 | import numpy as np 7 | #import tensorflow as tf 8 | import dynet as dy 9 | from utils.general_utils import test_all_close 10 | 11 | def softmax(x): 12 | """ 13 | Compute the softmax function in tensorflow. 14 | 15 | You might find the tensorflow functions tf.exp, tf.reduce_max, 16 | tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may 17 | not need to use all of these functions). Recall also that many common 18 | tensorflow operations are sugared (e.g. x * y does a tensor multiplication 19 | if x and y are both tensors). Make sure to implement the numerical stability 20 | fixes as in the previous homework! 21 | 22 | Args: 23 | x: tf.Tensor with shape (n_samples, n_features). Note feature vectors are 24 | represented by row-vectors. (For simplicity, no need to handle 1-d 25 | input as in the previous homework) 26 | Returns: 27 | out: tf.Tensor with shape (n_sample, n_features). You need to construct this 28 | tensor in this problem. 29 | """ 30 | 31 | ### YOUR CODE HERE 32 | x_max = dy.max_dim(x, 1) 33 | x_sub = dy.colwise_add(x, -x_max) 34 | x_exp = dy.exp(x_sub) 35 | sum_exp = dy.colwise_add(dy.zeroes(x.dim()[0]), dy.sum_cols(x_exp)) 36 | 37 | out = dy.cdiv(x_exp, sum_exp) 38 | ### END YOUR CODE 39 | 40 | return out 41 | 42 | 43 | def cross_entropy_loss(y, yhat): 44 | """ 45 | Compute the cross entropy loss in tensorflow. 46 | The loss should be summed over the current minibatch. 47 | 48 | y is a one-hot tensor of shape (n_samples, n_classes) and yhat is a tensor 49 | of shape (n_samples, n_classes). y should be of dtype tf.int32, and yhat should 50 | be of dtype tf.float32. 51 | 52 | The functions tf.to_float, tf.reduce_sum, and tf.log might prove useful. (Many 53 | solutions are possible, so you may not need to use all of these functions). 54 | 55 | Note: You are NOT allowed to use the tensorflow built-in cross-entropy 56 | functions. 57 | 58 | Args: 59 | y: tf.Tensor with shape (n_samples, n_classes). One-hot encoded. 60 | yhat: tf.Tensorwith shape (n_sample, n_classes). Each row encodes a 61 | probability distribution and should sum to 1. 62 | Returns: 63 | out: tf.Tensor with shape (1,) (Scalar output). You need to construct this 64 | tensor in the problem. 65 | """ 66 | 67 | ### YOUR CODE HERE 68 | l_yhat = dy.log(yhat) 69 | product = dy.cmult(y, l_yhat) 70 | out = (-dy.sum_elems(product)) 71 | ### END YOUR CODE 72 | 73 | return out 74 | 75 | 76 | def test_softmax_basic(): 77 | """ 78 | Some simple tests of softmax to get you started. 79 | Warning: these are not exhaustive. 80 | """ 81 | dy.renew_cg() 82 | test1 = softmax(dy.inputTensor(np.array([[1001, 1002], [3, 4]]))) 83 | test_all_close("Softmax test 1", test1.value(), np.array([[0.26894142, 0.73105858], 84 | [0.26894142, 0.73105858]])) 85 | dy.renew_cg() 86 | test2 = softmax(dy.inputTensor(np.array([[-1001, -1002]]))) 87 | test_all_close("Softmax test 2", test2.value(), np.array([[0.73105858, 0.26894142]])) 88 | 89 | print "Basic (non-exhaustive) softmax tests pass\n" 90 | 91 | 92 | def test_cross_entropy_loss_basic(): 93 | """ 94 | Some simple tests of cross_entropy_loss to get you started. 95 | Warning: these are not exhaustive. 96 | """ 97 | y = np.array([[0, 1], [1, 0], [1, 0]]) 98 | yhat = np.array([[.5, .5], [.5, .5], [.5, .5]]) 99 | 100 | test1 = cross_entropy_loss(dy.inputTensor(y), dy.inputTensor(yhat)) 101 | 102 | expected = -3 * np.log(.5) 103 | test_all_close("Cross-entropy test 1", np.array(test1.value()), expected) 104 | 105 | print "Basic (non-exhaustive) cross-entropy tests pass" 106 | 107 | if __name__ == "__main__": 108 | test_softmax_basic() 109 | test_cross_entropy_loss_basic() 110 | -------------------------------------------------------------------------------- /Assignment2/ZhichaoFu/assignment2/q2_initialization.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | reload(sys) 4 | sys.setdefaultencoding('utf-8') 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | import dynet as dy 9 | 10 | def xavier_weight_init(): 11 | """Returns function that creates random tensor. 12 | 13 | The specified function will take in a shape (tuple or 1-d array) and 14 | returns a random tensor of the specified shape drawn from the 15 | Xavier initialization distribution. 16 | 17 | Hint: You might find tf.random_uniform useful. 18 | """ 19 | def _xavier_initializer(shape, **kwargs): 20 | """Defines an initializer for the Xavier distribution. 21 | Specifically, the output should be sampled uniformly from [-epsilon, epsilon] where 22 | epsilon = sqrt(6) / 23 | e.g., if shape = (2, 3), epsilon = sqrt(6 / (2 + 3)) 24 | 25 | This function will be used as a variable initializer. 26 | 27 | Args: 28 | shape: Tuple or 1-d array that species the dimensions of the requested tensor. 29 | Returns: 30 | out: tf.Tensor of specified shape sampled from the Xavier distribution. 31 | """ 32 | ### YOUR CODE HERE 33 | epsilon = np.sqrt(6 / np.sum(shape)) 34 | out = dy.random_uniform(dim=shape, left=-epsilon, right=epsilon) 35 | ### END YOUR CODE 36 | return out.npvalue() 37 | # Returns defined initializer function. 38 | return _xavier_initializer 39 | 40 | 41 | def test_initialization_basic(): 42 | """Some simple tests for the initialization. 43 | """ 44 | print "Running basic tests..." 45 | xavier_initializer = xavier_weight_init() 46 | shape = (1,) 47 | xavier_mat = xavier_initializer(shape) 48 | assert xavier_mat.shape == shape 49 | 50 | shape = (1, 2, 3) 51 | xavier_mat = xavier_initializer(shape) 52 | assert xavier_mat.shape == shape 53 | print "Basic (non-exhaustive) Xavier initialization tests pass" 54 | 55 | 56 | if __name__ == "__main__": 57 | test_initialization_basic() 58 | -------------------------------------------------------------------------------- /Assignment2/ZhichaoFu/assignment2/q2_parser_transitions.py: -------------------------------------------------------------------------------- 1 | class PartialParse(object): 2 | def __init__(self, sentence): 3 | """Initializes this partial parse. 4 | 5 | Your code should initialize the following fields: 6 | self.stack: The current stack represented as a list with the top of the stack as the 7 | last element of the list. 8 | self.buffer: The current buffer represented as a list with the first item on the 9 | buffer as the first item of the list 10 | self.dependencies: The list of dependencies produced so far. Represented as a list of 11 | tuples where each tuple is of the form (head, dependent). 12 | Order for this list doesn't matter. 13 | 14 | The root token should be represented with the string "ROOT" 15 | 16 | Args: 17 | sentence: The sentence to be parsed as a list of words. 18 | Your code should not modify the sentence. 19 | """ 20 | # The sentence being parsed is kept for bookkeeping purposes. Do not use it in your code. 21 | self.sentence = sentence 22 | 23 | ### YOUR CODE HERE 24 | self.stack = ['ROOT'] 25 | self.buffer = sentence[:] 26 | self.dependencies = [] 27 | ### END YOUR CODE 28 | 29 | def parse_step(self, transition): 30 | """Performs a single parse step by applying the given transition to this partial parse 31 | 32 | Args: 33 | transition: A string that equals "S", "LA", or "RA" representing the shift, left-arc, 34 | and right-arc transitions. 35 | """ 36 | ### YOUR CODE HERE 37 | if transition == 'S': 38 | if self.buffer: 39 | self.stack.append(self.buffer[0]) 40 | self.buffer.pop(0) 41 | elif transition == 'LA': 42 | if len(self.stack) >= 2: 43 | self.dependencies.append((self.stack[-1], self.stack[-2])) 44 | self.stack.pop(-2) 45 | else: 46 | if len(self.stack) >= 2: 47 | self.dependencies.append((self.stack[-2], self.stack[-1])) 48 | self.stack.pop(-1) 49 | ### END YOUR CODE 50 | 51 | def parse(self, transitions): 52 | """Applies the provided transitions to this PartialParse 53 | 54 | Args: 55 | transitions: The list of transitions in the order they should be applied 56 | Returns: 57 | dependencies: The list of dependencies produced when parsing the sentence. Represented 58 | as a list of tuples where each tuple is of the form (head, dependent) 59 | """ 60 | for transition in transitions: 61 | self.parse_step(transition) 62 | return self.dependencies 63 | 64 | 65 | def minibatch_parse(sentences, model, batch_size): 66 | """Parses a list of sentences in minibatches using a model. 67 | 68 | Args: 69 | sentences: A list of sentences to be parsed (each sentence is a list of words) 70 | model: The model that makes parsing decisions. It is assumed to have a function 71 | model.predict(partial_parses) that takes in a list of PartialParses as input and 72 | returns a list of transitions predicted for each parse. That is, after calling 73 | transitions = model.predict(partial_parses) 74 | transitions[i] will be the next transition to apply to partial_parses[i]. 75 | batch_size: The number of PartialParses to include in each minibatch 76 | Returns: 77 | dependencies: A list where each element is the dependencies list for a parsed sentence. 78 | Ordering should be the same as in sentences (i.e., dependencies[i] should 79 | contain the parse for sentences[i]). 80 | """ 81 | 82 | ### YOUR CODE HERE 83 | dependencies = [] 84 | for sentence in sentences: 85 | pp = PartialParse(sentence) 86 | for i in xrange(2*len(sentence)): 87 | action = model.predict([pp]) 88 | pp.parse(action) 89 | dependencies.append(pp.dependencies) 90 | ### END YOUR CODE 91 | 92 | return dependencies 93 | 94 | 95 | def test_step(name, transition, stack, buf, deps, 96 | ex_stack, ex_buf, ex_deps): 97 | """Tests that a single parse step returns the expected output""" 98 | pp = PartialParse([]) 99 | pp.stack, pp.buffer, pp.dependencies = stack, buf, deps 100 | 101 | pp.parse_step(transition) 102 | stack, buf, deps = (tuple(pp.stack), tuple(pp.buffer), tuple(sorted(pp.dependencies))) 103 | assert stack == ex_stack, \ 104 | "{:} test resulted in stack {:}, expected {:}".format(name, stack, ex_stack) 105 | assert buf == ex_buf, \ 106 | "{:} test resulted in buffer {:}, expected {:}".format(name, buf, ex_buf) 107 | assert deps == ex_deps, \ 108 | "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps) 109 | print "{:} test passed!".format(name) 110 | 111 | 112 | def test_parse_step(): 113 | """Simple tests for the PartialParse.parse_step function 114 | Warning: these are not exhaustive 115 | """ 116 | test_step("SHIFT", "S", ["ROOT", "the"], ["cat", "sat"], [], 117 | ("ROOT", "the", "cat"), ("sat",), ()) 118 | test_step("LEFT-ARC", "LA", ["ROOT", "the", "cat"], ["sat"], [], 119 | ("ROOT", "cat",), ("sat",), (("cat", "the"),)) 120 | test_step("RIGHT-ARC", "RA", ["ROOT", "run", "fast"], [], [], 121 | ("ROOT", "run",), (), (("run", "fast"),)) 122 | 123 | 124 | def test_parse(): 125 | """Simple tests for the PartialParse.parse function 126 | Warning: these are not exhaustive 127 | """ 128 | sentence = ["parse", "this", "sentence"] 129 | dependencies = PartialParse(sentence).parse(["S", "S", "S", "LA", "RA", "RA"]) 130 | dependencies = tuple(sorted(dependencies)) 131 | expected = (('ROOT', 'parse'), ('parse', 'sentence'), ('sentence', 'this')) 132 | assert dependencies == expected, \ 133 | "parse test resulted in dependencies {:}, expected {:}".format(dependencies, expected) 134 | assert tuple(sentence) == ("parse", "this", "sentence"), \ 135 | "parse test failed: the input sentence should not be modified" 136 | print "parse test passed!" 137 | 138 | 139 | class DummyModel: 140 | """Dummy model for testing the minibatch_parse function 141 | First shifts everything onto the stack and then does exclusively right arcs if the first word of 142 | the sentence is "right", "left" if otherwise. 143 | """ 144 | def predict(self, partial_parses): 145 | return [("RA" if pp.stack[1] is "right" else "LA") if len(pp.buffer) == 0 else "S" 146 | for pp in partial_parses] 147 | 148 | 149 | def test_dependencies(name, deps, ex_deps): 150 | """Tests the provided dependencies match the expected dependencies""" 151 | deps = tuple(sorted(deps)) 152 | assert deps == ex_deps, \ 153 | "{:} test resulted in dependency list {:}, expected {:}".format(name, deps, ex_deps) 154 | 155 | 156 | def test_minibatch_parse(): 157 | """Simple tests for the minibatch_parse function 158 | Warning: these are not exhaustive 159 | """ 160 | sentences = [["right", "arcs", "only"], 161 | ["right", "arcs", "only", "again"], 162 | ["left", "arcs", "only"], 163 | ["left", "arcs", "only", "again"]] 164 | deps = minibatch_parse(sentences, DummyModel(), 2) 165 | test_dependencies("minibatch_parse", deps[0], 166 | (('ROOT', 'right'), ('arcs', 'only'), ('right', 'arcs'))) 167 | test_dependencies("minibatch_parse", deps[1], 168 | (('ROOT', 'right'), ('arcs', 'only'), ('only', 'again'), ('right', 'arcs'))) 169 | test_dependencies("minibatch_parse", deps[2], 170 | (('only', 'ROOT'), ('only', 'arcs'), ('only', 'left'))) 171 | test_dependencies("minibatch_parse", deps[3], 172 | (('again', 'ROOT'), ('again', 'arcs'), ('again', 'left'), ('again', 'only'))) 173 | print "minibatch_parse test passed!" 174 | 175 | if __name__ == '__main__': 176 | test_parse_step() 177 | test_parse() 178 | test_minibatch_parse() 179 | -------------------------------------------------------------------------------- /Assignment2/ZhichaoFu/assignment2/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/ZhichaoFu/assignment2/utils/__init__.py -------------------------------------------------------------------------------- /Assignment2/ZhichaoFu/assignment2/utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/ZhichaoFu/assignment2/utils/__init__.pyc -------------------------------------------------------------------------------- /Assignment2/ZhichaoFu/assignment2/utils/general_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import numpy as np 4 | 5 | 6 | def get_minibatches(data, minibatch_size, shuffle=True): 7 | """ 8 | Iterates through the provided data one minibatch at at time. You can use this function to 9 | iterate through data in minibatches as follows: 10 | 11 | for inputs_minibatch in get_minibatches(inputs, minibatch_size): 12 | ... 13 | 14 | Or with multiple data sources: 15 | 16 | for inputs_minibatch, labels_minibatch in get_minibatches([inputs, labels], minibatch_size): 17 | ... 18 | 19 | Args: 20 | data: there are two possible values: 21 | - a list or numpy array 22 | - a list where each element is either a list or numpy array 23 | minibatch_size: the maximum number of items in a minibatch 24 | shuffle: whether to randomize the order of returned data 25 | Returns: 26 | minibatches: the return value depends on data: 27 | - If data is a list/array it yields the next minibatch of data. 28 | - If data a list of lists/arrays it returns the next minibatch of each element in the 29 | list. This can be used to iterate through multiple data sources 30 | (e.g., features and labels) at the same time. 31 | 32 | """ 33 | list_data = type(data) is list and (type(data[0]) is list or type(data[0]) is np.ndarray) 34 | data_size = len(data[0]) if list_data else len(data) 35 | indices = np.arange(data_size) 36 | if shuffle: 37 | np.random.shuffle(indices) 38 | for minibatch_start in np.arange(0, data_size, minibatch_size): 39 | minibatch_indices = indices[minibatch_start:minibatch_start + minibatch_size] 40 | yield [minibatch(d, minibatch_indices) for d in data] if list_data \ 41 | else minibatch(data, minibatch_indices) 42 | 43 | 44 | def minibatch(data, minibatch_idx): 45 | return data[minibatch_idx] if type(data) is np.ndarray else [data[i] for i in minibatch_idx] 46 | 47 | 48 | def test_all_close(name, actual, expected): 49 | if actual.shape != expected.shape: 50 | raise ValueError("{:} failed, expected output to have shape {:} but has shape {:}" 51 | .format(name, expected.shape, actual.shape)) 52 | if np.amax(np.fabs(actual - expected)) > 1e-6: 53 | raise ValueError("{:} failed, expected {:} but value is {:}".format(name, expected, actual)) 54 | else: 55 | print name, "passed!" 56 | 57 | 58 | def logged_loop(iterable, n=None): 59 | if n is None: 60 | n = len(iterable) 61 | step = max(1, n / 1000) 62 | prog = Progbar(n) 63 | for i, elem in enumerate(iterable): 64 | if i % step == 0 or i == n - 1: 65 | prog.update(i + 1) 66 | yield elem 67 | 68 | 69 | class Progbar(object): 70 | """ 71 | Progbar class copied from keras (https://github.com/fchollet/keras/) 72 | Displays a progress bar. 73 | # Arguments 74 | target: Total number of steps expected. 75 | interval: Minimum visual progress update interval (in seconds). 76 | """ 77 | 78 | def __init__(self, target, width=30, verbose=1): 79 | self.width = width 80 | self.target = target 81 | self.sum_values = {} 82 | self.unique_values = [] 83 | self.start = time.time() 84 | self.total_width = 0 85 | self.seen_so_far = 0 86 | self.verbose = verbose 87 | 88 | def update(self, current, values=[], exact=[]): 89 | """ 90 | Updates the progress bar. 91 | # Arguments 92 | current: Index of current step. 93 | values: List of tuples (name, value_for_last_step). 94 | The progress bar will display averages for these values. 95 | exact: List of tuples (name, value_for_last_step). 96 | The progress bar will display these values directly. 97 | """ 98 | 99 | for k, v in values: 100 | if k not in self.sum_values: 101 | self.sum_values[k] = [v * (current - self.seen_so_far), current - self.seen_so_far] 102 | self.unique_values.append(k) 103 | else: 104 | self.sum_values[k][0] += v * (current - self.seen_so_far) 105 | self.sum_values[k][1] += (current - self.seen_so_far) 106 | for k, v in exact: 107 | if k not in self.sum_values: 108 | self.unique_values.append(k) 109 | self.sum_values[k] = [v, 1] 110 | self.seen_so_far = current 111 | 112 | now = time.time() 113 | if self.verbose == 1: 114 | prev_total_width = self.total_width 115 | sys.stdout.write("\b" * prev_total_width) 116 | sys.stdout.write("\r") 117 | 118 | numdigits = int(np.floor(np.log10(self.target))) + 1 119 | barstr = '%%%dd/%%%dd [' % (numdigits, numdigits) 120 | bar = barstr % (current, self.target) 121 | prog = float(current)/self.target 122 | prog_width = int(self.width*prog) 123 | if prog_width > 0: 124 | bar += ('='*(prog_width-1)) 125 | if current < self.target: 126 | bar += '>' 127 | else: 128 | bar += '=' 129 | bar += ('.'*(self.width-prog_width)) 130 | bar += ']' 131 | sys.stdout.write(bar) 132 | self.total_width = len(bar) 133 | 134 | if current: 135 | time_per_unit = (now - self.start) / current 136 | else: 137 | time_per_unit = 0 138 | eta = time_per_unit*(self.target - current) 139 | info = '' 140 | if current < self.target: 141 | info += ' - ETA: %ds' % eta 142 | else: 143 | info += ' - %ds' % (now - self.start) 144 | for k in self.unique_values: 145 | if type(self.sum_values[k]) is list: 146 | info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1])) 147 | else: 148 | info += ' - %s: %s' % (k, self.sum_values[k]) 149 | 150 | self.total_width += len(info) 151 | if prev_total_width > self.total_width: 152 | info += ((prev_total_width-self.total_width) * " ") 153 | 154 | sys.stdout.write(info) 155 | sys.stdout.flush() 156 | 157 | if current >= self.target: 158 | sys.stdout.write("\n") 159 | 160 | if self.verbose == 2: 161 | if current >= self.target: 162 | info = '%ds' % (now - self.start) 163 | for k in self.unique_values: 164 | info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1])) 165 | sys.stdout.write(info + "\n") 166 | 167 | def add(self, n, values=[]): 168 | self.update(self.seen_so_far+n, values) 169 | -------------------------------------------------------------------------------- /Assignment2/ZhichaoFu/assignment2/utils/general_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/ZhichaoFu/assignment2/utils/general_utils.pyc -------------------------------------------------------------------------------- /Assignment2/ZhichaoFu/assignment2/utils/parser_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Assignment2/ZhichaoFu/assignment2/utils/parser_utils.pyc -------------------------------------------------------------------------------- /Lecture11/1611.04558.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture11/1611.04558.pdf -------------------------------------------------------------------------------- /Lecture11/Lecture11.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture11/Lecture11.pdf -------------------------------------------------------------------------------- /Lecture11/Lecture11_highlight.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture11/Lecture11_highlight.pdf -------------------------------------------------------------------------------- /Lecture11/README.md: -------------------------------------------------------------------------------- 1 | # CS224n Lecture11 2 | 3 | + [Lecture11 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture11/Lecture11.pdf) 4 | + [Lecture11 highlight PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture11/Lecture11_highlight.pdf) 5 | + Highlight Paper: [Google’s Multilingual Neural Machine Translation System: Enabling Zero-Shot Translation](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture11/1611.04558.pdf) 6 | 7 | 8 | -------------------------------------------------------------------------------- /Lecture12/1611.05358.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture12/1611.05358.pdf -------------------------------------------------------------------------------- /Lecture12/Lecture12.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture12/Lecture12.pdf -------------------------------------------------------------------------------- /Lecture12/Lecture12_highlight.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture12/Lecture12_highlight.pdf -------------------------------------------------------------------------------- /Lecture12/README.md: -------------------------------------------------------------------------------- 1 | # CS224n Lecture12 2 | 3 | + [Lecture12 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture12/Lecture12.pdf) 4 | + [Lecture12 highlight PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture12/Lecture12_highlight.pdf) 5 | + Highlight Paper: [Lip Reading Sentences in the Wild](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture12/1611.05358.pdf) 6 | 7 | 8 | -------------------------------------------------------------------------------- /Lecture13/Lecture13.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture13/Lecture13.pdf -------------------------------------------------------------------------------- /Lecture13/README.md: -------------------------------------------------------------------------------- 1 | # CS224n Lecture13 2 | 3 | + [Lecture13 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture13/Lecture13.pdf) 4 | 5 | -------------------------------------------------------------------------------- /Lecture14/1508.06615.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture14/1508.06615.pdf -------------------------------------------------------------------------------- /Lecture14/D14-1181.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture14/D14-1181.pdf -------------------------------------------------------------------------------- /Lecture14/P14-1062.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture14/P14-1062.pdf -------------------------------------------------------------------------------- /Lecture14/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture14/README.md -------------------------------------------------------------------------------- /Lecture14/cs224n-2017-lecture13-CNNs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture14/cs224n-2017-lecture13-CNNs.pdf -------------------------------------------------------------------------------- /Lecture14/cs224n-2017-lecture13-highlight.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture14/cs224n-2017-lecture13-highlight.pdf -------------------------------------------------------------------------------- /Lecture2/2016 Arora.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture2/2016 Arora.pdf -------------------------------------------------------------------------------- /Lecture2/CBOW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture2/CBOW.png -------------------------------------------------------------------------------- /Lecture2/HS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture2/HS.png -------------------------------------------------------------------------------- /Lecture2/Lecture2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture2/Lecture2.pdf -------------------------------------------------------------------------------- /Lecture2/Lecture2_highlight.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture2/Lecture2_highlight.pdf -------------------------------------------------------------------------------- /Lecture2/Lecture2_supplement.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### Distributed Representations of Words and Phrases and their Compositionality\n", 8 | "
\n", 9 | "#### Efficient Estimation of Word Representations in Vector Space\n", 10 | "
\n", 11 | "### (一)CBOW(Continuous Bag-of-Words)\n", 12 | "
\n", 13 | "\n", 14 | "\n", 15 | "\n", 16 | "\\begin{eqnarray}\n", 17 | "\\boldsymbol v_{t+j}=V\\boldsymbol x_{t+j}\n", 18 | "\\tag{1}\\end{eqnarray}\n", 19 | "\n", 20 | "\\begin{eqnarray}\n", 21 | "\\hat{\\boldsymbol v}_t=\\frac{1}{2m}\\sum_j\\boldsymbol v_{t+j}\n", 22 | "\\tag{2}\\end{eqnarray}\n", 23 | "\n", 24 | "\\begin{eqnarray}\n", 25 | "\\boldsymbol z=U\\hat{\\boldsymbol v}_t\n", 26 | "\\tag{3}\\end{eqnarray}\n", 27 | "\n", 28 | "\\begin{eqnarray}\n", 29 | "\\hat y_{\\underline i}=P(w_{\\underline i}|w_{t-m},...,w_{t-1},w_{t+1},...,w_{t+m})=\\text{softmax}(z_{\\underline i})=\\text{softmax}(\\boldsymbol u_{\\underline i}^\\top \\hat{\\boldsymbol v}_t),\\quad w_{\\underline i}\\in \\mathbb V\n", 30 | "\\tag{4}\\end{eqnarray}\n", 31 | "\n", 32 | "
\n", 33 | "损失函数的推导:\n", 34 | "\\begin{aligned}\\mathcal L&=-\\log \\hat y_t\\\\&=-\\log P(w_t|w_{t-m},...,w_{t-1},w_{t+1},...,w_{t+m})\\\\&=-\\log \\text{softmax}(z_t)\\\\&=-\\log \\frac{\\exp (\\boldsymbol u_t^\\top \\hat{\\boldsymbol v}_t)}{\\sum_{k=1}^{|\\mathbb V|}\\exp (\\boldsymbol u_{\\underline k}^\\top \\hat{\\boldsymbol v}_t)}\\\\&=-\\boldsymbol u_t^\\top \\hat{\\boldsymbol v}_t+\\log \\sum_{k=1}^{|\\mathbb V|}\\exp (\\boldsymbol u_{\\underline k}^\\top \\hat{\\boldsymbol v}_t)\\\\&=-z_t+\\log \\sum_{k=1}^{|\\mathbb V|}\\exp z_{\\underline k} \\end{aligned}\n", 35 | "\n", 36 | "
\n", 37 | "
\n", 38 | "### (二)Hierarchical Softmax\n", 39 | "![](HS.png)\n", 40 | "\\begin{eqnarray}\n", 41 | "&\\sigma(\\boldsymbol u_{n(w,j)}^\\top \\hat{\\boldsymbol v}_t)\\\\\n", 42 | "&1-\\sigma(\\boldsymbol u_{n(w,j)}^\\top \\hat{\\boldsymbol v}_t)=\\sigma(-\\boldsymbol u_{n(w,j)}^\\top \\hat{\\boldsymbol v}_t)\n", 43 | "\\end{eqnarray}\n", 44 | "\n", 45 | "
\n", 46 | "
\n", 47 | "### (三)Negative Sampling" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": { 54 | "collapsed": true 55 | }, 56 | "outputs": [], 57 | "source": [] 58 | } 59 | ], 60 | "metadata": { 61 | "kernelspec": { 62 | "display_name": "Python 2", 63 | "language": "python", 64 | "name": "python2" 65 | }, 66 | "language_info": { 67 | "codemirror_mode": { 68 | "name": "ipython", 69 | "version": 2 70 | }, 71 | "file_extension": ".py", 72 | "mimetype": "text/x-python", 73 | "name": "python", 74 | "nbconvert_exporter": "python", 75 | "pygments_lexer": "ipython2", 76 | "version": "2.7.12" 77 | } 78 | }, 79 | "nbformat": 4, 80 | "nbformat_minor": 2 81 | } 82 | -------------------------------------------------------------------------------- /Lecture2/README.md: -------------------------------------------------------------------------------- 1 | ## CS224n Lecture2 2 | 3 | + [Lecture2 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture2/Lecture2.pdf) 4 | 5 | + [Lecture2 highlight PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture2/Lecture2_highlight.pdf) 6 | 7 | + Highlight Paper: [A SIMPLE BUT TOUGH-TO-BEAT BASELINE FOR SENTENCE EMBEDDINGS](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture2/2016%20Arora.pdf) 8 | 9 | + [word2vec supplement](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture2/word2vec.md) 10 | 11 | + Paper: [Skip-gram](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture2/arXiv%202013%20Mikolov.pdf) 12 | 13 | + Paper: [CBOW](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture2/arXiv%202013%20Mikolov-1.pdf) 14 | 15 | + Paper: [word2vec Explained](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture2/arXiv%202014%20Goldberg.pdf) 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /Lecture2/arXiv 2013 Mikolov-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture2/arXiv 2013 Mikolov-1.pdf -------------------------------------------------------------------------------- /Lecture2/arXiv 2013 Mikolov.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture2/arXiv 2013 Mikolov.pdf -------------------------------------------------------------------------------- /Lecture2/arXiv 2014 Goldberg.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture2/arXiv 2014 Goldberg.pdf -------------------------------------------------------------------------------- /Lecture2/word2vec.md: -------------------------------------------------------------------------------- 1 | > Distributed Representations of Words and Phrases and their Compositionality 2 | > 3 | > Efficient Estimation of Word Representations in Vector Space 4 | 5 | 6 | 7 | ## CBOW(Continuous Bag-of-Words) 8 | 9 | $$ 10 | \boldsymbol v_{t+j}=V\boldsymbol x_{t+j} \\ 11 | \hat{\boldsymbol v}_t=\frac{1}{2m}\sum_j\boldsymbol v_{t+j} \\ 12 | \boldsymbol z=U\hat{\boldsymbol v}_t \\ 13 | \hat y_{i}=P(w_{ i}|w_{t-m},\cdots,w_{t-1},w_{t+1},\cdots,w_{t+m})=\text{softmax}(z_{ i})=\text{softmax}(\boldsymbol u_{ i}^\top \hat{\boldsymbol v}_t) 14 | $$ 15 | 16 | 损失函数的推导: 17 | $$ 18 | \begin{align*} 19 | \mathcal L &=-\log \hat y_t\\ 20 | &=-\log P(w_t|w_{t-m},...,w_{t-1},w_{t+1},...,w_{t+m})\\ 21 | &=-\log \text{softmax}(z_t)\\ 22 | &=-\log \frac{\exp (\boldsymbol u_t^\top \hat{\boldsymbol v}_t)}{\sum_{k=1}^{|\mathbb V|}\exp (\boldsymbol u_{\underline k}^\top \hat{\boldsymbol v}_t)}\\ 23 | &=-\boldsymbol u_t^\top \hat{\boldsymbol v}_t+\log \sum_{k=1}^{|\mathbb V|}\exp (\boldsymbol u_{\underline k}^\top \hat{\boldsymbol v}_t)\\ 24 | &=-z_t+\log \sum_{k=1}^{|\mathbb V|}\exp z_{\underline k} 25 | \end{align*} 26 | $$ 27 | 28 | ## Hierarchical Softmax 29 | 30 | ![](HS.png) 31 | 32 | 33 | 34 | 35 | $$ 36 | \begin{eqnarray} 37 | 38 | &\sigma(\boldsymbol u_{n(w,j)}^\top \hat{\boldsymbol v}_t)\\ 39 | 40 | &1-\sigma(\boldsymbol u_{n(w,j)}^\top \hat{\boldsymbol v}_t)=\sigma(-\boldsymbol u_{n(w,j)}^\top \hat{\boldsymbol v}_t) 41 | 42 | \end{eqnarray} 43 | $$ 44 | 45 | ## Negative Sampling 46 | 47 | ==构造语料中不存在的上下文词对作为负样本,最大化正样本的同时最小化负样本的概率== 48 | 49 | 正样本的概率: 50 | $$ 51 | P(\mathbb D=1|w,c)=\sigma(\boldsymbol u_{w}^\top \boldsymbol v_{c}) 52 | $$ 53 | 全部正样本的似然: 54 | $$ 55 | \prod_{(w,c)\in\mathbb D}P(\mathbb D=1|w,c) 56 | $$ 57 | 全部负样本的似然: 58 | $$ 59 | \prod_{(w,c)\notin\mathbb D}P(\mathbb D=1|w,c) 60 | $$ 61 | 最大化下式: 62 | $$ 63 | \begin{align*} 64 | &\arg\max_{\theta}\prod_{(w,c)\in\mathbb D}P(\mathbb D=1|w,c)\prod_{(w,c)\notin\mathbb D}(1-P(\mathbb D=1|w,c)) \\ 65 | =&\arg\max_{\theta}\sum_{(w,c)\in\mathbb D}\log\sigma(\boldsymbol u_{w}^\top \boldsymbol v_{c}) + \sum_{(w,c)\notin\mathbb D}\log\sigma(-\boldsymbol u_{w}^\top \boldsymbol v_{c}) 66 | \end{align*}\\ 67 | 68 | \log\sigma(\boldsymbol u_{w}^\top \boldsymbol v_{c}) + \sum_{w_j\in W_{negative}}\log\sigma(-\boldsymbol u_{w_j}^\top \boldsymbol v_{c}) 69 | $$ 70 | 71 | 72 | ## Sub-Sampling 73 | 74 | $$ 75 | p(w_i)=1-\sqrt{\frac{sample}{freq(w_i)}} 76 | $$ 77 | 78 | $$ 79 | p(w_i)=1-\Bigg(\sqrt{\frac{sample}{freq(w_i)}} + \frac{sample}{freq(w_i)}\Bigg) 80 | $$ 81 | 82 | $sample$为超参数,建议设置为$1e^{-3}$到$1e^{-5}$。$freq(w_i)$ 表示词频。 83 | 84 | ​ 85 | ​ 86 | 87 | ​ 88 | ​ 89 | ​ 90 | 91 | 92 | ​ 93 | ​ 94 | ​ 95 | 96 | -------------------------------------------------------------------------------- /Lecture3/2014 Pennington.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture3/2014 Pennington.pdf -------------------------------------------------------------------------------- /Lecture3/Lecture3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture3/Lecture3.pdf -------------------------------------------------------------------------------- /Lecture3/Lecture3_highlight.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture3/Lecture3_highlight.pdf -------------------------------------------------------------------------------- /Lecture3/README.md: -------------------------------------------------------------------------------- 1 | # CS224n Lecture3 2 | 3 | + [Lecture3 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture3/Lecture3.pdf) 4 | + [Lecture3 highlight PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture3/Lecture3_highlight.pdf) 5 | + Highlight Paper: [GloVe: Global Vectors for Word Representation](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture3/2014%20Pennington.pdf) 6 | 7 | -------------------------------------------------------------------------------- /Lecture4/Lecture4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture4/Lecture4.pdf -------------------------------------------------------------------------------- /Lecture4/README.md: -------------------------------------------------------------------------------- 1 | # CS224n Lecture4 2 | 3 | + [Lecture4 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture4/Lecture4.pdf) 4 | 5 | -------------------------------------------------------------------------------- /Lecture5/A Primer on Neural Network Models.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture5/A Primer on Neural Network Models.pdf -------------------------------------------------------------------------------- /Lecture5/README.md: -------------------------------------------------------------------------------- 1 | Lecture5 [Slides & (Highplot Slides](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture5/lecture5.pdf) & [Paper](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture5/A%20Primer%20on%20Neural%20Network%20Models.pdf)) 2 | -------------------------------------------------------------------------------- /Lecture5/lecture5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture5/lecture5.pdf -------------------------------------------------------------------------------- /Lecture6/Lecture6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture6/Lecture6.pdf -------------------------------------------------------------------------------- /Lecture6/Lecture6_highlight.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture6/Lecture6_highlight.pdf -------------------------------------------------------------------------------- /Lecture6/README.md: -------------------------------------------------------------------------------- 1 | # CS224n Lecture6 2 | 3 | + [Lecture6 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture6/Lecture6.pdf) 4 | + [Lecture6 highlight PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture6/Lecture6_highlight.pdf) 5 | + Highlight Paper: [Improving-distributional-similarity](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture6/improving-distributional-similarity-tacl-2015.pdf) 6 | 7 | -------------------------------------------------------------------------------- /Lecture6/improving-distributional-similarity-tacl-2015.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture6/improving-distributional-similarity-tacl-2015.pdf -------------------------------------------------------------------------------- /Lecture8/Lecture8.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture8/Lecture8.pdf -------------------------------------------------------------------------------- /Lecture8/Lecture8_highlight.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture8/Lecture8_highlight.pdf -------------------------------------------------------------------------------- /Lecture8/README.md: -------------------------------------------------------------------------------- 1 | # CS224n Lecture8 2 | 3 | + [Lecture8 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture8/Lecture8.pdf) 4 | + [Lecture8 highlight PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture8/Lecture8_highlight.pdf) 5 | + Highlight Paper: [Structured Training for Neural Network Transition-Based Parsing](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture8/acl15.pdf) 6 | 7 | 8 | -------------------------------------------------------------------------------- /Lecture8/acl15.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture8/acl15.pdf -------------------------------------------------------------------------------- /Lecture9/1602.02410.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture9/1602.02410.pdf -------------------------------------------------------------------------------- /Lecture9/1703.02573.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture9/1703.02573.pdf -------------------------------------------------------------------------------- /Lecture9/Lecture9.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture9/Lecture9.pdf -------------------------------------------------------------------------------- /Lecture9/Lecture9_highlight.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture9/Lecture9_highlight.pdf -------------------------------------------------------------------------------- /Lecture9/README.md: -------------------------------------------------------------------------------- 1 | # CS224n Lecture9 2 | 3 | + [Lecture9 PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture9/Lecture9.pdf) 4 | 5 | + [Lecture9 highlight PPT](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture9/Lecture9_highlight.pdf) 6 | 7 | + Highlight Paper: 8 | 9 | + [SUBWORD LANGUAGE MODELING WITH NEURAL NETWORKS](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture9/char.pdf) 10 | + [DATA NOISING AS SMOOTHING IN NEURAL NETWORK LANGUAGE MODELS](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture9/1703.02573.pdf) 11 | + [Exploring the Limits of Language Modeling](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/blob/master/Lecture9/1602.02410.pdf) 12 | 13 | 14 | ​ 15 | 16 | -------------------------------------------------------------------------------- /Lecture9/char.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JT-Ushio/ECNU17_Summer_Seminar/8353413f0e5302b90694d864a9213e9b7c62668f/Lecture9/char.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ECNU17_Summer_Seminar 2 | ECNU NLP group learns CS224n in the form of seminars in the 2017 summer. 3 | 4 | 5 | 6 | ## Seminar Participants 7 | 8 | 纪焘、黄子寅、杜雨沛 9 | 10 | *钟鸣(7月4日~7月16日回家)* 11 | 12 | *姚岳坤(7月10日之后)* 13 | 14 | *郑淇(7月15日之后、因为实习所以尽量少讲)* 15 | 16 | *[韦阳](https://github.com/godweiyang)、付志超(远程参与)* 17 | 18 | *焦乙竹、王江舟(旁听)* 19 | 20 | 21 | 22 | ## Introduce CS224n 23 | 24 | #### Lecture video Collection 25 | 26 | + [YouTube](https://www.youtube.com/playlist?list=PL3FW7Lu3i5Jsnh1rnUwq_TcylNr7EkRe6) 27 | + [Bilibili](http://space.bilibili.com/23852932#!/channel/detail?cid=11177) 28 | 29 | 30 | 31 | #### Lecture Materials 32 | 33 | [CS224n: Natural Language Processing with Deep Learning](http://web.stanford.edu/class/cs224n/syllabus.html) 34 | 35 | 36 | 37 | #### Lecture List 38 | 39 | | Event | Date | Description | 描述 | Speaker | 40 | | :---------------: | :----: | :--------------------------------------- | :--------------- | :--------------------------------------: | 41 | | Lecture1 | 7.3 一 | Introduction to NLP and Deep Learning | 介绍自然语言和深度学习 | 王江舟 | 42 | | Lecture2 | 7.5 三 | Word Vector Representations:word2vec | Word2Vec词向量表示 | [纪焘](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture2) | 43 | | Lecture3 | 7.8 六 | Advanced Word Vector Representations | 高级词向量表示 | [杜雨沛](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture3) | 44 | | Lecture4 | 7.10 一 | Word Window Classification and Neural Networks | 词窗分类与神经网络 | [杜雨沛](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture4) | 45 | | Lecture5 | 7.12 三 | Backpropagation | 反向传播 | [黄子寅](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture5) | 46 | | Lecture6 | 7.15 六 | Dependency Parsing | 依存句法分析 | [姚岳坤](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture6) | 47 | | **Assignment #1** | 7.15 六 | | | [纪焘](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Assignment1) | 48 | | ~~Lecture7~~ | | ~~Introduction to TensorFlow~~ | ~~介绍TensorFlow~~ | | 49 | | Lecture8 | 7. 18二 | Recurrent Neural Networks and Language Models | RNN与语言模型 | [钟鸣](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture8) | 50 | | Lecture9 | 7.20四 | Machine translation and advanced recurrent LSTMs and GRUs | 机器翻译与高级RNN | [钟鸣](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture9) | 51 | | ~~Lecture10~~ | | ~~Midterm Review~~ | ~~中期回顾~~ | | 52 | | Lecture11 | 7.22 六 | Neural Machine Translation and Models with Attention | NMT与注意力模型 | [黄子寅](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture11) | 53 | | Lecture12 | 7.24 一 | Gated recurrent units and further topics in NMT | GRU与NMT进阶 | [黄子寅](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture12) | 54 | | Lecture13 | 7.26 三 | End-to-end models for Speech Processing | 端到端语音处理 | [姚岳坤](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture13) | 55 | | Lecture14 | 7.29 六 | Convolutional Neural Networks | CNN | [郑淇](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Lecture14) | 56 | | **Assignment #2** | 7.29 六 | | | [纪焘](https://github.com/JT-Ushio/ECNU17_Summer_Seminar/tree/master/Assignment2) | 57 | | Lecture15 | 7.31 一 | Tree Recursive Neural Networks and Constituency Parsing | 树RNN与短语句法分析 | 钟鸣 | 58 | | Lecture16 | 8.2 三 | Coreference Resolution | 共指消解 | 杜雨沛 | 59 | | Lecture17 | 8.6 日 | Dynamic Neural Networks for Question Answering | 动态神经网络QA | 钟鸣 | 60 | | Lecture18 | 8.7 一 | Issues in NLP and Possible Architectures for NLP | NLP中的问题与可能的解决框架 | 韦阳 | 61 | | Lecture19 | 8.9 三 | Tackling the Limits of Deep Learning for NLP | 聚焦深度学习在NLP上的局限性 | 郑淇 | 62 | | **Assignment #3** | 8.9 三 | | | 纪焘 | 63 | | **Assignment #4** | 9.10 六 | | | 纪焘 | 64 | 65 | 66 | 67 | --------------------------------------------------------------------------------