├── README.md ├── assignment1 ├── .gitignore ├── .ipynb_checkpoints │ ├── features-checkpoint.ipynb │ ├── knn-checkpoint.ipynb │ ├── softmax-checkpoint.ipynb │ ├── svm-checkpoint.ipynb │ └── two_layer_net-checkpoint.ipynb ├── README.md ├── collectSubmission.sh ├── cs231n │ ├── __init__.py │ ├── classifiers │ │ ├── __init__.py │ │ ├── k_nearest_neighbor.py │ │ ├── linear_classifier.py │ │ ├── linear_svm.py │ │ ├── neural_net.py │ │ └── softmax.py │ ├── data_utils.py │ ├── datasets │ │ ├── .gitignore │ │ └── get_datasets.sh │ ├── features.py │ ├── gradient_check.py │ └── vis_utils.py ├── features.ipynb ├── frameworkpython ├── knn.ipynb ├── requirements.txt ├── softmax.ipynb ├── start_ipython_osx.sh ├── svm.ipynb └── two_layer_net.ipynb ├── assignment2 ├── .gitignore ├── .ipynb_checkpoints │ ├── BatchNormalization-checkpoint.ipynb │ ├── ConvolutionalNetworks-checkpoint.ipynb │ ├── Dropout-checkpoint.ipynb │ └── FullyConnectedNets-checkpoint.ipynb ├── BatchNormalization.ipynb ├── ConvolutionalNetworks.ipynb ├── Dropout.ipynb ├── FullyConnectedNets.ipynb ├── README.md ├── collectSubmission.sh ├── cs231n │ ├── .gitignore │ ├── __init__.py │ ├── classifiers │ │ ├── __init__.py │ │ ├── cnn.py │ │ └── fc_net.py │ ├── data_utils.py │ ├── datasets │ │ ├── .gitignore │ │ └── get_datasets.sh │ ├── fast_layers.py │ ├── gradient_check.py │ ├── im2col.py │ ├── im2col_cython.pyx │ ├── layer_utils.py │ ├── layers.py │ ├── optim.py │ ├── setup.py │ ├── solver.py │ └── vis_utils.py ├── frameworkpython ├── kitten.jpg ├── puppy.jpg ├── requirements.txt └── start_ipython_osx.sh └── assignment3 ├── .gitignore ├── ImageGeneration.ipynb ├── ImageGradients.ipynb ├── LSTM_Captioning.ipynb ├── RNN_Captioning.ipynb ├── collectSubmission.sh ├── cs231n ├── .gitignore ├── __init__.py ├── captioning_solver.py ├── classifiers │ ├── __init__.py │ ├── pretrained_cnn.py │ └── rnn.py ├── coco_utils.py ├── data_utils.py ├── datasets │ ├── get_coco_captioning.sh │ ├── get_pretrained_model.sh │ └── get_tiny_imagenet_a.sh ├── fast_layers.py ├── gradient_check.py ├── im2col.py ├── im2col_cython.pyx ├── image_utils.py ├── layer_utils.py ├── layers.py ├── optim.py ├── rnn_layers.py └── setup.py ├── frameworkpython ├── kitten.jpg ├── requirements.txt ├── sky.jpg └── start_ipython_osx.sh /README.md: -------------------------------------------------------------------------------- 1 | ## CS231n Convolutional Neural Networks for Visual Recognition 2 | 3 | 斯坦福 cs231n 作业代码实践,代码实现主要参考了 [lightaime/cs231n](https://github.com/lightaime/cs231n) 4 | 5 | - 教程笔记 [cs231n.github.io](http://cs231n.github.io/) 6 | - 课程主页 [stanford cs231n](http://cs231n.stanford.edu/index.html) 7 | - 进度安排 [course syllabus](http://cs231n.stanford.edu/syllabus.html) 8 | 9 | 上面的网页有时候不稳定,可能要 FQ 才能访问。 10 | 11 | 下载课程作业源代码:  [Assignment #1](http://cs231n.stanford.edu/assignments/2016/winter1516_assignment1.zip) & [Assignment #2](http://cs231n.stanford.edu/assignments/2016/winter1516_assignment2.zip) & [Assignment #3](http://cs231n.stanford.edu/assignments/2016/winter1516_assignment3.zip) 12 | 13 | 网上有很多的[资料](http://blog.csdn.net/zhangxb35/article/details/55223825),包括中文翻译,课程视频等,但是个人觉得都不如写完作业代码的收获大。 14 | 15 | 我的 CSDN 博客笔记: [cs231n 课程作业](http://blog.csdn.net/zhangxb35/article/category/6727687) 16 | 17 | --- 18 | 19 | 注:我的这个是 2016 年做的作业,内容略旧,仅供参考。 20 | -------------------------------------------------------------------------------- /assignment1/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | .env/* 4 | -------------------------------------------------------------------------------- /assignment1/README.md: -------------------------------------------------------------------------------- 1 | Details about this assignment can be found [on the course webpage](http://cs231n.github.io/), under Assignment #1 of Winter 2016. 2 | -------------------------------------------------------------------------------- /assignment1/collectSubmission.sh: -------------------------------------------------------------------------------- 1 | rm -f assignment1.zip 2 | zip -r assignment1.zip . -x "*.git*" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" 3 | -------------------------------------------------------------------------------- /assignment1/cs231n/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment1/cs231n/__init__.py -------------------------------------------------------------------------------- /assignment1/cs231n/classifiers/__init__.py: -------------------------------------------------------------------------------- 1 | from cs231n.classifiers.k_nearest_neighbor import * 2 | from cs231n.classifiers.linear_classifier import * 3 | -------------------------------------------------------------------------------- /assignment1/cs231n/classifiers/k_nearest_neighbor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class KNearestNeighbor(object): 4 | """ a kNN classifier with L2 distance """ 5 | 6 | def __init__(self): 7 | pass 8 | 9 | def train(self, X, y): 10 | """ 11 | Train the classifier. For k-nearest neighbors this is just 12 | memorizing the training data. 13 | 14 | Inputs: 15 | - X: A numpy array of shape (num_train, D) containing the training data 16 | consisting of num_train samples each of dimension D. 17 | - y: A numpy array of shape (N,) containing the training labels, where 18 | y[i] is the label for X[i]. 19 | """ 20 | self.X_train = X 21 | self.y_train = y 22 | 23 | def predict(self, X, k=1, num_loops=0): 24 | """ 25 | Predict labels for test data using this classifier. 26 | 27 | Inputs: 28 | - X: A numpy array of shape (num_test, D) containing test data consisting 29 | of num_test samples each of dimension D. 30 | - k: The number of nearest neighbors that vote for the predicted labels. 31 | - num_loops: Determines which implementation to use to compute distances 32 | between training points and testing points. 33 | 34 | Returns: 35 | - y: A numpy array of shape (num_test,) containing predicted labels for the 36 | test data, where y[i] is the predicted label for the test point X[i]. 37 | """ 38 | if num_loops == 0: 39 | dists = self.compute_distances_no_loops(X) 40 | elif num_loops == 1: 41 | dists = self.compute_distances_one_loop(X) 42 | elif num_loops == 2: 43 | dists = self.compute_distances_two_loops(X) 44 | else: 45 | raise ValueError('Invalid value %d for num_loops' % num_loops) 46 | 47 | return self.predict_labels(dists, k=k) 48 | 49 | def compute_distances_two_loops(self, X): 50 | """ 51 | Compute the distance between each test point in X and each training point 52 | in self.X_train using a nested loop over both the training data and the 53 | test data. 54 | 55 | Inputs: 56 | - X: A numpy array of shape (num_test, D) containing test data. 57 | 58 | Returns: 59 | - dists: A numpy array of shape (num_test, num_train) where dists[i, j] 60 | is the Euclidean distance between the ith test point and the jth training 61 | point. 62 | """ 63 | num_test = X.shape[0] 64 | num_train = self.X_train.shape[0] 65 | dists = np.zeros((num_test, num_train)) 66 | for i in xrange(num_test): 67 | for j in xrange(num_train): 68 | ##################################################################### 69 | # TODO: # 70 | # Compute the l2 distance between the ith test point and the jth # 71 | # training point, and store the result in dists[i, j]. You should # 72 | # not use a loop over dimension. # 73 | ##################################################################### 74 | pass 75 | dists[i][j] = np.sqrt(np.sum((X[i] - self.X_train[j]) ** 2)) 76 | ##################################################################### 77 | # END OF YOUR CODE # 78 | ##################################################################### 79 | return dists 80 | 81 | def compute_distances_one_loop(self, X): 82 | """ 83 | Compute the distance between each test point in X and each training point 84 | in self.X_train using a single loop over the test data. 85 | 86 | Input / Output: Same as compute_distances_two_loops 87 | """ 88 | num_test = X.shape[0] 89 | num_train = self.X_train.shape[0] 90 | dists = np.zeros((num_test, num_train)) 91 | for i in xrange(num_test): 92 | ####################################################################### 93 | # TODO: # 94 | # Compute the l2 distance between the ith test point and all training # 95 | # points, and store the result in dists[i, :]. # 96 | ####################################################################### 97 | pass 98 | dists[i] = np.sqrt(np.sum((self.X_train - X[i]) ** 2, 1)) 99 | ####################################################################### 100 | # END OF YOUR CODE # 101 | ####################################################################### 102 | return dists 103 | 104 | def compute_distances_no_loops(self, X): 105 | """ 106 | Compute the distance between each test point in X and each training point 107 | in self.X_train using no explicit loops. 108 | 109 | Input / Output: Same as compute_distances_two_loops 110 | """ 111 | num_test = X.shape[0] 112 | num_train = self.X_train.shape[0] 113 | dists = np.zeros((num_test, num_train)) 114 | ######################################################################### 115 | # TODO: # 116 | # Compute the l2 distance between all test points and all training # 117 | # points without using any explicit loops, and store the result in # 118 | # dists. # 119 | # # 120 | # You should implement this function using only basic array operations; # 121 | # in particular you should not use functions from scipy. # 122 | # # 123 | # HINT: Try to formulate the l2 distance using matrix multiplication # 124 | # and two broadcast sums. # 125 | ######################################################################### 126 | pass 127 | dists += np.sum(self.X_train ** 2, axis=1).reshape(1, num_train) 128 | dists += np.sum(X ** 2, axis=1).reshape(num_test, 1) # reshape for broadcasting 129 | dists -= 2 * np.dot(X, self.X_train.T) 130 | dists = np.sqrt(dists) 131 | ######################################################################### 132 | # END OF YOUR CODE # 133 | ######################################################################### 134 | return dists 135 | 136 | def predict_labels(self, dists, k=1): 137 | """ 138 | Given a matrix of distances between test points and training points, 139 | predict a label for each test point. 140 | 141 | Inputs: 142 | - dists: A numpy array of shape (num_test, num_train) where dists[i, j] 143 | gives the distance betwen the ith test point and the jth training point. 144 | 145 | Returns: 146 | - y: A numpy array of shape (num_test,) containing predicted labels for the 147 | test data, where y[i] is the predicted label for the test point X[i]. 148 | """ 149 | num_test = dists.shape[0] 150 | y_pred = np.zeros(num_test) 151 | for i in xrange(num_test): 152 | # A list of length k storing the labels of the k nearest neighbors to 153 | # the ith test point. 154 | closest_y = [] 155 | ######################################################################### 156 | # TODO: # 157 | # Use the distance matrix to find the k nearest neighbors of the ith # 158 | # testing point, and use self.y_train to find the labels of these # 159 | # neighbors. Store these labels in closest_y. # 160 | # Hint: Look up the function numpy.argsort. # 161 | ######################################################################### 162 | pass 163 | closest_y = self.y_train[np.argsort(dists[i])[0:k]] 164 | ######################################################################### 165 | # TODO: # 166 | # Now that you have found the labels of the k nearest neighbors, you # 167 | # need to find the most common label in the list closest_y of labels. # 168 | # Store this label in y_pred[i]. Break ties by choosing the smaller # 169 | # label. # 170 | ######################################################################### 171 | pass 172 | # to find the most common element in list, you can use np.bincount 173 | y_pred[i] = np.bincount(closest_y).argmax() 174 | ######################################################################### 175 | # END OF YOUR CODE # 176 | ######################################################################### 177 | 178 | return y_pred 179 | 180 | -------------------------------------------------------------------------------- /assignment1/cs231n/classifiers/linear_classifier.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cs231n.classifiers.linear_svm import * 3 | from cs231n.classifiers.softmax import * 4 | 5 | class LinearClassifier(object): 6 | 7 | def __init__(self): 8 | self.W = None 9 | 10 | def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100, 11 | batch_size=200, verbose=False): 12 | """ 13 | Train this linear classifier using stochastic gradient descent. 14 | 15 | Inputs: 16 | - X: A numpy array of shape (N, D) containing training data; there are N 17 | training samples each of dimension D. 18 | - y: A numpy array of shape (N,) containing training labels; y[i] = c 19 | means that X[i] has label 0 <= c < C for C classes. 20 | - learning_rate: (float) learning rate for optimization. 21 | - reg: (float) regularization strength. 22 | - num_iters: (integer) number of steps to take when optimizing 23 | - batch_size: (integer) number of training examples to use at each step. 24 | - verbose: (boolean) If true, print progress during optimization. 25 | 26 | Outputs: 27 | A list containing the value of the loss function at each training iteration. 28 | """ 29 | num_train, dim = X.shape 30 | num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes 31 | if self.W is None: 32 | # lazily initialize W 33 | self.W = 0.001 * np.random.randn(dim, num_classes) 34 | 35 | # Run stochastic gradient descent to optimize W 36 | loss_history = [] 37 | for it in xrange(num_iters): 38 | X_batch = None 39 | y_batch = None 40 | 41 | ######################################################################### 42 | # TODO: # 43 | # Sample batch_size elements from the training data and their # 44 | # corresponding labels to use in this round of gradient descent. # 45 | # Store the data in X_batch and their corresponding labels in # 46 | # y_batch; after sampling X_batch should have shape (dim, batch_size) # 47 | # and y_batch should have shape (batch_size,) # 48 | # # 49 | # Hint: Use np.random.choice to generate indices. Sampling with # 50 | # replacement is faster than sampling without replacement. # 51 | ######################################################################### 52 | pass 53 | indices = np.random.choice(num_train, batch_size) 54 | X_batch = X[indices] 55 | y_batch = y[indices] 56 | ######################################################################### 57 | # END OF YOUR CODE # 58 | ######################################################################### 59 | 60 | # evaluate loss and gradient 61 | loss, grad = self.loss(X_batch, y_batch, reg) 62 | loss_history.append(loss) 63 | 64 | # perform parameter update 65 | ######################################################################### 66 | # TODO: # 67 | # Update the weights using the gradient and the learning rate. # 68 | ######################################################################### 69 | pass 70 | self.W -= learning_rate * grad 71 | ######################################################################### 72 | # END OF YOUR CODE # 73 | ######################################################################### 74 | 75 | if verbose and it % 100 == 0: 76 | print 'iteration %d / %d: loss %f' % (it, num_iters, loss) 77 | 78 | return loss_history 79 | 80 | def predict(self, X): 81 | """ 82 | Use the trained weights of this linear classifier to predict labels for 83 | data points. 84 | 85 | Inputs: 86 | - X: D x N array of training data. Each column is a D-dimensional point. 87 | 88 | Returns: 89 | - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional 90 | array of length N, and each element is an integer giving the predicted 91 | class. 92 | """ 93 | y_pred = np.zeros(X.shape[1]) 94 | ########################################################################### 95 | # TODO: # 96 | # Implement this method. Store the predicted labels in y_pred. # 97 | ########################################################################### 98 | pass 99 | y_pred = np.argmax(np.dot(X, self.W), axis = 1) 100 | ########################################################################### 101 | # END OF YOUR CODE # 102 | ########################################################################### 103 | return y_pred 104 | 105 | def loss(self, X_batch, y_batch, reg): 106 | """ 107 | Compute the loss function and its derivative. 108 | Subclasses will override this. 109 | 110 | Inputs: 111 | - X_batch: A numpy array of shape (N, D) containing a minibatch of N 112 | data points; each point has dimension D. 113 | - y_batch: A numpy array of shape (N,) containing labels for the minibatch. 114 | - reg: (float) regularization strength. 115 | 116 | Returns: A tuple containing: 117 | - loss as a single float 118 | - gradient with respect to self.W; an array of the same shape as W 119 | """ 120 | pass 121 | 122 | 123 | class LinearSVM(LinearClassifier): 124 | """ A subclass that uses the Multiclass SVM loss function """ 125 | 126 | def loss(self, X_batch, y_batch, reg): 127 | return svm_loss_vectorized(self.W, X_batch, y_batch, reg) 128 | 129 | 130 | class Softmax(LinearClassifier): 131 | """ A subclass that uses the Softmax + Cross-entropy loss function """ 132 | 133 | def loss(self, X_batch, y_batch, reg): 134 | return softmax_loss_vectorized(self.W, X_batch, y_batch, reg) 135 | 136 | -------------------------------------------------------------------------------- /assignment1/cs231n/classifiers/linear_svm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | #from random import shuffle 3 | 4 | def svm_loss_naive(W, X, y, reg): 5 | """ 6 | Structured SVM loss function, naive implementation (with loops). 7 | 8 | Inputs have dimension D, there are C classes, and we operate on minibatches 9 | of N examples. 10 | 11 | Inputs: 12 | - W: A numpy array of shape (D, C) containing weights. 13 | - X: A numpy array of shape (N, D) containing a minibatch of data. 14 | - y: A numpy array of shape (N,) containing training labels; y[i] = c means 15 | that X[i] has label c, where 0 <= c < C. 16 | - reg: (float) regularization strength 17 | 18 | Returns a tuple of: 19 | - loss as single float 20 | - gradient with respect to weights W; an array of same shape as W 21 | """ 22 | dW = np.zeros(W.shape) # initialize the gradient as zero 23 | 24 | # compute the loss and the gradient 25 | num_classes = W.shape[1] 26 | num_train = X.shape[0] 27 | loss = 0.0 28 | for i in xrange(num_train): 29 | scores = X[i].dot(W) 30 | correct_class_score = scores[y[i]] 31 | for j in xrange(num_classes): 32 | if j == y[i]: 33 | continue 34 | margin = scores[j] - correct_class_score + 1 # note delta = 1 35 | if margin > 0: 36 | loss += margin 37 | dW[:, j] += X[i] 38 | dW[:, y[i]] -= X[i] 39 | 40 | # Right now the loss is a sum over all training examples, but we want it 41 | # to be an average instead so we divide by num_train. 42 | loss /= num_train 43 | dW /= num_train 44 | 45 | # Add regularization to the loss. 46 | loss += 0.5 * reg * np.sum(W * W) 47 | dW += reg * W 48 | 49 | ############################################################################# 50 | # TODO: # 51 | # Compute the gradient of the loss function and store it dW. # 52 | # Rather that first computing the loss and then computing the derivative, # 53 | # it may be simpler to compute the derivative at the same time that the # 54 | # loss is being computed. As a result you may need to modify some of the # 55 | # code above to compute the gradient. # 56 | ############################################################################# 57 | 58 | 59 | return loss, dW 60 | 61 | 62 | def svm_loss_vectorized(W, X, y, reg): 63 | """ 64 | Structured SVM loss function, vectorized implementation. 65 | 66 | Inputs and outputs are the same as svm_loss_naive. 67 | """ 68 | loss = 0.0 69 | dW = np.zeros(W.shape) # initialize the gradient as zero 70 | 71 | ############################################################################# 72 | # TODO: # 73 | # Implement a vectorized version of the structured SVM loss, storing the # 74 | # result in loss. # 75 | ############################################################################# 76 | pass 77 | N = X.shape[0] 78 | #scores = np.dot(X, W) 79 | #margin = scores - scores[range(0, N), y].reshape(N, 1) + 1 80 | #margin[range(0, N), y] = 0 81 | #margin = margin * (margin > 0) # max(0, s_j - s_yi + delta) 82 | #loss += np.sum(margin) / N + 0.5 * reg * np.sum(W * W) 83 | scores = X.dot(W) # N x C 84 | margin = scores - scores[range(0,N), y].reshape(-1, 1) + 1 # N x C 85 | margin[range(N), y] = 0 86 | margin = (margin > 0) * margin 87 | loss += margin.sum() / N 88 | loss += 0.5 * reg * np.sum(W * W) 89 | ############################################################################# 90 | # END OF YOUR CODE # 91 | ############################################################################# 92 | 93 | 94 | ############################################################################# 95 | # TODO: # 96 | # Implement a vectorized version of the gradient for the structured SVM # 97 | # loss, storing the result in dW. # 98 | # # 99 | # Hint: Instead of computing the gradient from scratch, it may be easier # 100 | # to reuse some of the intermediate values that you used to compute the # 101 | # loss. # 102 | ############################################################################# 103 | pass 104 | counts = (margin > 0).astype(int) 105 | counts[range(N), y] = - np.sum(counts, axis = 1) 106 | dW += np.dot(X.T, counts) / N + reg * W 107 | ############################################################################# 108 | # END OF YOUR CODE # 109 | ############################################################################# 110 | 111 | return loss, dW 112 | -------------------------------------------------------------------------------- /assignment1/cs231n/classifiers/neural_net.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | #import matplotlib.pyplot as plt 3 | 4 | 5 | class TwoLayerNet(object): 6 | """ 7 | A two-layer fully-connected neural network. The net has an input dimension of 8 | N, a hidden layer dimension of H, and performs classification over C classes. 9 | We train the network with a softmax loss function and L2 regularization on the 10 | weight matrices. The network uses a ReLU nonlinearity after the first fully 11 | connected layer. 12 | 13 | In other words, the network has the following architecture: 14 | 15 | input - fully connected layer - ReLU - fully connected layer - softmax 16 | 17 | The outputs of the second fully-connected layer are the scores for each class. 18 | """ 19 | 20 | def __init__(self, input_size, hidden_size, output_size, std=1e-4): 21 | """ 22 | Initialize the model. Weights are initialized to small random values and 23 | biases are initialized to zero. Weights and biases are stored in the 24 | variable self.params, which is a dictionary with the following keys: 25 | 26 | W1: First layer weights; has shape (D, H) 27 | b1: First layer biases; has shape (H,) 28 | W2: Second layer weights; has shape (H, C) 29 | b2: Second layer biases; has shape (C,) 30 | 31 | Inputs: 32 | - input_size: The dimension D of the input data. 33 | - hidden_size: The number of neurons H in the hidden layer. 34 | - output_size: The number of classes C. 35 | """ 36 | self.params = {} 37 | self.params['W1'] = std * np.random.randn(input_size, hidden_size) 38 | self.params['b1'] = np.zeros(hidden_size) 39 | self.params['W2'] = std * np.random.randn(hidden_size, output_size) 40 | self.params['b2'] = np.zeros(output_size) 41 | 42 | def loss(self, X, y=None, reg=0.0): 43 | """ 44 | Compute the loss and gradients for a two layer fully connected neural 45 | network. 46 | 47 | Inputs: 48 | - X: Input data of shape (N, D). Each X[i] is a training sample. 49 | - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is 50 | an integer in the range 0 <= y[i] < C. This parameter is optional; if it 51 | is not passed then we only return scores, and if it is passed then we 52 | instead return the loss and gradients. 53 | - reg: Regularization strength. 54 | 55 | Returns: 56 | If y is None, return a matrix scores of shape (N, C) where scores[i, c] is 57 | the score for class c on input X[i]. 58 | 59 | If y is not None, instead return a tuple of: 60 | - loss: Loss (data loss and regularization loss) for this batch of training 61 | samples. 62 | - grads: Dictionary mapping parameter names to gradients of those parameters 63 | with respect to the loss function; has the same keys as self.params. 64 | """ 65 | # Unpack variables from the params dictionary 66 | W1, b1 = self.params['W1'], self.params['b1'] 67 | W2, b2 = self.params['W2'], self.params['b2'] 68 | N, D = X.shape 69 | 70 | # Compute the forward pass 71 | scores = None 72 | ############################################################################# 73 | # TODO: Perform the forward pass, computing the class scores for the input. # 74 | # Store the result in the scores variable, which should be an array of # 75 | # shape (N, C). # 76 | ############################################################################# 77 | pass 78 | hidden_layer = np.maximum(0, np.dot(X, W1) + b1) 79 | scores = np.dot(hidden_layer, W2) + b2 80 | ############################################################################# 81 | # END OF YOUR CODE # 82 | ############################################################################# 83 | 84 | # If the targets are not given then jump out, we're done 85 | if y is None: 86 | return scores 87 | 88 | # Compute the loss 89 | loss = None 90 | ############################################################################# 91 | # TODO: Finish the forward pass, and compute the loss. This should include # 92 | # both the data loss and L2 regularization for W1 and W2. Store the result # 93 | # in the variable loss, which should be a scalar. Use the Softmax # 94 | # classifier loss. So that your results match ours, multiply the # 95 | # regularization loss by 0.5 # 96 | ############################################################################# 97 | pass 98 | f = scores - np.max(scores, axis = 1, keepdims = True) 99 | loss = -f[range(N), y].sum() + np.log(np.exp(f).sum(axis = 1)).sum() 100 | loss = loss / N + 0.5 * reg * (np.sum(W1 * W1) + np.sum(W2 * W2)) 101 | ############################################################################# 102 | # END OF YOUR CODE # 103 | ############################################################################# 104 | 105 | # Backward pass: compute gradients 106 | grads = {} 107 | ############################################################################# 108 | # TODO: Compute the backward pass, computing the derivatives of the weights # 109 | # and biases. Store the results in the grads dictionary. For example, # 110 | # grads['W1'] should store the gradient on W1, and be a matrix of same size # 111 | ############################################################################# 112 | pass 113 | dscore = np.exp(f) / np.exp(f).sum(axis = 1, keepdims = True) 114 | dscore[range(N), y] -= 1 115 | dscore /= N 116 | grads['W2'] = np.dot(hidden_layer.T, dscore) + reg * W2 117 | grads['b2'] = np.sum(dscore, axis = 0) 118 | 119 | dhidden = np.dot(dscore, W2.T) 120 | dhidden[hidden_layer <= 0.00001] = 0 121 | 122 | grads['W1'] = np.dot(X.T, dhidden) + reg * W1 123 | grads['b1'] = np.sum(dhidden, axis = 0) 124 | ############################################################################# 125 | # END OF YOUR CODE # 126 | ############################################################################# 127 | 128 | return loss, grads 129 | 130 | def train(self, X, y, X_val, y_val, 131 | learning_rate=1e-3, learning_rate_decay=0.95, 132 | reg=1e-5, num_iters=100, 133 | batch_size=200, verbose=False): 134 | """ 135 | Train this neural network using stochastic gradient descent. 136 | 137 | Inputs: 138 | - X: A numpy array of shape (N, D) giving training data. 139 | - y: A numpy array f shape (N,) giving training labels; y[i] = c means that 140 | X[i] has label c, where 0 <= c < C. 141 | - X_val: A numpy array of shape (N_val, D) giving validation data. 142 | - y_val: A numpy array of shape (N_val,) giving validation labels. 143 | - learning_rate: Scalar giving learning rate for optimization. 144 | - learning_rate_decay: Scalar giving factor used to decay the learning rate 145 | after each epoch. 146 | - reg: Scalar giving regularization strength. 147 | - num_iters: Number of steps to take when optimizing. 148 | - batch_size: Number of training examples to use per step. 149 | - verbose: boolean; if true print progress during optimization. 150 | """ 151 | num_train = X.shape[0] 152 | iterations_per_epoch = max(num_train / batch_size, 1) 153 | 154 | # Use SGD to optimize the parameters in self.model 155 | loss_history = [] 156 | train_acc_history = [] 157 | val_acc_history = [] 158 | 159 | for it in xrange(num_iters): 160 | X_batch = None 161 | y_batch = None 162 | 163 | ######################################################################### 164 | # TODO: Create a random minibatch of training data and labels, storing # 165 | # them in X_batch and y_batch respectively. # 166 | ######################################################################### 167 | pass 168 | indices = np.random.choice(num_train, batch_size, replace=True) 169 | X_batch = X[indices] 170 | y_batch = y[indices] 171 | ######################################################################### 172 | # END OF YOUR CODE # 173 | ######################################################################### 174 | 175 | # Compute loss and gradients using the current minibatch 176 | loss, grads = self.loss(X_batch, y=y_batch, reg=reg) 177 | loss_history.append(loss) 178 | 179 | ######################################################################### 180 | # TODO: Use the gradients in the grads dictionary to update the # 181 | # parameters of the network (stored in the dictionary self.params) # 182 | # using stochastic gradient descent. You'll need to use the gradients # 183 | # stored in the grads dictionary defined above. # 184 | ######################################################################### 185 | pass 186 | self.params['W1'] -= learning_rate * grads['W1'] 187 | self.params['b1'] -= learning_rate * grads['b1'] 188 | self.params['W2'] -= learning_rate * grads['W2'] 189 | self.params['b2'] -= learning_rate * grads['b2'] 190 | ######################################################################### 191 | # END OF YOUR CODE # 192 | ######################################################################### 193 | 194 | if verbose and it % 100 == 0: 195 | print 'iteration %d / %d: loss %f' % (it, num_iters, loss) 196 | 197 | # Every epoch, check train and val accuracy and decay learning rate. 198 | if it % iterations_per_epoch == 0: 199 | # Check accuracy 200 | train_acc = (self.predict(X_batch) == y_batch).mean() 201 | val_acc = (self.predict(X_val) == y_val).mean() 202 | train_acc_history.append(train_acc) 203 | val_acc_history.append(val_acc) 204 | 205 | # Decay learning rate 206 | learning_rate *= learning_rate_decay 207 | 208 | return { 209 | 'loss_history': loss_history, 210 | 'train_acc_history': train_acc_history, 211 | 'val_acc_history': val_acc_history, 212 | } 213 | 214 | def predict(self, X): 215 | """ 216 | Use the trained weights of this two-layer network to predict labels for 217 | data points. For each data point we predict scores for each of the C 218 | classes, and assign each data point to the class with the highest score. 219 | 220 | Inputs: 221 | - X: A numpy array of shape (N, D) giving N D-dimensional data points to 222 | classify. 223 | 224 | Returns: 225 | - y_pred: A numpy array of shape (N,) giving predicted labels for each of 226 | the elements of X. For all i, y_pred[i] = c means that X[i] is predicted 227 | to have class c, where 0 <= c < C. 228 | """ 229 | y_pred = None 230 | 231 | ########################################################################### 232 | # TODO: Implement this function; it should be VERY simple! # 233 | ########################################################################### 234 | pass 235 | W1, b1 = self.params['W1'], self.params['b1'] 236 | W2, b2 = self.params['W2'], self.params['b2'] 237 | 238 | hidden_layer = np.maximum(0, np.dot(X, W1) + b1) 239 | scores = np.dot(hidden_layer, W2) + b2 240 | y_pred = np.argmax(scores, axis = 1) 241 | ########################################################################### 242 | # END OF YOUR CODE # 243 | ########################################################################### 244 | 245 | return y_pred 246 | 247 | 248 | -------------------------------------------------------------------------------- /assignment1/cs231n/classifiers/softmax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | #from random import shuffle 3 | 4 | def softmax_loss_naive(W, X, y, reg): 5 | """ 6 | Softmax loss function, naive implementation (with loops) 7 | 8 | Inputs have dimension D, there are C classes, and we operate on minibatches 9 | of N examples. 10 | 11 | Inputs: 12 | - W: A numpy array of shape (D, C) containing weights. 13 | - X: A numpy array of shape (N, D) containing a minibatch of data. 14 | - y: A numpy array of shape (N,) containing training labels; y[i] = c means 15 | that X[i] has label c, where 0 <= c < C. 16 | - reg: (float) regularization strength 17 | 18 | Returns a tuple of: 19 | - loss as single float 20 | - gradient with respect to weights W; an array of same shape as W 21 | """ 22 | # Initialize the loss and gradient to zero. 23 | loss = 0.0 24 | dW = np.zeros_like(W) 25 | 26 | ############################################################################# 27 | # TODO: Compute the softmax loss and its gradient using explicit loops. # 28 | # Store the loss in loss and the gradient in dW. If you are not careful # 29 | # here, it is easy to run into numeric instability. Don't forget the # 30 | # regularization! # 31 | ############################################################################# 32 | pass 33 | N, C = X.shape[0], W.shape[1] 34 | for i in range(N): 35 | f = np.dot(X[i], W) 36 | f -= np.max(f) # f.shape = C 37 | loss = loss + np.log(np.sum(np.exp(f))) - f[y[i]] 38 | dW[:, y[i]] -= X[i] 39 | s = np.exp(f).sum() 40 | for j in range(C): 41 | dW[:, j] += np.exp(f[j]) / s * X[i] 42 | loss = loss / N + 0.5 * reg * np.sum(W * W) 43 | dW = dW / N + reg * W 44 | ############################################################################# 45 | # END OF YOUR CODE # 46 | ############################################################################# 47 | 48 | return loss, dW 49 | 50 | 51 | def softmax_loss_vectorized(W, X, y, reg): 52 | """ 53 | Softmax loss function, vectorized version. 54 | 55 | Inputs and outputs are the same as softmax_loss_naive. 56 | """ 57 | # Initialize the loss and gradient to zero. 58 | loss = 0.0 59 | dW = np.zeros_like(W) 60 | 61 | ############################################################################# 62 | # TODO: Compute the softmax loss and its gradient using no explicit loops. # 63 | # Store the loss in loss and the gradient in dW. If you are not careful # 64 | # here, it is easy to run into numeric instability. Don't forget the # 65 | # regularization! # 66 | ############################################################################# 67 | pass 68 | N = X.shape[0] 69 | f = np.dot(X, W) # f.shape = N, C 70 | f -= f.max(axis = 1).reshape(N, 1) 71 | s = np.exp(f).sum(axis = 1) 72 | loss = np.log(s).sum() - f[range(N), y].sum() 73 | 74 | counts = np.exp(f) / s.reshape(N, 1) 75 | counts[range(N), y] -= 1 76 | dW = np.dot(X.T, counts) 77 | 78 | loss = loss / N + 0.5 * reg * np.sum(W * W) 79 | dW = dW / N + reg * W 80 | ############################################################################# 81 | # END OF YOUR CODE # 82 | ############################################################################# 83 | 84 | return loss, dW 85 | 86 | -------------------------------------------------------------------------------- /assignment1/cs231n/data_utils.py: -------------------------------------------------------------------------------- 1 | import cPickle as pickle 2 | import numpy as np 3 | import os 4 | from scipy.misc import imread 5 | 6 | def load_CIFAR_batch(filename): 7 | """ load single batch of cifar """ 8 | with open(filename, 'rb') as f: 9 | datadict = pickle.load(f) 10 | X = datadict['data'] 11 | Y = datadict['labels'] 12 | X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float") 13 | Y = np.array(Y) 14 | return X, Y 15 | 16 | def load_CIFAR10(ROOT): 17 | """ load all of cifar """ 18 | xs = [] 19 | ys = [] 20 | for b in range(1,6): 21 | f = os.path.join(ROOT, 'data_batch_%d' % (b, )) 22 | X, Y = load_CIFAR_batch(f) 23 | xs.append(X) 24 | ys.append(Y) 25 | Xtr = np.concatenate(xs) 26 | Ytr = np.concatenate(ys) 27 | del X, Y 28 | Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch')) 29 | return Xtr, Ytr, Xte, Yte 30 | 31 | def load_tiny_imagenet(path, dtype=np.float32): 32 | """ 33 | Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and 34 | TinyImageNet-200 have the same directory structure, so this can be used 35 | to load any of them. 36 | 37 | Inputs: 38 | - path: String giving path to the directory to load. 39 | - dtype: numpy datatype used to load the data. 40 | 41 | Returns: A tuple of 42 | - class_names: A list where class_names[i] is a list of strings giving the 43 | WordNet names for class i in the loaded dataset. 44 | - X_train: (N_tr, 3, 64, 64) array of training images 45 | - y_train: (N_tr,) array of training labels 46 | - X_val: (N_val, 3, 64, 64) array of validation images 47 | - y_val: (N_val,) array of validation labels 48 | - X_test: (N_test, 3, 64, 64) array of testing images. 49 | - y_test: (N_test,) array of test labels; if test labels are not available 50 | (such as in student code) then y_test will be None. 51 | """ 52 | # First load wnids 53 | with open(os.path.join(path, 'wnids.txt'), 'r') as f: 54 | wnids = [x.strip() for x in f] 55 | 56 | # Map wnids to integer labels 57 | wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)} 58 | 59 | # Use words.txt to get names for each class 60 | with open(os.path.join(path, 'words.txt'), 'r') as f: 61 | wnid_to_words = dict(line.split('\t') for line in f) 62 | for wnid, words in wnid_to_words.iteritems(): 63 | wnid_to_words[wnid] = [w.strip() for w in words.split(',')] 64 | class_names = [wnid_to_words[wnid] for wnid in wnids] 65 | 66 | # Next load training data. 67 | X_train = [] 68 | y_train = [] 69 | for i, wnid in enumerate(wnids): 70 | if (i + 1) % 20 == 0: 71 | print 'loading training data for synset %d / %d' % (i + 1, len(wnids)) 72 | # To figure out the filenames we need to open the boxes file 73 | boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid) 74 | with open(boxes_file, 'r') as f: 75 | filenames = [x.split('\t')[0] for x in f] 76 | num_images = len(filenames) 77 | 78 | X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype) 79 | y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64) 80 | for j, img_file in enumerate(filenames): 81 | img_file = os.path.join(path, 'train', wnid, 'images', img_file) 82 | img = imread(img_file) 83 | if img.ndim == 2: 84 | ## grayscale file 85 | img.shape = (64, 64, 1) 86 | X_train_block[j] = img.transpose(2, 0, 1) 87 | X_train.append(X_train_block) 88 | y_train.append(y_train_block) 89 | 90 | # We need to concatenate all training data 91 | X_train = np.concatenate(X_train, axis=0) 92 | y_train = np.concatenate(y_train, axis=0) 93 | 94 | # Next load validation data 95 | with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f: 96 | img_files = [] 97 | val_wnids = [] 98 | for line in f: 99 | img_file, wnid = line.split('\t')[:2] 100 | img_files.append(img_file) 101 | val_wnids.append(wnid) 102 | num_val = len(img_files) 103 | y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids]) 104 | X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype) 105 | for i, img_file in enumerate(img_files): 106 | img_file = os.path.join(path, 'val', 'images', img_file) 107 | img = imread(img_file) 108 | if img.ndim == 2: 109 | img.shape = (64, 64, 1) 110 | X_val[i] = img.transpose(2, 0, 1) 111 | 112 | # Next load test images 113 | # Students won't have test labels, so we need to iterate over files in the 114 | # images directory. 115 | img_files = os.listdir(os.path.join(path, 'test', 'images')) 116 | X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype) 117 | for i, img_file in enumerate(img_files): 118 | img_file = os.path.join(path, 'test', 'images', img_file) 119 | img = imread(img_file) 120 | if img.ndim == 2: 121 | img.shape = (64, 64, 1) 122 | X_test[i] = img.transpose(2, 0, 1) 123 | 124 | y_test = None 125 | y_test_file = os.path.join(path, 'test', 'test_annotations.txt') 126 | if os.path.isfile(y_test_file): 127 | with open(y_test_file, 'r') as f: 128 | img_file_to_wnid = {} 129 | for line in f: 130 | line = line.split('\t') 131 | img_file_to_wnid[line[0]] = line[1] 132 | y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files] 133 | y_test = np.array(y_test) 134 | 135 | return class_names, X_train, y_train, X_val, y_val, X_test, y_test 136 | 137 | 138 | def load_models(models_dir): 139 | """ 140 | Load saved models from disk. This will attempt to unpickle all files in a 141 | directory; any files that give errors on unpickling (such as README.txt) will 142 | be skipped. 143 | 144 | Inputs: 145 | - models_dir: String giving the path to a directory containing model files. 146 | Each model file is a pickled dictionary with a 'model' field. 147 | 148 | Returns: 149 | A dictionary mapping model file names to models. 150 | """ 151 | models = {} 152 | for model_file in os.listdir(models_dir): 153 | with open(os.path.join(models_dir, model_file), 'rb') as f: 154 | try: 155 | models[model_file] = pickle.load(f)['model'] 156 | except pickle.UnpicklingError: 157 | continue 158 | return models 159 | -------------------------------------------------------------------------------- /assignment1/cs231n/datasets/.gitignore: -------------------------------------------------------------------------------- 1 | cifar-10-batches-py/* 2 | tiny-imagenet-100-A* 3 | tiny-imagenet-100-B* 4 | tiny-100-A-pretrained/* 5 | -------------------------------------------------------------------------------- /assignment1/cs231n/datasets/get_datasets.sh: -------------------------------------------------------------------------------- 1 | # Get CIFAR10 2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz 3 | tar -xzvf cifar-10-python.tar.gz 4 | rm cifar-10-python.tar.gz 5 | -------------------------------------------------------------------------------- /assignment1/cs231n/features.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | import numpy as np 3 | from scipy.ndimage import uniform_filter 4 | 5 | 6 | def extract_features(imgs, feature_fns, verbose=False): 7 | """ 8 | Given pixel data for images and several feature functions that can operate on 9 | single images, apply all feature functions to all images, concatenating the 10 | feature vectors for each image and storing the features for all images in 11 | a single matrix. 12 | 13 | Inputs: 14 | - imgs: N x H X W X C array of pixel data for N images. 15 | - feature_fns: List of k feature functions. The ith feature function should 16 | take as input an H x W x D array and return a (one-dimensional) array of 17 | length F_i. 18 | - verbose: Boolean; if true, print progress. 19 | 20 | Returns: 21 | An array of shape (N, F_1 + ... + F_k) where each column is the concatenation 22 | of all features for a single image. 23 | """ 24 | num_images = imgs.shape[0] 25 | if num_images == 0: 26 | return np.array([]) 27 | 28 | # Use the first image to determine feature dimensions 29 | feature_dims = [] 30 | first_image_features = [] 31 | for feature_fn in feature_fns: 32 | feats = feature_fn(imgs[0].squeeze()) 33 | assert len(feats.shape) == 1, 'Feature functions must be one-dimensional' 34 | feature_dims.append(feats.size) 35 | first_image_features.append(feats) 36 | 37 | # Now that we know the dimensions of the features, we can allocate a single 38 | # big array to store all features as columns. 39 | total_feature_dim = sum(feature_dims) 40 | imgs_features = np.zeros((num_images, total_feature_dim)) 41 | imgs_features[0] = np.hstack(first_image_features).T 42 | 43 | # Extract features for the rest of the images. 44 | for i in xrange(1, num_images): 45 | idx = 0 46 | for feature_fn, feature_dim in zip(feature_fns, feature_dims): 47 | next_idx = idx + feature_dim 48 | imgs_features[i, idx:next_idx] = feature_fn(imgs[i].squeeze()) 49 | idx = next_idx 50 | if verbose and i % 1000 == 0: 51 | print 'Done extracting features for %d / %d images' % (i, num_images) 52 | 53 | return imgs_features 54 | 55 | 56 | def rgb2gray(rgb): 57 | """Convert RGB image to grayscale 58 | 59 | Parameters: 60 | rgb : RGB image 61 | 62 | Returns: 63 | gray : grayscale image 64 | 65 | """ 66 | return np.dot(rgb[...,:3], [0.299, 0.587, 0.144]) 67 | 68 | 69 | def hog_feature(im): 70 | """Compute Histogram of Gradient (HOG) feature for an image 71 | 72 | Modified from skimage.feature.hog 73 | http://pydoc.net/Python/scikits-image/0.4.2/skimage.feature.hog 74 | 75 | Reference: 76 | Histograms of Oriented Gradients for Human Detection 77 | Navneet Dalal and Bill Triggs, CVPR 2005 78 | 79 | Parameters: 80 | im : an input grayscale or rgb image 81 | 82 | Returns: 83 | feat: Histogram of Gradient (HOG) feature 84 | 85 | """ 86 | 87 | # convert rgb to grayscale if needed 88 | if im.ndim == 3: 89 | image = rgb2gray(im) 90 | else: 91 | image = np.at_least_2d(im) 92 | 93 | sx, sy = image.shape # image size 94 | orientations = 9 # number of gradient bins 95 | cx, cy = (8, 8) # pixels per cell 96 | 97 | gx = np.zeros(image.shape) 98 | gy = np.zeros(image.shape) 99 | gx[:, :-1] = np.diff(image, n=1, axis=1) # compute gradient on x-direction 100 | gy[:-1, :] = np.diff(image, n=1, axis=0) # compute gradient on y-direction 101 | grad_mag = np.sqrt(gx ** 2 + gy ** 2) # gradient magnitude 102 | grad_ori = np.arctan2(gy, (gx + 1e-15)) * (180 / np.pi) + 90 # gradient orientation 103 | 104 | n_cellsx = int(np.floor(sx / cx)) # number of cells in x 105 | n_cellsy = int(np.floor(sy / cy)) # number of cells in y 106 | # compute orientations integral images 107 | orientation_histogram = np.zeros((n_cellsx, n_cellsy, orientations)) 108 | for i in range(orientations): 109 | # create new integral image for this orientation 110 | # isolate orientations in this range 111 | temp_ori = np.where(grad_ori < 180 / orientations * (i + 1), 112 | grad_ori, 0) 113 | temp_ori = np.where(grad_ori >= 180 / orientations * i, 114 | temp_ori, 0) 115 | # select magnitudes for those orientations 116 | cond2 = temp_ori > 0 117 | temp_mag = np.where(cond2, grad_mag, 0) 118 | orientation_histogram[:,:,i] = uniform_filter(temp_mag, size=(cx, cy))[cx/2::cx, cy/2::cy].T 119 | 120 | return orientation_histogram.ravel() 121 | 122 | 123 | def color_histogram_hsv(im, nbin=10, xmin=0, xmax=255, normalized=True): 124 | """ 125 | Compute color histogram for an image using hue. 126 | 127 | Inputs: 128 | - im: H x W x C array of pixel data for an RGB image. 129 | - nbin: Number of histogram bins. (default: 10) 130 | - xmin: Minimum pixel value (default: 0) 131 | - xmax: Maximum pixel value (default: 255) 132 | - normalized: Whether to normalize the histogram (default: True) 133 | 134 | Returns: 135 | 1D vector of length nbin giving the color histogram over the hue of the 136 | input image. 137 | """ 138 | ndim = im.ndim 139 | bins = np.linspace(xmin, xmax, nbin+1) 140 | hsv = matplotlib.colors.rgb_to_hsv(im/xmax) * xmax 141 | imhist, bin_edges = np.histogram(hsv[:,:,0], bins=bins, density=normalized) 142 | imhist = imhist * np.diff(bin_edges) 143 | 144 | # return histogram 145 | return imhist 146 | 147 | 148 | pass 149 | -------------------------------------------------------------------------------- /assignment1/cs231n/gradient_check.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import randrange 3 | 4 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001): 5 | """ 6 | a naive implementation of numerical gradient of f at x 7 | - f should be a function that takes a single argument 8 | - x is the point (numpy array) to evaluate the gradient at 9 | """ 10 | 11 | fx = f(x) # evaluate function value at original point 12 | grad = np.zeros_like(x) 13 | # iterate over all indexes in x 14 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 15 | while not it.finished: 16 | 17 | # evaluate function at x+h 18 | ix = it.multi_index 19 | oldval = x[ix] 20 | x[ix] = oldval + h # increment by h 21 | fxph = f(x) # evalute f(x + h) 22 | x[ix] = oldval - h 23 | fxmh = f(x) # evaluate f(x - h) 24 | x[ix] = oldval # restore 25 | 26 | # compute the partial derivative with centered formula 27 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 28 | if verbose: 29 | print ix, grad[ix] 30 | it.iternext() # step to next dimension 31 | 32 | return grad 33 | 34 | 35 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 36 | """ 37 | Evaluate a numeric gradient for a function that accepts a numpy 38 | array and returns a numpy array. 39 | """ 40 | grad = np.zeros_like(x) 41 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 42 | while not it.finished: 43 | ix = it.multi_index 44 | 45 | oldval = x[ix] 46 | x[ix] = oldval + h 47 | pos = f(x).copy() 48 | x[ix] = oldval - h 49 | neg = f(x).copy() 50 | x[ix] = oldval 51 | 52 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 53 | it.iternext() 54 | return grad 55 | 56 | 57 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): 58 | """ 59 | Compute numeric gradients for a function that operates on input 60 | and output blobs. 61 | 62 | We assume that f accepts several input blobs as arguments, followed by a blob 63 | into which outputs will be written. For example, f might be called like this: 64 | 65 | f(x, w, out) 66 | 67 | where x and w are input Blobs, and the result of f will be written to out. 68 | 69 | Inputs: 70 | - f: function 71 | - inputs: tuple of input blobs 72 | - output: output blob 73 | - h: step size 74 | """ 75 | numeric_diffs = [] 76 | for input_blob in inputs: 77 | diff = np.zeros_like(input_blob.diffs) 78 | it = np.nditer(input_blob.vals, flags=['multi_index'], 79 | op_flags=['readwrite']) 80 | while not it.finished: 81 | idx = it.multi_index 82 | orig = input_blob.vals[idx] 83 | 84 | input_blob.vals[idx] = orig + h 85 | f(*(inputs + (output,))) 86 | pos = np.copy(output.vals) 87 | input_blob.vals[idx] = orig - h 88 | f(*(inputs + (output,))) 89 | neg = np.copy(output.vals) 90 | input_blob.vals[idx] = orig 91 | 92 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) 93 | 94 | it.iternext() 95 | numeric_diffs.append(diff) 96 | return numeric_diffs 97 | 98 | 99 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5): 100 | return eval_numerical_gradient_blobs(lambda *args: net.forward(), 101 | inputs, output, h=h) 102 | 103 | 104 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): 105 | """ 106 | sample a few random elements and only return numerical 107 | in this dimensions. 108 | """ 109 | 110 | for i in xrange(num_checks): 111 | ix = tuple([randrange(m) for m in x.shape]) 112 | 113 | oldval = x[ix] 114 | x[ix] = oldval + h # increment by h 115 | fxph = f(x) # evaluate f(x + h) 116 | x[ix] = oldval - h # increment by h 117 | fxmh = f(x) # evaluate f(x - h) 118 | x[ix] = oldval # reset 119 | 120 | grad_numerical = (fxph - fxmh) / (2 * h) 121 | grad_analytic = analytic_grad[ix] 122 | rel_error = abs(grad_numerical - grad_analytic) / (abs(grad_numerical) + abs(grad_analytic)) 123 | print 'numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error) 124 | 125 | -------------------------------------------------------------------------------- /assignment1/cs231n/vis_utils.py: -------------------------------------------------------------------------------- 1 | from math import sqrt, ceil 2 | import numpy as np 3 | 4 | def visualize_grid(Xs, ubound=255.0, padding=1): 5 | """ 6 | Reshape a 4D tensor of image data to a grid for easy visualization. 7 | 8 | Inputs: 9 | - Xs: Data of shape (N, H, W, C) 10 | - ubound: Output grid will have values scaled to the range [0, ubound] 11 | - padding: The number of blank pixels between elements of the grid 12 | """ 13 | (N, H, W, C) = Xs.shape 14 | grid_size = int(ceil(sqrt(N))) 15 | grid_height = H * grid_size + padding * (grid_size - 1) 16 | grid_width = W * grid_size + padding * (grid_size - 1) 17 | grid = np.zeros((grid_height, grid_width, C)) 18 | next_idx = 0 19 | y0, y1 = 0, H 20 | for y in xrange(grid_size): 21 | x0, x1 = 0, W 22 | for x in xrange(grid_size): 23 | if next_idx < N: 24 | img = Xs[next_idx] 25 | low, high = np.min(img), np.max(img) 26 | grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low) 27 | # grid[y0:y1, x0:x1] = Xs[next_idx] 28 | next_idx += 1 29 | x0 += W + padding 30 | x1 += W + padding 31 | y0 += H + padding 32 | y1 += H + padding 33 | # grid_max = np.max(grid) 34 | # grid_min = np.min(grid) 35 | # grid = ubound * (grid - grid_min) / (grid_max - grid_min) 36 | return grid 37 | 38 | def vis_grid(Xs): 39 | """ visualize a grid of images """ 40 | (N, H, W, C) = Xs.shape 41 | A = int(ceil(sqrt(N))) 42 | G = np.ones((A*H+A, A*W+A, C), Xs.dtype) 43 | G *= np.min(Xs) 44 | n = 0 45 | for y in range(A): 46 | for x in range(A): 47 | if n < N: 48 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:] 49 | n += 1 50 | # normalize to [0,1] 51 | maxg = G.max() 52 | ming = G.min() 53 | G = (G - ming)/(maxg-ming) 54 | return G 55 | 56 | def vis_nn(rows): 57 | """ visualize array of arrays of images """ 58 | N = len(rows) 59 | D = len(rows[0]) 60 | H,W,C = rows[0][0].shape 61 | Xs = rows[0][0] 62 | G = np.ones((N*H+N, D*W+D, C), Xs.dtype) 63 | for y in range(N): 64 | for x in range(D): 65 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x] 66 | # normalize to [0,1] 67 | maxg = G.max() 68 | ming = G.min() 69 | G = (G - ming)/(maxg-ming) 70 | return G 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /assignment1/frameworkpython: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # what real Python executable to use 4 | PYVER=2.7 5 | PATHTOPYTHON=/usr/local/bin/ 6 | PYTHON=${PATHTOPYTHON}python${PYVER} 7 | 8 | # find the root of the virtualenv, it should be the parent of the dir this script is in 9 | ENV=`$PYTHON -c "import os; print os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..'))"` 10 | 11 | # now run Python with the virtualenv set as Python's HOME 12 | export PYTHONHOME=$ENV 13 | exec $PYTHON "$@" 14 | -------------------------------------------------------------------------------- /assignment1/requirements.txt: -------------------------------------------------------------------------------- 1 | Jinja2==2.8 2 | MarkupSafe==0.23 3 | Pillow==3.0.0 4 | Pygments==2.0.2 5 | appnope==0.1.0 6 | backports-abc==0.4 7 | backports.ssl-match-hostname==3.5.0.1 8 | certifi==2015.11.20.1 9 | cycler==0.9.0 10 | decorator==4.0.6 11 | functools32==3.2.3-2 12 | gnureadline==6.3.3 13 | ipykernel==4.2.2 14 | ipython==4.0.1 15 | ipython-genutils==0.1.0 16 | ipywidgets==4.1.1 17 | jsonschema==2.5.1 18 | jupyter==1.0.0 19 | jupyter-client==4.1.1 20 | jupyter-console==4.0.3 21 | jupyter-core==4.0.6 22 | matplotlib==1.5.0 23 | mistune==0.7.1 24 | nbconvert==4.1.0 25 | nbformat==4.0.1 26 | notebook==4.0.6 27 | numpy==1.10.4 28 | path.py==8.1.2 29 | pexpect==4.0.1 30 | pickleshare==0.5 31 | ptyprocess==0.5 32 | pyparsing==2.0.7 33 | python-dateutil==2.4.2 34 | pytz==2015.7 35 | pyzmq==15.1.0 36 | qtconsole==4.1.1 37 | scipy==0.16.1 38 | simplegeneric==0.8.1 39 | singledispatch==3.4.0.3 40 | six==1.10.0 41 | terminado==0.5 42 | tornado==4.3 43 | traitlets==4.0.0 44 | wsgiref==0.1.2 45 | jupyter==1.0.0 46 | pillow==3.1.0 47 | -------------------------------------------------------------------------------- /assignment1/start_ipython_osx.sh: -------------------------------------------------------------------------------- 1 | # Assume the virtualenv is called .env 2 | 3 | cp frameworkpython .env/bin 4 | .env/bin/frameworkpython -m IPython notebook 5 | -------------------------------------------------------------------------------- /assignment2/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | .env/* 4 | -------------------------------------------------------------------------------- /assignment2/README.md: -------------------------------------------------------------------------------- 1 | In this assignment you will practice writing backpropagation code, and training 2 | Neural Networks and Convolutional Neural Networks. The goals of this assignment 3 | are as follows: 4 | 5 | - understand **Neural Networks** and how they are arranged in layered 6 | architectures 7 | - understand and be able to implement (vectorized) **backpropagation** 8 | - implement various **update rules** used to optimize Neural Networks 9 | - implement **batch normalization** for training deep networks 10 | - implement **dropout** to regularize networks 11 | - effectively **cross-validate** and find the best hyperparameters for Neural 12 | Network architecture 13 | - understand the architecture of **Convolutional Neural Networks** and train 14 | gain experience with training these models on data 15 | 16 | ## Setup 17 | You can work on the assignment in one of two ways: locally on your own machine, 18 | or on a virtual machine through Terminal.com. 19 | 20 | ### Working in the cloud on Terminal 21 | 22 | Terminal has created a separate subdomain to serve our class, 23 | [www.stanfordterminalcloud.com](https://www.stanfordterminalcloud.com). Register 24 | your account there. The Assignment 2 snapshot can then be found HERE. If you are 25 | registered in the class you can contact the TA (see Piazza for more information) 26 | to request Terminal credits for use on the assignment. Once you boot up the 27 | snapshot everything will be installed for you, and you will be ready to start on 28 | your assignment right away. We have written a small tutorial on Terminal 29 | [here](http://cs231n.github.io/terminal-tutorial/). 30 | 31 | ### Working locally 32 | Get the code as a zip file 33 | [here](http://vision.stanford.edu/teaching/cs231n/winter1516_assignment2.zip). 34 | As for the dependencies: 35 | 36 | **[Option 1] Use Anaconda:** 37 | The preferred approach for installing all the assignment dependencies is to use 38 | [Anaconda](https://www.continuum.io/downloads), which is a Python distribution 39 | that includes many of the most popular Python packages for science, math, 40 | engineering and data analysis. Once you install it you can skip all mentions of 41 | requirements and you are ready to go directly to working on the assignment. 42 | 43 | **[Option 2] Manual install, virtual environment:** 44 | If you do not want to use Anaconda and want to go with a more manual and risky 45 | installation route you will likely want to create a 46 | [virtual environment](http://docs.python-guide.org/en/latest/dev/virtualenvs/) 47 | for the project. If you choose not to use a virtual environment, it is up to you 48 | to make sure that all dependencies for the code are installed globally on your 49 | machine. To set up a virtual environment, run the following: 50 | 51 | ```bash 52 | cd assignment2 53 | sudo pip install virtualenv # This may already be installed 54 | virtualenv .env # Create a virtual environment 55 | source .env/bin/activate # Activate the virtual environment 56 | pip install -r requirements.txt # Install dependencies 57 | # Work on the assignment for a while ... 58 | deactivate # Exit the virtual environment 59 | ``` 60 | 61 | **Download data:** 62 | Once you have the starter code, you will need to download the CIFAR-10 dataset. 63 | Run the following from the `assignment2` directory: 64 | 65 | ```bash 66 | cd cs231n/datasets 67 | ./get_datasets.sh 68 | ``` 69 | 70 | **Compile the Cython extension:** Convolutional Neural Networks require a very 71 | efficient implementation. We have implemented of the functionality using 72 | [Cython](http://cython.org/); you will need to compile the Cython extension 73 | before you can run the code. From the `cs231n` directory, run the following 74 | command: 75 | 76 | ```bash 77 | python setup.py build_ext --inplace 78 | ``` 79 | 80 | **Start IPython:** 81 | After you have the CIFAR-10 data, you should start the IPython notebook server 82 | from the `assignment2` directory. If you are unfamiliar with IPython, you should 83 | read our [IPython tutorial](http://cs231n.github.io/ipython-tutorial/). 84 | 85 | **NOTE:** If you are working in a virtual environment on OSX, you may encounter 86 | errors with matplotlib due to the 87 | [issues described here](http://matplotlib.org/faq/virtualenv_faq.html). 88 | You can work around this issue by starting the IPython server using the 89 | `start_ipython_osx.sh` script from the `assignment2` directory; the script 90 | assumes that your virtual environment is named `.env`. 91 | 92 | 93 | ### Submitting your work: 94 | Whether you work on the assignment locally or using Terminal, once you are done 95 | working run the `collectSubmission.sh` script; this will produce a file called 96 | `assignment2.zip`. Upload this file to your dropbox on 97 | [the coursework](https://coursework.stanford.edu/portal/site/W15-CS-231N-01/) 98 | page for the course. 99 | 100 | 101 | ### Q1: Fully-connected Neural Network (30 points) 102 | The IPython notebook `FullyConnectedNets.ipynb` will introduce you to our 103 | modular layer design, and then use those layers to implement fully-connected 104 | networks of arbitrary depth. To optimize these models you will implement several 105 | popular update rules. 106 | 107 | ### Q2: Batch Normalization (30 points) 108 | In the IPython notebook `BatchNormalization.ipynb` you will implement batch 109 | normalization, and use it to train deep fully-connected networks. 110 | 111 | ### Q3: Dropout (10 points) 112 | The IPython notebook `Dropout.ipynb` will help you implement Dropout and explore 113 | its effects on model generalization. 114 | 115 | ### Q4: ConvNet on CIFAR-10 (30 points) 116 | In the IPython Notebook `ConvolutionalNetworks.ipynb` you will implement several 117 | new layers that are commonly used in convolutional networks. You will train a 118 | (shallow) convolutional network on CIFAR-10, and it will then be up to you to 119 | train the best network that you can. 120 | 121 | ### Q5: Do something extra! (up to +10 points) 122 | In the process of training your network, you should feel free to implement 123 | anything that you want to get better performance. You can modify the solver, 124 | implement additional layers, use different types of regularization, use an 125 | ensemble of models, or anything else that comes to mind. If you implement these 126 | or other ideas not covered in the assignment then you will be awarded some bonus 127 | points. 128 | 129 | -------------------------------------------------------------------------------- /assignment2/collectSubmission.sh: -------------------------------------------------------------------------------- 1 | rm -f assignment2.zip 2 | zip -r assignment2.zip . -x "*.git*" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" ".env/*" "*.pyc" "*cs231n/build/*" 3 | -------------------------------------------------------------------------------- /assignment2/cs231n/.gitignore: -------------------------------------------------------------------------------- 1 | build/* 2 | im2col_cython.c 3 | im2col_cython.so 4 | -------------------------------------------------------------------------------- /assignment2/cs231n/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment2/cs231n/__init__.py -------------------------------------------------------------------------------- /assignment2/cs231n/classifiers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment2/cs231n/classifiers/__init__.py -------------------------------------------------------------------------------- /assignment2/cs231n/classifiers/cnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from cs231n.layers import * 4 | from cs231n.fast_layers import * 5 | from cs231n.layer_utils import * 6 | 7 | 8 | class ThreeLayerConvNet(object): 9 | """ 10 | A three-layer convolutional network with the following architecture: 11 | 12 | conv - relu - 2x2 max pool - affine - relu - affine - softmax 13 | 14 | The network operates on minibatches of data that have shape (N, C, H, W) 15 | consisting of N images, each with height H and width W and with C input 16 | channels. 17 | """ 18 | 19 | def __init__(self, input_dim=(3, 32, 32), num_filters=32, filter_size=7, 20 | hidden_dim=100, num_classes=10, weight_scale=1e-3, reg=0.0, 21 | dtype=np.float32): 22 | """ 23 | Initialize a new network. 24 | 25 | Inputs: 26 | - input_dim: Tuple (C, H, W) giving size of input data 27 | - num_filters: Number of filters to use in the convolutional layer 28 | - filter_size: Size of filters to use in the convolutional layer 29 | - hidden_dim: Number of units to use in the fully-connected hidden layer 30 | - num_classes: Number of scores to produce from the final affine layer. 31 | - weight_scale: Scalar giving standard deviation for random initialization 32 | of weights. 33 | - reg: Scalar giving L2 regularization strength 34 | - dtype: numpy datatype to use for computation. 35 | """ 36 | self.params = {} 37 | self.reg = reg 38 | self.dtype = dtype 39 | 40 | ############################################################################ 41 | # TODO: Initialize weights and biases for the three-layer convolutional # 42 | # network. Weights should be initialized from a Gaussian with standard # 43 | # deviation equal to weight_scale; biases should be initialized to zero. # 44 | # All weights and biases should be stored in the dictionary self.params. # 45 | # Store weights and biases for the convolutional layer using the keys 'W1' # 46 | # and 'b1'; use keys 'W2' and 'b2' for the weights and biases of the # 47 | # hidden affine layer, and keys 'W3' and 'b3' for the weights and biases # 48 | # of the output affine layer. # 49 | ############################################################################ 50 | # pass 51 | C, H, W = input_dim 52 | F, HH, WW = num_filters, filter_size, filter_size 53 | self.params['W1'] = weight_scale * np.random.randn(F, C, HH, WW) 54 | self.params['W2'] = weight_scale * np.random.randn(F*H/2*W/2, hidden_dim) 55 | self.params['W3'] = weight_scale * np.random.randn(hidden_dim, num_classes) 56 | self.params['b1'] = np.zeros(F) 57 | self.params['b2'] = np.zeros(hidden_dim) 58 | self.params['b3'] = np.zeros(num_classes) 59 | ############################################################################ 60 | # END OF YOUR CODE # 61 | ############################################################################ 62 | 63 | for k, v in self.params.iteritems(): 64 | self.params[k] = v.astype(dtype) 65 | 66 | 67 | def loss(self, X, y=None): 68 | """ 69 | Evaluate loss and gradient for the three-layer convolutional network. 70 | 71 | Input / output: Same API as TwoLayerNet in fc_net.py. 72 | """ 73 | W1, b1 = self.params['W1'], self.params['b1'] 74 | W2, b2 = self.params['W2'], self.params['b2'] 75 | W3, b3 = self.params['W3'], self.params['b3'] 76 | 77 | # pass conv_param to the forward pass for the convolutional layer 78 | filter_size = W1.shape[2] 79 | conv_param = {'stride': 1, 'pad': (filter_size - 1) / 2} 80 | 81 | # pass pool_param to the forward pass for the max-pooling layer 82 | pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2} 83 | 84 | scores = None 85 | ############################################################################ 86 | # TODO: Implement the forward pass for the three-layer convolutional net, # 87 | # computing the class scores for X and storing them in the scores # 88 | # variable. # 89 | ############################################################################ 90 | pass 91 | #conv_out, conv_cache = conv_forward_naive(X, W1, b1, conv_param) 92 | #relu_out, relu_cache = relu_forward(conv_out) 93 | #pool_out, pool_cache = max_pool_forward_naive(relu_out, pool_param) 94 | #pool_out = pool_out.reshape(pool_out.shape[0], -1) 95 | pool_out, pool_cache = conv_relu_pool_forward(X, W1, b1, conv_param, pool_param) 96 | affine_out, affine_cache = affine_relu_forward(pool_out, W2, b2) 97 | scores, cache = affine_forward(affine_out, W3, b3) 98 | ############################################################################ 99 | # END OF YOUR CODE # 100 | ############################################################################ 101 | 102 | if y is None: 103 | return scores 104 | 105 | loss, grads = 0, {} 106 | ############################################################################ 107 | # TODO: Implement the backward pass for the three-layer convolutional net, # 108 | # storing the loss and gradients in the loss and grads variables. Compute # 109 | # data loss using softmax, and make sure that grads[k] holds the gradients # 110 | # for self.params[k]. Don't forget to add L2 regularization! # 111 | ############################################################################ 112 | pass 113 | loss, dscore = softmax_loss(scores, y) 114 | daffine, grads['W3'], grads['b3'] = affine_backward(dscore, cache) 115 | dpool, grads['W2'], grads['b2'] = affine_relu_backward(daffine, affine_cache) 116 | #dpool = dpool.reshape(X.shape[0], W1.shape[0], X.shape[2]/2, X.shape[3]/2) 117 | #drelu = max_pool_backward_naive(dpool, pool_cache) 118 | #dconv = relu_backward(drelu, relu_cache) 119 | #dx, grads['W1'], grads['b1'] = conv_backward_naive(dconv, conv_cache) 120 | dx, grads['W1'], grads['b1'] = conv_relu_pool_backward(dpool, pool_cache) 121 | 122 | loss += 0.5 * self.reg * (np.sum(W1 ** 2) + np.sum(W2 ** 2) + np.sum(W3 ** 2)) 123 | grads['W1'] += self.reg * W1 124 | grads['W2'] += self.reg * W2 125 | grads['W3'] += self.reg * W3 126 | ############################################################################ 127 | # END OF YOUR CODE # 128 | ############################################################################ 129 | 130 | return loss, grads 131 | 132 | 133 | pass 134 | -------------------------------------------------------------------------------- /assignment2/cs231n/data_utils.py: -------------------------------------------------------------------------------- 1 | import cPickle as pickle 2 | import numpy as np 3 | import os 4 | from scipy.misc import imread 5 | 6 | def load_CIFAR_batch(filename): 7 | """ load single batch of cifar """ 8 | with open(filename, 'rb') as f: 9 | datadict = pickle.load(f) 10 | X = datadict['data'] 11 | Y = datadict['labels'] 12 | X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float") 13 | Y = np.array(Y) 14 | return X, Y 15 | 16 | def load_CIFAR10(ROOT): 17 | """ load all of cifar """ 18 | xs = [] 19 | ys = [] 20 | for b in range(1,6): 21 | f = os.path.join(ROOT, 'data_batch_%d' % (b, )) 22 | X, Y = load_CIFAR_batch(f) 23 | xs.append(X) 24 | ys.append(Y) 25 | Xtr = np.concatenate(xs) 26 | Ytr = np.concatenate(ys) 27 | del X, Y 28 | Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch')) 29 | return Xtr, Ytr, Xte, Yte 30 | 31 | 32 | def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): 33 | """ 34 | Load the CIFAR-10 dataset from disk and perform preprocessing to prepare 35 | it for classifiers. These are the same steps as we used for the SVM, but 36 | condensed to a single function. 37 | """ 38 | # Load the raw CIFAR-10 data 39 | cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' 40 | X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) 41 | 42 | # Subsample the data 43 | mask = range(num_training, num_training + num_validation) 44 | X_val = X_train[mask] 45 | y_val = y_train[mask] 46 | mask = range(num_training) 47 | X_train = X_train[mask] 48 | y_train = y_train[mask] 49 | mask = range(num_test) 50 | X_test = X_test[mask] 51 | y_test = y_test[mask] 52 | 53 | # Normalize the data: subtract the mean image 54 | mean_image = np.mean(X_train, axis=0) 55 | X_train -= mean_image 56 | X_val -= mean_image 57 | X_test -= mean_image 58 | 59 | # Transpose so that channels come first 60 | X_train = X_train.transpose(0, 3, 1, 2).copy() 61 | X_val = X_val.transpose(0, 3, 1, 2).copy() 62 | X_test = X_test.transpose(0, 3, 1, 2).copy() 63 | 64 | # Package data into a dictionary 65 | return { 66 | 'X_train': X_train, 'y_train': y_train, 67 | 'X_val': X_val, 'y_val': y_val, 68 | 'X_test': X_test, 'y_test': y_test, 69 | } 70 | 71 | 72 | def load_tiny_imagenet(path, dtype=np.float32): 73 | """ 74 | Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and 75 | TinyImageNet-200 have the same directory structure, so this can be used 76 | to load any of them. 77 | 78 | Inputs: 79 | - path: String giving path to the directory to load. 80 | - dtype: numpy datatype used to load the data. 81 | 82 | Returns: A tuple of 83 | - class_names: A list where class_names[i] is a list of strings giving the 84 | WordNet names for class i in the loaded dataset. 85 | - X_train: (N_tr, 3, 64, 64) array of training images 86 | - y_train: (N_tr,) array of training labels 87 | - X_val: (N_val, 3, 64, 64) array of validation images 88 | - y_val: (N_val,) array of validation labels 89 | - X_test: (N_test, 3, 64, 64) array of testing images. 90 | - y_test: (N_test,) array of test labels; if test labels are not available 91 | (such as in student code) then y_test will be None. 92 | """ 93 | # First load wnids 94 | with open(os.path.join(path, 'wnids.txt'), 'r') as f: 95 | wnids = [x.strip() for x in f] 96 | 97 | # Map wnids to integer labels 98 | wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)} 99 | 100 | # Use words.txt to get names for each class 101 | with open(os.path.join(path, 'words.txt'), 'r') as f: 102 | wnid_to_words = dict(line.split('\t') for line in f) 103 | for wnid, words in wnid_to_words.iteritems(): 104 | wnid_to_words[wnid] = [w.strip() for w in words.split(',')] 105 | class_names = [wnid_to_words[wnid] for wnid in wnids] 106 | 107 | # Next load training data. 108 | X_train = [] 109 | y_train = [] 110 | for i, wnid in enumerate(wnids): 111 | if (i + 1) % 20 == 0: 112 | print 'loading training data for synset %d / %d' % (i + 1, len(wnids)) 113 | # To figure out the filenames we need to open the boxes file 114 | boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid) 115 | with open(boxes_file, 'r') as f: 116 | filenames = [x.split('\t')[0] for x in f] 117 | num_images = len(filenames) 118 | 119 | X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype) 120 | y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64) 121 | for j, img_file in enumerate(filenames): 122 | img_file = os.path.join(path, 'train', wnid, 'images', img_file) 123 | img = imread(img_file) 124 | if img.ndim == 2: 125 | ## grayscale file 126 | img.shape = (64, 64, 1) 127 | X_train_block[j] = img.transpose(2, 0, 1) 128 | X_train.append(X_train_block) 129 | y_train.append(y_train_block) 130 | 131 | # We need to concatenate all training data 132 | X_train = np.concatenate(X_train, axis=0) 133 | y_train = np.concatenate(y_train, axis=0) 134 | 135 | # Next load validation data 136 | with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f: 137 | img_files = [] 138 | val_wnids = [] 139 | for line in f: 140 | img_file, wnid = line.split('\t')[:2] 141 | img_files.append(img_file) 142 | val_wnids.append(wnid) 143 | num_val = len(img_files) 144 | y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids]) 145 | X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype) 146 | for i, img_file in enumerate(img_files): 147 | img_file = os.path.join(path, 'val', 'images', img_file) 148 | img = imread(img_file) 149 | if img.ndim == 2: 150 | img.shape = (64, 64, 1) 151 | X_val[i] = img.transpose(2, 0, 1) 152 | 153 | # Next load test images 154 | # Students won't have test labels, so we need to iterate over files in the 155 | # images directory. 156 | img_files = os.listdir(os.path.join(path, 'test', 'images')) 157 | X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype) 158 | for i, img_file in enumerate(img_files): 159 | img_file = os.path.join(path, 'test', 'images', img_file) 160 | img = imread(img_file) 161 | if img.ndim == 2: 162 | img.shape = (64, 64, 1) 163 | X_test[i] = img.transpose(2, 0, 1) 164 | 165 | y_test = None 166 | y_test_file = os.path.join(path, 'test', 'test_annotations.txt') 167 | if os.path.isfile(y_test_file): 168 | with open(y_test_file, 'r') as f: 169 | img_file_to_wnid = {} 170 | for line in f: 171 | line = line.split('\t') 172 | img_file_to_wnid[line[0]] = line[1] 173 | y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files] 174 | y_test = np.array(y_test) 175 | 176 | return class_names, X_train, y_train, X_val, y_val, X_test, y_test 177 | 178 | 179 | def load_models(models_dir): 180 | """ 181 | Load saved models from disk. This will attempt to unpickle all files in a 182 | directory; any files that give errors on unpickling (such as README.txt) will 183 | be skipped. 184 | 185 | Inputs: 186 | - models_dir: String giving the path to a directory containing model files. 187 | Each model file is a pickled dictionary with a 'model' field. 188 | 189 | Returns: 190 | A dictionary mapping model file names to models. 191 | """ 192 | models = {} 193 | for model_file in os.listdir(models_dir): 194 | with open(os.path.join(models_dir, model_file), 'rb') as f: 195 | try: 196 | models[model_file] = pickle.load(f)['model'] 197 | except pickle.UnpicklingError: 198 | continue 199 | return models 200 | -------------------------------------------------------------------------------- /assignment2/cs231n/datasets/.gitignore: -------------------------------------------------------------------------------- 1 | cifar-10-batches-py/* 2 | tiny-imagenet-100-A* 3 | tiny-imagenet-100-B* 4 | tiny-100-A-pretrained/* 5 | -------------------------------------------------------------------------------- /assignment2/cs231n/datasets/get_datasets.sh: -------------------------------------------------------------------------------- 1 | # Get CIFAR10 2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz 3 | tar -xzvf cifar-10-python.tar.gz 4 | rm cifar-10-python.tar.gz 5 | -------------------------------------------------------------------------------- /assignment2/cs231n/fast_layers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | try: 3 | from cs231n.im2col_cython import col2im_cython, im2col_cython 4 | from cs231n.im2col_cython import col2im_6d_cython 5 | except ImportError: 6 | print 'run the following from the cs231n directory and try again:' 7 | print 'python setup.py build_ext --inplace' 8 | print 'You may also need to restart your iPython kernel' 9 | 10 | from cs231n.im2col import * 11 | 12 | 13 | def conv_forward_im2col(x, w, b, conv_param): 14 | """ 15 | A fast implementation of the forward pass for a convolutional layer 16 | based on im2col and col2im. 17 | """ 18 | N, C, H, W = x.shape 19 | num_filters, _, filter_height, filter_width = w.shape 20 | stride, pad = conv_param['stride'], conv_param['pad'] 21 | 22 | # Check dimensions 23 | assert (W + 2 * pad - filter_width) % stride == 0, 'width does not work' 24 | assert (H + 2 * pad - filter_height) % stride == 0, 'height does not work' 25 | 26 | # Create output 27 | out_height = (H + 2 * pad - filter_height) / stride + 1 28 | out_width = (W + 2 * pad - filter_width) / stride + 1 29 | out = np.zeros((N, num_filters, out_height, out_width), dtype=x.dtype) 30 | 31 | # x_cols = im2col_indices(x, w.shape[2], w.shape[3], pad, stride) 32 | x_cols = im2col_cython(x, w.shape[2], w.shape[3], pad, stride) 33 | res = w.reshape((w.shape[0], -1)).dot(x_cols) + b.reshape(-1, 1) 34 | 35 | out = res.reshape(w.shape[0], out.shape[2], out.shape[3], x.shape[0]) 36 | out = out.transpose(3, 0, 1, 2) 37 | 38 | cache = (x, w, b, conv_param, x_cols) 39 | return out, cache 40 | 41 | 42 | def conv_forward_strides(x, w, b, conv_param): 43 | N, C, H, W = x.shape 44 | F, _, HH, WW = w.shape 45 | stride, pad = conv_param['stride'], conv_param['pad'] 46 | 47 | # Check dimensions 48 | assert (W + 2 * pad - WW) % stride == 0, 'width does not work' 49 | assert (H + 2 * pad - HH) % stride == 0, 'height does not work' 50 | 51 | # Pad the input 52 | p = pad 53 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 54 | 55 | # Figure out output dimensions 56 | H += 2 * pad 57 | W += 2 * pad 58 | out_h = (H - HH) / stride + 1 59 | out_w = (W - WW) / stride + 1 60 | 61 | # Perform an im2col operation by picking clever strides 62 | shape = (C, HH, WW, N, out_h, out_w) 63 | strides = (H * W, W, 1, C * H * W, stride * W, stride) 64 | strides = x.itemsize * np.array(strides) 65 | x_stride = np.lib.stride_tricks.as_strided(x_padded, 66 | shape=shape, strides=strides) 67 | x_cols = np.ascontiguousarray(x_stride) 68 | x_cols.shape = (C * HH * WW, N * out_h * out_w) 69 | 70 | # Now all our convolutions are a big matrix multiply 71 | res = w.reshape(F, -1).dot(x_cols) + b.reshape(-1, 1) 72 | 73 | # Reshape the output 74 | res.shape = (F, N, out_h, out_w) 75 | out = res.transpose(1, 0, 2, 3) 76 | 77 | # Be nice and return a contiguous array 78 | # The old version of conv_forward_fast doesn't do this, so for a fair 79 | # comparison we won't either 80 | out = np.ascontiguousarray(out) 81 | 82 | cache = (x, w, b, conv_param, x_cols) 83 | return out, cache 84 | 85 | 86 | def conv_backward_strides(dout, cache): 87 | x, w, b, conv_param, x_cols = cache 88 | stride, pad = conv_param['stride'], conv_param['pad'] 89 | 90 | N, C, H, W = x.shape 91 | F, _, HH, WW = w.shape 92 | _, _, out_h, out_w = dout.shape 93 | 94 | db = np.sum(dout, axis=(0, 2, 3)) 95 | 96 | dout_reshaped = dout.transpose(1, 0, 2, 3).reshape(F, -1) 97 | dw = dout_reshaped.dot(x_cols.T).reshape(w.shape) 98 | 99 | dx_cols = w.reshape(F, -1).T.dot(dout_reshaped) 100 | dx_cols.shape = (C, HH, WW, N, out_h, out_w) 101 | dx = col2im_6d_cython(dx_cols, N, C, H, W, HH, WW, pad, stride) 102 | 103 | return dx, dw, db 104 | 105 | 106 | def conv_backward_im2col(dout, cache): 107 | """ 108 | A fast implementation of the backward pass for a convolutional layer 109 | based on im2col and col2im. 110 | """ 111 | x, w, b, conv_param, x_cols = cache 112 | stride, pad = conv_param['stride'], conv_param['pad'] 113 | 114 | db = np.sum(dout, axis=(0, 2, 3)) 115 | 116 | num_filters, _, filter_height, filter_width = w.shape 117 | dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(num_filters, -1) 118 | dw = dout_reshaped.dot(x_cols.T).reshape(w.shape) 119 | 120 | dx_cols = w.reshape(num_filters, -1).T.dot(dout_reshaped) 121 | # dx = col2im_indices(dx_cols, x.shape, filter_height, filter_width, pad, stride) 122 | dx = col2im_cython(dx_cols, x.shape[0], x.shape[1], x.shape[2], x.shape[3], 123 | filter_height, filter_width, pad, stride) 124 | 125 | return dx, dw, db 126 | 127 | 128 | conv_forward_fast = conv_forward_strides 129 | conv_backward_fast = conv_backward_strides 130 | 131 | 132 | def max_pool_forward_fast(x, pool_param): 133 | """ 134 | A fast implementation of the forward pass for a max pooling layer. 135 | 136 | This chooses between the reshape method and the im2col method. If the pooling 137 | regions are square and tile the input image, then we can use the reshape 138 | method which is very fast. Otherwise we fall back on the im2col method, which 139 | is not much faster than the naive method. 140 | """ 141 | N, C, H, W = x.shape 142 | pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] 143 | stride = pool_param['stride'] 144 | 145 | same_size = pool_height == pool_width == stride 146 | tiles = H % pool_height == 0 and W % pool_width == 0 147 | if same_size and tiles: 148 | out, reshape_cache = max_pool_forward_reshape(x, pool_param) 149 | cache = ('reshape', reshape_cache) 150 | else: 151 | out, im2col_cache = max_pool_forward_im2col(x, pool_param) 152 | cache = ('im2col', im2col_cache) 153 | return out, cache 154 | 155 | 156 | def max_pool_backward_fast(dout, cache): 157 | """ 158 | A fast implementation of the backward pass for a max pooling layer. 159 | 160 | This switches between the reshape method an the im2col method depending on 161 | which method was used to generate the cache. 162 | """ 163 | method, real_cache = cache 164 | if method == 'reshape': 165 | return max_pool_backward_reshape(dout, real_cache) 166 | elif method == 'im2col': 167 | return max_pool_backward_im2col(dout, real_cache) 168 | else: 169 | raise ValueError('Unrecognized method "%s"' % method) 170 | 171 | 172 | def max_pool_forward_reshape(x, pool_param): 173 | """ 174 | A fast implementation of the forward pass for the max pooling layer that uses 175 | some clever reshaping. 176 | 177 | This can only be used for square pooling regions that tile the input. 178 | """ 179 | N, C, H, W = x.shape 180 | pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] 181 | stride = pool_param['stride'] 182 | assert pool_height == pool_width == stride, 'Invalid pool params' 183 | assert H % pool_height == 0 184 | assert W % pool_height == 0 185 | x_reshaped = x.reshape(N, C, H / pool_height, pool_height, 186 | W / pool_width, pool_width) 187 | out = x_reshaped.max(axis=3).max(axis=4) 188 | 189 | cache = (x, x_reshaped, out) 190 | return out, cache 191 | 192 | 193 | def max_pool_backward_reshape(dout, cache): 194 | """ 195 | A fast implementation of the backward pass for the max pooling layer that 196 | uses some clever broadcasting and reshaping. 197 | 198 | This can only be used if the forward pass was computed using 199 | max_pool_forward_reshape. 200 | 201 | NOTE: If there are multiple argmaxes, this method will assign gradient to 202 | ALL argmax elements of the input rather than picking one. In this case the 203 | gradient will actually be incorrect. However this is unlikely to occur in 204 | practice, so it shouldn't matter much. One possible solution is to split the 205 | upstream gradient equally among all argmax elements; this should result in a 206 | valid subgradient. You can make this happen by uncommenting the line below; 207 | however this results in a significant performance penalty (about 40% slower) 208 | and is unlikely to matter in practice so we don't do it. 209 | """ 210 | x, x_reshaped, out = cache 211 | 212 | dx_reshaped = np.zeros_like(x_reshaped) 213 | out_newaxis = out[:, :, :, np.newaxis, :, np.newaxis] 214 | mask = (x_reshaped == out_newaxis) 215 | dout_newaxis = dout[:, :, :, np.newaxis, :, np.newaxis] 216 | dout_broadcast, _ = np.broadcast_arrays(dout_newaxis, dx_reshaped) 217 | dx_reshaped[mask] = dout_broadcast[mask] 218 | dx_reshaped /= np.sum(mask, axis=(3, 5), keepdims=True) 219 | dx = dx_reshaped.reshape(x.shape) 220 | 221 | return dx 222 | 223 | 224 | def max_pool_forward_im2col(x, pool_param): 225 | """ 226 | An implementation of the forward pass for max pooling based on im2col. 227 | 228 | This isn't much faster than the naive version, so it should be avoided if 229 | possible. 230 | """ 231 | N, C, H, W = x.shape 232 | pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] 233 | stride = pool_param['stride'] 234 | 235 | assert (H - pool_height) % stride == 0, 'Invalid height' 236 | assert (W - pool_width) % stride == 0, 'Invalid width' 237 | 238 | out_height = (H - pool_height) / stride + 1 239 | out_width = (W - pool_width) / stride + 1 240 | 241 | x_split = x.reshape(N * C, 1, H, W) 242 | x_cols = im2col(x_split, pool_height, pool_width, padding=0, stride=stride) 243 | x_cols_argmax = np.argmax(x_cols, axis=0) 244 | x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])] 245 | out = x_cols_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1) 246 | 247 | cache = (x, x_cols, x_cols_argmax, pool_param) 248 | return out, cache 249 | 250 | 251 | def max_pool_backward_im2col(dout, cache): 252 | """ 253 | An implementation of the backward pass for max pooling based on im2col. 254 | 255 | This isn't much faster than the naive version, so it should be avoided if 256 | possible. 257 | """ 258 | x, x_cols, x_cols_argmax, pool_param = cache 259 | N, C, H, W = x.shape 260 | pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] 261 | stride = pool_param['stride'] 262 | 263 | dout_reshaped = dout.transpose(2, 3, 0, 1).flatten() 264 | dx_cols = np.zeros_like(x_cols) 265 | dx_cols[x_cols_argmax, np.arange(dx_cols.shape[1])] = dout_reshaped 266 | dx = col2im_indices(dx_cols, (N * C, 1, H, W), pool_height, pool_width, 267 | padding=0, stride=stride) 268 | dx = dx.reshape(x.shape) 269 | 270 | return dx 271 | -------------------------------------------------------------------------------- /assignment2/cs231n/gradient_check.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import randrange 3 | 4 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001): 5 | """ 6 | a naive implementation of numerical gradient of f at x 7 | - f should be a function that takes a single argument 8 | - x is the point (numpy array) to evaluate the gradient at 9 | """ 10 | 11 | fx = f(x) # evaluate function value at original point 12 | grad = np.zeros_like(x) 13 | # iterate over all indexes in x 14 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 15 | while not it.finished: 16 | 17 | # evaluate function at x+h 18 | ix = it.multi_index 19 | oldval = x[ix] 20 | x[ix] = oldval + h # increment by h 21 | fxph = f(x) # evalute f(x + h) 22 | x[ix] = oldval - h 23 | fxmh = f(x) # evaluate f(x - h) 24 | x[ix] = oldval # restore 25 | 26 | # compute the partial derivative with centered formula 27 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 28 | if verbose: 29 | print ix, grad[ix] 30 | it.iternext() # step to next dimension 31 | 32 | return grad 33 | 34 | 35 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 36 | """ 37 | Evaluate a numeric gradient for a function that accepts a numpy 38 | array and returns a numpy array. 39 | """ 40 | grad = np.zeros_like(x) 41 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 42 | while not it.finished: 43 | ix = it.multi_index 44 | 45 | oldval = x[ix] 46 | x[ix] = oldval + h 47 | pos = f(x).copy() 48 | x[ix] = oldval - h 49 | neg = f(x).copy() 50 | x[ix] = oldval 51 | 52 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 53 | it.iternext() 54 | return grad 55 | 56 | 57 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): 58 | """ 59 | Compute numeric gradients for a function that operates on input 60 | and output blobs. 61 | 62 | We assume that f accepts several input blobs as arguments, followed by a blob 63 | into which outputs will be written. For example, f might be called like this: 64 | 65 | f(x, w, out) 66 | 67 | where x and w are input Blobs, and the result of f will be written to out. 68 | 69 | Inputs: 70 | - f: function 71 | - inputs: tuple of input blobs 72 | - output: output blob 73 | - h: step size 74 | """ 75 | numeric_diffs = [] 76 | for input_blob in inputs: 77 | diff = np.zeros_like(input_blob.diffs) 78 | it = np.nditer(input_blob.vals, flags=['multi_index'], 79 | op_flags=['readwrite']) 80 | while not it.finished: 81 | idx = it.multi_index 82 | orig = input_blob.vals[idx] 83 | 84 | input_blob.vals[idx] = orig + h 85 | f(*(inputs + (output,))) 86 | pos = np.copy(output.vals) 87 | input_blob.vals[idx] = orig - h 88 | f(*(inputs + (output,))) 89 | neg = np.copy(output.vals) 90 | input_blob.vals[idx] = orig 91 | 92 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) 93 | 94 | it.iternext() 95 | numeric_diffs.append(diff) 96 | return numeric_diffs 97 | 98 | 99 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5): 100 | return eval_numerical_gradient_blobs(lambda *args: net.forward(), 101 | inputs, output, h=h) 102 | 103 | 104 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): 105 | """ 106 | sample a few random elements and only return numerical 107 | in this dimensions. 108 | """ 109 | 110 | for i in xrange(num_checks): 111 | ix = tuple([randrange(m) for m in x.shape]) 112 | 113 | oldval = x[ix] 114 | x[ix] = oldval + h # increment by h 115 | fxph = f(x) # evaluate f(x + h) 116 | x[ix] = oldval - h # increment by h 117 | fxmh = f(x) # evaluate f(x - h) 118 | x[ix] = oldval # reset 119 | 120 | grad_numerical = (fxph - fxmh) / (2 * h) 121 | grad_analytic = analytic_grad[ix] 122 | rel_error = abs(grad_numerical - grad_analytic) / (abs(grad_numerical) + abs(grad_analytic)) 123 | print 'numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error) 124 | 125 | -------------------------------------------------------------------------------- /assignment2/cs231n/im2col.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1): 5 | # First figure out what the size of the output should be 6 | N, C, H, W = x_shape 7 | assert (H + 2 * padding - field_height) % stride == 0 8 | assert (W + 2 * padding - field_height) % stride == 0 9 | out_height = (H + 2 * padding - field_height) / stride + 1 10 | out_width = (W + 2 * padding - field_width) / stride + 1 11 | 12 | i0 = np.repeat(np.arange(field_height), field_width) 13 | i0 = np.tile(i0, C) 14 | i1 = stride * np.repeat(np.arange(out_height), out_width) 15 | j0 = np.tile(np.arange(field_width), field_height * C) 16 | j1 = stride * np.tile(np.arange(out_width), out_height) 17 | i = i0.reshape(-1, 1) + i1.reshape(1, -1) 18 | j = j0.reshape(-1, 1) + j1.reshape(1, -1) 19 | 20 | k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1) 21 | 22 | return (k, i, j) 23 | 24 | 25 | def im2col_indices(x, field_height, field_width, padding=1, stride=1): 26 | """ An implementation of im2col based on some fancy indexing """ 27 | # Zero-pad the input 28 | p = padding 29 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 30 | 31 | k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, 32 | stride) 33 | 34 | cols = x_padded[:, k, i, j] 35 | C = x.shape[1] 36 | cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1) 37 | return cols 38 | 39 | 40 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1, 41 | stride=1): 42 | """ An implementation of col2im based on fancy indexing and np.add.at """ 43 | N, C, H, W = x_shape 44 | H_padded, W_padded = H + 2 * padding, W + 2 * padding 45 | x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype) 46 | k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, 47 | stride) 48 | cols_reshaped = cols.reshape(C * field_height * field_width, -1, N) 49 | cols_reshaped = cols_reshaped.transpose(2, 0, 1) 50 | np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped) 51 | if padding == 0: 52 | return x_padded 53 | return x_padded[:, :, padding:-padding, padding:-padding] 54 | 55 | pass 56 | -------------------------------------------------------------------------------- /assignment2/cs231n/im2col_cython.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | 5 | # DTYPE = np.float64 6 | # ctypedef np.float64_t DTYPE_t 7 | 8 | ctypedef fused DTYPE_t: 9 | np.float32_t 10 | np.float64_t 11 | 12 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height, 13 | int field_width, int padding, int stride): 14 | cdef int N = x.shape[0] 15 | cdef int C = x.shape[1] 16 | cdef int H = x.shape[2] 17 | cdef int W = x.shape[3] 18 | 19 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 20 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 21 | 22 | cdef int p = padding 23 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x, 24 | ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 25 | 26 | cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros( 27 | (C * field_height * field_width, N * HH * WW), 28 | dtype=x.dtype) 29 | 30 | # Moving the inner loop to a C function with no bounds checking works, but does 31 | # not seem to help performance in any measurable way. 32 | 33 | im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 34 | field_height, field_width, padding, stride) 35 | return cols 36 | 37 | 38 | @cython.boundscheck(False) 39 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 40 | np.ndarray[DTYPE_t, ndim=4] x_padded, 41 | int N, int C, int H, int W, int HH, int WW, 42 | int field_height, int field_width, int padding, int stride) except? -1: 43 | cdef int c, ii, jj, row, yy, xx, i, col 44 | 45 | for c in range(C): 46 | for yy in range(HH): 47 | for xx in range(WW): 48 | for ii in range(field_height): 49 | for jj in range(field_width): 50 | row = c * field_width * field_height + ii * field_height + jj 51 | for i in range(N): 52 | col = yy * WW * N + xx * N + i 53 | cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj] 54 | 55 | 56 | 57 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W, 58 | int field_height, int field_width, int padding, int stride): 59 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 60 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 61 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 62 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding), 63 | dtype=cols.dtype) 64 | 65 | # Moving the inner loop to a C-function with no bounds checking improves 66 | # performance quite a bit for col2im. 67 | col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 68 | field_height, field_width, padding, stride) 69 | if padding > 0: 70 | return x_padded[:, :, padding:-padding, padding:-padding] 71 | return x_padded 72 | 73 | 74 | @cython.boundscheck(False) 75 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 76 | np.ndarray[DTYPE_t, ndim=4] x_padded, 77 | int N, int C, int H, int W, int HH, int WW, 78 | int field_height, int field_width, int padding, int stride) except? -1: 79 | cdef int c, ii, jj, row, yy, xx, i, col 80 | 81 | for c in range(C): 82 | for ii in range(field_height): 83 | for jj in range(field_width): 84 | row = c * field_width * field_height + ii * field_height + jj 85 | for yy in range(HH): 86 | for xx in range(WW): 87 | for i in range(N): 88 | col = yy * WW * N + xx * N + i 89 | x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col] 90 | 91 | 92 | @cython.boundscheck(False) 93 | @cython.wraparound(False) 94 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols, 95 | np.ndarray[DTYPE_t, ndim=4] x_padded, 96 | int N, int C, int H, int W, int HH, int WW, 97 | int out_h, int out_w, int pad, int stride): 98 | 99 | cdef int c, hh, ww, n, h, w 100 | for n in range(N): 101 | for c in range(C): 102 | for hh in range(HH): 103 | for ww in range(WW): 104 | for h in range(out_h): 105 | for w in range(out_w): 106 | x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w] 107 | 108 | 109 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W, 110 | int HH, int WW, int pad, int stride): 111 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 112 | cdef int out_h = (H + 2 * pad - HH) / stride + 1 113 | cdef int out_w = (W + 2 * pad - WW) / stride + 1 114 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad), 115 | dtype=cols.dtype) 116 | 117 | col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride) 118 | 119 | if pad > 0: 120 | return x_padded[:, :, pad:-pad, pad:-pad] 121 | return x_padded 122 | -------------------------------------------------------------------------------- /assignment2/cs231n/layer_utils.py: -------------------------------------------------------------------------------- 1 | from cs231n.layers import * 2 | from cs231n.fast_layers import * 3 | 4 | 5 | def affine_relu_forward(x, w, b): 6 | """ 7 | Convenience layer that perorms an affine transform followed by a ReLU 8 | 9 | Inputs: 10 | - x: Input to the affine layer 11 | - w, b: Weights for the affine layer 12 | 13 | Returns a tuple of: 14 | - out: Output from the ReLU 15 | - cache: Object to give to the backward pass 16 | """ 17 | a, fc_cache = affine_forward(x, w, b) 18 | out, relu_cache = relu_forward(a) 19 | cache = (fc_cache, relu_cache) 20 | return out, cache 21 | 22 | 23 | def affine_relu_backward(dout, cache): 24 | """ 25 | Backward pass for the affine-relu convenience layer 26 | """ 27 | fc_cache, relu_cache = cache 28 | da = relu_backward(dout, relu_cache) 29 | dx, dw, db = affine_backward(da, fc_cache) 30 | return dx, dw, db 31 | 32 | 33 | pass 34 | def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param): 35 | a, fc_cache = affine_forward(x, w, b) 36 | bn, bn_cache = batchnorm_forward(a, gamma, beta, bn_param) 37 | out, relu_cache = relu_forward(bn) 38 | cache = (fc_cache, bn_cache, relu_cache) 39 | return out, cache 40 | 41 | def affine_bn_relu_backward(dout, cache): 42 | fc_cache, bn_cache, relu_cache = cache 43 | da = relu_backward(dout, relu_cache) 44 | dbn, dgamma, dbeta = batchnorm_backward(da, bn_cache) 45 | dx, dw, db = affine_backward(dbn, fc_cache) 46 | return dx, dw, db, dgamma, dbeta 47 | 48 | 49 | def conv_relu_forward(x, w, b, conv_param): 50 | """ 51 | A convenience layer that performs a convolution followed by a ReLU. 52 | 53 | Inputs: 54 | - x: Input to the convolutional layer 55 | - w, b, conv_param: Weights and parameters for the convolutional layer 56 | 57 | Returns a tuple of: 58 | - out: Output from the ReLU 59 | - cache: Object to give to the backward pass 60 | """ 61 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 62 | out, relu_cache = relu_forward(a) 63 | cache = (conv_cache, relu_cache) 64 | return out, cache 65 | 66 | 67 | def conv_relu_backward(dout, cache): 68 | """ 69 | Backward pass for the conv-relu convenience layer. 70 | """ 71 | conv_cache, relu_cache = cache 72 | da = relu_backward(dout, relu_cache) 73 | dx, dw, db = conv_backward_fast(da, conv_cache) 74 | return dx, dw, db 75 | 76 | 77 | def conv_relu_pool_forward(x, w, b, conv_param, pool_param): 78 | """ 79 | Convenience layer that performs a convolution, a ReLU, and a pool. 80 | 81 | Inputs: 82 | - x: Input to the convolutional layer 83 | - w, b, conv_param: Weights and parameters for the convolutional layer 84 | - pool_param: Parameters for the pooling layer 85 | 86 | Returns a tuple of: 87 | - out: Output from the pooling layer 88 | - cache: Object to give to the backward pass 89 | """ 90 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 91 | s, relu_cache = relu_forward(a) 92 | out, pool_cache = max_pool_forward_fast(s, pool_param) 93 | cache = (conv_cache, relu_cache, pool_cache) 94 | return out, cache 95 | 96 | 97 | def conv_relu_pool_backward(dout, cache): 98 | """ 99 | Backward pass for the conv-relu-pool convenience layer 100 | """ 101 | conv_cache, relu_cache, pool_cache = cache 102 | ds = max_pool_backward_fast(dout, pool_cache) 103 | da = relu_backward(ds, relu_cache) 104 | dx, dw, db = conv_backward_fast(da, conv_cache) 105 | return dx, dw, db 106 | 107 | -------------------------------------------------------------------------------- /assignment2/cs231n/optim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | This file implements various first-order update rules that are commonly used for 5 | training neural networks. Each update rule accepts current weights and the 6 | gradient of the loss with respect to those weights and produces the next set of 7 | weights. Each update rule has the same interface: 8 | 9 | def update(w, dw, config=None): 10 | 11 | Inputs: 12 | - w: A numpy array giving the current weights. 13 | - dw: A numpy array of the same shape as w giving the gradient of the 14 | loss with respect to w. 15 | - config: A dictionary containing hyperparameter values such as learning rate, 16 | momentum, etc. If the update rule requires caching values over many 17 | iterations, then config will also hold these cached values. 18 | 19 | Returns: 20 | - next_w: The next point after the update. 21 | - config: The config dictionary to be passed to the next iteration of the 22 | update rule. 23 | 24 | NOTE: For most update rules, the default learning rate will probably not perform 25 | well; however the default values of the other hyperparameters should work well 26 | for a variety of different problems. 27 | 28 | For efficiency, update rules may perform in-place updates, mutating w and 29 | setting next_w equal to w. 30 | """ 31 | 32 | 33 | def sgd(w, dw, config=None): 34 | """ 35 | Performs vanilla stochastic gradient descent. 36 | 37 | config format: 38 | - learning_rate: Scalar learning rate. 39 | """ 40 | if config is None: config = {} 41 | config.setdefault('learning_rate', 1e-2) 42 | 43 | w -= config['learning_rate'] * dw 44 | return w, config 45 | 46 | 47 | def sgd_momentum(w, dw, config=None): 48 | """ 49 | Performs stochastic gradient descent with momentum. 50 | 51 | config format: 52 | - learning_rate: Scalar learning rate. 53 | - momentum: Scalar between 0 and 1 giving the momentum value. 54 | Setting momentum = 0 reduces to sgd. 55 | - velocity: A numpy array of the same shape as w and dw used to store a moving 56 | average of the gradients. 57 | """ 58 | if config is None: config = {} 59 | config.setdefault('learning_rate', 1e-2) 60 | config.setdefault('momentum', 0.9) 61 | v = config.get('velocity', np.zeros_like(w)) 62 | 63 | next_w = None 64 | ############################################################################# 65 | # TODO: Implement the momentum update formula. Store the updated value in # 66 | # the next_w variable. You should also use and update the velocity v. # 67 | ############################################################################# 68 | pass 69 | v = config['momentum'] * v - config['learning_rate'] * dw 70 | next_w = w + v 71 | ############################################################################# 72 | # END OF YOUR CODE # 73 | ############################################################################# 74 | config['velocity'] = v 75 | 76 | return next_w, config 77 | 78 | 79 | 80 | def rmsprop(x, dx, config=None): 81 | """ 82 | Uses the RMSProp update rule, which uses a moving average of squared gradient 83 | values to set adaptive per-parameter learning rates. 84 | 85 | config format: 86 | - learning_rate: Scalar learning rate. 87 | - decay_rate: Scalar between 0 and 1 giving the decay rate for the squared 88 | gradient cache. 89 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 90 | - cache: Moving average of second moments of gradients. 91 | """ 92 | if config is None: config = {} 93 | config.setdefault('learning_rate', 1e-2) 94 | config.setdefault('decay_rate', 0.99) 95 | config.setdefault('epsilon', 1e-8) 96 | config.setdefault('cache', np.zeros_like(x)) 97 | 98 | next_x = None 99 | ############################################################################# 100 | # TODO: Implement the RMSprop update formula, storing the next value of x # 101 | # in the next_x variable. Don't forget to update cache value stored in # 102 | # config['cache']. # 103 | ############################################################################# 104 | pass 105 | config['cache'] = config['decay_rate'] * config['cache'] + (1 - config['decay_rate']) * dx ** 2 106 | next_x = x - config['learning_rate'] * dx / (np.sqrt(config['cache']) + config['epsilon']) 107 | ############################################################################# 108 | # END OF YOUR CODE # 109 | ############################################################################# 110 | 111 | return next_x, config 112 | 113 | 114 | def adam(x, dx, config=None): 115 | """ 116 | Uses the Adam update rule, which incorporates moving averages of both the 117 | gradient and its square and a bias correction term. 118 | 119 | config format: 120 | - learning_rate: Scalar learning rate. 121 | - beta1: Decay rate for moving average of first moment of gradient. 122 | - beta2: Decay rate for moving average of second moment of gradient. 123 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 124 | - m: Moving average of gradient. 125 | - v: Moving average of squared gradient. 126 | - t: Iteration number. 127 | """ 128 | if config is None: config = {} 129 | config.setdefault('learning_rate', 1e-3) 130 | config.setdefault('beta1', 0.9) 131 | config.setdefault('beta2', 0.999) 132 | config.setdefault('epsilon', 1e-8) 133 | config.setdefault('m', np.zeros_like(x)) 134 | config.setdefault('v', np.zeros_like(x)) 135 | config.setdefault('t', 0) 136 | 137 | next_x = None 138 | ############################################################################# 139 | # TODO: Implement the Adam update formula, storing the next value of x in # 140 | # the next_x variable. Don't forget to update the m, v, and t variables # 141 | # stored in config. # 142 | ############################################################################# 143 | pass 144 | config['m'] = config['beta1'] * config['m'] + (1 - config['beta1']) * dx 145 | config['v'] = config['beta2'] * config['v'] + (1 - config['beta2']) * (dx ** 2) 146 | next_x = x - config['learning_rate'] * config['m'] / (np.sqrt(config['v']) + config['epsilon']) 147 | ############################################################################# 148 | # END OF YOUR CODE # 149 | ############################################################################# 150 | 151 | return next_x, config 152 | 153 | 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /assignment2/cs231n/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Build import cythonize 4 | import numpy 5 | 6 | extensions = [ 7 | Extension('im2col_cython', ['im2col_cython.pyx'], 8 | include_dirs = [numpy.get_include()] 9 | ), 10 | ] 11 | 12 | setup( 13 | ext_modules = cythonize(extensions), 14 | ) 15 | -------------------------------------------------------------------------------- /assignment2/cs231n/solver.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from cs231n import optim 4 | 5 | 6 | class Solver(object): 7 | """ 8 | A Solver encapsulates all the logic necessary for training classification 9 | models. The Solver performs stochastic gradient descent using different 10 | update rules defined in optim.py. 11 | 12 | The solver accepts both training and validataion data and labels so it can 13 | periodically check classification accuracy on both training and validation 14 | data to watch out for overfitting. 15 | 16 | To train a model, you will first construct a Solver instance, passing the 17 | model, dataset, and various optoins (learning rate, batch size, etc) to the 18 | constructor. You will then call the train() method to run the optimization 19 | procedure and train the model. 20 | 21 | After the train() method returns, model.params will contain the parameters 22 | that performed best on the validation set over the course of training. 23 | In addition, the instance variable solver.loss_history will contain a list 24 | of all losses encountered during training and the instance variables 25 | solver.train_acc_history and solver.val_acc_history will be lists containing 26 | the accuracies of the model on the training and validation set at each epoch. 27 | 28 | Example usage might look something like this: 29 | 30 | data = { 31 | 'X_train': # training data 32 | 'y_train': # training labels 33 | 'X_val': # validation data 34 | 'X_train': # validation labels 35 | } 36 | model = MyAwesomeModel(hidden_size=100, reg=10) 37 | solver = Solver(model, data, 38 | update_rule='sgd', 39 | optim_config={ 40 | 'learning_rate': 1e-3, 41 | }, 42 | lr_decay=0.95, 43 | num_epochs=10, batch_size=100, 44 | print_every=100) 45 | solver.train() 46 | 47 | 48 | A Solver works on a model object that must conform to the following API: 49 | 50 | - model.params must be a dictionary mapping string parameter names to numpy 51 | arrays containing parameter values. 52 | 53 | - model.loss(X, y) must be a function that computes training-time loss and 54 | gradients, and test-time classification scores, with the following inputs 55 | and outputs: 56 | 57 | Inputs: 58 | - X: Array giving a minibatch of input data of shape (N, d_1, ..., d_k) 59 | - y: Array of labels, of shape (N,) giving labels for X where y[i] is the 60 | label for X[i]. 61 | 62 | Returns: 63 | If y is None, run a test-time forward pass and return: 64 | - scores: Array of shape (N, C) giving classification scores for X where 65 | scores[i, c] gives the score of class c for X[i]. 66 | 67 | If y is not None, run a training time forward and backward pass and return 68 | a tuple of: 69 | - loss: Scalar giving the loss 70 | - grads: Dictionary with the same keys as self.params mapping parameter 71 | names to gradients of the loss with respect to those parameters. 72 | """ 73 | 74 | def __init__(self, model, data, **kwargs): 75 | """ 76 | Construct a new Solver instance. 77 | 78 | Required arguments: 79 | - model: A model object conforming to the API described above 80 | - data: A dictionary of training and validation data with the following: 81 | 'X_train': Array of shape (N_train, d_1, ..., d_k) giving training images 82 | 'X_val': Array of shape (N_val, d_1, ..., d_k) giving validation images 83 | 'y_train': Array of shape (N_train,) giving labels for training images 84 | 'y_val': Array of shape (N_val,) giving labels for validation images 85 | 86 | Optional arguments: 87 | - update_rule: A string giving the name of an update rule in optim.py. 88 | Default is 'sgd'. 89 | - optim_config: A dictionary containing hyperparameters that will be 90 | passed to the chosen update rule. Each update rule requires different 91 | hyperparameters (see optim.py) but all update rules require a 92 | 'learning_rate' parameter so that should always be present. 93 | - lr_decay: A scalar for learning rate decay; after each epoch the learning 94 | rate is multiplied by this value. 95 | - batch_size: Size of minibatches used to compute loss and gradient during 96 | training. 97 | - num_epochs: The number of epochs to run for during training. 98 | - print_every: Integer; training losses will be printed every print_every 99 | iterations. 100 | - verbose: Boolean; if set to false then no output will be printed during 101 | training. 102 | """ 103 | self.model = model 104 | self.X_train = data['X_train'] 105 | self.y_train = data['y_train'] 106 | self.X_val = data['X_val'] 107 | self.y_val = data['y_val'] 108 | 109 | # Unpack keyword arguments 110 | self.update_rule = kwargs.pop('update_rule', 'sgd') 111 | self.optim_config = kwargs.pop('optim_config', {}) 112 | self.lr_decay = kwargs.pop('lr_decay', 1.0) 113 | self.batch_size = kwargs.pop('batch_size', 100) 114 | self.num_epochs = kwargs.pop('num_epochs', 10) 115 | 116 | self.print_every = kwargs.pop('print_every', 10) 117 | self.verbose = kwargs.pop('verbose', True) 118 | 119 | # Throw an error if there are extra keyword arguments 120 | if len(kwargs) > 0: 121 | extra = ', '.join('"%s"' % k for k in kwargs.keys()) 122 | raise ValueError('Unrecognized arguments %s' % extra) 123 | 124 | # Make sure the update rule exists, then replace the string 125 | # name with the actual function 126 | if not hasattr(optim, self.update_rule): 127 | raise ValueError('Invalid update_rule "%s"' % self.update_rule) 128 | self.update_rule = getattr(optim, self.update_rule) 129 | 130 | self._reset() 131 | 132 | 133 | def _reset(self): 134 | """ 135 | Set up some book-keeping variables for optimization. Don't call this 136 | manually. 137 | """ 138 | # Set up some variables for book-keeping 139 | self.epoch = 0 140 | self.best_val_acc = 0 141 | self.best_params = {} 142 | self.loss_history = [] 143 | self.train_acc_history = [] 144 | self.val_acc_history = [] 145 | 146 | # Make a deep copy of the optim_config for each parameter 147 | self.optim_configs = {} 148 | for p in self.model.params: 149 | d = {k: v for k, v in self.optim_config.iteritems()} 150 | self.optim_configs[p] = d 151 | 152 | 153 | def _step(self): 154 | """ 155 | Make a single gradient update. This is called by train() and should not 156 | be called manually. 157 | """ 158 | # Make a minibatch of training data 159 | num_train = self.X_train.shape[0] 160 | batch_mask = np.random.choice(num_train, self.batch_size) 161 | X_batch = self.X_train[batch_mask] 162 | y_batch = self.y_train[batch_mask] 163 | 164 | # Compute loss and gradient 165 | loss, grads = self.model.loss(X_batch, y_batch) 166 | self.loss_history.append(loss) 167 | 168 | # Perform a parameter update 169 | for p, w in self.model.params.iteritems(): 170 | dw = grads[p] 171 | config = self.optim_configs[p] 172 | next_w, next_config = self.update_rule(w, dw, config) 173 | self.model.params[p] = next_w 174 | self.optim_configs[p] = next_config 175 | 176 | 177 | def check_accuracy(self, X, y, num_samples=None, batch_size=100): 178 | """ 179 | Check accuracy of the model on the provided data. 180 | 181 | Inputs: 182 | - X: Array of data, of shape (N, d_1, ..., d_k) 183 | - y: Array of labels, of shape (N,) 184 | - num_samples: If not None, subsample the data and only test the model 185 | on num_samples datapoints. 186 | - batch_size: Split X and y into batches of this size to avoid using too 187 | much memory. 188 | 189 | Returns: 190 | - acc: Scalar giving the fraction of instances that were correctly 191 | classified by the model. 192 | """ 193 | 194 | # Maybe subsample the data 195 | N = X.shape[0] 196 | if num_samples is not None and N > num_samples: 197 | mask = np.random.choice(N, num_samples) 198 | N = num_samples 199 | X = X[mask] 200 | y = y[mask] 201 | 202 | # Compute predictions in batches 203 | num_batches = N / batch_size 204 | if N % batch_size != 0: 205 | num_batches += 1 206 | y_pred = [] 207 | for i in xrange(num_batches): 208 | start = i * batch_size 209 | end = (i + 1) * batch_size 210 | scores = self.model.loss(X[start:end]) 211 | y_pred.append(np.argmax(scores, axis=1)) 212 | y_pred = np.hstack(y_pred) 213 | acc = np.mean(y_pred == y) 214 | 215 | return acc 216 | 217 | 218 | def train(self): 219 | """ 220 | Run optimization to train the model. 221 | """ 222 | num_train = self.X_train.shape[0] 223 | iterations_per_epoch = max(num_train / self.batch_size, 1) 224 | num_iterations = self.num_epochs * iterations_per_epoch 225 | 226 | for t in xrange(num_iterations): 227 | self._step() 228 | 229 | # Maybe print training loss 230 | if self.verbose and t % self.print_every == 0: 231 | print '(Iteration %d / %d) loss: %f' % ( 232 | t + 1, num_iterations, self.loss_history[-1]) 233 | 234 | # At the end of every epoch, increment the epoch counter and decay the 235 | # learning rate. 236 | epoch_end = (t + 1) % iterations_per_epoch == 0 237 | if epoch_end: 238 | self.epoch += 1 239 | for k in self.optim_configs: 240 | self.optim_configs[k]['learning_rate'] *= self.lr_decay 241 | 242 | # Check train and val accuracy on the first iteration, the last 243 | # iteration, and at the end of each epoch. 244 | first_it = (t == 0) 245 | last_it = (t == num_iterations + 1) 246 | if first_it or last_it or epoch_end: 247 | train_acc = self.check_accuracy(self.X_train, self.y_train, 248 | num_samples=1000) 249 | val_acc = self.check_accuracy(self.X_val, self.y_val) 250 | self.train_acc_history.append(train_acc) 251 | self.val_acc_history.append(val_acc) 252 | 253 | if self.verbose: 254 | print '(Epoch %d / %d) train acc: %f; val_acc: %f' % ( 255 | self.epoch, self.num_epochs, train_acc, val_acc) 256 | 257 | # Keep track of the best model 258 | if val_acc > self.best_val_acc: 259 | self.best_val_acc = val_acc 260 | self.best_params = {} 261 | for k, v in self.model.params.iteritems(): 262 | self.best_params[k] = v.copy() 263 | 264 | # At the end of training swap the best params into the model 265 | self.model.params = self.best_params 266 | 267 | -------------------------------------------------------------------------------- /assignment2/cs231n/vis_utils.py: -------------------------------------------------------------------------------- 1 | from math import sqrt, ceil 2 | import numpy as np 3 | 4 | def visualize_grid(Xs, ubound=255.0, padding=1): 5 | """ 6 | Reshape a 4D tensor of image data to a grid for easy visualization. 7 | 8 | Inputs: 9 | - Xs: Data of shape (N, H, W, C) 10 | - ubound: Output grid will have values scaled to the range [0, ubound] 11 | - padding: The number of blank pixels between elements of the grid 12 | """ 13 | (N, H, W, C) = Xs.shape 14 | grid_size = int(ceil(sqrt(N))) 15 | grid_height = H * grid_size + padding * (grid_size - 1) 16 | grid_width = W * grid_size + padding * (grid_size - 1) 17 | grid = np.zeros((grid_height, grid_width, C)) 18 | next_idx = 0 19 | y0, y1 = 0, H 20 | for y in xrange(grid_size): 21 | x0, x1 = 0, W 22 | for x in xrange(grid_size): 23 | if next_idx < N: 24 | img = Xs[next_idx] 25 | low, high = np.min(img), np.max(img) 26 | grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low) 27 | # grid[y0:y1, x0:x1] = Xs[next_idx] 28 | next_idx += 1 29 | x0 += W + padding 30 | x1 += W + padding 31 | y0 += H + padding 32 | y1 += H + padding 33 | # grid_max = np.max(grid) 34 | # grid_min = np.min(grid) 35 | # grid = ubound * (grid - grid_min) / (grid_max - grid_min) 36 | return grid 37 | 38 | def vis_grid(Xs): 39 | """ visualize a grid of images """ 40 | (N, H, W, C) = Xs.shape 41 | A = int(ceil(sqrt(N))) 42 | G = np.ones((A*H+A, A*W+A, C), Xs.dtype) 43 | G *= np.min(Xs) 44 | n = 0 45 | for y in range(A): 46 | for x in range(A): 47 | if n < N: 48 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:] 49 | n += 1 50 | # normalize to [0,1] 51 | maxg = G.max() 52 | ming = G.min() 53 | G = (G - ming)/(maxg-ming) 54 | return G 55 | 56 | def vis_nn(rows): 57 | """ visualize array of arrays of images """ 58 | N = len(rows) 59 | D = len(rows[0]) 60 | H,W,C = rows[0][0].shape 61 | Xs = rows[0][0] 62 | G = np.ones((N*H+N, D*W+D, C), Xs.dtype) 63 | for y in range(N): 64 | for x in range(D): 65 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x] 66 | # normalize to [0,1] 67 | maxg = G.max() 68 | ming = G.min() 69 | G = (G - ming)/(maxg-ming) 70 | return G 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /assignment2/frameworkpython: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # what real Python executable to use 4 | PYVER=2.7 5 | PATHTOPYTHON=/usr/local/bin/ 6 | PYTHON=${PATHTOPYTHON}python${PYVER} 7 | 8 | # find the root of the virtualenv, it should be the parent of the dir this script is in 9 | ENV=`$PYTHON -c "import os; print os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..'))"` 10 | 11 | # now run Python with the virtualenv set as Python's HOME 12 | export PYTHONHOME=$ENV 13 | exec $PYTHON "$@" 14 | -------------------------------------------------------------------------------- /assignment2/kitten.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment2/kitten.jpg -------------------------------------------------------------------------------- /assignment2/puppy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment2/puppy.jpg -------------------------------------------------------------------------------- /assignment2/requirements.txt: -------------------------------------------------------------------------------- 1 | Cython==0.23.4 2 | Jinja2==2.8 3 | MarkupSafe==0.23 4 | Pillow==3.0.0 5 | Pygments==2.0.2 6 | appnope==0.1.0 7 | argparse==1.2.1 8 | backports-abc==0.4 9 | backports.ssl-match-hostname==3.5.0.1 10 | certifi==2015.11.20.1 11 | cycler==0.9.0 12 | decorator==4.0.6 13 | functools32==3.2.3-2 14 | gnureadline==6.3.3 15 | ipykernel==4.2.2 16 | ipython==4.0.1 17 | ipython-genutils==0.1.0 18 | ipywidgets==4.1.1 19 | jsonschema==2.5.1 20 | jupyter==1.0.0 21 | jupyter-client==4.1.1 22 | jupyter-console==4.0.3 23 | jupyter-core==4.0.6 24 | matplotlib==1.5.0 25 | mistune==0.7.1 26 | nbconvert==4.1.0 27 | nbformat==4.0.1 28 | notebook==4.0.6 29 | numpy==1.10.4 30 | path.py==8.1.2 31 | pexpect==4.0.1 32 | pickleshare==0.5 33 | ptyprocess==0.5 34 | pyparsing==2.0.7 35 | python-dateutil==2.4.2 36 | pytz==2015.7 37 | pyzmq==15.1.0 38 | qtconsole==4.1.1 39 | scipy==0.16.1 40 | simplegeneric==0.8.1 41 | singledispatch==3.4.0.3 42 | six==1.10.0 43 | terminado==0.5 44 | tornado==4.3 45 | traitlets==4.0.0 46 | wsgiref==0.1.2 47 | -------------------------------------------------------------------------------- /assignment2/start_ipython_osx.sh: -------------------------------------------------------------------------------- 1 | # Assume the virtualenv is called .env 2 | 3 | cp frameworkpython .env/bin 4 | .env/bin/frameworkpython -m IPython notebook 5 | -------------------------------------------------------------------------------- /assignment3/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | .env/* 4 | -------------------------------------------------------------------------------- /assignment3/collectSubmission.sh: -------------------------------------------------------------------------------- 1 | rm -f assignment3.zip 2 | zip -r assignment3.zip . -x "*.git" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" ".env/*" "*.pyc" "*cs231n/build/*" 3 | -------------------------------------------------------------------------------- /assignment3/cs231n/.gitignore: -------------------------------------------------------------------------------- 1 | build/* 2 | im2col_cython.c 3 | im2col_cython.so 4 | -------------------------------------------------------------------------------- /assignment3/cs231n/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment3/cs231n/__init__.py -------------------------------------------------------------------------------- /assignment3/cs231n/captioning_solver.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from cs231n import optim 4 | from cs231n.coco_utils import sample_coco_minibatch 5 | 6 | 7 | class CaptioningSolver(object): 8 | """ 9 | A CaptioningSolver encapsulates all the logic necessary for training 10 | image captioning models. The CaptioningSolver performs stochastic gradient 11 | descent using different update rules defined in optim.py. 12 | 13 | The solver accepts both training and validataion data and labels so it can 14 | periodically check classification accuracy on both training and validation 15 | data to watch out for overfitting. 16 | 17 | To train a model, you will first construct a CaptioningSolver instance, 18 | passing the model, dataset, and various options (learning rate, batch size, 19 | etc) to the constructor. You will then call the train() method to run the 20 | optimization procedure and train the model. 21 | 22 | After the train() method returns, model.params will contain the parameters 23 | that performed best on the validation set over the course of training. 24 | In addition, the instance variable solver.loss_history will contain a list 25 | of all losses encountered during training and the instance variables 26 | solver.train_acc_history and solver.val_acc_history will be lists containing 27 | the accuracies of the model on the training and validation set at each epoch. 28 | 29 | Example usage might look something like this: 30 | 31 | data = load_coco_data() 32 | model = MyAwesomeModel(hidden_dim=100) 33 | solver = CaptioningSolver(model, data, 34 | update_rule='sgd', 35 | optim_config={ 36 | 'learning_rate': 1e-3, 37 | }, 38 | lr_decay=0.95, 39 | num_epochs=10, batch_size=100, 40 | print_every=100) 41 | solver.train() 42 | 43 | 44 | A CaptioningSolver works on a model object that must conform to the following 45 | API: 46 | 47 | - model.params must be a dictionary mapping string parameter names to numpy 48 | arrays containing parameter values. 49 | 50 | - model.loss(features, captions) must be a function that computes 51 | training-time loss and gradients, with the following inputs and outputs: 52 | 53 | Inputs: 54 | - features: Array giving a minibatch of features for images, of shape (N, D 55 | - captions: Array of captions for those images, of shape (N, T) where 56 | each element is in the range (0, V]. 57 | 58 | Returns: 59 | - loss: Scalar giving the loss 60 | - grads: Dictionary with the same keys as self.params mapping parameter 61 | names to gradients of the loss with respect to those parameters. 62 | """ 63 | 64 | def __init__(self, model, data, **kwargs): 65 | """ 66 | Construct a new CaptioningSolver instance. 67 | 68 | Required arguments: 69 | - model: A model object conforming to the API described above 70 | - data: A dictionary of training and validation data from load_coco_data 71 | 72 | Optional arguments: 73 | - update_rule: A string giving the name of an update rule in optim.py. 74 | Default is 'sgd'. 75 | - optim_config: A dictionary containing hyperparameters that will be 76 | passed to the chosen update rule. Each update rule requires different 77 | hyperparameters (see optim.py) but all update rules require a 78 | 'learning_rate' parameter so that should always be present. 79 | - lr_decay: A scalar for learning rate decay; after each epoch the learning 80 | rate is multiplied by this value. 81 | - batch_size: Size of minibatches used to compute loss and gradient during 82 | training. 83 | - num_epochs: The number of epochs to run for during training. 84 | - print_every: Integer; training losses will be printed every print_every 85 | iterations. 86 | - verbose: Boolean; if set to false then no output will be printed during 87 | training. 88 | """ 89 | self.model = model 90 | self.data = data 91 | 92 | # Unpack keyword arguments 93 | self.update_rule = kwargs.pop('update_rule', 'sgd') 94 | self.optim_config = kwargs.pop('optim_config', {}) 95 | self.lr_decay = kwargs.pop('lr_decay', 1.0) 96 | self.batch_size = kwargs.pop('batch_size', 100) 97 | self.num_epochs = kwargs.pop('num_epochs', 10) 98 | 99 | self.print_every = kwargs.pop('print_every', 10) 100 | self.verbose = kwargs.pop('verbose', True) 101 | 102 | # Throw an error if there are extra keyword arguments 103 | if len(kwargs) > 0: 104 | extra = ', '.join('"%s"' % k for k in kwargs.keys()) 105 | raise ValueError('Unrecognized arguments %s' % extra) 106 | 107 | # Make sure the update rule exists, then replace the string 108 | # name with the actual function 109 | if not hasattr(optim, self.update_rule): 110 | raise ValueError('Invalid update_rule "%s"' % self.update_rule) 111 | self.update_rule = getattr(optim, self.update_rule) 112 | 113 | self._reset() 114 | 115 | 116 | def _reset(self): 117 | """ 118 | Set up some book-keeping variables for optimization. Don't call this 119 | manually. 120 | """ 121 | # Set up some variables for book-keeping 122 | self.epoch = 0 123 | self.best_val_acc = 0 124 | self.best_params = {} 125 | self.loss_history = [] 126 | self.train_acc_history = [] 127 | self.val_acc_history = [] 128 | 129 | # Make a deep copy of the optim_config for each parameter 130 | self.optim_configs = {} 131 | for p in self.model.params: 132 | d = {k: v for k, v in self.optim_config.iteritems()} 133 | self.optim_configs[p] = d 134 | 135 | 136 | def _step(self): 137 | """ 138 | Make a single gradient update. This is called by train() and should not 139 | be called manually. 140 | """ 141 | # Make a minibatch of training data 142 | minibatch = sample_coco_minibatch(self.data, 143 | batch_size=self.batch_size, 144 | split='train') 145 | captions, features, urls = minibatch 146 | 147 | # Compute loss and gradient 148 | loss, grads = self.model.loss(features, captions) 149 | self.loss_history.append(loss) 150 | 151 | # Perform a parameter update 152 | for p, w in self.model.params.iteritems(): 153 | dw = grads[p] 154 | config = self.optim_configs[p] 155 | next_w, next_config = self.update_rule(w, dw, config) 156 | self.model.params[p] = next_w 157 | self.optim_configs[p] = next_config 158 | 159 | 160 | # TODO: This does nothing right now; maybe implement BLEU? 161 | def check_accuracy(self, X, y, num_samples=None, batch_size=100): 162 | """ 163 | Check accuracy of the model on the provided data. 164 | 165 | Inputs: 166 | - X: Array of data, of shape (N, d_1, ..., d_k) 167 | - y: Array of labels, of shape (N,) 168 | - num_samples: If not None, subsample the data and only test the model 169 | on num_samples datapoints. 170 | - batch_size: Split X and y into batches of this size to avoid using too 171 | much memory. 172 | 173 | Returns: 174 | - acc: Scalar giving the fraction of instances that were correctly 175 | classified by the model. 176 | """ 177 | return 0.0 178 | 179 | # Maybe subsample the data 180 | N = X.shape[0] 181 | if num_samples is not None and N > num_samples: 182 | mask = np.random.choice(N, num_samples) 183 | N = num_samples 184 | X = X[mask] 185 | y = y[mask] 186 | 187 | # Compute predictions in batches 188 | num_batches = N / batch_size 189 | if N % batch_size != 0: 190 | num_batches += 1 191 | y_pred = [] 192 | for i in xrange(num_batches): 193 | start = i * batch_size 194 | end = (i + 1) * batch_size 195 | scores = self.model.loss(X[start:end]) 196 | y_pred.append(np.argmax(scores, axis=1)) 197 | y_pred = np.hstack(y_pred) 198 | acc = np.mean(y_pred == y) 199 | 200 | return acc 201 | 202 | 203 | def train(self): 204 | """ 205 | Run optimization to train the model. 206 | """ 207 | num_train = self.data['train_captions'].shape[0] 208 | iterations_per_epoch = max(num_train / self.batch_size, 1) 209 | num_iterations = self.num_epochs * iterations_per_epoch 210 | 211 | for t in xrange(num_iterations): 212 | self._step() 213 | 214 | # Maybe print training loss 215 | if self.verbose and t % self.print_every == 0: 216 | print '(Iteration %d / %d) loss: %f' % ( 217 | t + 1, num_iterations, self.loss_history[-1]) 218 | 219 | # At the end of every epoch, increment the epoch counter and decay the 220 | # learning rate. 221 | epoch_end = (t + 1) % iterations_per_epoch == 0 222 | if epoch_end: 223 | self.epoch += 1 224 | for k in self.optim_configs: 225 | self.optim_configs[k]['learning_rate'] *= self.lr_decay 226 | 227 | # Check train and val accuracy on the first iteration, the last 228 | # iteration, and at the end of each epoch. 229 | # TODO: Implement some logic to check Bleu on validation set periodically 230 | 231 | # At the end of training swap the best params into the model 232 | # self.model.params = self.best_params 233 | 234 | -------------------------------------------------------------------------------- /assignment3/cs231n/classifiers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment3/cs231n/classifiers/__init__.py -------------------------------------------------------------------------------- /assignment3/cs231n/classifiers/pretrained_cnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import h5py 3 | 4 | from cs231n.layers import * 5 | from cs231n.fast_layers import * 6 | from cs231n.layer_utils import * 7 | 8 | 9 | class PretrainedCNN(object): 10 | def __init__(self, dtype=np.float32, num_classes=100, input_size=64, h5_file=None): 11 | self.dtype = dtype 12 | self.conv_params = [] 13 | self.input_size = input_size 14 | self.num_classes = num_classes 15 | 16 | # TODO: In the future it would be nice if the architecture could be loaded from 17 | # the HDF5 file rather than being hardcoded. For now this will have to do. 18 | self.conv_params.append({'stride': 2, 'pad': 2}) 19 | self.conv_params.append({'stride': 1, 'pad': 1}) 20 | self.conv_params.append({'stride': 2, 'pad': 1}) 21 | self.conv_params.append({'stride': 1, 'pad': 1}) 22 | self.conv_params.append({'stride': 2, 'pad': 1}) 23 | self.conv_params.append({'stride': 1, 'pad': 1}) 24 | self.conv_params.append({'stride': 2, 'pad': 1}) 25 | self.conv_params.append({'stride': 1, 'pad': 1}) 26 | self.conv_params.append({'stride': 2, 'pad': 1}) 27 | 28 | self.filter_sizes = [5, 3, 3, 3, 3, 3, 3, 3, 3] 29 | self.num_filters = [64, 64, 128, 128, 256, 256, 512, 512, 1024] 30 | hidden_dim = 512 31 | 32 | self.bn_params = [] 33 | 34 | cur_size = input_size 35 | prev_dim = 3 36 | self.params = {} 37 | for i, (f, next_dim) in enumerate(zip(self.filter_sizes, self.num_filters)): 38 | fan_in = f * f * prev_dim 39 | self.params['W%d' % (i + 1)] = np.sqrt(2.0 / fan_in) * np.random.randn(next_dim, prev_dim, f, f) 40 | self.params['b%d' % (i + 1)] = np.zeros(next_dim) 41 | self.params['gamma%d' % (i + 1)] = np.ones(next_dim) 42 | self.params['beta%d' % (i + 1)] = np.zeros(next_dim) 43 | self.bn_params.append({'mode': 'train'}) 44 | prev_dim = next_dim 45 | if self.conv_params[i]['stride'] == 2: cur_size /= 2 46 | 47 | # Add a fully-connected layers 48 | fan_in = cur_size * cur_size * self.num_filters[-1] 49 | self.params['W%d' % (i + 2)] = np.sqrt(2.0 / fan_in) * np.random.randn(fan_in, hidden_dim) 50 | self.params['b%d' % (i + 2)] = np.zeros(hidden_dim) 51 | self.params['gamma%d' % (i + 2)] = np.ones(hidden_dim) 52 | self.params['beta%d' % (i + 2)] = np.zeros(hidden_dim) 53 | self.bn_params.append({'mode': 'train'}) 54 | self.params['W%d' % (i + 3)] = np.sqrt(2.0 / hidden_dim) * np.random.randn(hidden_dim, num_classes) 55 | self.params['b%d' % (i + 3)] = np.zeros(num_classes) 56 | 57 | for k, v in self.params.iteritems(): 58 | self.params[k] = v.astype(dtype) 59 | 60 | if h5_file is not None: 61 | self.load_weights(h5_file) 62 | 63 | 64 | def load_weights(self, h5_file, verbose=False): 65 | """ 66 | Load pretrained weights from an HDF5 file. 67 | 68 | Inputs: 69 | - h5_file: Path to the HDF5 file where pretrained weights are stored. 70 | - verbose: Whether to print debugging info 71 | """ 72 | 73 | # Before loading weights we need to make a dummy forward pass to initialize 74 | # the running averages in the bn_pararams 75 | x = np.random.randn(1, 3, self.input_size, self.input_size) 76 | y = np.random.randint(self.num_classes, size=1) 77 | loss, grads = self.loss(x, y) 78 | 79 | with h5py.File(h5_file, 'r') as f: 80 | for k, v in f.iteritems(): 81 | v = np.asarray(v) 82 | if k in self.params: 83 | if verbose: print k, v.shape, self.params[k].shape 84 | if v.shape == self.params[k].shape: 85 | self.params[k] = v.copy() 86 | elif v.T.shape == self.params[k].shape: 87 | self.params[k] = v.T.copy() 88 | else: 89 | raise ValueError('shapes for %s do not match' % k) 90 | if k.startswith('running_mean'): 91 | i = int(k[12:]) - 1 92 | assert self.bn_params[i]['running_mean'].shape == v.shape 93 | self.bn_params[i]['running_mean'] = v.copy() 94 | if verbose: print k, v.shape 95 | if k.startswith('running_var'): 96 | i = int(k[11:]) - 1 97 | assert v.shape == self.bn_params[i]['running_var'].shape 98 | self.bn_params[i]['running_var'] = v.copy() 99 | if verbose: print k, v.shape 100 | 101 | for k, v in self.params.iteritems(): 102 | self.params[k] = v.astype(self.dtype) 103 | 104 | 105 | def forward(self, X, start=None, end=None, mode='test'): 106 | """ 107 | Run part of the model forward, starting and ending at an arbitrary layer, 108 | in either training mode or testing mode. 109 | 110 | You can pass arbitrary input to the starting layer, and you will receive 111 | output from the ending layer and a cache object that can be used to run 112 | the model backward over the same set of layers. 113 | 114 | For the purposes of this function, a "layer" is one of the following blocks: 115 | 116 | [conv - spatial batchnorm - relu] (There are 9 of these) 117 | [affine - batchnorm - relu] (There is one of these) 118 | [affine] (There is one of these) 119 | 120 | Inputs: 121 | - X: The input to the starting layer. If start=0, then this should be an 122 | array of shape (N, C, 64, 64). 123 | - start: The index of the layer to start from. start=0 starts from the first 124 | convolutional layer. Default is 0. 125 | - end: The index of the layer to end at. start=11 ends at the last 126 | fully-connected layer, returning class scores. Default is 11. 127 | - mode: The mode to use, either 'test' or 'train'. We need this because 128 | batch normalization behaves differently at training time and test time. 129 | 130 | Returns: 131 | - out: Output from the end layer. 132 | - cache: A cache object that can be passed to the backward method to run the 133 | network backward over the same range of layers. 134 | """ 135 | X = X.astype(self.dtype) 136 | if start is None: start = 0 137 | if end is None: end = len(self.conv_params) + 1 138 | layer_caches = [] 139 | 140 | prev_a = X 141 | for i in xrange(start, end + 1): 142 | i1 = i + 1 143 | if 0 <= i < len(self.conv_params): 144 | # This is a conv layer 145 | w, b = self.params['W%d' % i1], self.params['b%d' % i1] 146 | gamma, beta = self.params['gamma%d' % i1], self.params['beta%d' % i1] 147 | conv_param = self.conv_params[i] 148 | bn_param = self.bn_params[i] 149 | bn_param['mode'] = mode 150 | 151 | next_a, cache = conv_bn_relu_forward(prev_a, w, b, gamma, beta, conv_param, bn_param) 152 | elif i == len(self.conv_params): 153 | # This is the fully-connected hidden layer 154 | w, b = self.params['W%d' % i1], self.params['b%d' % i1] 155 | gamma, beta = self.params['gamma%d' % i1], self.params['beta%d' % i1] 156 | bn_param = self.bn_params[i] 157 | bn_param['mode'] = mode 158 | next_a, cache = affine_bn_relu_forward(prev_a, w, b, gamma, beta, bn_param) 159 | elif i == len(self.conv_params) + 1: 160 | # This is the last fully-connected layer that produces scores 161 | w, b = self.params['W%d' % i1], self.params['b%d' % i1] 162 | next_a, cache = affine_forward(prev_a, w, b) 163 | else: 164 | raise ValueError('Invalid layer index %d' % i) 165 | 166 | layer_caches.append(cache) 167 | prev_a = next_a 168 | 169 | out = prev_a 170 | cache = (start, end, layer_caches) 171 | return out, cache 172 | 173 | 174 | def backward(self, dout, cache): 175 | """ 176 | Run the model backward over a sequence of layers that were previously run 177 | forward using the self.forward method. 178 | 179 | Inputs: 180 | - dout: Gradient with respect to the ending layer; this should have the same 181 | shape as the out variable returned from the corresponding call to forward. 182 | - cache: A cache object returned from self.forward. 183 | 184 | Returns: 185 | - dX: Gradient with respect to the start layer. This will have the same 186 | shape as the input X passed to self.forward. 187 | - grads: Gradient of all parameters in the layers. For example if you run 188 | forward through two convolutional layers, then on the corresponding call 189 | to backward grads will contain the gradients with respect to the weights, 190 | biases, and spatial batchnorm parameters of those two convolutional 191 | layers. The grads dictionary will therefore contain a subset of the keys 192 | of self.params, and grads[k] and self.params[k] will have the same shape. 193 | """ 194 | start, end, layer_caches = cache 195 | dnext_a = dout 196 | grads = {} 197 | for i in reversed(range(start, end + 1)): 198 | i1 = i + 1 199 | if i == len(self.conv_params) + 1: 200 | # This is the last fully-connected layer 201 | dprev_a, dw, db = affine_backward(dnext_a, layer_caches.pop()) 202 | grads['W%d' % i1] = dw 203 | grads['b%d' % i1] = db 204 | elif i == len(self.conv_params): 205 | # This is the fully-connected hidden layer 206 | temp = affine_bn_relu_backward(dnext_a, layer_caches.pop()) 207 | dprev_a, dw, db, dgamma, dbeta = temp 208 | grads['W%d' % i1] = dw 209 | grads['b%d' % i1] = db 210 | grads['gamma%d' % i1] = dgamma 211 | grads['beta%d' % i1] = dbeta 212 | elif 0 <= i < len(self.conv_params): 213 | # This is a conv layer 214 | temp = conv_bn_relu_backward(dnext_a, layer_caches.pop()) 215 | dprev_a, dw, db, dgamma, dbeta = temp 216 | grads['W%d' % i1] = dw 217 | grads['b%d' % i1] = db 218 | grads['gamma%d' % i1] = dgamma 219 | grads['beta%d' % i1] = dbeta 220 | else: 221 | raise ValueError('Invalid layer index %d' % i) 222 | dnext_a = dprev_a 223 | 224 | dX = dnext_a 225 | return dX, grads 226 | 227 | 228 | def loss(self, X, y=None): 229 | """ 230 | Classification loss used to train the network. 231 | 232 | Inputs: 233 | - X: Array of data, of shape (N, 3, 64, 64) 234 | - y: Array of labels, of shape (N,) 235 | 236 | If y is None, then run a test-time forward pass and return: 237 | - scores: Array of shape (N, 100) giving class scores. 238 | 239 | If y is not None, then run a training-time forward and backward pass and 240 | return a tuple of: 241 | - loss: Scalar giving loss 242 | - grads: Dictionary of gradients, with the same keys as self.params. 243 | """ 244 | # Note that we implement this by just caling self.forward and self.backward 245 | mode = 'test' if y is None else 'train' 246 | scores, cache = self.forward(X, mode=mode) 247 | if mode == 'test': 248 | return scores 249 | loss, dscores = softmax_loss(scores, y) 250 | dX, grads = self.backward(dscores, cache) 251 | return loss, grads 252 | 253 | -------------------------------------------------------------------------------- /assignment3/cs231n/classifiers/rnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from cs231n.layers import * 4 | from cs231n.rnn_layers import * 5 | 6 | 7 | class CaptioningRNN(object): 8 | """ 9 | A CaptioningRNN produces captions from image features using a recurrent 10 | neural network. 11 | 12 | The RNN receives input vectors of size D, has a vocab size of V, works on 13 | sequences of length T, has an RNN hidden dimension of H, uses word vectors 14 | of dimension W, and operates on minibatches of size N. 15 | 16 | Note that we don't use any regularization for the CaptioningRNN. 17 | """ 18 | 19 | def __init__(self, word_to_idx, input_dim=512, wordvec_dim=128, 20 | hidden_dim=128, cell_type='rnn', dtype=np.float32): 21 | """ 22 | Construct a new CaptioningRNN instance. 23 | 24 | Inputs: 25 | - word_to_idx: A dictionary giving the vocabulary. It contains V entries, 26 | and maps each string to a unique integer in the range [0, V). 27 | - input_dim: Dimension D of input image feature vectors. 28 | - wordvec_dim: Dimension W of word vectors. 29 | - hidden_dim: Dimension H for the hidden state of the RNN. 30 | - cell_type: What type of RNN to use; either 'rnn' or 'lstm'. 31 | - dtype: numpy datatype to use; use float32 for training and float64 for 32 | numeric gradient checking. 33 | """ 34 | if cell_type not in {'rnn', 'lstm'}: 35 | raise ValueError('Invalid cell_type "%s"' % cell_type) 36 | 37 | self.cell_type = cell_type 38 | self.dtype = dtype 39 | self.word_to_idx = word_to_idx 40 | self.idx_to_word = {i: w for w, i in word_to_idx.iteritems()} 41 | self.params = {} 42 | 43 | vocab_size = len(word_to_idx) 44 | 45 | self._null = word_to_idx[''] 46 | self._start = word_to_idx.get('', None) 47 | self._end = word_to_idx.get('', None) 48 | 49 | # Initialize word vectors 50 | self.params['W_embed'] = np.random.randn(vocab_size, wordvec_dim) 51 | self.params['W_embed'] /= 100 52 | 53 | # Initialize CNN -> hidden state projection parameters 54 | self.params['W_proj'] = np.random.randn(input_dim, hidden_dim) 55 | self.params['W_proj'] /= np.sqrt(input_dim) 56 | self.params['b_proj'] = np.zeros(hidden_dim) 57 | 58 | # Initialize parameters for the RNN 59 | dim_mul = {'lstm': 4, 'rnn': 1}[cell_type] 60 | self.params['Wx'] = np.random.randn(wordvec_dim, dim_mul * hidden_dim) 61 | self.params['Wx'] /= np.sqrt(wordvec_dim) 62 | self.params['Wh'] = np.random.randn(hidden_dim, dim_mul * hidden_dim) 63 | self.params['Wh'] /= np.sqrt(hidden_dim) 64 | self.params['b'] = np.zeros(dim_mul * hidden_dim) 65 | 66 | # Initialize output to vocab weights 67 | self.params['W_vocab'] = np.random.randn(hidden_dim, vocab_size) 68 | self.params['W_vocab'] /= np.sqrt(hidden_dim) 69 | self.params['b_vocab'] = np.zeros(vocab_size) 70 | 71 | # Cast parameters to correct dtype 72 | for k, v in self.params.iteritems(): 73 | self.params[k] = v.astype(self.dtype) 74 | 75 | 76 | def loss(self, features, captions): 77 | """ 78 | Compute training-time loss for the RNN. We input image features and 79 | ground-truth captions for those images, and use an RNN (or LSTM) to compute 80 | loss and gradients on all parameters. 81 | 82 | Inputs: 83 | - features: Input image features, of shape (N, D) 84 | - captions: Ground-truth captions; an integer array of shape (N, T) where 85 | each element is in the range 0 <= y[i, t] < V 86 | 87 | Returns a tuple of: 88 | - loss: Scalar loss 89 | - grads: Dictionary of gradients parallel to self.params 90 | """ 91 | # Cut captions into two pieces: captions_in has everything but the last word 92 | # and will be input to the RNN; captions_out has everything but the first 93 | # word and this is what we will expect the RNN to generate. These are offset 94 | # by one relative to each other because the RNN should produce word (t+1) 95 | # after receiving word t. The first element of captions_in will be the START 96 | # token, and the first element of captions_out will be the first word. 97 | captions_in = captions[:, :-1] 98 | captions_out = captions[:, 1:] 99 | 100 | # You'll need this 101 | mask = (captions_out != self._null) 102 | 103 | # Weight and bias for the affine transform from image features to initial 104 | # hidden state 105 | W_proj, b_proj = self.params['W_proj'], self.params['b_proj'] 106 | 107 | # Word embedding matrix 108 | W_embed = self.params['W_embed'] 109 | 110 | # Input-to-hidden, hidden-to-hidden, and biases for the RNN 111 | Wx, Wh, b = self.params['Wx'], self.params['Wh'], self.params['b'] 112 | 113 | # Weight and bias for the hidden-to-vocab transformation. 114 | W_vocab, b_vocab = self.params['W_vocab'], self.params['b_vocab'] 115 | 116 | loss, grads = 0.0, {} 117 | ############################################################################ 118 | # TODO: Implement the forward and backward passes for the CaptioningRNN. # 119 | # In the forward pass you will need to do the following: # 120 | # (1) Use an affine transformation to compute the initial hidden state # 121 | # from the image features. This should produce an array of shape (N, H)# 122 | # (2) Use a word embedding layer to transform the words in captions_in # 123 | # from indices to vectors, giving an array of shape (N, T, W). # 124 | # (3) Use either a vanilla RNN or LSTM (depending on self.cell_type) to # 125 | # process the sequence of input word vectors and produce hidden state # 126 | # vectors for all timesteps, producing an array of shape (N, T, H). # 127 | # (4) Use a (temporal) affine transformation to compute scores over the # 128 | # vocabulary at every timestep using the hidden states, giving an # 129 | # array of shape (N, T, V). # 130 | # (5) Use (temporal) softmax to compute loss using captions_out, ignoring # 131 | # the points where the output word is using the mask above. # 132 | # # 133 | # In the backward pass you will need to compute the gradient of the loss # 134 | # with respect to all model parameters. Use the loss and grads variables # 135 | # defined above to store loss and gradients; grads[k] should give the # 136 | # gradients for self.params[k]. # 137 | ############################################################################ 138 | pass 139 | # forward and loss 140 | # (1) image feature to rnn hidden state 141 | h0, features_cache = affine_forward(features, W_proj, b_proj) 142 | # (2) words indices to word embedding vectors 143 | captions_in_emb, emb_in_cache = word_embedding_forward(captions_in, W_embed) 144 | # (3) rnn forward 145 | if self.cell_type == 'rnn': 146 | h, rnn_cache = rnn_forward(captions_in_emb, h0, Wx, Wh, b) 147 | elif self.cell_type == 'lstm': 148 | h, lstm_cache = lstm_forward(captions_in_emb, h0, Wx, Wh, b) 149 | # (4) hidden state to words 150 | temporal_out, temporal_cache = temporal_affine_forward(h, W_vocab, b_vocab) 151 | # (5) prediction and label, loss and gradients 152 | loss, dout = temporal_softmax_loss(temporal_out, captions_out, mask) 153 | 154 | # backward and grads 155 | dtemp, grads['W_vocab'], grads['b_vocab'] = temporal_affine_backward(dout, temporal_cache) 156 | if self.cell_type == 'rnn': 157 | drnn, dh0, grads['Wx'], grads['Wh'], grads['b'] = rnn_backward(dtemp, rnn_cache) 158 | elif self.cell_type == 'lstm': 159 | drnn, dh0, grads['Wx'], grads['Wh'], grads['b'] = lstm_backward(dtemp, lstm_cache) 160 | grads['W_embed'] = word_embedding_backward(drnn, emb_in_cache) 161 | dfeatures, grads['W_proj'], grads['b_proj'] = affine_backward(dh0, features_cache) 162 | ############################################################################ 163 | # END OF YOUR CODE # 164 | ############################################################################ 165 | 166 | return loss, grads 167 | 168 | 169 | def sample(self, features, max_length=30): 170 | """ 171 | Run a test-time forward pass for the model, sampling captions for input 172 | feature vectors. 173 | 174 | At each timestep, we embed the current word, pass it and the previous hidden 175 | state to the RNN to get the next hidden state, use the hidden state to get 176 | scores for all vocab words, and choose the word with the highest score as 177 | the next word. The initial hidden state is computed by applying an affine 178 | transform to the input image features, and the initial word is the 179 | token. 180 | 181 | For LSTMs you will also have to keep track of the cell state; in that case 182 | the initial cell state should be zero. 183 | 184 | Inputs: 185 | - features: Array of input image features of shape (N, D). 186 | - max_length: Maximum length T of generated captions. 187 | 188 | Returns: 189 | - captions: Array of shape (N, max_length) giving sampled captions, 190 | where each element is an integer in the range [0, V). The first element 191 | of captions should be the first sampled word, not the token. 192 | """ 193 | N = features.shape[0] 194 | captions = self._null * np.ones((N, max_length), dtype=np.int32) 195 | 196 | # Unpack parameters 197 | W_proj, b_proj = self.params['W_proj'], self.params['b_proj'] 198 | W_embed = self.params['W_embed'] 199 | Wx, Wh, b = self.params['Wx'], self.params['Wh'], self.params['b'] 200 | W_vocab, b_vocab = self.params['W_vocab'], self.params['b_vocab'] 201 | 202 | ########################################################################### 203 | # TODO: Implement test-time sampling for the model. You will need to # 204 | # initialize the hidden state of the RNN by applying the learned affine # 205 | # transform to the input image features. The first word that you feed to # 206 | # the RNN should be the token; its value is stored in the # 207 | # variable self._start. At each timestep you will need to do to: # 208 | # (1) Embed the previous word using the learned word embeddings # 209 | # (2) Make an RNN step using the previous hidden state and the embedded # 210 | # current word to get the next hidden state. # 211 | # (3) Apply the learned affine transformation to the next hidden state to # 212 | # get scores for all words in the vocabulary # 213 | # (4) Select the word with the highest score as the next word, writing it # 214 | # to the appropriate slot in the captions variable # 215 | # # 216 | # For simplicity, you do not need to stop generating after an token # 217 | # is sampled, but you can if you want to. # 218 | # # 219 | # HINT: You will not be able to use the rnn_forward or lstm_forward # 220 | # functions; you'll need to call rnn_step_forward or lstm_step_forward in # 221 | # a loop. # 222 | ########################################################################### 223 | pass 224 | prev_h, _ = affine_forward(features, W_proj, b_proj) # using image features as h0 225 | if self.cell_type == 'lstm': 226 | prev_c = np.zeros_like(prev_h) 227 | x = np.array([self._start for i in range(N)]) 228 | captions[:, 0] = self._start 229 | for t in range(1, max_length): 230 | x_emb, _ = word_embedding_forward(x, W_embed) 231 | if self.cell_type == 'rnn': 232 | next_h, cache = rnn_step_forward(x_emb, prev_h, Wx, Wh, b) 233 | prev_h = next_h 234 | elif self.cell_type == 'lstm': 235 | next_h, next_c, cache = lstm_step_forward(x_emb, prev_h, prev_c, Wx, Wh, b) 236 | prev_h, prev_c = next_h, next_c 237 | vocab_out, vocab_cache = affine_forward(next_h, W_vocab, b_vocab) 238 | x = vocab_out.argmax(1) 239 | captions[:, t] = x 240 | ############################################################################ 241 | # END OF YOUR CODE # 242 | ############################################################################ 243 | return captions 244 | -------------------------------------------------------------------------------- /assignment3/cs231n/coco_utils.py: -------------------------------------------------------------------------------- 1 | import os, json 2 | import numpy as np 3 | import h5py 4 | 5 | 6 | def load_coco_data(base_dir='cs231n/datasets/coco_captioning', 7 | max_train=None, 8 | pca_features=True): 9 | data = {} 10 | caption_file = os.path.join(base_dir, 'coco2014_captions.h5') 11 | with h5py.File(caption_file, 'r') as f: 12 | for k, v in f.iteritems(): 13 | data[k] = np.asarray(v) 14 | 15 | if pca_features: 16 | train_feat_file = os.path.join(base_dir, 'train2014_vgg16_fc7_pca.h5') 17 | else: 18 | train_feat_file = os.path.join(base_dir, 'train2014_vgg16_fc7.h5') 19 | with h5py.File(train_feat_file, 'r') as f: 20 | data['train_features'] = np.asarray(f['features']) 21 | 22 | if pca_features: 23 | val_feat_file = os.path.join(base_dir, 'val2014_vgg16_fc7_pca.h5') 24 | else: 25 | val_feat_file = os.path.join(base_dir, 'val2014_vgg16_fc7.h5') 26 | with h5py.File(val_feat_file, 'r') as f: 27 | data['val_features'] = np.asarray(f['features']) 28 | 29 | dict_file = os.path.join(base_dir, 'coco2014_vocab.json') 30 | with open(dict_file, 'r') as f: 31 | dict_data = json.load(f) 32 | for k, v in dict_data.iteritems(): 33 | data[k] = v 34 | 35 | train_url_file = os.path.join(base_dir, 'train2014_urls.txt') 36 | with open(train_url_file, 'r') as f: 37 | train_urls = np.asarray([line.strip() for line in f]) 38 | data['train_urls'] = train_urls 39 | 40 | val_url_file = os.path.join(base_dir, 'val2014_urls.txt') 41 | with open(val_url_file, 'r') as f: 42 | val_urls = np.asarray([line.strip() for line in f]) 43 | data['val_urls'] = val_urls 44 | 45 | # Maybe subsample the training data 46 | if max_train is not None: 47 | num_train = data['train_captions'].shape[0] 48 | mask = np.random.randint(num_train, size=max_train) 49 | data['train_captions'] = data['train_captions'][mask] 50 | data['train_image_idxs'] = data['train_image_idxs'][mask] 51 | 52 | return data 53 | 54 | 55 | def decode_captions(captions, idx_to_word): 56 | singleton = False 57 | if captions.ndim == 1: 58 | singleton = True 59 | captions = captions[None] 60 | decoded = [] 61 | N, T = captions.shape 62 | for i in xrange(N): 63 | words = [] 64 | for t in xrange(T): 65 | word = idx_to_word[captions[i, t]] 66 | if word != '': 67 | words.append(word) 68 | if word == '': 69 | break 70 | decoded.append(' '.join(words)) 71 | if singleton: 72 | decoded = decoded[0] 73 | return decoded 74 | 75 | 76 | def sample_coco_minibatch(data, batch_size=100, split='train'): 77 | split_size = data['%s_captions' % split].shape[0] 78 | mask = np.random.choice(split_size, batch_size) 79 | captions = data['%s_captions' % split][mask] 80 | image_idxs = data['%s_image_idxs' % split][mask] 81 | image_features = data['%s_features' % split][image_idxs] 82 | urls = data['%s_urls' % split][image_idxs] 83 | return captions, image_features, urls 84 | 85 | -------------------------------------------------------------------------------- /assignment3/cs231n/data_utils.py: -------------------------------------------------------------------------------- 1 | import cPickle as pickle 2 | import numpy as np 3 | import os 4 | from scipy.misc import imread 5 | 6 | def load_CIFAR_batch(filename): 7 | """ load single batch of cifar """ 8 | with open(filename, 'rb') as f: 9 | datadict = pickle.load(f) 10 | X = datadict['data'] 11 | Y = datadict['labels'] 12 | X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float") 13 | Y = np.array(Y) 14 | return X, Y 15 | 16 | def load_CIFAR10(ROOT): 17 | """ load all of cifar """ 18 | xs = [] 19 | ys = [] 20 | for b in range(1,6): 21 | f = os.path.join(ROOT, 'data_batch_%d' % (b, )) 22 | X, Y = load_CIFAR_batch(f) 23 | xs.append(X) 24 | ys.append(Y) 25 | Xtr = np.concatenate(xs) 26 | Ytr = np.concatenate(ys) 27 | del X, Y 28 | Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch')) 29 | return Xtr, Ytr, Xte, Yte 30 | 31 | 32 | def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, 33 | subtract_mean=True): 34 | """ 35 | Load the CIFAR-10 dataset from disk and perform preprocessing to prepare 36 | it for classifiers. These are the same steps as we used for the SVM, but 37 | condensed to a single function. 38 | """ 39 | # Load the raw CIFAR-10 data 40 | cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' 41 | X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) 42 | 43 | # Subsample the data 44 | mask = range(num_training, num_training + num_validation) 45 | X_val = X_train[mask] 46 | y_val = y_train[mask] 47 | mask = range(num_training) 48 | X_train = X_train[mask] 49 | y_train = y_train[mask] 50 | mask = range(num_test) 51 | X_test = X_test[mask] 52 | y_test = y_test[mask] 53 | 54 | # Normalize the data: subtract the mean image 55 | if subtract_mean: 56 | mean_image = np.mean(X_train, axis=0) 57 | X_train -= mean_image 58 | X_val -= mean_image 59 | X_test -= mean_image 60 | 61 | # Transpose so that channels come first 62 | X_train = X_train.transpose(0, 3, 1, 2).copy() 63 | X_val = X_val.transpose(0, 3, 1, 2).copy() 64 | X_test = X_test.transpose(0, 3, 1, 2).copy() 65 | 66 | # Package data into a dictionary 67 | return { 68 | 'X_train': X_train, 'y_train': y_train, 69 | 'X_val': X_val, 'y_val': y_val, 70 | 'X_test': X_test, 'y_test': y_test, 71 | } 72 | 73 | 74 | def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True): 75 | """ 76 | Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and 77 | TinyImageNet-200 have the same directory structure, so this can be used 78 | to load any of them. 79 | 80 | Inputs: 81 | - path: String giving path to the directory to load. 82 | - dtype: numpy datatype used to load the data. 83 | - subtract_mean: Whether to subtract the mean training image. 84 | 85 | Returns: A dictionary with the following entries: 86 | - class_names: A list where class_names[i] is a list of strings giving the 87 | WordNet names for class i in the loaded dataset. 88 | - X_train: (N_tr, 3, 64, 64) array of training images 89 | - y_train: (N_tr,) array of training labels 90 | - X_val: (N_val, 3, 64, 64) array of validation images 91 | - y_val: (N_val,) array of validation labels 92 | - X_test: (N_test, 3, 64, 64) array of testing images. 93 | - y_test: (N_test,) array of test labels; if test labels are not available 94 | (such as in student code) then y_test will be None. 95 | - mean_image: (3, 64, 64) array giving mean training image 96 | """ 97 | # First load wnids 98 | with open(os.path.join(path, 'wnids.txt'), 'r') as f: 99 | wnids = [x.strip() for x in f] 100 | 101 | # Map wnids to integer labels 102 | wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)} 103 | 104 | # Use words.txt to get names for each class 105 | with open(os.path.join(path, 'words.txt'), 'r') as f: 106 | wnid_to_words = dict(line.split('\t') for line in f) 107 | for wnid, words in wnid_to_words.iteritems(): 108 | wnid_to_words[wnid] = [w.strip() for w in words.split(',')] 109 | class_names = [wnid_to_words[wnid] for wnid in wnids] 110 | 111 | # Next load training data. 112 | X_train = [] 113 | y_train = [] 114 | for i, wnid in enumerate(wnids): 115 | if (i + 1) % 20 == 0: 116 | print 'loading training data for synset %d / %d' % (i + 1, len(wnids)) 117 | # To figure out the filenames we need to open the boxes file 118 | boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid) 119 | with open(boxes_file, 'r') as f: 120 | filenames = [x.split('\t')[0] for x in f] 121 | num_images = len(filenames) 122 | 123 | X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype) 124 | y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64) 125 | for j, img_file in enumerate(filenames): 126 | img_file = os.path.join(path, 'train', wnid, 'images', img_file) 127 | img = imread(img_file) 128 | if img.ndim == 2: 129 | ## grayscale file 130 | img.shape = (64, 64, 1) 131 | X_train_block[j] = img.transpose(2, 0, 1) 132 | X_train.append(X_train_block) 133 | y_train.append(y_train_block) 134 | 135 | # We need to concatenate all training data 136 | X_train = np.concatenate(X_train, axis=0) 137 | y_train = np.concatenate(y_train, axis=0) 138 | 139 | # Next load validation data 140 | with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f: 141 | img_files = [] 142 | val_wnids = [] 143 | for line in f: 144 | img_file, wnid = line.split('\t')[:2] 145 | img_files.append(img_file) 146 | val_wnids.append(wnid) 147 | num_val = len(img_files) 148 | y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids]) 149 | X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype) 150 | for i, img_file in enumerate(img_files): 151 | img_file = os.path.join(path, 'val', 'images', img_file) 152 | img = imread(img_file) 153 | if img.ndim == 2: 154 | img.shape = (64, 64, 1) 155 | X_val[i] = img.transpose(2, 0, 1) 156 | 157 | # Next load test images 158 | # Students won't have test labels, so we need to iterate over files in the 159 | # images directory. 160 | img_files = os.listdir(os.path.join(path, 'test', 'images')) 161 | X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype) 162 | for i, img_file in enumerate(img_files): 163 | img_file = os.path.join(path, 'test', 'images', img_file) 164 | img = imread(img_file) 165 | if img.ndim == 2: 166 | img.shape = (64, 64, 1) 167 | X_test[i] = img.transpose(2, 0, 1) 168 | 169 | y_test = None 170 | y_test_file = os.path.join(path, 'test', 'test_annotations.txt') 171 | if os.path.isfile(y_test_file): 172 | with open(y_test_file, 'r') as f: 173 | img_file_to_wnid = {} 174 | for line in f: 175 | line = line.split('\t') 176 | img_file_to_wnid[line[0]] = line[1] 177 | y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files] 178 | y_test = np.array(y_test) 179 | 180 | mean_image = X_train.mean(axis=0) 181 | if subtract_mean: 182 | X_train -= mean_image[None] 183 | X_val -= mean_image[None] 184 | X_test -= mean_image[None] 185 | 186 | return { 187 | 'class_names': class_names, 188 | 'X_train': X_train, 189 | 'y_train': y_train, 190 | 'X_val': X_val, 191 | 'y_val': y_val, 192 | 'X_test': X_test, 193 | 'y_test': y_test, 194 | 'class_names': class_names, 195 | 'mean_image': mean_image, 196 | } 197 | 198 | 199 | def load_models(models_dir): 200 | """ 201 | Load saved models from disk. This will attempt to unpickle all files in a 202 | directory; any files that give errors on unpickling (such as README.txt) will 203 | be skipped. 204 | 205 | Inputs: 206 | - models_dir: String giving the path to a directory containing model files. 207 | Each model file is a pickled dictionary with a 'model' field. 208 | 209 | Returns: 210 | A dictionary mapping model file names to models. 211 | """ 212 | models = {} 213 | for model_file in os.listdir(models_dir): 214 | with open(os.path.join(models_dir, model_file), 'rb') as f: 215 | try: 216 | models[model_file] = pickle.load(f)['model'] 217 | except pickle.UnpicklingError: 218 | continue 219 | return models 220 | -------------------------------------------------------------------------------- /assignment3/cs231n/datasets/get_coco_captioning.sh: -------------------------------------------------------------------------------- 1 | wget "http://cs231n.stanford.edu/coco_captioning.zip" 2 | unzip coco_captioning.zip 3 | rm coco_captioning.zip 4 | -------------------------------------------------------------------------------- /assignment3/cs231n/datasets/get_pretrained_model.sh: -------------------------------------------------------------------------------- 1 | wget http://cs231n.stanford.edu/pretrained_model.h5 2 | -------------------------------------------------------------------------------- /assignment3/cs231n/datasets/get_tiny_imagenet_a.sh: -------------------------------------------------------------------------------- 1 | wget http://cs231n.stanford.edu/tiny-imagenet-100-A.zip 2 | unzip tiny-imagenet-100-A.zip 3 | rm tiny-imagenet-100-A.zip 4 | -------------------------------------------------------------------------------- /assignment3/cs231n/fast_layers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | try: 3 | from cs231n.im2col_cython import col2im_cython, im2col_cython 4 | from cs231n.im2col_cython import col2im_6d_cython 5 | except ImportError: 6 | print 'run the following from the cs231n directory and try again:' 7 | print 'python setup.py build_ext --inplace' 8 | print 'You may also need to restart your iPython kernel' 9 | 10 | from cs231n.im2col import * 11 | 12 | 13 | def conv_forward_im2col(x, w, b, conv_param): 14 | """ 15 | A fast implementation of the forward pass for a convolutional layer 16 | based on im2col and col2im. 17 | """ 18 | N, C, H, W = x.shape 19 | num_filters, _, filter_height, filter_width = w.shape 20 | stride, pad = conv_param['stride'], conv_param['pad'] 21 | 22 | # Check dimensions 23 | assert (W + 2 * pad - filter_width) % stride == 0, 'width does not work' 24 | assert (H + 2 * pad - filter_height) % stride == 0, 'height does not work' 25 | 26 | # Create output 27 | out_height = (H + 2 * pad - filter_height) / stride + 1 28 | out_width = (W + 2 * pad - filter_width) / stride + 1 29 | out = np.zeros((N, num_filters, out_height, out_width), dtype=x.dtype) 30 | 31 | # x_cols = im2col_indices(x, w.shape[2], w.shape[3], pad, stride) 32 | x_cols = im2col_cython(x, w.shape[2], w.shape[3], pad, stride) 33 | res = w.reshape((w.shape[0], -1)).dot(x_cols) + b.reshape(-1, 1) 34 | 35 | out = res.reshape(w.shape[0], out.shape[2], out.shape[3], x.shape[0]) 36 | out = out.transpose(3, 0, 1, 2) 37 | 38 | cache = (x, w, b, conv_param, x_cols) 39 | return out, cache 40 | 41 | 42 | def conv_forward_strides(x, w, b, conv_param): 43 | N, C, H, W = x.shape 44 | F, _, HH, WW = w.shape 45 | stride, pad = conv_param['stride'], conv_param['pad'] 46 | 47 | # Check dimensions 48 | #assert (W + 2 * pad - WW) % stride == 0, 'width does not work' 49 | #assert (H + 2 * pad - HH) % stride == 0, 'height does not work' 50 | 51 | # Pad the input 52 | p = pad 53 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 54 | 55 | # Figure out output dimensions 56 | H += 2 * pad 57 | W += 2 * pad 58 | out_h = (H - HH) / stride + 1 59 | out_w = (W - WW) / stride + 1 60 | 61 | # Perform an im2col operation by picking clever strides 62 | shape = (C, HH, WW, N, out_h, out_w) 63 | strides = (H * W, W, 1, C * H * W, stride * W, stride) 64 | strides = x.itemsize * np.array(strides) 65 | x_stride = np.lib.stride_tricks.as_strided(x_padded, 66 | shape=shape, strides=strides) 67 | x_cols = np.ascontiguousarray(x_stride) 68 | x_cols.shape = (C * HH * WW, N * out_h * out_w) 69 | 70 | # Now all our convolutions are a big matrix multiply 71 | res = w.reshape(F, -1).dot(x_cols) + b.reshape(-1, 1) 72 | 73 | # Reshape the output 74 | res.shape = (F, N, out_h, out_w) 75 | out = res.transpose(1, 0, 2, 3) 76 | 77 | # Be nice and return a contiguous array 78 | # The old version of conv_forward_fast doesn't do this, so for a fair 79 | # comparison we won't either 80 | out = np.ascontiguousarray(out) 81 | 82 | cache = (x, w, b, conv_param, x_cols) 83 | return out, cache 84 | 85 | 86 | def conv_backward_strides(dout, cache): 87 | x, w, b, conv_param, x_cols = cache 88 | stride, pad = conv_param['stride'], conv_param['pad'] 89 | 90 | N, C, H, W = x.shape 91 | F, _, HH, WW = w.shape 92 | _, _, out_h, out_w = dout.shape 93 | 94 | db = np.sum(dout, axis=(0, 2, 3)) 95 | 96 | dout_reshaped = dout.transpose(1, 0, 2, 3).reshape(F, -1) 97 | dw = dout_reshaped.dot(x_cols.T).reshape(w.shape) 98 | 99 | dx_cols = w.reshape(F, -1).T.dot(dout_reshaped) 100 | dx_cols.shape = (C, HH, WW, N, out_h, out_w) 101 | dx = col2im_6d_cython(dx_cols, N, C, H, W, HH, WW, pad, stride) 102 | 103 | return dx, dw, db 104 | 105 | 106 | def conv_backward_im2col(dout, cache): 107 | """ 108 | A fast implementation of the backward pass for a convolutional layer 109 | based on im2col and col2im. 110 | """ 111 | x, w, b, conv_param, x_cols = cache 112 | stride, pad = conv_param['stride'], conv_param['pad'] 113 | 114 | db = np.sum(dout, axis=(0, 2, 3)) 115 | 116 | num_filters, _, filter_height, filter_width = w.shape 117 | dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(num_filters, -1) 118 | dw = dout_reshaped.dot(x_cols.T).reshape(w.shape) 119 | 120 | dx_cols = w.reshape(num_filters, -1).T.dot(dout_reshaped) 121 | # dx = col2im_indices(dx_cols, x.shape, filter_height, filter_width, pad, stride) 122 | dx = col2im_cython(dx_cols, x.shape[0], x.shape[1], x.shape[2], x.shape[3], 123 | filter_height, filter_width, pad, stride) 124 | 125 | return dx, dw, db 126 | 127 | 128 | conv_forward_fast = conv_forward_strides 129 | conv_backward_fast = conv_backward_strides 130 | 131 | 132 | def max_pool_forward_fast(x, pool_param): 133 | """ 134 | A fast implementation of the forward pass for a max pooling layer. 135 | 136 | This chooses between the reshape method and the im2col method. If the pooling 137 | regions are square and tile the input image, then we can use the reshape 138 | method which is very fast. Otherwise we fall back on the im2col method, which 139 | is not much faster than the naive method. 140 | """ 141 | N, C, H, W = x.shape 142 | pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] 143 | stride = pool_param['stride'] 144 | 145 | same_size = pool_height == pool_width == stride 146 | tiles = H % pool_height == 0 and W % pool_width == 0 147 | if same_size and tiles: 148 | out, reshape_cache = max_pool_forward_reshape(x, pool_param) 149 | cache = ('reshape', reshape_cache) 150 | else: 151 | out, im2col_cache = max_pool_forward_im2col(x, pool_param) 152 | cache = ('im2col', im2col_cache) 153 | return out, cache 154 | 155 | 156 | def max_pool_backward_fast(dout, cache): 157 | """ 158 | A fast implementation of the backward pass for a max pooling layer. 159 | 160 | This switches between the reshape method an the im2col method depending on 161 | which method was used to generate the cache. 162 | """ 163 | method, real_cache = cache 164 | if method == 'reshape': 165 | return max_pool_backward_reshape(dout, real_cache) 166 | elif method == 'im2col': 167 | return max_pool_backward_im2col(dout, real_cache) 168 | else: 169 | raise ValueError('Unrecognized method "%s"' % method) 170 | 171 | 172 | def max_pool_forward_reshape(x, pool_param): 173 | """ 174 | A fast implementation of the forward pass for the max pooling layer that uses 175 | some clever reshaping. 176 | 177 | This can only be used for square pooling regions that tile the input. 178 | """ 179 | N, C, H, W = x.shape 180 | pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] 181 | stride = pool_param['stride'] 182 | assert pool_height == pool_width == stride, 'Invalid pool params' 183 | assert H % pool_height == 0 184 | assert W % pool_height == 0 185 | x_reshaped = x.reshape(N, C, H / pool_height, pool_height, 186 | W / pool_width, pool_width) 187 | out = x_reshaped.max(axis=3).max(axis=4) 188 | 189 | cache = (x, x_reshaped, out) 190 | return out, cache 191 | 192 | 193 | def max_pool_backward_reshape(dout, cache): 194 | """ 195 | A fast implementation of the backward pass for the max pooling layer that 196 | uses some clever broadcasting and reshaping. 197 | 198 | This can only be used if the forward pass was computed using 199 | max_pool_forward_reshape. 200 | 201 | NOTE: If there are multiple argmaxes, this method will assign gradient to 202 | ALL argmax elements of the input rather than picking one. In this case the 203 | gradient will actually be incorrect. However this is unlikely to occur in 204 | practice, so it shouldn't matter much. One possible solution is to split the 205 | upstream gradient equally among all argmax elements; this should result in a 206 | valid subgradient. You can make this happen by uncommenting the line below; 207 | however this results in a significant performance penalty (about 40% slower) 208 | and is unlikely to matter in practice so we don't do it. 209 | """ 210 | x, x_reshaped, out = cache 211 | 212 | dx_reshaped = np.zeros_like(x_reshaped) 213 | out_newaxis = out[:, :, :, np.newaxis, :, np.newaxis] 214 | mask = (x_reshaped == out_newaxis) 215 | dout_newaxis = dout[:, :, :, np.newaxis, :, np.newaxis] 216 | dout_broadcast, _ = np.broadcast_arrays(dout_newaxis, dx_reshaped) 217 | dx_reshaped[mask] = dout_broadcast[mask] 218 | dx_reshaped /= np.sum(mask, axis=(3, 5), keepdims=True) 219 | dx = dx_reshaped.reshape(x.shape) 220 | 221 | return dx 222 | 223 | 224 | def max_pool_forward_im2col(x, pool_param): 225 | """ 226 | An implementation of the forward pass for max pooling based on im2col. 227 | 228 | This isn't much faster than the naive version, so it should be avoided if 229 | possible. 230 | """ 231 | N, C, H, W = x.shape 232 | pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] 233 | stride = pool_param['stride'] 234 | 235 | assert (H - pool_height) % stride == 0, 'Invalid height' 236 | assert (W - pool_width) % stride == 0, 'Invalid width' 237 | 238 | out_height = (H - pool_height) / stride + 1 239 | out_width = (W - pool_width) / stride + 1 240 | 241 | x_split = x.reshape(N * C, 1, H, W) 242 | x_cols = im2col(x_split, pool_height, pool_width, padding=0, stride=stride) 243 | x_cols_argmax = np.argmax(x_cols, axis=0) 244 | x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])] 245 | out = x_cols_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1) 246 | 247 | cache = (x, x_cols, x_cols_argmax, pool_param) 248 | return out, cache 249 | 250 | 251 | def max_pool_backward_im2col(dout, cache): 252 | """ 253 | An implementation of the backward pass for max pooling based on im2col. 254 | 255 | This isn't much faster than the naive version, so it should be avoided if 256 | possible. 257 | """ 258 | x, x_cols, x_cols_argmax, pool_param = cache 259 | N, C, H, W = x.shape 260 | pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] 261 | stride = pool_param['stride'] 262 | 263 | dout_reshaped = dout.transpose(2, 3, 0, 1).flatten() 264 | dx_cols = np.zeros_like(x_cols) 265 | dx_cols[x_cols_argmax, np.arange(dx_cols.shape[1])] = dout_reshaped 266 | dx = col2im_indices(dx_cols, (N * C, 1, H, W), pool_height, pool_width, 267 | padding=0, stride=stride) 268 | dx = dx.reshape(x.shape) 269 | 270 | return dx 271 | -------------------------------------------------------------------------------- /assignment3/cs231n/gradient_check.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import randrange 3 | 4 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001): 5 | """ 6 | a naive implementation of numerical gradient of f at x 7 | - f should be a function that takes a single argument 8 | - x is the point (numpy array) to evaluate the gradient at 9 | """ 10 | 11 | fx = f(x) # evaluate function value at original point 12 | grad = np.zeros_like(x) 13 | # iterate over all indexes in x 14 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 15 | while not it.finished: 16 | 17 | # evaluate function at x+h 18 | ix = it.multi_index 19 | oldval = x[ix] 20 | x[ix] = oldval + h # increment by h 21 | fxph = f(x) # evalute f(x + h) 22 | x[ix] = oldval - h 23 | fxmh = f(x) # evaluate f(x - h) 24 | x[ix] = oldval # restore 25 | 26 | # compute the partial derivative with centered formula 27 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 28 | if verbose: 29 | print ix, grad[ix] 30 | it.iternext() # step to next dimension 31 | 32 | return grad 33 | 34 | 35 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 36 | """ 37 | Evaluate a numeric gradient for a function that accepts a numpy 38 | array and returns a numpy array. 39 | """ 40 | grad = np.zeros_like(x) 41 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 42 | while not it.finished: 43 | ix = it.multi_index 44 | 45 | oldval = x[ix] 46 | x[ix] = oldval + h 47 | pos = f(x).copy() 48 | x[ix] = oldval - h 49 | neg = f(x).copy() 50 | x[ix] = oldval 51 | 52 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 53 | it.iternext() 54 | return grad 55 | 56 | 57 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): 58 | """ 59 | Compute numeric gradients for a function that operates on input 60 | and output blobs. 61 | 62 | We assume that f accepts several input blobs as arguments, followed by a blob 63 | into which outputs will be written. For example, f might be called like this: 64 | 65 | f(x, w, out) 66 | 67 | where x and w are input Blobs, and the result of f will be written to out. 68 | 69 | Inputs: 70 | - f: function 71 | - inputs: tuple of input blobs 72 | - output: output blob 73 | - h: step size 74 | """ 75 | numeric_diffs = [] 76 | for input_blob in inputs: 77 | diff = np.zeros_like(input_blob.diffs) 78 | it = np.nditer(input_blob.vals, flags=['multi_index'], 79 | op_flags=['readwrite']) 80 | while not it.finished: 81 | idx = it.multi_index 82 | orig = input_blob.vals[idx] 83 | 84 | input_blob.vals[idx] = orig + h 85 | f(*(inputs + (output,))) 86 | pos = np.copy(output.vals) 87 | input_blob.vals[idx] = orig - h 88 | f(*(inputs + (output,))) 89 | neg = np.copy(output.vals) 90 | input_blob.vals[idx] = orig 91 | 92 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) 93 | 94 | it.iternext() 95 | numeric_diffs.append(diff) 96 | return numeric_diffs 97 | 98 | 99 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5): 100 | return eval_numerical_gradient_blobs(lambda *args: net.forward(), 101 | inputs, output, h=h) 102 | 103 | 104 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): 105 | """ 106 | sample a few random elements and only return numerical 107 | in this dimensions. 108 | """ 109 | 110 | for i in xrange(num_checks): 111 | ix = tuple([randrange(m) for m in x.shape]) 112 | 113 | oldval = x[ix] 114 | x[ix] = oldval + h # increment by h 115 | fxph = f(x) # evaluate f(x + h) 116 | x[ix] = oldval - h # increment by h 117 | fxmh = f(x) # evaluate f(x - h) 118 | x[ix] = oldval # reset 119 | 120 | grad_numerical = (fxph - fxmh) / (2 * h) 121 | grad_analytic = analytic_grad[ix] 122 | rel_error = abs(grad_numerical - grad_analytic) / (abs(grad_numerical) + abs(grad_analytic)) 123 | print 'numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error) 124 | 125 | -------------------------------------------------------------------------------- /assignment3/cs231n/im2col.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1): 5 | # First figure out what the size of the output should be 6 | N, C, H, W = x_shape 7 | assert (H + 2 * padding - field_height) % stride == 0 8 | assert (W + 2 * padding - field_height) % stride == 0 9 | out_height = (H + 2 * padding - field_height) / stride + 1 10 | out_width = (W + 2 * padding - field_width) / stride + 1 11 | 12 | i0 = np.repeat(np.arange(field_height), field_width) 13 | i0 = np.tile(i0, C) 14 | i1 = stride * np.repeat(np.arange(out_height), out_width) 15 | j0 = np.tile(np.arange(field_width), field_height * C) 16 | j1 = stride * np.tile(np.arange(out_width), out_height) 17 | i = i0.reshape(-1, 1) + i1.reshape(1, -1) 18 | j = j0.reshape(-1, 1) + j1.reshape(1, -1) 19 | 20 | k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1) 21 | 22 | return (k, i, j) 23 | 24 | 25 | def im2col_indices(x, field_height, field_width, padding=1, stride=1): 26 | """ An implementation of im2col based on some fancy indexing """ 27 | # Zero-pad the input 28 | p = padding 29 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 30 | 31 | k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, 32 | stride) 33 | 34 | cols = x_padded[:, k, i, j] 35 | C = x.shape[1] 36 | cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1) 37 | return cols 38 | 39 | 40 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1, 41 | stride=1): 42 | """ An implementation of col2im based on fancy indexing and np.add.at """ 43 | N, C, H, W = x_shape 44 | H_padded, W_padded = H + 2 * padding, W + 2 * padding 45 | x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype) 46 | k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, 47 | stride) 48 | cols_reshaped = cols.reshape(C * field_height * field_width, -1, N) 49 | cols_reshaped = cols_reshaped.transpose(2, 0, 1) 50 | np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped) 51 | if padding == 0: 52 | return x_padded 53 | return x_padded[:, :, padding:-padding, padding:-padding] 54 | 55 | pass 56 | -------------------------------------------------------------------------------- /assignment3/cs231n/im2col_cython.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | 5 | # DTYPE = np.float64 6 | # ctypedef np.float64_t DTYPE_t 7 | 8 | ctypedef fused DTYPE_t: 9 | np.float32_t 10 | np.float64_t 11 | 12 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height, 13 | int field_width, int padding, int stride): 14 | cdef int N = x.shape[0] 15 | cdef int C = x.shape[1] 16 | cdef int H = x.shape[2] 17 | cdef int W = x.shape[3] 18 | 19 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 20 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 21 | 22 | cdef int p = padding 23 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x, 24 | ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 25 | 26 | cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros( 27 | (C * field_height * field_width, N * HH * WW), 28 | dtype=x.dtype) 29 | 30 | # Moving the inner loop to a C function with no bounds checking works, but does 31 | # not seem to help performance in any measurable way. 32 | 33 | im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 34 | field_height, field_width, padding, stride) 35 | return cols 36 | 37 | 38 | @cython.boundscheck(False) 39 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 40 | np.ndarray[DTYPE_t, ndim=4] x_padded, 41 | int N, int C, int H, int W, int HH, int WW, 42 | int field_height, int field_width, int padding, int stride) except? -1: 43 | cdef int c, ii, jj, row, yy, xx, i, col 44 | 45 | for c in range(C): 46 | for yy in range(HH): 47 | for xx in range(WW): 48 | for ii in range(field_height): 49 | for jj in range(field_width): 50 | row = c * field_width * field_height + ii * field_height + jj 51 | for i in range(N): 52 | col = yy * WW * N + xx * N + i 53 | cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj] 54 | 55 | 56 | 57 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W, 58 | int field_height, int field_width, int padding, int stride): 59 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 60 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 61 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 62 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding), 63 | dtype=cols.dtype) 64 | 65 | # Moving the inner loop to a C-function with no bounds checking improves 66 | # performance quite a bit for col2im. 67 | col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 68 | field_height, field_width, padding, stride) 69 | if padding > 0: 70 | return x_padded[:, :, padding:-padding, padding:-padding] 71 | return x_padded 72 | 73 | 74 | @cython.boundscheck(False) 75 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 76 | np.ndarray[DTYPE_t, ndim=4] x_padded, 77 | int N, int C, int H, int W, int HH, int WW, 78 | int field_height, int field_width, int padding, int stride) except? -1: 79 | cdef int c, ii, jj, row, yy, xx, i, col 80 | 81 | for c in range(C): 82 | for ii in range(field_height): 83 | for jj in range(field_width): 84 | row = c * field_width * field_height + ii * field_height + jj 85 | for yy in range(HH): 86 | for xx in range(WW): 87 | for i in range(N): 88 | col = yy * WW * N + xx * N + i 89 | x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col] 90 | 91 | 92 | @cython.boundscheck(False) 93 | @cython.wraparound(False) 94 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols, 95 | np.ndarray[DTYPE_t, ndim=4] x_padded, 96 | int N, int C, int H, int W, int HH, int WW, 97 | int out_h, int out_w, int pad, int stride): 98 | 99 | cdef int c, hh, ww, n, h, w 100 | for n in range(N): 101 | for c in range(C): 102 | for hh in range(HH): 103 | for ww in range(WW): 104 | for h in range(out_h): 105 | for w in range(out_w): 106 | x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w] 107 | 108 | 109 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W, 110 | int HH, int WW, int pad, int stride): 111 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 112 | cdef int out_h = (H + 2 * pad - HH) / stride + 1 113 | cdef int out_w = (W + 2 * pad - WW) / stride + 1 114 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad), 115 | dtype=cols.dtype) 116 | 117 | col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride) 118 | 119 | if pad > 0: 120 | return x_padded[:, :, pad:-pad, pad:-pad] 121 | return x_padded 122 | -------------------------------------------------------------------------------- /assignment3/cs231n/image_utils.py: -------------------------------------------------------------------------------- 1 | import urllib2, os, tempfile 2 | 3 | import numpy as np 4 | from scipy.misc import imread 5 | 6 | from cs231n.fast_layers import conv_forward_fast 7 | 8 | 9 | """ 10 | Utility functions used for viewing and processing images. 11 | """ 12 | 13 | 14 | def blur_image(X): 15 | """ 16 | A very gentle image blurring operation, to be used as a regularizer for image 17 | generation. 18 | 19 | Inputs: 20 | - X: Image data of shape (N, 3, H, W) 21 | 22 | Returns: 23 | - X_blur: Blurred version of X, of shape (N, 3, H, W) 24 | """ 25 | w_blur = np.zeros((3, 3, 3, 3)) 26 | b_blur = np.zeros(3) 27 | blur_param = {'stride': 1, 'pad': 1} 28 | for i in xrange(3): 29 | w_blur[i, i] = np.asarray([[1, 2, 1], [2, 188, 2], [1, 2, 1]], dtype=np.float32) 30 | w_blur /= 200.0 31 | return conv_forward_fast(X, w_blur, b_blur, blur_param)[0] 32 | 33 | 34 | def preprocess_image(img, mean_img, mean='image'): 35 | """ 36 | Convert to float, transepose, and subtract mean pixel 37 | 38 | Input: 39 | - img: (H, W, 3) 40 | 41 | Returns: 42 | - (1, 3, H, 3) 43 | """ 44 | if mean == 'image': 45 | mean = mean_img 46 | elif mean == 'pixel': 47 | mean = mean_img.mean(axis=(1, 2), keepdims=True) 48 | elif mean == 'none': 49 | mean = 0 50 | else: 51 | raise ValueError('mean must be image or pixel or none') 52 | return img.astype(np.float32).transpose(2, 0, 1)[None] - mean 53 | 54 | 55 | def deprocess_image(img, mean_img, mean='image', renorm=False): 56 | """ 57 | Add mean pixel, transpose, and convert to uint8 58 | 59 | Input: 60 | - (1, 3, H, W) or (3, H, W) 61 | 62 | Returns: 63 | - (H, W, 3) 64 | """ 65 | if mean == 'image': 66 | mean = mean_img 67 | elif mean == 'pixel': 68 | mean = mean_img.mean(axis=(1, 2), keepdims=True) 69 | elif mean == 'none': 70 | mean = 0 71 | else: 72 | raise ValueError('mean must be image or pixel or none') 73 | if img.ndim == 3: 74 | img = img[None] 75 | img = (img + mean)[0].transpose(1, 2, 0) 76 | if renorm: 77 | low, high = img.min(), img.max() 78 | img = 255.0 * (img - low) / (high - low) 79 | return img.astype(np.uint8) 80 | 81 | 82 | def image_from_url(url): 83 | """ 84 | Read an image from a URL. Returns a numpy array with the pixel data. 85 | We write the image to a temporary file then read it back. Kinda gross. 86 | """ 87 | try: 88 | f = urllib2.urlopen(url) 89 | _, fname = tempfile.mkstemp() 90 | with open(fname, 'wb') as ff: 91 | ff.write(f.read()) 92 | img = imread(fname) 93 | os.remove(fname) 94 | return img 95 | except urllib2.URLError as e: 96 | print 'URL Error: ', e.reason, url 97 | except urllib2.HTTPError as e: 98 | print 'HTTP Error: ', e.code, url 99 | -------------------------------------------------------------------------------- /assignment3/cs231n/layer_utils.py: -------------------------------------------------------------------------------- 1 | from cs231n.layers import * 2 | from cs231n.fast_layers import * 3 | 4 | 5 | def affine_relu_forward(x, w, b): 6 | """ 7 | Convenience layer that perorms an affine transform followed by a ReLU 8 | 9 | Inputs: 10 | - x: Input to the affine layer 11 | - w, b: Weights for the affine layer 12 | 13 | Returns a tuple of: 14 | - out: Output from the ReLU 15 | - cache: Object to give to the backward pass 16 | """ 17 | a, fc_cache = affine_forward(x, w, b) 18 | out, relu_cache = relu_forward(a) 19 | cache = (fc_cache, relu_cache) 20 | return out, cache 21 | 22 | 23 | def affine_relu_backward(dout, cache): 24 | """ 25 | Backward pass for the affine-relu convenience layer 26 | """ 27 | fc_cache, relu_cache = cache 28 | da = relu_backward(dout, relu_cache) 29 | dx, dw, db = affine_backward(da, fc_cache) 30 | return dx, dw, db 31 | 32 | 33 | def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param): 34 | """ 35 | Convenience layer that performs an affine transform, batch normalization, 36 | and ReLU. 37 | 38 | Inputs: 39 | - x: Array of shape (N, D1); input to the affine layer 40 | - w, b: Arrays of shape (D2, D2) and (D2,) giving the weight and bias for 41 | the affine transform. 42 | - gamma, beta: Arrays of shape (D2,) and (D2,) giving scale and shift 43 | parameters for batch normalization. 44 | - bn_param: Dictionary of parameters for batch normalization. 45 | 46 | Returns: 47 | - out: Output from ReLU, of shape (N, D2) 48 | - cache: Object to give to the backward pass. 49 | """ 50 | a, fc_cache = affine_forward(x, w, b) 51 | a_bn, bn_cache = batchnorm_forward(a, gamma, beta, bn_param) 52 | out, relu_cache = relu_forward(a_bn) 53 | cache = (fc_cache, bn_cache, relu_cache) 54 | return out, cache 55 | 56 | 57 | def affine_bn_relu_backward(dout, cache): 58 | """ 59 | Backward pass for the affine-batchnorm-relu convenience layer. 60 | """ 61 | fc_cache, bn_cache, relu_cache = cache 62 | da_bn = relu_backward(dout, relu_cache) 63 | da, dgamma, dbeta = batchnorm_backward(da_bn, bn_cache) 64 | dx, dw, db = affine_backward(da, fc_cache) 65 | return dx, dw, db, dgamma, dbeta 66 | 67 | 68 | def conv_relu_forward(x, w, b, conv_param): 69 | """ 70 | A convenience layer that performs a convolution followed by a ReLU. 71 | 72 | Inputs: 73 | - x: Input to the convolutional layer 74 | - w, b, conv_param: Weights and parameters for the convolutional layer 75 | 76 | Returns a tuple of: 77 | - out: Output from the ReLU 78 | - cache: Object to give to the backward pass 79 | """ 80 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 81 | out, relu_cache = relu_forward(a) 82 | cache = (conv_cache, relu_cache) 83 | return out, cache 84 | 85 | 86 | def conv_relu_backward(dout, cache): 87 | """ 88 | Backward pass for the conv-relu convenience layer. 89 | """ 90 | conv_cache, relu_cache = cache 91 | da = relu_backward(dout, relu_cache) 92 | dx, dw, db = conv_backward_fast(da, conv_cache) 93 | return dx, dw, db 94 | 95 | 96 | def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param): 97 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 98 | an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param) 99 | out, relu_cache = relu_forward(an) 100 | cache = (conv_cache, bn_cache, relu_cache) 101 | return out, cache 102 | 103 | 104 | def conv_bn_relu_backward(dout, cache): 105 | conv_cache, bn_cache, relu_cache = cache 106 | dan = relu_backward(dout, relu_cache) 107 | da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache) 108 | dx, dw, db = conv_backward_fast(da, conv_cache) 109 | return dx, dw, db, dgamma, dbeta 110 | 111 | 112 | def conv_relu_pool_forward(x, w, b, conv_param, pool_param): 113 | """ 114 | Convenience layer that performs a convolution, a ReLU, and a pool. 115 | 116 | Inputs: 117 | - x: Input to the convolutional layer 118 | - w, b, conv_param: Weights and parameters for the convolutional layer 119 | - pool_param: Parameters for the pooling layer 120 | 121 | Returns a tuple of: 122 | - out: Output from the pooling layer 123 | - cache: Object to give to the backward pass 124 | """ 125 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 126 | s, relu_cache = relu_forward(a) 127 | out, pool_cache = max_pool_forward_fast(s, pool_param) 128 | cache = (conv_cache, relu_cache, pool_cache) 129 | return out, cache 130 | 131 | 132 | def conv_relu_pool_backward(dout, cache): 133 | """ 134 | Backward pass for the conv-relu-pool convenience layer 135 | """ 136 | conv_cache, relu_cache, pool_cache = cache 137 | ds = max_pool_backward_fast(dout, pool_cache) 138 | da = relu_backward(ds, relu_cache) 139 | dx, dw, db = conv_backward_fast(da, conv_cache) 140 | return dx, dw, db 141 | 142 | -------------------------------------------------------------------------------- /assignment3/cs231n/layers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def affine_forward(x, w, b): 5 | """ 6 | Computes the forward pass for an affine (fully-connected) layer. 7 | 8 | The input x has shape (N, d_1, ..., d_k) where x[i] is the ith input. 9 | We multiply this against a weight matrix of shape (D, M) where 10 | D = \prod_i d_i 11 | 12 | Inputs: 13 | x - Input data, of shape (N, d_1, ..., d_k) 14 | w - Weights, of shape (D, M) 15 | b - Biases, of shape (M,) 16 | 17 | Returns a tuple of: 18 | - out: output, of shape (N, M) 19 | - cache: (x, w, b) 20 | """ 21 | out = x.reshape(x.shape[0], -1).dot(w) + b 22 | cache = (x, w, b) 23 | return out, cache 24 | 25 | 26 | def affine_backward(dout, cache): 27 | """ 28 | Computes the backward pass for an affine layer. 29 | 30 | Inputs: 31 | - dout: Upstream derivative, of shape (N, M) 32 | - cache: Tuple of: 33 | - x: Input data, of shape (N, d_1, ... d_k) 34 | - w: Weights, of shape (D, M) 35 | 36 | Returns a tuple of: 37 | - dx: Gradient with respect to x, of shape (N, d1, ..., d_k) 38 | - dw: Gradient with respect to w, of shape (D, M) 39 | - db: Gradient with respect to b, of shape (M,) 40 | """ 41 | x, w, b = cache 42 | dx = dout.dot(w.T).reshape(x.shape) 43 | dw = x.reshape(x.shape[0], -1).T.dot(dout) 44 | db = np.sum(dout, axis=0) 45 | return dx, dw, db 46 | 47 | 48 | def relu_forward(x): 49 | """ 50 | Computes the forward pass for a layer of rectified linear units (ReLUs). 51 | 52 | Input: 53 | - x: Inputs, of any shape 54 | 55 | Returns a tuple of: 56 | - out: Output, of the same shape as x 57 | - cache: x 58 | """ 59 | out = np.maximum(0, x) 60 | cache = x 61 | return out, cache 62 | 63 | 64 | def relu_backward(dout, cache): 65 | """ 66 | Computes the backward pass for a layer of rectified linear units (ReLUs). 67 | 68 | Input: 69 | - dout: Upstream derivatives, of any shape 70 | - cache: Input x, of same shape as dout 71 | 72 | Returns: 73 | - dx: Gradient with respect to x 74 | """ 75 | x = cache 76 | dx = np.where(x > 0, dout, 0) 77 | return dx 78 | 79 | 80 | def batchnorm_forward(x, gamma, beta, bn_param): 81 | """ 82 | Forward pass for batch normalization. 83 | 84 | During training the sample mean and (uncorrected) sample variance are 85 | computed from minibatch statistics and used to normalize the incoming data. 86 | During training we also keep an exponentially decaying running mean of the mean 87 | and variance of each feature, and these averages are used to normalize data 88 | at test-time. 89 | 90 | At each timestep we update the running averages for mean and variance using 91 | an exponential decay based on the momentum parameter: 92 | 93 | running_mean = momentum * running_mean + (1 - momentum) * sample_mean 94 | running_var = momentum * running_var + (1 - momentum) * sample_var 95 | 96 | Note that the batch normalization paper suggests a different test-time 97 | behavior: they compute sample mean and variance for each feature using a 98 | large number of training images rather than using a running average. For 99 | this implementation we have chosen to use running averages instead since 100 | they do not require an additional estimation step; the torch7 implementation 101 | of batch normalization also uses running averages. 102 | 103 | Input: 104 | - x: Data of shape (N, D) 105 | - gamma: Scale parameter of shape (D,) 106 | - beta: Shift paremeter of shape (D,) 107 | - bn_param: Dictionary with the following keys: 108 | - mode: 'train' or 'test'; required 109 | - eps: Constant for numeric stability 110 | - momentum: Constant for running mean / variance. 111 | - running_mean: Array of shape (D,) giving running mean of features 112 | - running_var Array of shape (D,) giving running variance of features 113 | 114 | Returns a tuple of: 115 | - out: of shape (N, D) 116 | - cache: A tuple of values needed in the backward pass 117 | """ 118 | mode = bn_param['mode'] 119 | eps = bn_param.get('eps', 1e-5) 120 | momentum = bn_param.get('momentum', 0.9) 121 | 122 | N, D = x.shape 123 | running_mean = bn_param.get('running_mean', np.zeros(D, dtype=x.dtype)) 124 | running_var = bn_param.get('running_var', np.zeros(D, dtype=x.dtype)) 125 | 126 | out, cache = None, None 127 | if mode == 'train': 128 | # Compute output 129 | mu = x.mean(axis=0) 130 | xc = x - mu 131 | var = np.mean(xc ** 2, axis=0) 132 | std = np.sqrt(var + eps) 133 | xn = xc / std 134 | out = gamma * xn + beta 135 | 136 | cache = (mode, x, gamma, xc, std, xn, out) 137 | 138 | # Update running average of mean 139 | running_mean *= momentum 140 | running_mean += (1 - momentum) * mu 141 | 142 | # Update running average of variance 143 | running_var *= momentum 144 | running_var += (1 - momentum) * var 145 | elif mode == 'test': 146 | # Using running mean and variance to normalize 147 | std = np.sqrt(running_var + eps) 148 | xn = (x - running_mean) / std 149 | out = gamma * xn + beta 150 | cache = (mode, x, xn, gamma, beta, std) 151 | else: 152 | raise ValueError('Invalid forward batchnorm mode "%s"' % mode) 153 | 154 | # Store the updated running means back into bn_param 155 | bn_param['running_mean'] = running_mean 156 | bn_param['running_var'] = running_var 157 | 158 | return out, cache 159 | 160 | 161 | def batchnorm_backward(dout, cache): 162 | """ 163 | Backward pass for batch normalization. 164 | 165 | For this implementation, you should write out a computation graph for 166 | batch normalization on paper and propagate gradients backward through 167 | intermediate nodes. 168 | 169 | Inputs: 170 | - dout: Upstream derivatives, of shape (N, D) 171 | - cache: Variable of intermediates from batchnorm_forward. 172 | 173 | Returns a tuple of: 174 | - dx: Gradient with respect to inputs x, of shape (N, D) 175 | - dgamma: Gradient with respect to scale parameter gamma, of shape (D,) 176 | - dbeta: Gradient with respect to shift parameter beta, of shape (D,) 177 | """ 178 | mode = cache[0] 179 | if mode == 'train': 180 | mode, x, gamma, xc, std, xn, out = cache 181 | 182 | N = x.shape[0] 183 | dbeta = dout.sum(axis=0) 184 | dgamma = np.sum(xn * dout, axis=0) 185 | dxn = gamma * dout 186 | dxc = dxn / std 187 | dstd = -np.sum((dxn * xc) / (std * std), axis=0) 188 | dvar = 0.5 * dstd / std 189 | dxc += (2.0 / N) * xc * dvar 190 | dmu = np.sum(dxc, axis=0) 191 | dx = dxc - dmu / N 192 | elif mode == 'test': 193 | mode, x, xn, gamma, beta, std = cache 194 | dbeta = dout.sum(axis=0) 195 | dgamma = np.sum(xn * dout, axis=0) 196 | dxn = gamma * dout 197 | dx = dxn / std 198 | else: 199 | raise ValueError(mode) 200 | 201 | return dx, dgamma, dbeta 202 | 203 | 204 | def spatial_batchnorm_forward(x, gamma, beta, bn_param): 205 | """ 206 | Computes the forward pass for spatial batch normalization. 207 | 208 | Inputs: 209 | - x: Input data of shape (N, C, H, W) 210 | - gamma: Scale parameter, of shape (C,) 211 | - beta: Shift parameter, of shape (C,) 212 | - bn_param: Dictionary with the following keys: 213 | - mode: 'train' or 'test'; required 214 | - eps: Constant for numeric stability 215 | - momentum: Constant for running mean / variance. momentum=0 means that 216 | old information is discarded completely at every time step, while 217 | momentum=1 means that new information is never incorporated. The 218 | default of momentum=0.9 should work well in most situations. 219 | - running_mean: Array of shape (D,) giving running mean of features 220 | - running_var Array of shape (D,) giving running variance of features 221 | 222 | Returns a tuple of: 223 | - out: Output data, of shape (N, C, H, W) 224 | - cache: Values needed for the backward pass 225 | """ 226 | N, C, H, W = x.shape 227 | x_flat = x.transpose(0, 2, 3, 1).reshape(-1, C) 228 | out_flat, cache = batchnorm_forward(x_flat, gamma, beta, bn_param) 229 | out = out_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2) 230 | return out, cache 231 | 232 | 233 | def spatial_batchnorm_backward(dout, cache): 234 | """ 235 | Computes the backward pass for spatial batch normalization. 236 | 237 | Inputs: 238 | - dout: Upstream derivatives, of shape (N, C, H, W) 239 | - cache: Values from the forward pass 240 | 241 | Returns a tuple of: 242 | - dx: Gradient with respect to inputs, of shape (N, C, H, W) 243 | - dgamma: Gradient with respect to scale parameter, of shape (C,) 244 | - dbeta: Gradient with respect to shift parameter, of shape (C,) 245 | """ 246 | N, C, H, W = dout.shape 247 | dout_flat = dout.transpose(0, 2, 3, 1).reshape(-1, C) 248 | dx_flat, dgamma, dbeta = batchnorm_backward(dout_flat, cache) 249 | dx = dx_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2) 250 | return dx, dgamma, dbeta 251 | 252 | 253 | def svm_loss(x, y): 254 | """ 255 | Computes the loss and gradient using for multiclass SVM classification. 256 | 257 | Inputs: 258 | - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class 259 | for the ith input. 260 | - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 261 | 0 <= y[i] < C 262 | 263 | Returns a tuple of: 264 | - loss: Scalar giving the loss 265 | - dx: Gradient of the loss with respect to x 266 | """ 267 | N = x.shape[0] 268 | correct_class_scores = x[np.arange(N), y] 269 | margins = np.maximum(0, x - correct_class_scores[:, np.newaxis] + 1.0) 270 | margins[np.arange(N), y] = 0 271 | loss = np.sum(margins) / N 272 | num_pos = np.sum(margins > 0, axis=1) 273 | dx = np.zeros_like(x) 274 | dx[margins > 0] = 1 275 | dx[np.arange(N), y] -= num_pos 276 | dx /= N 277 | return loss, dx 278 | 279 | 280 | def softmax_loss(x, y): 281 | """ 282 | Computes the loss and gradient for softmax classification. 283 | 284 | Inputs: 285 | - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class 286 | for the ith input. 287 | - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 288 | 0 <= y[i] < C 289 | 290 | Returns a tuple of: 291 | - loss: Scalar giving the loss 292 | - dx: Gradient of the loss with respect to x 293 | """ 294 | probs = np.exp(x - np.max(x, axis=1, keepdims=True)) 295 | probs /= np.sum(probs, axis=1, keepdims=True) 296 | N = x.shape[0] 297 | loss = -np.sum(np.log(probs[np.arange(N), y])) / N 298 | dx = probs.copy() 299 | dx[np.arange(N), y] -= 1 300 | dx /= N 301 | return loss, dx 302 | 303 | -------------------------------------------------------------------------------- /assignment3/cs231n/optim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | This file implements various first-order update rules that are commonly used for 5 | training neural networks. Each update rule accepts current weights and the 6 | gradient of the loss with respect to those weights and produces the next set of 7 | weights. Each update rule has the same interface: 8 | 9 | def update(w, dw, config=None): 10 | 11 | Inputs: 12 | - w: A numpy array giving the current weights. 13 | - dw: A numpy array of the same shape as w giving the gradient of the 14 | loss with respect to w. 15 | - config: A dictionary containing hyperparameter values such as learning rate, 16 | momentum, etc. If the update rule requires caching values over many 17 | iterations, then config will also hold these cached values. 18 | 19 | Returns: 20 | - next_w: The next point after the update. 21 | - config: The config dictionary to be passed to the next iteration of the 22 | update rule. 23 | 24 | NOTE: For most update rules, the default learning rate will probably not perform 25 | well; however the default values of the other hyperparameters should work well 26 | for a variety of different problems. 27 | 28 | For efficiency, update rules may perform in-place updates, mutating w and 29 | setting next_w equal to w. 30 | """ 31 | 32 | 33 | def sgd(w, dw, config=None): 34 | """ 35 | Performs vanilla stochastic gradient descent. 36 | 37 | config format: 38 | - learning_rate: Scalar learning rate. 39 | """ 40 | if config is None: config = {} 41 | config.setdefault('learning_rate', 1e-2) 42 | 43 | w -= config['learning_rate'] * dw 44 | return w, config 45 | 46 | 47 | def adam(x, dx, config=None): 48 | """ 49 | Uses the Adam update rule, which incorporates moving averages of both the 50 | gradient and its square and a bias correction term. 51 | 52 | config format: 53 | - learning_rate: Scalar learning rate. 54 | - beta1: Decay rate for moving average of first moment of gradient. 55 | - beta2: Decay rate for moving average of second moment of gradient. 56 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 57 | - m: Moving average of gradient. 58 | - v: Moving average of squared gradient. 59 | - t: Iteration number. 60 | """ 61 | if config is None: config = {} 62 | config.setdefault('learning_rate', 1e-3) 63 | config.setdefault('beta1', 0.9) 64 | config.setdefault('beta2', 0.999) 65 | config.setdefault('epsilon', 1e-8) 66 | config.setdefault('m', np.zeros_like(x)) 67 | config.setdefault('v', np.zeros_like(x)) 68 | config.setdefault('t', 0) 69 | 70 | next_x = None 71 | beta1, beta2, eps = config['beta1'], config['beta2'], config['epsilon'] 72 | t, m, v = config['t'], config['m'], config['v'] 73 | m = beta1 * m + (1 - beta1) * dx 74 | v = beta2 * v + (1 - beta2) * (dx * dx) 75 | t += 1 76 | alpha = config['learning_rate'] * np.sqrt(1 - beta2 ** t) / (1 - beta1 ** t) 77 | x -= alpha * (m / (np.sqrt(v) + eps)) 78 | config['t'] = t 79 | config['m'] = m 80 | config['v'] = v 81 | next_x = x 82 | 83 | return next_x, config 84 | 85 | 86 | -------------------------------------------------------------------------------- /assignment3/cs231n/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Build import cythonize 4 | import numpy 5 | 6 | extensions = [ 7 | Extension('im2col_cython', ['im2col_cython.pyx'], 8 | include_dirs = [numpy.get_include()] 9 | ), 10 | ] 11 | 12 | setup( 13 | ext_modules = cythonize(extensions), 14 | ) 15 | -------------------------------------------------------------------------------- /assignment3/frameworkpython: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # what real Python executable to use 4 | PYVER=2.7 5 | PATHTOPYTHON=/usr/local/bin/ 6 | PYTHON=${PATHTOPYTHON}python${PYVER} 7 | 8 | # find the root of the virtualenv, it should be the parent of the dir this script is in 9 | ENV=`$PYTHON -c "import os; print os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..'))"` 10 | 11 | # now run Python with the virtualenv set as Python's HOME 12 | export PYTHONHOME=$ENV 13 | exec $PYTHON "$@" 14 | -------------------------------------------------------------------------------- /assignment3/kitten.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment3/kitten.jpg -------------------------------------------------------------------------------- /assignment3/requirements.txt: -------------------------------------------------------------------------------- 1 | Cython==0.23.4 2 | Jinja2==2.8 3 | MarkupSafe==0.23 4 | Pillow==3.0.0 5 | Pygments==2.0.2 6 | appnope==0.1.0 7 | argparse==1.2.1 8 | backports-abc==0.4 9 | backports.ssl-match-hostname==3.5.0.1 10 | certifi==2015.11.20.1 11 | cycler==0.9.0 12 | decorator==4.0.6 13 | functools32==3.2.3-2 14 | gnureadline==6.3.3 15 | ipykernel==4.2.2 16 | ipython==4.0.1 17 | ipython-genutils==0.1.0 18 | ipywidgets==4.1.1 19 | jsonschema==2.5.1 20 | jupyter==1.0.0 21 | jupyter-client==4.1.1 22 | jupyter-console==4.0.3 23 | jupyter-core==4.0.6 24 | matplotlib==1.5.0 25 | mistune==0.7.1 26 | nbconvert==4.1.0 27 | nbformat==4.0.1 28 | notebook==4.0.6 29 | numpy==1.10.4 30 | path.py==8.1.2 31 | pexpect==4.0.1 32 | pickleshare==0.5 33 | ptyprocess==0.5 34 | pyparsing==2.0.7 35 | python-dateutil==2.4.2 36 | pytz==2015.7 37 | pyzmq==15.1.0 38 | qtconsole==4.1.1 39 | scipy==0.16.1 40 | simplegeneric==0.8.1 41 | singledispatch==3.4.0.3 42 | six==1.10.0 43 | terminado==0.5 44 | tornado==4.3 45 | traitlets==4.0.0 46 | wsgiref==0.1.2 47 | -------------------------------------------------------------------------------- /assignment3/sky.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment3/sky.jpg -------------------------------------------------------------------------------- /assignment3/start_ipython_osx.sh: -------------------------------------------------------------------------------- 1 | # Assume the virtualenv is called .env 2 | 3 | cp frameworkpython .env/bin 4 | .env/bin/frameworkpython -m IPython notebook 5 | --------------------------------------------------------------------------------