├── README.md
├── assignment1
    ├── .gitignore
    ├── .ipynb_checkpoints
    │   ├── features-checkpoint.ipynb
    │   ├── knn-checkpoint.ipynb
    │   ├── softmax-checkpoint.ipynb
    │   ├── svm-checkpoint.ipynb
    │   └── two_layer_net-checkpoint.ipynb
    ├── README.md
    ├── collectSubmission.sh
    ├── cs231n
    │   ├── __init__.py
    │   ├── classifiers
    │   │   ├── __init__.py
    │   │   ├── k_nearest_neighbor.py
    │   │   ├── linear_classifier.py
    │   │   ├── linear_svm.py
    │   │   ├── neural_net.py
    │   │   └── softmax.py
    │   ├── data_utils.py
    │   ├── datasets
    │   │   ├── .gitignore
    │   │   └── get_datasets.sh
    │   ├── features.py
    │   ├── gradient_check.py
    │   └── vis_utils.py
    ├── features.ipynb
    ├── frameworkpython
    ├── knn.ipynb
    ├── requirements.txt
    ├── softmax.ipynb
    ├── start_ipython_osx.sh
    ├── svm.ipynb
    └── two_layer_net.ipynb
├── assignment2
    ├── .gitignore
    ├── .ipynb_checkpoints
    │   ├── BatchNormalization-checkpoint.ipynb
    │   ├── ConvolutionalNetworks-checkpoint.ipynb
    │   ├── Dropout-checkpoint.ipynb
    │   └── FullyConnectedNets-checkpoint.ipynb
    ├── BatchNormalization.ipynb
    ├── ConvolutionalNetworks.ipynb
    ├── Dropout.ipynb
    ├── FullyConnectedNets.ipynb
    ├── README.md
    ├── collectSubmission.sh
    ├── cs231n
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── classifiers
    │   │   ├── __init__.py
    │   │   ├── cnn.py
    │   │   └── fc_net.py
    │   ├── data_utils.py
    │   ├── datasets
    │   │   ├── .gitignore
    │   │   └── get_datasets.sh
    │   ├── fast_layers.py
    │   ├── gradient_check.py
    │   ├── im2col.py
    │   ├── im2col_cython.pyx
    │   ├── layer_utils.py
    │   ├── layers.py
    │   ├── optim.py
    │   ├── setup.py
    │   ├── solver.py
    │   └── vis_utils.py
    ├── frameworkpython
    ├── kitten.jpg
    ├── puppy.jpg
    ├── requirements.txt
    └── start_ipython_osx.sh
└── assignment3
    ├── .gitignore
    ├── ImageGeneration.ipynb
    ├── ImageGradients.ipynb
    ├── LSTM_Captioning.ipynb
    ├── RNN_Captioning.ipynb
    ├── collectSubmission.sh
    ├── cs231n
        ├── .gitignore
        ├── __init__.py
        ├── captioning_solver.py
        ├── classifiers
        │   ├── __init__.py
        │   ├── pretrained_cnn.py
        │   └── rnn.py
        ├── coco_utils.py
        ├── data_utils.py
        ├── datasets
        │   ├── get_coco_captioning.sh
        │   ├── get_pretrained_model.sh
        │   └── get_tiny_imagenet_a.sh
        ├── fast_layers.py
        ├── gradient_check.py
        ├── im2col.py
        ├── im2col_cython.pyx
        ├── image_utils.py
        ├── layer_utils.py
        ├── layers.py
        ├── optim.py
        ├── rnn_layers.py
        └── setup.py
    ├── frameworkpython
    ├── kitten.jpg
    ├── requirements.txt
    ├── sky.jpg
    └── start_ipython_osx.sh


/README.md:
--------------------------------------------------------------------------------
 1 | ## CS231n Convolutional Neural Networks for Visual Recognition
 2 | 
 3 | 斯坦福 cs231n 作业代码实践，代码实现主要参考了 [lightaime/cs231n](https://github.com/lightaime/cs231n)
 4 | 
 5 | - 教程笔记 [cs231n.github.io](http://cs231n.github.io/)
 6 | - 课程主页 [stanford cs231n](http://cs231n.stanford.edu/index.html)
 7 | - 进度安排 [course syllabus](http://cs231n.stanford.edu/syllabus.html)
 8 | 
 9 | 上面的网页有时候不稳定，可能要 FQ 才能访问。
10 | 
11 | 下载课程作业源代码:  [Assignment #1](http://cs231n.stanford.edu/assignments/2016/winter1516_assignment1.zip) & [Assignment #2](http://cs231n.stanford.edu/assignments/2016/winter1516_assignment2.zip) & [Assignment #3](http://cs231n.stanford.edu/assignments/2016/winter1516_assignment3.zip)
12 | 
13 | 网上有很多的[资料](http://blog.csdn.net/zhangxb35/article/details/55223825)，包括中文翻译，课程视频等，但是个人觉得都不如写完作业代码的收获大。
14 | 
15 | 我的 CSDN 博客笔记： [cs231n 课程作业](http://blog.csdn.net/zhangxb35/article/category/6727687)
16 | 
17 | ---
18 | 
19 | 注：我的这个是 2016 年做的作业，内容略旧，仅供参考。
20 | 


--------------------------------------------------------------------------------
/assignment1/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.pyc
3 | .env/*
4 | 


--------------------------------------------------------------------------------
/assignment1/README.md:
--------------------------------------------------------------------------------
1 | Details about this assignment can be found [on the course webpage](http://cs231n.github.io/), under Assignment #1 of Winter 2016.
2 | 


--------------------------------------------------------------------------------
/assignment1/collectSubmission.sh:
--------------------------------------------------------------------------------
1 | rm -f assignment1.zip 
2 | zip -r assignment1.zip . -x "*.git*" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt"
3 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment1/cs231n/__init__.py


--------------------------------------------------------------------------------
/assignment1/cs231n/classifiers/__init__.py:
--------------------------------------------------------------------------------
1 | from cs231n.classifiers.k_nearest_neighbor import *
2 | from cs231n.classifiers.linear_classifier import *
3 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/classifiers/k_nearest_neighbor.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | class KNearestNeighbor(object):
  4 |   """ a kNN classifier with L2 distance """
  5 | 
  6 |   def __init__(self):
  7 |     pass
  8 | 
  9 |   def train(self, X, y):
 10 |     """
 11 |     Train the classifier. For k-nearest neighbors this is just
 12 |     memorizing the training data.
 13 | 
 14 |     Inputs:
 15 |     - X: A numpy array of shape (num_train, D) containing the training data
 16 |       consisting of num_train samples each of dimension D.
 17 |     - y: A numpy array of shape (N,) containing the training labels, where
 18 |          y[i] is the label for X[i].
 19 |     """
 20 |     self.X_train = X
 21 |     self.y_train = y
 22 | 
 23 |   def predict(self, X, k=1, num_loops=0):
 24 |     """
 25 |     Predict labels for test data using this classifier.
 26 | 
 27 |     Inputs:
 28 |     - X: A numpy array of shape (num_test, D) containing test data consisting
 29 |          of num_test samples each of dimension D.
 30 |     - k: The number of nearest neighbors that vote for the predicted labels.
 31 |     - num_loops: Determines which implementation to use to compute distances
 32 |       between training points and testing points.
 33 | 
 34 |     Returns:
 35 |     - y: A numpy array of shape (num_test,) containing predicted labels for the
 36 |       test data, where y[i] is the predicted label for the test point X[i].
 37 |     """
 38 |     if num_loops == 0:
 39 |       dists = self.compute_distances_no_loops(X)
 40 |     elif num_loops == 1:
 41 |       dists = self.compute_distances_one_loop(X)
 42 |     elif num_loops == 2:
 43 |       dists = self.compute_distances_two_loops(X)
 44 |     else:
 45 |       raise ValueError('Invalid value %d for num_loops' % num_loops)
 46 | 
 47 |     return self.predict_labels(dists, k=k)
 48 | 
 49 |   def compute_distances_two_loops(self, X):
 50 |     """
 51 |     Compute the distance between each test point in X and each training point
 52 |     in self.X_train using a nested loop over both the training data and the
 53 |     test data.
 54 | 
 55 |     Inputs:
 56 |     - X: A numpy array of shape (num_test, D) containing test data.
 57 | 
 58 |     Returns:
 59 |     - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
 60 |       is the Euclidean distance between the ith test point and the jth training
 61 |       point.
 62 |     """
 63 |     num_test = X.shape[0]
 64 |     num_train = self.X_train.shape[0]
 65 |     dists = np.zeros((num_test, num_train))
 66 |     for i in xrange(num_test):
 67 |       for j in xrange(num_train):
 68 |         #####################################################################
 69 |         # TODO:                                                             #
 70 |         # Compute the l2 distance between the ith test point and the jth    #
 71 |         # training point, and store the result in dists[i, j]. You should   #
 72 |         # not use a loop over dimension.                                    #
 73 |         #####################################################################
 74 |         pass
 75 |         dists[i][j] = np.sqrt(np.sum((X[i] - self.X_train[j]) ** 2))
 76 |         #####################################################################
 77 |         #                       END OF YOUR CODE                            #
 78 |         #####################################################################
 79 |     return dists
 80 | 
 81 |   def compute_distances_one_loop(self, X):
 82 |     """
 83 |     Compute the distance between each test point in X and each training point
 84 |     in self.X_train using a single loop over the test data.
 85 | 
 86 |     Input / Output: Same as compute_distances_two_loops
 87 |     """
 88 |     num_test = X.shape[0]
 89 |     num_train = self.X_train.shape[0]
 90 |     dists = np.zeros((num_test, num_train))
 91 |     for i in xrange(num_test):
 92 |       #######################################################################
 93 |       # TODO:                                                               #
 94 |       # Compute the l2 distance between the ith test point and all training #
 95 |       # points, and store the result in dists[i, :].                        #
 96 |       #######################################################################
 97 |       pass
 98 |       dists[i] = np.sqrt(np.sum((self.X_train - X[i]) ** 2, 1))
 99 |       #######################################################################
100 |       #                         END OF YOUR CODE                            #
101 |       #######################################################################
102 |     return dists
103 | 
104 |   def compute_distances_no_loops(self, X):
105 |     """
106 |     Compute the distance between each test point in X and each training point
107 |     in self.X_train using no explicit loops.
108 | 
109 |     Input / Output: Same as compute_distances_two_loops
110 |     """
111 |     num_test = X.shape[0]
112 |     num_train = self.X_train.shape[0]
113 |     dists = np.zeros((num_test, num_train))
114 |     #########################################################################
115 |     # TODO:                                                                 #
116 |     # Compute the l2 distance between all test points and all training      #
117 |     # points without using any explicit loops, and store the result in      #
118 |     # dists.                                                                #
119 |     #                                                                       #
120 |     # You should implement this function using only basic array operations; #
121 |     # in particular you should not use functions from scipy.                #
122 |     #                                                                       #
123 |     # HINT: Try to formulate the l2 distance using matrix multiplication    #
124 |     #       and two broadcast sums.                                         #
125 |     #########################################################################
126 |     pass
127 |     dists += np.sum(self.X_train ** 2, axis=1).reshape(1, num_train)
128 |     dists += np.sum(X ** 2, axis=1).reshape(num_test, 1) # reshape for broadcasting
129 |     dists -= 2 * np.dot(X, self.X_train.T)
130 |     dists = np.sqrt(dists)
131 |     #########################################################################
132 |     #                         END OF YOUR CODE                              #
133 |     #########################################################################
134 |     return dists
135 | 
136 |   def predict_labels(self, dists, k=1):
137 |     """
138 |     Given a matrix of distances between test points and training points,
139 |     predict a label for each test point.
140 | 
141 |     Inputs:
142 |     - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
143 |       gives the distance betwen the ith test point and the jth training point.
144 | 
145 |     Returns:
146 |     - y: A numpy array of shape (num_test,) containing predicted labels for the
147 |       test data, where y[i] is the predicted label for the test point X[i].
148 |     """
149 |     num_test = dists.shape[0]
150 |     y_pred = np.zeros(num_test)
151 |     for i in xrange(num_test):
152 |       # A list of length k storing the labels of the k nearest neighbors to
153 |       # the ith test point.
154 |       closest_y = []
155 |       #########################################################################
156 |       # TODO:                                                                 #
157 |       # Use the distance matrix to find the k nearest neighbors of the ith    #
158 |       # testing point, and use self.y_train to find the labels of these       #
159 |       # neighbors. Store these labels in closest_y.                           #
160 |       # Hint: Look up the function numpy.argsort.                             #
161 |       #########################################################################
162 |       pass
163 |       closest_y = self.y_train[np.argsort(dists[i])[0:k]]
164 |       #########################################################################
165 |       # TODO:                                                                 #
166 |       # Now that you have found the labels of the k nearest neighbors, you    #
167 |       # need to find the most common label in the list closest_y of labels.   #
168 |       # Store this label in y_pred[i]. Break ties by choosing the smaller     #
169 |       # label.                                                                #
170 |       #########################################################################
171 |       pass
172 |       # to find the most common element in list, you can use np.bincount
173 |       y_pred[i] = np.bincount(closest_y).argmax()
174 |       #########################################################################
175 |       #                           END OF YOUR CODE                            #
176 |       #########################################################################
177 | 
178 |     return y_pred
179 | 
180 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/classifiers/linear_classifier.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from cs231n.classifiers.linear_svm import *
  3 | from cs231n.classifiers.softmax import *
  4 | 
  5 | class LinearClassifier(object):
  6 | 
  7 |   def __init__(self):
  8 |     self.W = None
  9 | 
 10 |   def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,
 11 |             batch_size=200, verbose=False):
 12 |     """
 13 |     Train this linear classifier using stochastic gradient descent.
 14 | 
 15 |     Inputs:
 16 |     - X: A numpy array of shape (N, D) containing training data; there are N
 17 |       training samples each of dimension D.
 18 |     - y: A numpy array of shape (N,) containing training labels; y[i] = c
 19 |       means that X[i] has label 0 <= c < C for C classes.
 20 |     - learning_rate: (float) learning rate for optimization.
 21 |     - reg: (float) regularization strength.
 22 |     - num_iters: (integer) number of steps to take when optimizing
 23 |     - batch_size: (integer) number of training examples to use at each step.
 24 |     - verbose: (boolean) If true, print progress during optimization.
 25 | 
 26 |     Outputs:
 27 |     A list containing the value of the loss function at each training iteration.
 28 |     """
 29 |     num_train, dim = X.shape
 30 |     num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes
 31 |     if self.W is None:
 32 |       # lazily initialize W
 33 |       self.W = 0.001 * np.random.randn(dim, num_classes)
 34 | 
 35 |     # Run stochastic gradient descent to optimize W
 36 |     loss_history = []
 37 |     for it in xrange(num_iters):
 38 |       X_batch = None
 39 |       y_batch = None
 40 | 
 41 |       #########################################################################
 42 |       # TODO:                                                                 #
 43 |       # Sample batch_size elements from the training data and their           #
 44 |       # corresponding labels to use in this round of gradient descent.        #
 45 |       # Store the data in X_batch and their corresponding labels in           #
 46 |       # y_batch; after sampling X_batch should have shape (dim, batch_size)   #
 47 |       # and y_batch should have shape (batch_size,)                           #
 48 |       #                                                                       #
 49 |       # Hint: Use np.random.choice to generate indices. Sampling with         #
 50 |       # replacement is faster than sampling without replacement.              #
 51 |       #########################################################################
 52 |       pass
 53 |       indices = np.random.choice(num_train, batch_size)
 54 |       X_batch = X[indices]
 55 |       y_batch = y[indices]
 56 |       #########################################################################
 57 |       #                       END OF YOUR CODE                                #
 58 |       #########################################################################
 59 | 
 60 |       # evaluate loss and gradient
 61 |       loss, grad = self.loss(X_batch, y_batch, reg)
 62 |       loss_history.append(loss)
 63 | 
 64 |       # perform parameter update
 65 |       #########################################################################
 66 |       # TODO:                                                                 #
 67 |       # Update the weights using the gradient and the learning rate.          #
 68 |       #########################################################################
 69 |       pass
 70 |       self.W -= learning_rate * grad
 71 |       #########################################################################
 72 |       #                       END OF YOUR CODE                                #
 73 |       #########################################################################
 74 | 
 75 |       if verbose and it % 100 == 0:
 76 |         print 'iteration %d / %d: loss %f' % (it, num_iters, loss)
 77 | 
 78 |     return loss_history
 79 | 
 80 |   def predict(self, X):
 81 |     """
 82 |     Use the trained weights of this linear classifier to predict labels for
 83 |     data points.
 84 | 
 85 |     Inputs:
 86 |     - X: D x N array of training data. Each column is a D-dimensional point.
 87 | 
 88 |     Returns:
 89 |     - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
 90 |       array of length N, and each element is an integer giving the predicted
 91 |       class.
 92 |     """
 93 |     y_pred = np.zeros(X.shape[1])
 94 |     ###########################################################################
 95 |     # TODO:                                                                   #
 96 |     # Implement this method. Store the predicted labels in y_pred.            #
 97 |     ###########################################################################
 98 |     pass
 99 |     y_pred = np.argmax(np.dot(X, self.W), axis = 1)
100 |     ###########################################################################
101 |     #                           END OF YOUR CODE                              #
102 |     ###########################################################################
103 |     return y_pred
104 | 
105 |   def loss(self, X_batch, y_batch, reg):
106 |     """
107 |     Compute the loss function and its derivative.
108 |     Subclasses will override this.
109 | 
110 |     Inputs:
111 |     - X_batch: A numpy array of shape (N, D) containing a minibatch of N
112 |       data points; each point has dimension D.
113 |     - y_batch: A numpy array of shape (N,) containing labels for the minibatch.
114 |     - reg: (float) regularization strength.
115 | 
116 |     Returns: A tuple containing:
117 |     - loss as a single float
118 |     - gradient with respect to self.W; an array of the same shape as W
119 |     """
120 |     pass
121 | 
122 | 
123 | class LinearSVM(LinearClassifier):
124 |   """ A subclass that uses the Multiclass SVM loss function """
125 | 
126 |   def loss(self, X_batch, y_batch, reg):
127 |     return svm_loss_vectorized(self.W, X_batch, y_batch, reg)
128 | 
129 | 
130 | class Softmax(LinearClassifier):
131 |   """ A subclass that uses the Softmax + Cross-entropy loss function """
132 | 
133 |   def loss(self, X_batch, y_batch, reg):
134 |     return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)
135 | 
136 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/classifiers/linear_svm.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | #from random import shuffle
  3 | 
  4 | def svm_loss_naive(W, X, y, reg):
  5 |   """
  6 |   Structured SVM loss function, naive implementation (with loops).
  7 | 
  8 |   Inputs have dimension D, there are C classes, and we operate on minibatches
  9 |   of N examples.
 10 | 
 11 |   Inputs:
 12 |   - W: A numpy array of shape (D, C) containing weights.
 13 |   - X: A numpy array of shape (N, D) containing a minibatch of data.
 14 |   - y: A numpy array of shape (N,) containing training labels; y[i] = c means
 15 |     that X[i] has label c, where 0 <= c < C.
 16 |   - reg: (float) regularization strength
 17 | 
 18 |   Returns a tuple of:
 19 |   - loss as single float
 20 |   - gradient with respect to weights W; an array of same shape as W
 21 |   """
 22 |   dW = np.zeros(W.shape) # initialize the gradient as zero
 23 | 
 24 |   # compute the loss and the gradient
 25 |   num_classes = W.shape[1]
 26 |   num_train = X.shape[0]
 27 |   loss = 0.0
 28 |   for i in xrange(num_train):
 29 |     scores = X[i].dot(W)
 30 |     correct_class_score = scores[y[i]]
 31 |     for j in xrange(num_classes):
 32 |       if j == y[i]:
 33 |         continue
 34 |       margin = scores[j] - correct_class_score + 1 # note delta = 1
 35 |       if margin > 0:
 36 |         loss += margin
 37 |         dW[:, j] += X[i]
 38 |         dW[:, y[i]] -= X[i]
 39 | 
 40 |   # Right now the loss is a sum over all training examples, but we want it
 41 |   # to be an average instead so we divide by num_train.
 42 |   loss /= num_train
 43 |   dW /= num_train
 44 | 
 45 |   # Add regularization to the loss.
 46 |   loss += 0.5 * reg * np.sum(W * W)
 47 |   dW += reg * W
 48 | 
 49 |   #############################################################################
 50 |   # TODO:                                                                     #
 51 |   # Compute the gradient of the loss function and store it dW.                #
 52 |   # Rather that first computing the loss and then computing the derivative,   #
 53 |   # it may be simpler to compute the derivative at the same time that the     #
 54 |   # loss is being computed. As a result you may need to modify some of the    #
 55 |   # code above to compute the gradient.                                       #
 56 |   #############################################################################
 57 | 
 58 | 
 59 |   return loss, dW
 60 | 
 61 | 
 62 | def svm_loss_vectorized(W, X, y, reg):
 63 |   """
 64 |   Structured SVM loss function, vectorized implementation.
 65 | 
 66 |   Inputs and outputs are the same as svm_loss_naive.
 67 |   """
 68 |   loss = 0.0
 69 |   dW = np.zeros(W.shape) # initialize the gradient as zero
 70 | 
 71 |   #############################################################################
 72 |   # TODO:                                                                     #
 73 |   # Implement a vectorized version of the structured SVM loss, storing the    #
 74 |   # result in loss.                                                           #
 75 |   #############################################################################
 76 |   pass
 77 |   N = X.shape[0]
 78 |   #scores = np.dot(X, W)
 79 |   #margin = scores - scores[range(0, N), y].reshape(N, 1) + 1
 80 |   #margin[range(0, N), y] = 0
 81 |   #margin = margin * (margin > 0) # max(0, s_j - s_yi + delta)
 82 |   #loss += np.sum(margin) / N + 0.5 * reg * np.sum(W * W)
 83 |   scores = X.dot(W) # N x C
 84 |   margin = scores - scores[range(0,N), y].reshape(-1, 1) + 1 # N x C
 85 |   margin[range(N), y] = 0
 86 |   margin = (margin > 0) * margin
 87 |   loss += margin.sum() / N
 88 |   loss += 0.5 * reg * np.sum(W * W)
 89 |   #############################################################################
 90 |   #                             END OF YOUR CODE                              #
 91 |   #############################################################################
 92 | 
 93 | 
 94 |   #############################################################################
 95 |   # TODO:                                                                     #
 96 |   # Implement a vectorized version of the gradient for the structured SVM     #
 97 |   # loss, storing the result in dW.                                           #
 98 |   #                                                                           #
 99 |   # Hint: Instead of computing the gradient from scratch, it may be easier    #
100 |   # to reuse some of the intermediate values that you used to compute the     #
101 |   # loss.                                                                     #
102 |   #############################################################################
103 |   pass
104 |   counts = (margin > 0).astype(int)
105 |   counts[range(N), y] = - np.sum(counts, axis = 1)
106 |   dW += np.dot(X.T, counts) / N + reg * W
107 |   #############################################################################
108 |   #                             END OF YOUR CODE                              #
109 |   #############################################################################
110 | 
111 |   return loss, dW
112 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/classifiers/neural_net.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | #import matplotlib.pyplot as plt
  3 | 
  4 | 
  5 | class TwoLayerNet(object):
  6 |   """
  7 |   A two-layer fully-connected neural network. The net has an input dimension of
  8 |   N, a hidden layer dimension of H, and performs classification over C classes.
  9 |   We train the network with a softmax loss function and L2 regularization on the
 10 |   weight matrices. The network uses a ReLU nonlinearity after the first fully
 11 |   connected layer.
 12 | 
 13 |   In other words, the network has the following architecture:
 14 | 
 15 |   input - fully connected layer - ReLU - fully connected layer - softmax
 16 | 
 17 |   The outputs of the second fully-connected layer are the scores for each class.
 18 |   """
 19 | 
 20 |   def __init__(self, input_size, hidden_size, output_size, std=1e-4):
 21 |     """
 22 |     Initialize the model. Weights are initialized to small random values and
 23 |     biases are initialized to zero. Weights and biases are stored in the
 24 |     variable self.params, which is a dictionary with the following keys:
 25 | 
 26 |     W1: First layer weights; has shape (D, H)
 27 |     b1: First layer biases; has shape (H,)
 28 |     W2: Second layer weights; has shape (H, C)
 29 |     b2: Second layer biases; has shape (C,)
 30 | 
 31 |     Inputs:
 32 |     - input_size: The dimension D of the input data.
 33 |     - hidden_size: The number of neurons H in the hidden layer.
 34 |     - output_size: The number of classes C.
 35 |     """
 36 |     self.params = {}
 37 |     self.params['W1'] = std * np.random.randn(input_size, hidden_size)
 38 |     self.params['b1'] = np.zeros(hidden_size)
 39 |     self.params['W2'] = std * np.random.randn(hidden_size, output_size)
 40 |     self.params['b2'] = np.zeros(output_size)
 41 | 
 42 |   def loss(self, X, y=None, reg=0.0):
 43 |     """
 44 |     Compute the loss and gradients for a two layer fully connected neural
 45 |     network.
 46 | 
 47 |     Inputs:
 48 |     - X: Input data of shape (N, D). Each X[i] is a training sample.
 49 |     - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is
 50 |       an integer in the range 0 <= y[i] < C. This parameter is optional; if it
 51 |       is not passed then we only return scores, and if it is passed then we
 52 |       instead return the loss and gradients.
 53 |     - reg: Regularization strength.
 54 | 
 55 |     Returns:
 56 |     If y is None, return a matrix scores of shape (N, C) where scores[i, c] is
 57 |     the score for class c on input X[i].
 58 | 
 59 |     If y is not None, instead return a tuple of:
 60 |     - loss: Loss (data loss and regularization loss) for this batch of training
 61 |       samples.
 62 |     - grads: Dictionary mapping parameter names to gradients of those parameters
 63 |       with respect to the loss function; has the same keys as self.params.
 64 |     """
 65 |     # Unpack variables from the params dictionary
 66 |     W1, b1 = self.params['W1'], self.params['b1']
 67 |     W2, b2 = self.params['W2'], self.params['b2']
 68 |     N, D = X.shape
 69 | 
 70 |     # Compute the forward pass
 71 |     scores = None
 72 |     #############################################################################
 73 |     # TODO: Perform the forward pass, computing the class scores for the input. #
 74 |     # Store the result in the scores variable, which should be an array of      #
 75 |     # shape (N, C).                                                             #
 76 |     #############################################################################
 77 |     pass
 78 |     hidden_layer = np.maximum(0, np.dot(X, W1) + b1)
 79 |     scores = np.dot(hidden_layer, W2) + b2
 80 |     #############################################################################
 81 |     #                              END OF YOUR CODE                             #
 82 |     #############################################################################
 83 | 
 84 |     # If the targets are not given then jump out, we're done
 85 |     if y is None:
 86 |       return scores
 87 | 
 88 |     # Compute the loss
 89 |     loss = None
 90 |     #############################################################################
 91 |     # TODO: Finish the forward pass, and compute the loss. This should include  #
 92 |     # both the data loss and L2 regularization for W1 and W2. Store the result  #
 93 |     # in the variable loss, which should be a scalar. Use the Softmax           #
 94 |     # classifier loss. So that your results match ours, multiply the            #
 95 |     # regularization loss by 0.5                                                #
 96 |     #############################################################################
 97 |     pass
 98 |     f = scores - np.max(scores, axis = 1, keepdims = True)
 99 |     loss = -f[range(N), y].sum() + np.log(np.exp(f).sum(axis = 1)).sum()
100 |     loss = loss / N + 0.5 * reg * (np.sum(W1 * W1) + np.sum(W2 * W2))
101 |     #############################################################################
102 |     #                              END OF YOUR CODE                             #
103 |     #############################################################################
104 | 
105 |     # Backward pass: compute gradients
106 |     grads = {}
107 |     #############################################################################
108 |     # TODO: Compute the backward pass, computing the derivatives of the weights #
109 |     # and biases. Store the results in the grads dictionary. For example,       #
110 |     # grads['W1'] should store the gradient on W1, and be a matrix of same size #
111 |     #############################################################################
112 |     pass
113 |     dscore = np.exp(f) / np.exp(f).sum(axis = 1, keepdims = True)
114 |     dscore[range(N), y] -= 1
115 |     dscore /= N
116 |     grads['W2'] = np.dot(hidden_layer.T, dscore) + reg * W2
117 |     grads['b2'] = np.sum(dscore, axis = 0)
118 | 
119 |     dhidden = np.dot(dscore, W2.T)
120 |     dhidden[hidden_layer <= 0.00001] = 0
121 | 
122 |     grads['W1'] = np.dot(X.T, dhidden) + reg * W1
123 |     grads['b1'] = np.sum(dhidden, axis = 0)
124 |     #############################################################################
125 |     #                              END OF YOUR CODE                             #
126 |     #############################################################################
127 | 
128 |     return loss, grads
129 | 
130 |   def train(self, X, y, X_val, y_val,
131 |             learning_rate=1e-3, learning_rate_decay=0.95,
132 |             reg=1e-5, num_iters=100,
133 |             batch_size=200, verbose=False):
134 |     """
135 |     Train this neural network using stochastic gradient descent.
136 | 
137 |     Inputs:
138 |     - X: A numpy array of shape (N, D) giving training data.
139 |     - y: A numpy array f shape (N,) giving training labels; y[i] = c means that
140 |       X[i] has label c, where 0 <= c < C.
141 |     - X_val: A numpy array of shape (N_val, D) giving validation data.
142 |     - y_val: A numpy array of shape (N_val,) giving validation labels.
143 |     - learning_rate: Scalar giving learning rate for optimization.
144 |     - learning_rate_decay: Scalar giving factor used to decay the learning rate
145 |       after each epoch.
146 |     - reg: Scalar giving regularization strength.
147 |     - num_iters: Number of steps to take when optimizing.
148 |     - batch_size: Number of training examples to use per step.
149 |     - verbose: boolean; if true print progress during optimization.
150 |     """
151 |     num_train = X.shape[0]
152 |     iterations_per_epoch = max(num_train / batch_size, 1)
153 | 
154 |     # Use SGD to optimize the parameters in self.model
155 |     loss_history = []
156 |     train_acc_history = []
157 |     val_acc_history = []
158 | 
159 |     for it in xrange(num_iters):
160 |       X_batch = None
161 |       y_batch = None
162 | 
163 |       #########################################################################
164 |       # TODO: Create a random minibatch of training data and labels, storing  #
165 |       # them in X_batch and y_batch respectively.                             #
166 |       #########################################################################
167 |       pass
168 |       indices = np.random.choice(num_train, batch_size, replace=True)
169 |       X_batch = X[indices]
170 |       y_batch = y[indices]
171 |       #########################################################################
172 |       #                             END OF YOUR CODE                          #
173 |       #########################################################################
174 | 
175 |       # Compute loss and gradients using the current minibatch
176 |       loss, grads = self.loss(X_batch, y=y_batch, reg=reg)
177 |       loss_history.append(loss)
178 | 
179 |       #########################################################################
180 |       # TODO: Use the gradients in the grads dictionary to update the         #
181 |       # parameters of the network (stored in the dictionary self.params)      #
182 |       # using stochastic gradient descent. You'll need to use the gradients   #
183 |       # stored in the grads dictionary defined above.                         #
184 |       #########################################################################
185 |       pass
186 |       self.params['W1'] -= learning_rate * grads['W1']
187 |       self.params['b1'] -= learning_rate * grads['b1']
188 |       self.params['W2'] -= learning_rate * grads['W2']
189 |       self.params['b2'] -= learning_rate * grads['b2']
190 |       #########################################################################
191 |       #                             END OF YOUR CODE                          #
192 |       #########################################################################
193 | 
194 |       if verbose and it % 100 == 0:
195 |         print 'iteration %d / %d: loss %f' % (it, num_iters, loss)
196 | 
197 |       # Every epoch, check train and val accuracy and decay learning rate.
198 |       if it % iterations_per_epoch == 0:
199 |         # Check accuracy
200 |         train_acc = (self.predict(X_batch) == y_batch).mean()
201 |         val_acc = (self.predict(X_val) == y_val).mean()
202 |         train_acc_history.append(train_acc)
203 |         val_acc_history.append(val_acc)
204 | 
205 |         # Decay learning rate
206 |         learning_rate *= learning_rate_decay
207 | 
208 |     return {
209 |       'loss_history': loss_history,
210 |       'train_acc_history': train_acc_history,
211 |       'val_acc_history': val_acc_history,
212 |     }
213 | 
214 |   def predict(self, X):
215 |     """
216 |     Use the trained weights of this two-layer network to predict labels for
217 |     data points. For each data point we predict scores for each of the C
218 |     classes, and assign each data point to the class with the highest score.
219 | 
220 |     Inputs:
221 |     - X: A numpy array of shape (N, D) giving N D-dimensional data points to
222 |       classify.
223 | 
224 |     Returns:
225 |     - y_pred: A numpy array of shape (N,) giving predicted labels for each of
226 |       the elements of X. For all i, y_pred[i] = c means that X[i] is predicted
227 |       to have class c, where 0 <= c < C.
228 |     """
229 |     y_pred = None
230 | 
231 |     ###########################################################################
232 |     # TODO: Implement this function; it should be VERY simple!                #
233 |     ###########################################################################
234 |     pass
235 |     W1, b1 = self.params['W1'], self.params['b1']
236 |     W2, b2 = self.params['W2'], self.params['b2']
237 | 
238 |     hidden_layer = np.maximum(0, np.dot(X, W1) + b1)
239 |     scores = np.dot(hidden_layer, W2) + b2
240 |     y_pred = np.argmax(scores, axis = 1)
241 |     ###########################################################################
242 |     #                              END OF YOUR CODE                           #
243 |     ###########################################################################
244 | 
245 |     return y_pred
246 | 
247 | 
248 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/classifiers/softmax.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | #from random import shuffle
 3 | 
 4 | def softmax_loss_naive(W, X, y, reg):
 5 |   """
 6 |   Softmax loss function, naive implementation (with loops)
 7 | 
 8 |   Inputs have dimension D, there are C classes, and we operate on minibatches
 9 |   of N examples.
10 | 
11 |   Inputs:
12 |   - W: A numpy array of shape (D, C) containing weights.
13 |   - X: A numpy array of shape (N, D) containing a minibatch of data.
14 |   - y: A numpy array of shape (N,) containing training labels; y[i] = c means
15 |     that X[i] has label c, where 0 <= c < C.
16 |   - reg: (float) regularization strength
17 | 
18 |   Returns a tuple of:
19 |   - loss as single float
20 |   - gradient with respect to weights W; an array of same shape as W
21 |   """
22 |   # Initialize the loss and gradient to zero.
23 |   loss = 0.0
24 |   dW = np.zeros_like(W)
25 | 
26 |   #############################################################################
27 |   # TODO: Compute the softmax loss and its gradient using explicit loops.     #
28 |   # Store the loss in loss and the gradient in dW. If you are not careful     #
29 |   # here, it is easy to run into numeric instability. Don't forget the        #
30 |   # regularization!                                                           #
31 |   #############################################################################
32 |   pass
33 |   N, C = X.shape[0], W.shape[1]
34 |   for i in range(N):
35 |       f = np.dot(X[i], W)
36 |       f -= np.max(f) # f.shape = C
37 |       loss = loss + np.log(np.sum(np.exp(f))) - f[y[i]]
38 |       dW[:, y[i]] -= X[i]
39 |       s = np.exp(f).sum()
40 |       for j in range(C):
41 |           dW[:, j] += np.exp(f[j]) / s * X[i]
42 |   loss = loss / N + 0.5 * reg * np.sum(W * W)
43 |   dW = dW / N + reg * W
44 |   #############################################################################
45 |   #                          END OF YOUR CODE                                 #
46 |   #############################################################################
47 | 
48 |   return loss, dW
49 | 
50 | 
51 | def softmax_loss_vectorized(W, X, y, reg):
52 |   """
53 |   Softmax loss function, vectorized version.
54 | 
55 |   Inputs and outputs are the same as softmax_loss_naive.
56 |   """
57 |   # Initialize the loss and gradient to zero.
58 |   loss = 0.0
59 |   dW = np.zeros_like(W)
60 | 
61 |   #############################################################################
62 |   # TODO: Compute the softmax loss and its gradient using no explicit loops.  #
63 |   # Store the loss in loss and the gradient in dW. If you are not careful     #
64 |   # here, it is easy to run into numeric instability. Don't forget the        #
65 |   # regularization!                                                           #
66 |   #############################################################################
67 |   pass
68 |   N = X.shape[0]
69 |   f = np.dot(X, W) # f.shape = N, C
70 |   f -= f.max(axis = 1).reshape(N, 1)
71 |   s = np.exp(f).sum(axis = 1)
72 |   loss = np.log(s).sum() - f[range(N), y].sum()
73 | 
74 |   counts = np.exp(f) / s.reshape(N, 1)
75 |   counts[range(N), y] -= 1
76 |   dW = np.dot(X.T, counts)
77 | 
78 |   loss = loss / N + 0.5 * reg * np.sum(W * W)
79 |   dW = dW / N + reg * W
80 |   #############################################################################
81 |   #                          END OF YOUR CODE                                 #
82 |   #############################################################################
83 | 
84 |   return loss, dW
85 | 
86 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/data_utils.py:
--------------------------------------------------------------------------------
  1 | import cPickle as pickle
  2 | import numpy as np
  3 | import os
  4 | from scipy.misc import imread
  5 | 
  6 | def load_CIFAR_batch(filename):
  7 |   """ load single batch of cifar """
  8 |   with open(filename, 'rb') as f:
  9 |     datadict = pickle.load(f)
 10 |     X = datadict['data']
 11 |     Y = datadict['labels']
 12 |     X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
 13 |     Y = np.array(Y)
 14 |     return X, Y
 15 | 
 16 | def load_CIFAR10(ROOT):
 17 |   """ load all of cifar """
 18 |   xs = []
 19 |   ys = []
 20 |   for b in range(1,6):
 21 |     f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
 22 |     X, Y = load_CIFAR_batch(f)
 23 |     xs.append(X)
 24 |     ys.append(Y)    
 25 |   Xtr = np.concatenate(xs)
 26 |   Ytr = np.concatenate(ys)
 27 |   del X, Y
 28 |   Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
 29 |   return Xtr, Ytr, Xte, Yte
 30 | 
 31 | def load_tiny_imagenet(path, dtype=np.float32):
 32 |   """
 33 |   Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
 34 |   TinyImageNet-200 have the same directory structure, so this can be used
 35 |   to load any of them.
 36 | 
 37 |   Inputs:
 38 |   - path: String giving path to the directory to load.
 39 |   - dtype: numpy datatype used to load the data.
 40 | 
 41 |   Returns: A tuple of
 42 |   - class_names: A list where class_names[i] is a list of strings giving the
 43 |     WordNet names for class i in the loaded dataset.
 44 |   - X_train: (N_tr, 3, 64, 64) array of training images
 45 |   - y_train: (N_tr,) array of training labels
 46 |   - X_val: (N_val, 3, 64, 64) array of validation images
 47 |   - y_val: (N_val,) array of validation labels
 48 |   - X_test: (N_test, 3, 64, 64) array of testing images.
 49 |   - y_test: (N_test,) array of test labels; if test labels are not available
 50 |     (such as in student code) then y_test will be None.
 51 |   """
 52 |   # First load wnids
 53 |   with open(os.path.join(path, 'wnids.txt'), 'r') as f:
 54 |     wnids = [x.strip() for x in f]
 55 | 
 56 |   # Map wnids to integer labels
 57 |   wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}
 58 | 
 59 |   # Use words.txt to get names for each class
 60 |   with open(os.path.join(path, 'words.txt'), 'r') as f:
 61 |     wnid_to_words = dict(line.split('\t') for line in f)
 62 |     for wnid, words in wnid_to_words.iteritems():
 63 |       wnid_to_words[wnid] = [w.strip() for w in words.split(',')]
 64 |   class_names = [wnid_to_words[wnid] for wnid in wnids]
 65 | 
 66 |   # Next load training data.
 67 |   X_train = []
 68 |   y_train = []
 69 |   for i, wnid in enumerate(wnids):
 70 |     if (i + 1) % 20 == 0:
 71 |       print 'loading training data for synset %d / %d' % (i + 1, len(wnids))
 72 |     # To figure out the filenames we need to open the boxes file
 73 |     boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)
 74 |     with open(boxes_file, 'r') as f:
 75 |       filenames = [x.split('\t')[0] for x in f]
 76 |     num_images = len(filenames)
 77 |     
 78 |     X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
 79 |     y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64)
 80 |     for j, img_file in enumerate(filenames):
 81 |       img_file = os.path.join(path, 'train', wnid, 'images', img_file)
 82 |       img = imread(img_file)
 83 |       if img.ndim == 2:
 84 |         ## grayscale file
 85 |         img.shape = (64, 64, 1)
 86 |       X_train_block[j] = img.transpose(2, 0, 1)
 87 |     X_train.append(X_train_block)
 88 |     y_train.append(y_train_block)
 89 |       
 90 |   # We need to concatenate all training data
 91 |   X_train = np.concatenate(X_train, axis=0)
 92 |   y_train = np.concatenate(y_train, axis=0)
 93 |   
 94 |   # Next load validation data
 95 |   with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f:
 96 |     img_files = []
 97 |     val_wnids = []
 98 |     for line in f:
 99 |       img_file, wnid = line.split('\t')[:2]
100 |       img_files.append(img_file)
101 |       val_wnids.append(wnid)
102 |     num_val = len(img_files)
103 |     y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
104 |     X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
105 |     for i, img_file in enumerate(img_files):
106 |       img_file = os.path.join(path, 'val', 'images', img_file)
107 |       img = imread(img_file)
108 |       if img.ndim == 2:
109 |         img.shape = (64, 64, 1)
110 |       X_val[i] = img.transpose(2, 0, 1)
111 | 
112 |   # Next load test images
113 |   # Students won't have test labels, so we need to iterate over files in the
114 |   # images directory.
115 |   img_files = os.listdir(os.path.join(path, 'test', 'images'))
116 |   X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
117 |   for i, img_file in enumerate(img_files):
118 |     img_file = os.path.join(path, 'test', 'images', img_file)
119 |     img = imread(img_file)
120 |     if img.ndim == 2:
121 |       img.shape = (64, 64, 1)
122 |     X_test[i] = img.transpose(2, 0, 1)
123 | 
124 |   y_test = None
125 |   y_test_file = os.path.join(path, 'test', 'test_annotations.txt')
126 |   if os.path.isfile(y_test_file):
127 |     with open(y_test_file, 'r') as f:
128 |       img_file_to_wnid = {}
129 |       for line in f:
130 |         line = line.split('\t')
131 |         img_file_to_wnid[line[0]] = line[1]
132 |     y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files]
133 |     y_test = np.array(y_test)
134 |   
135 |   return class_names, X_train, y_train, X_val, y_val, X_test, y_test
136 | 
137 | 
138 | def load_models(models_dir):
139 |   """
140 |   Load saved models from disk. This will attempt to unpickle all files in a
141 |   directory; any files that give errors on unpickling (such as README.txt) will
142 |   be skipped.
143 | 
144 |   Inputs:
145 |   - models_dir: String giving the path to a directory containing model files.
146 |     Each model file is a pickled dictionary with a 'model' field.
147 | 
148 |   Returns:
149 |   A dictionary mapping model file names to models.
150 |   """
151 |   models = {}
152 |   for model_file in os.listdir(models_dir):
153 |     with open(os.path.join(models_dir, model_file), 'rb') as f:
154 |       try:
155 |         models[model_file] = pickle.load(f)['model']
156 |       except pickle.UnpicklingError:
157 |         continue
158 |   return models
159 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/datasets/.gitignore:
--------------------------------------------------------------------------------
1 | cifar-10-batches-py/*
2 | tiny-imagenet-100-A*
3 | tiny-imagenet-100-B*
4 | tiny-100-A-pretrained/*
5 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/datasets/get_datasets.sh:
--------------------------------------------------------------------------------
1 | # Get CIFAR10
2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
3 | tar -xzvf cifar-10-python.tar.gz
4 | rm cifar-10-python.tar.gz 
5 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/features.py:
--------------------------------------------------------------------------------
  1 | import matplotlib
  2 | import numpy as np
  3 | from scipy.ndimage import uniform_filter
  4 | 
  5 | 
  6 | def extract_features(imgs, feature_fns, verbose=False):
  7 |   """
  8 |   Given pixel data for images and several feature functions that can operate on
  9 |   single images, apply all feature functions to all images, concatenating the
 10 |   feature vectors for each image and storing the features for all images in
 11 |   a single matrix.
 12 | 
 13 |   Inputs:
 14 |   - imgs: N x H X W X C array of pixel data for N images.
 15 |   - feature_fns: List of k feature functions. The ith feature function should
 16 |     take as input an H x W x D array and return a (one-dimensional) array of
 17 |     length F_i.
 18 |   - verbose: Boolean; if true, print progress.
 19 | 
 20 |   Returns:
 21 |   An array of shape (N, F_1 + ... + F_k) where each column is the concatenation
 22 |   of all features for a single image.
 23 |   """
 24 |   num_images = imgs.shape[0]
 25 |   if num_images == 0:
 26 |     return np.array([])
 27 | 
 28 |   # Use the first image to determine feature dimensions
 29 |   feature_dims = []
 30 |   first_image_features = []
 31 |   for feature_fn in feature_fns:
 32 |     feats = feature_fn(imgs[0].squeeze())
 33 |     assert len(feats.shape) == 1, 'Feature functions must be one-dimensional'
 34 |     feature_dims.append(feats.size)
 35 |     first_image_features.append(feats)
 36 | 
 37 |   # Now that we know the dimensions of the features, we can allocate a single
 38 |   # big array to store all features as columns.
 39 |   total_feature_dim = sum(feature_dims)
 40 |   imgs_features = np.zeros((num_images, total_feature_dim))
 41 |   imgs_features[0] = np.hstack(first_image_features).T
 42 | 
 43 |   # Extract features for the rest of the images.
 44 |   for i in xrange(1, num_images):
 45 |     idx = 0
 46 |     for feature_fn, feature_dim in zip(feature_fns, feature_dims):
 47 |       next_idx = idx + feature_dim
 48 |       imgs_features[i, idx:next_idx] = feature_fn(imgs[i].squeeze())
 49 |       idx = next_idx
 50 |     if verbose and i % 1000 == 0:
 51 |       print 'Done extracting features for %d / %d images' % (i, num_images)
 52 | 
 53 |   return imgs_features
 54 | 
 55 | 
 56 | def rgb2gray(rgb):
 57 |   """Convert RGB image to grayscale
 58 | 
 59 |     Parameters:
 60 |       rgb : RGB image
 61 | 
 62 |     Returns:
 63 |       gray : grayscale image
 64 |   
 65 |   """
 66 |   return np.dot(rgb[...,:3], [0.299, 0.587, 0.144])
 67 | 
 68 | 
 69 | def hog_feature(im):
 70 |   """Compute Histogram of Gradient (HOG) feature for an image
 71 |   
 72 |        Modified from skimage.feature.hog
 73 |        http://pydoc.net/Python/scikits-image/0.4.2/skimage.feature.hog
 74 |      
 75 |      Reference:
 76 |        Histograms of Oriented Gradients for Human Detection
 77 |        Navneet Dalal and Bill Triggs, CVPR 2005
 78 |      
 79 |     Parameters:
 80 |       im : an input grayscale or rgb image
 81 |       
 82 |     Returns:
 83 |       feat: Histogram of Gradient (HOG) feature
 84 |     
 85 |   """
 86 |   
 87 |   # convert rgb to grayscale if needed
 88 |   if im.ndim == 3:
 89 |     image = rgb2gray(im)
 90 |   else:
 91 |     image = np.at_least_2d(im)
 92 | 
 93 |   sx, sy = image.shape # image size
 94 |   orientations = 9 # number of gradient bins
 95 |   cx, cy = (8, 8) # pixels per cell
 96 | 
 97 |   gx = np.zeros(image.shape)
 98 |   gy = np.zeros(image.shape)
 99 |   gx[:, :-1] = np.diff(image, n=1, axis=1) # compute gradient on x-direction
100 |   gy[:-1, :] = np.diff(image, n=1, axis=0) # compute gradient on y-direction
101 |   grad_mag = np.sqrt(gx ** 2 + gy ** 2) # gradient magnitude
102 |   grad_ori = np.arctan2(gy, (gx + 1e-15)) * (180 / np.pi) + 90 # gradient orientation
103 | 
104 |   n_cellsx = int(np.floor(sx / cx))  # number of cells in x
105 |   n_cellsy = int(np.floor(sy / cy))  # number of cells in y
106 |   # compute orientations integral images
107 |   orientation_histogram = np.zeros((n_cellsx, n_cellsy, orientations))
108 |   for i in range(orientations):
109 |     # create new integral image for this orientation
110 |     # isolate orientations in this range
111 |     temp_ori = np.where(grad_ori < 180 / orientations * (i + 1),
112 |                         grad_ori, 0)
113 |     temp_ori = np.where(grad_ori >= 180 / orientations * i,
114 |                         temp_ori, 0)
115 |     # select magnitudes for those orientations
116 |     cond2 = temp_ori > 0
117 |     temp_mag = np.where(cond2, grad_mag, 0)
118 |     orientation_histogram[:,:,i] = uniform_filter(temp_mag, size=(cx, cy))[cx/2::cx, cy/2::cy].T
119 |   
120 |   return orientation_histogram.ravel()
121 | 
122 | 
123 | def color_histogram_hsv(im, nbin=10, xmin=0, xmax=255, normalized=True):
124 |   """
125 |   Compute color histogram for an image using hue.
126 | 
127 |   Inputs:
128 |   - im: H x W x C array of pixel data for an RGB image.
129 |   - nbin: Number of histogram bins. (default: 10)
130 |   - xmin: Minimum pixel value (default: 0)
131 |   - xmax: Maximum pixel value (default: 255)
132 |   - normalized: Whether to normalize the histogram (default: True)
133 | 
134 |   Returns:
135 |     1D vector of length nbin giving the color histogram over the hue of the
136 |     input image.
137 |   """
138 |   ndim = im.ndim
139 |   bins = np.linspace(xmin, xmax, nbin+1)
140 |   hsv = matplotlib.colors.rgb_to_hsv(im/xmax) * xmax
141 |   imhist, bin_edges = np.histogram(hsv[:,:,0], bins=bins, density=normalized)
142 |   imhist = imhist * np.diff(bin_edges)
143 | 
144 |   # return histogram
145 |   return imhist
146 | 
147 | 
148 | pass
149 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/gradient_check.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from random import randrange
  3 | 
  4 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
  5 |   """ 
  6 |   a naive implementation of numerical gradient of f at x 
  7 |   - f should be a function that takes a single argument
  8 |   - x is the point (numpy array) to evaluate the gradient at
  9 |   """ 
 10 | 
 11 |   fx = f(x) # evaluate function value at original point
 12 |   grad = np.zeros_like(x)
 13 |   # iterate over all indexes in x
 14 |   it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 15 |   while not it.finished:
 16 | 
 17 |     # evaluate function at x+h
 18 |     ix = it.multi_index
 19 |     oldval = x[ix]
 20 |     x[ix] = oldval + h # increment by h
 21 |     fxph = f(x) # evalute f(x + h)
 22 |     x[ix] = oldval - h
 23 |     fxmh = f(x) # evaluate f(x - h)
 24 |     x[ix] = oldval # restore
 25 | 
 26 |     # compute the partial derivative with centered formula
 27 |     grad[ix] = (fxph - fxmh) / (2 * h) # the slope
 28 |     if verbose:
 29 |       print ix, grad[ix]
 30 |     it.iternext() # step to next dimension
 31 | 
 32 |   return grad
 33 | 
 34 | 
 35 | def eval_numerical_gradient_array(f, x, df, h=1e-5):
 36 |   """
 37 |   Evaluate a numeric gradient for a function that accepts a numpy
 38 |   array and returns a numpy array.
 39 |   """
 40 |   grad = np.zeros_like(x)
 41 |   it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 42 |   while not it.finished:
 43 |     ix = it.multi_index
 44 |     
 45 |     oldval = x[ix]
 46 |     x[ix] = oldval + h
 47 |     pos = f(x).copy()
 48 |     x[ix] = oldval - h
 49 |     neg = f(x).copy()
 50 |     x[ix] = oldval
 51 |     
 52 |     grad[ix] = np.sum((pos - neg) * df) / (2 * h)
 53 |     it.iternext()
 54 |   return grad
 55 | 
 56 | 
 57 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
 58 |   """
 59 |   Compute numeric gradients for a function that operates on input
 60 |   and output blobs.
 61 |   
 62 |   We assume that f accepts several input blobs as arguments, followed by a blob
 63 |   into which outputs will be written. For example, f might be called like this:
 64 | 
 65 |   f(x, w, out)
 66 |   
 67 |   where x and w are input Blobs, and the result of f will be written to out.
 68 | 
 69 |   Inputs: 
 70 |   - f: function
 71 |   - inputs: tuple of input blobs
 72 |   - output: output blob
 73 |   - h: step size
 74 |   """
 75 |   numeric_diffs = []
 76 |   for input_blob in inputs:
 77 |     diff = np.zeros_like(input_blob.diffs)
 78 |     it = np.nditer(input_blob.vals, flags=['multi_index'],
 79 |                    op_flags=['readwrite'])
 80 |     while not it.finished:
 81 |       idx = it.multi_index
 82 |       orig = input_blob.vals[idx]
 83 | 
 84 |       input_blob.vals[idx] = orig + h
 85 |       f(*(inputs + (output,)))
 86 |       pos = np.copy(output.vals)
 87 |       input_blob.vals[idx] = orig - h
 88 |       f(*(inputs + (output,)))
 89 |       neg = np.copy(output.vals)
 90 |       input_blob.vals[idx] = orig
 91 |       
 92 |       diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)
 93 | 
 94 |       it.iternext()
 95 |     numeric_diffs.append(diff)
 96 |   return numeric_diffs
 97 | 
 98 | 
 99 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
100 |   return eval_numerical_gradient_blobs(lambda *args: net.forward(),
101 |               inputs, output, h=h)
102 | 
103 | 
104 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
105 |   """
106 |   sample a few random elements and only return numerical
107 |   in this dimensions.
108 |   """
109 | 
110 |   for i in xrange(num_checks):
111 |     ix = tuple([randrange(m) for m in x.shape])
112 | 
113 |     oldval = x[ix]
114 |     x[ix] = oldval + h # increment by h
115 |     fxph = f(x) # evaluate f(x + h)
116 |     x[ix] = oldval - h # increment by h
117 |     fxmh = f(x) # evaluate f(x - h)
118 |     x[ix] = oldval # reset
119 | 
120 |     grad_numerical = (fxph - fxmh) / (2 * h)
121 |     grad_analytic = analytic_grad[ix]
122 |     rel_error = abs(grad_numerical - grad_analytic) / (abs(grad_numerical) + abs(grad_analytic))
123 |     print 'numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error)
124 | 
125 | 


--------------------------------------------------------------------------------
/assignment1/cs231n/vis_utils.py:
--------------------------------------------------------------------------------
 1 | from math import sqrt, ceil
 2 | import numpy as np
 3 | 
 4 | def visualize_grid(Xs, ubound=255.0, padding=1):
 5 |   """
 6 |   Reshape a 4D tensor of image data to a grid for easy visualization.
 7 | 
 8 |   Inputs:
 9 |   - Xs: Data of shape (N, H, W, C)
10 |   - ubound: Output grid will have values scaled to the range [0, ubound]
11 |   - padding: The number of blank pixels between elements of the grid
12 |   """
13 |   (N, H, W, C) = Xs.shape
14 |   grid_size = int(ceil(sqrt(N)))
15 |   grid_height = H * grid_size + padding * (grid_size - 1)
16 |   grid_width = W * grid_size + padding * (grid_size - 1)
17 |   grid = np.zeros((grid_height, grid_width, C))
18 |   next_idx = 0
19 |   y0, y1 = 0, H
20 |   for y in xrange(grid_size):
21 |     x0, x1 = 0, W
22 |     for x in xrange(grid_size):
23 |       if next_idx < N:
24 |         img = Xs[next_idx]
25 |         low, high = np.min(img), np.max(img)
26 |         grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low)
27 |         # grid[y0:y1, x0:x1] = Xs[next_idx]
28 |         next_idx += 1
29 |       x0 += W + padding
30 |       x1 += W + padding
31 |     y0 += H + padding
32 |     y1 += H + padding
33 |   # grid_max = np.max(grid)
34 |   # grid_min = np.min(grid)
35 |   # grid = ubound * (grid - grid_min) / (grid_max - grid_min)
36 |   return grid
37 | 
38 | def vis_grid(Xs):
39 |   """ visualize a grid of images """
40 |   (N, H, W, C) = Xs.shape
41 |   A = int(ceil(sqrt(N)))
42 |   G = np.ones((A*H+A, A*W+A, C), Xs.dtype)
43 |   G *= np.min(Xs)
44 |   n = 0
45 |   for y in range(A):
46 |     for x in range(A):
47 |       if n < N:
48 |         G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:]
49 |         n += 1
50 |   # normalize to [0,1]
51 |   maxg = G.max()
52 |   ming = G.min()
53 |   G = (G - ming)/(maxg-ming)
54 |   return G
55 |   
56 | def vis_nn(rows):
57 |   """ visualize array of arrays of images """
58 |   N = len(rows)
59 |   D = len(rows[0])
60 |   H,W,C = rows[0][0].shape
61 |   Xs = rows[0][0]
62 |   G = np.ones((N*H+N, D*W+D, C), Xs.dtype)
63 |   for y in range(N):
64 |     for x in range(D):
65 |       G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x]
66 |   # normalize to [0,1]
67 |   maxg = G.max()
68 |   ming = G.min()
69 |   G = (G - ming)/(maxg-ming)
70 |   return G
71 | 
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/assignment1/frameworkpython:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # what real Python executable to use
 4 | PYVER=2.7
 5 | PATHTOPYTHON=/usr/local/bin/
 6 | PYTHON=${PATHTOPYTHON}python${PYVER}
 7 | 
 8 | # find the root of the virtualenv, it should be the parent of the dir this script is in
 9 | ENV=`$PYTHON -c "import os; print os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..'))"`
10 | 
11 | # now run Python with the virtualenv set as Python's HOME
12 | export PYTHONHOME=$ENV
13 | exec $PYTHON "$@"
14 | 


--------------------------------------------------------------------------------
/assignment1/requirements.txt:
--------------------------------------------------------------------------------
 1 | Jinja2==2.8
 2 | MarkupSafe==0.23
 3 | Pillow==3.0.0
 4 | Pygments==2.0.2
 5 | appnope==0.1.0
 6 | backports-abc==0.4
 7 | backports.ssl-match-hostname==3.5.0.1
 8 | certifi==2015.11.20.1
 9 | cycler==0.9.0
10 | decorator==4.0.6
11 | functools32==3.2.3-2
12 | gnureadline==6.3.3
13 | ipykernel==4.2.2
14 | ipython==4.0.1
15 | ipython-genutils==0.1.0
16 | ipywidgets==4.1.1
17 | jsonschema==2.5.1
18 | jupyter==1.0.0
19 | jupyter-client==4.1.1
20 | jupyter-console==4.0.3
21 | jupyter-core==4.0.6
22 | matplotlib==1.5.0
23 | mistune==0.7.1
24 | nbconvert==4.1.0
25 | nbformat==4.0.1
26 | notebook==4.0.6
27 | numpy==1.10.4
28 | path.py==8.1.2
29 | pexpect==4.0.1
30 | pickleshare==0.5
31 | ptyprocess==0.5
32 | pyparsing==2.0.7
33 | python-dateutil==2.4.2
34 | pytz==2015.7
35 | pyzmq==15.1.0
36 | qtconsole==4.1.1
37 | scipy==0.16.1
38 | simplegeneric==0.8.1
39 | singledispatch==3.4.0.3
40 | six==1.10.0
41 | terminado==0.5
42 | tornado==4.3
43 | traitlets==4.0.0
44 | wsgiref==0.1.2
45 | jupyter==1.0.0
46 | pillow==3.1.0
47 | 


--------------------------------------------------------------------------------
/assignment1/start_ipython_osx.sh:
--------------------------------------------------------------------------------
1 | # Assume the virtualenv is called .env
2 | 
3 | cp frameworkpython .env/bin
4 | .env/bin/frameworkpython -m IPython notebook
5 | 


--------------------------------------------------------------------------------
/assignment2/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.pyc
3 | .env/*
4 | 


--------------------------------------------------------------------------------
/assignment2/README.md:
--------------------------------------------------------------------------------
  1 | In this assignment you will practice writing backpropagation code, and training
  2 | Neural Networks and Convolutional Neural Networks. The goals of this assignment
  3 | are as follows:
  4 | 
  5 | - understand **Neural Networks** and how they are arranged in layered
  6 |   architectures
  7 | - understand and be able to implement (vectorized) **backpropagation**
  8 | - implement various **update rules** used to optimize Neural Networks
  9 | - implement **batch normalization** for training deep networks
 10 | - implement **dropout** to regularize networks
 11 | - effectively **cross-validate** and find the best hyperparameters for Neural
 12 |   Network architecture
 13 | - understand the architecture of **Convolutional Neural Networks** and train
 14 |   gain experience with training these models on data
 15 | 
 16 | ## Setup
 17 | You can work on the assignment in one of two ways: locally on your own machine,
 18 | or on a virtual machine through Terminal.com. 
 19 | 
 20 | ### Working in the cloud on Terminal
 21 | 
 22 | Terminal has created a separate subdomain to serve our class,
 23 | [www.stanfordterminalcloud.com](https://www.stanfordterminalcloud.com). Register
 24 | your account there. The Assignment 2 snapshot can then be found HERE. If you are
 25 | registered in the class you can contact the TA (see Piazza for more information)
 26 | to request Terminal credits for use on the assignment. Once you boot up the
 27 | snapshot everything will be installed for you, and you will be ready to start on
 28 | your assignment right away. We have written a small tutorial on Terminal
 29 | [here](http://cs231n.github.io/terminal-tutorial/).
 30 | 
 31 | ### Working locally
 32 | Get the code as a zip file
 33 | [here](http://vision.stanford.edu/teaching/cs231n/winter1516_assignment2.zip).
 34 | As for the dependencies:
 35 | 
 36 | **[Option 1] Use Anaconda:**
 37 | The preferred approach for installing all the assignment dependencies is to use
 38 | [Anaconda](https://www.continuum.io/downloads), which is a Python distribution
 39 | that includes many of the most popular Python packages for science, math,
 40 | engineering and data analysis. Once you install it you can skip all mentions of
 41 | requirements and you are ready to go directly to working on the assignment.
 42 | 
 43 | **[Option 2] Manual install, virtual environment:**
 44 | If you do not want to use Anaconda and want to go with a more manual and risky
 45 | installation route you will likely want to create a
 46 | [virtual environment](http://docs.python-guide.org/en/latest/dev/virtualenvs/)
 47 | for the project. If you choose not to use a virtual environment, it is up to you
 48 | to make sure that all dependencies for the code are installed globally on your
 49 | machine. To set up a virtual environment, run the following:
 50 | 
 51 | ```bash
 52 | cd assignment2
 53 | sudo pip install virtualenv      # This may already be installed
 54 | virtualenv .env                  # Create a virtual environment
 55 | source .env/bin/activate         # Activate the virtual environment
 56 | pip install -r requirements.txt  # Install dependencies
 57 | # Work on the assignment for a while ...
 58 | deactivate                       # Exit the virtual environment
 59 | ```
 60 | 
 61 | **Download data:**
 62 | Once you have the starter code, you will need to download the CIFAR-10 dataset.
 63 | Run the following from the `assignment2` directory:
 64 | 
 65 | ```bash
 66 | cd cs231n/datasets
 67 | ./get_datasets.sh
 68 | ```
 69 | 
 70 | **Compile the Cython extension:** Convolutional Neural Networks require a very
 71 | efficient implementation. We have implemented of the functionality using
 72 | [Cython](http://cython.org/); you will need to compile the Cython extension
 73 | before you can run the code. From the `cs231n` directory, run the following
 74 | command:
 75 | 
 76 | ```bash
 77 | python setup.py build_ext --inplace
 78 | ```
 79 | 
 80 | **Start IPython:**
 81 | After you have the CIFAR-10 data, you should start the IPython notebook server
 82 | from the `assignment2` directory. If you are unfamiliar with IPython, you should 
 83 | read our [IPython tutorial](http://cs231n.github.io/ipython-tutorial/).
 84 | 
 85 | **NOTE:** If you are working in a virtual environment on OSX, you may encounter
 86 | errors with matplotlib due to the
 87 | [issues described here](http://matplotlib.org/faq/virtualenv_faq.html).
 88 | You can work around this issue by starting the IPython server using the
 89 | `start_ipython_osx.sh` script from the `assignment2` directory; the script
 90 | assumes that your virtual environment is named `.env`.
 91 | 
 92 | 
 93 | ### Submitting your work:
 94 | Whether you work on the assignment locally or using Terminal, once you are done
 95 | working run the `collectSubmission.sh` script; this will produce a file called
 96 | `assignment2.zip`. Upload this file to your dropbox on
 97 | [the coursework](https://coursework.stanford.edu/portal/site/W15-CS-231N-01/)
 98 | page for the course.
 99 | 
100 | 
101 | ### Q1: Fully-connected Neural Network (30 points)
102 | The IPython notebook `FullyConnectedNets.ipynb` will introduce you to our
103 | modular layer design, and then use those layers to implement fully-connected
104 | networks of arbitrary depth. To optimize these models you will implement several
105 | popular update rules.
106 | 
107 | ### Q2: Batch Normalization (30 points)
108 | In the IPython notebook `BatchNormalization.ipynb` you will implement batch
109 | normalization, and use it to train deep fully-connected networks.
110 | 
111 | ### Q3: Dropout (10 points)
112 | The IPython notebook `Dropout.ipynb` will help you implement Dropout and explore
113 | its effects on model generalization.
114 | 
115 | ### Q4: ConvNet on CIFAR-10 (30 points)
116 | In the IPython Notebook `ConvolutionalNetworks.ipynb` you will implement several
117 | new layers that are commonly used in convolutional networks. You will train a
118 | (shallow) convolutional network on CIFAR-10, and it will then be up to you to
119 | train the best network that you can.
120 | 
121 | ### Q5: Do something extra! (up to +10 points)
122 | In the process of training your network, you should feel free to implement
123 | anything that you want to get better performance. You can modify the solver,
124 | implement additional layers, use different types of regularization, use an
125 | ensemble of models, or anything else that comes to mind. If you implement these
126 | or other ideas not covered in the assignment then you will be awarded some bonus
127 | points.
128 | 
129 | 


--------------------------------------------------------------------------------
/assignment2/collectSubmission.sh:
--------------------------------------------------------------------------------
1 | rm -f assignment2.zip
2 | zip -r assignment2.zip . -x "*.git*" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" ".env/*" "*.pyc" "*cs231n/build/*"
3 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/.gitignore:
--------------------------------------------------------------------------------
1 | build/*
2 | im2col_cython.c
3 | im2col_cython.so
4 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment2/cs231n/__init__.py


--------------------------------------------------------------------------------
/assignment2/cs231n/classifiers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment2/cs231n/classifiers/__init__.py


--------------------------------------------------------------------------------
/assignment2/cs231n/classifiers/cnn.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from cs231n.layers import *
  4 | from cs231n.fast_layers import *
  5 | from cs231n.layer_utils import *
  6 | 
  7 | 
  8 | class ThreeLayerConvNet(object):
  9 |   """
 10 |   A three-layer convolutional network with the following architecture:
 11 | 
 12 |   conv - relu - 2x2 max pool - affine - relu - affine - softmax
 13 | 
 14 |   The network operates on minibatches of data that have shape (N, C, H, W)
 15 |   consisting of N images, each with height H and width W and with C input
 16 |   channels.
 17 |   """
 18 | 
 19 |   def __init__(self, input_dim=(3, 32, 32), num_filters=32, filter_size=7,
 20 |                hidden_dim=100, num_classes=10, weight_scale=1e-3, reg=0.0,
 21 |                dtype=np.float32):
 22 |     """
 23 |     Initialize a new network.
 24 | 
 25 |     Inputs:
 26 |     - input_dim: Tuple (C, H, W) giving size of input data
 27 |     - num_filters: Number of filters to use in the convolutional layer
 28 |     - filter_size: Size of filters to use in the convolutional layer
 29 |     - hidden_dim: Number of units to use in the fully-connected hidden layer
 30 |     - num_classes: Number of scores to produce from the final affine layer.
 31 |     - weight_scale: Scalar giving standard deviation for random initialization
 32 |       of weights.
 33 |     - reg: Scalar giving L2 regularization strength
 34 |     - dtype: numpy datatype to use for computation.
 35 |     """
 36 |     self.params = {}
 37 |     self.reg = reg
 38 |     self.dtype = dtype
 39 | 
 40 |     ############################################################################
 41 |     # TODO: Initialize weights and biases for the three-layer convolutional    #
 42 |     # network. Weights should be initialized from a Gaussian with standard     #
 43 |     # deviation equal to weight_scale; biases should be initialized to zero.   #
 44 |     # All weights and biases should be stored in the dictionary self.params.   #
 45 |     # Store weights and biases for the convolutional layer using the keys 'W1' #
 46 |     # and 'b1'; use keys 'W2' and 'b2' for the weights and biases of the       #
 47 |     # hidden affine layer, and keys 'W3' and 'b3' for the weights and biases   #
 48 |     # of the output affine layer.                                              #
 49 |     ############################################################################
 50 |     # pass
 51 |     C, H, W = input_dim
 52 |     F, HH, WW = num_filters, filter_size, filter_size
 53 |     self.params['W1'] = weight_scale * np.random.randn(F, C, HH, WW)
 54 |     self.params['W2'] = weight_scale * np.random.randn(F*H/2*W/2, hidden_dim)
 55 |     self.params['W3'] = weight_scale * np.random.randn(hidden_dim, num_classes)
 56 |     self.params['b1'] = np.zeros(F)
 57 |     self.params['b2'] = np.zeros(hidden_dim)
 58 |     self.params['b3'] = np.zeros(num_classes)
 59 |     ############################################################################
 60 |     #                             END OF YOUR CODE                             #
 61 |     ############################################################################
 62 | 
 63 |     for k, v in self.params.iteritems():
 64 |       self.params[k] = v.astype(dtype)
 65 | 
 66 | 
 67 |   def loss(self, X, y=None):
 68 |     """
 69 |     Evaluate loss and gradient for the three-layer convolutional network.
 70 | 
 71 |     Input / output: Same API as TwoLayerNet in fc_net.py.
 72 |     """
 73 |     W1, b1 = self.params['W1'], self.params['b1']
 74 |     W2, b2 = self.params['W2'], self.params['b2']
 75 |     W3, b3 = self.params['W3'], self.params['b3']
 76 | 
 77 |     # pass conv_param to the forward pass for the convolutional layer
 78 |     filter_size = W1.shape[2]
 79 |     conv_param = {'stride': 1, 'pad': (filter_size - 1) / 2}
 80 | 
 81 |     # pass pool_param to the forward pass for the max-pooling layer
 82 |     pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}
 83 | 
 84 |     scores = None
 85 |     ############################################################################
 86 |     # TODO: Implement the forward pass for the three-layer convolutional net,  #
 87 |     # computing the class scores for X and storing them in the scores          #
 88 |     # variable.                                                                #
 89 |     ############################################################################
 90 |     pass
 91 |     #conv_out, conv_cache = conv_forward_naive(X, W1, b1, conv_param)
 92 |     #relu_out, relu_cache = relu_forward(conv_out)
 93 |     #pool_out, pool_cache = max_pool_forward_naive(relu_out, pool_param)
 94 |     #pool_out = pool_out.reshape(pool_out.shape[0], -1)
 95 |     pool_out, pool_cache = conv_relu_pool_forward(X, W1, b1, conv_param, pool_param)
 96 |     affine_out, affine_cache = affine_relu_forward(pool_out, W2, b2)
 97 |     scores, cache = affine_forward(affine_out, W3, b3)
 98 |     ############################################################################
 99 |     #                             END OF YOUR CODE                             #
100 |     ############################################################################
101 | 
102 |     if y is None:
103 |       return scores
104 | 
105 |     loss, grads = 0, {}
106 |     ############################################################################
107 |     # TODO: Implement the backward pass for the three-layer convolutional net, #
108 |     # storing the loss and gradients in the loss and grads variables. Compute  #
109 |     # data loss using softmax, and make sure that grads[k] holds the gradients #
110 |     # for self.params[k]. Don't forget to add L2 regularization!               #
111 |     ############################################################################
112 |     pass
113 |     loss, dscore = softmax_loss(scores, y)
114 |     daffine, grads['W3'], grads['b3'] = affine_backward(dscore, cache)
115 |     dpool, grads['W2'], grads['b2'] = affine_relu_backward(daffine, affine_cache)
116 |     #dpool = dpool.reshape(X.shape[0], W1.shape[0], X.shape[2]/2, X.shape[3]/2)
117 |     #drelu = max_pool_backward_naive(dpool, pool_cache)
118 |     #dconv = relu_backward(drelu, relu_cache)
119 |     #dx, grads['W1'], grads['b1'] = conv_backward_naive(dconv, conv_cache)
120 |     dx, grads['W1'], grads['b1'] = conv_relu_pool_backward(dpool, pool_cache)
121 | 
122 |     loss += 0.5 * self.reg * (np.sum(W1 ** 2) + np.sum(W2 ** 2) + np.sum(W3 ** 2))
123 |     grads['W1'] += self.reg * W1
124 |     grads['W2'] += self.reg * W2
125 |     grads['W3'] += self.reg * W3
126 |     ############################################################################
127 |     #                             END OF YOUR CODE                             #
128 |     ############################################################################
129 | 
130 |     return loss, grads
131 | 
132 | 
133 | pass
134 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/data_utils.py:
--------------------------------------------------------------------------------
  1 | import cPickle as pickle
  2 | import numpy as np
  3 | import os
  4 | from scipy.misc import imread
  5 | 
  6 | def load_CIFAR_batch(filename):
  7 |   """ load single batch of cifar """
  8 |   with open(filename, 'rb') as f:
  9 |     datadict = pickle.load(f)
 10 |     X = datadict['data']
 11 |     Y = datadict['labels']
 12 |     X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
 13 |     Y = np.array(Y)
 14 |     return X, Y
 15 | 
 16 | def load_CIFAR10(ROOT):
 17 |   """ load all of cifar """
 18 |   xs = []
 19 |   ys = []
 20 |   for b in range(1,6):
 21 |     f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
 22 |     X, Y = load_CIFAR_batch(f)
 23 |     xs.append(X)
 24 |     ys.append(Y)    
 25 |   Xtr = np.concatenate(xs)
 26 |   Ytr = np.concatenate(ys)
 27 |   del X, Y
 28 |   Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
 29 |   return Xtr, Ytr, Xte, Yte
 30 | 
 31 | 
 32 | def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
 33 |     """
 34 |     Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
 35 |     it for classifiers. These are the same steps as we used for the SVM, but
 36 |     condensed to a single function.
 37 |     """
 38 |     # Load the raw CIFAR-10 data
 39 |     cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
 40 |     X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
 41 |         
 42 |     # Subsample the data
 43 |     mask = range(num_training, num_training + num_validation)
 44 |     X_val = X_train[mask]
 45 |     y_val = y_train[mask]
 46 |     mask = range(num_training)
 47 |     X_train = X_train[mask]
 48 |     y_train = y_train[mask]
 49 |     mask = range(num_test)
 50 |     X_test = X_test[mask]
 51 |     y_test = y_test[mask]
 52 | 
 53 |     # Normalize the data: subtract the mean image
 54 |     mean_image = np.mean(X_train, axis=0)
 55 |     X_train -= mean_image
 56 |     X_val -= mean_image
 57 |     X_test -= mean_image
 58 |     
 59 |     # Transpose so that channels come first
 60 |     X_train = X_train.transpose(0, 3, 1, 2).copy()
 61 |     X_val = X_val.transpose(0, 3, 1, 2).copy()
 62 |     X_test = X_test.transpose(0, 3, 1, 2).copy()
 63 | 
 64 |     # Package data into a dictionary
 65 |     return {
 66 |       'X_train': X_train, 'y_train': y_train,
 67 |       'X_val': X_val, 'y_val': y_val,
 68 |       'X_test': X_test, 'y_test': y_test,
 69 |     }
 70 |     
 71 | 
 72 | def load_tiny_imagenet(path, dtype=np.float32):
 73 |   """
 74 |   Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
 75 |   TinyImageNet-200 have the same directory structure, so this can be used
 76 |   to load any of them.
 77 | 
 78 |   Inputs:
 79 |   - path: String giving path to the directory to load.
 80 |   - dtype: numpy datatype used to load the data.
 81 | 
 82 |   Returns: A tuple of
 83 |   - class_names: A list where class_names[i] is a list of strings giving the
 84 |     WordNet names for class i in the loaded dataset.
 85 |   - X_train: (N_tr, 3, 64, 64) array of training images
 86 |   - y_train: (N_tr,) array of training labels
 87 |   - X_val: (N_val, 3, 64, 64) array of validation images
 88 |   - y_val: (N_val,) array of validation labels
 89 |   - X_test: (N_test, 3, 64, 64) array of testing images.
 90 |   - y_test: (N_test,) array of test labels; if test labels are not available
 91 |     (such as in student code) then y_test will be None.
 92 |   """
 93 |   # First load wnids
 94 |   with open(os.path.join(path, 'wnids.txt'), 'r') as f:
 95 |     wnids = [x.strip() for x in f]
 96 | 
 97 |   # Map wnids to integer labels
 98 |   wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}
 99 | 
100 |   # Use words.txt to get names for each class
101 |   with open(os.path.join(path, 'words.txt'), 'r') as f:
102 |     wnid_to_words = dict(line.split('\t') for line in f)
103 |     for wnid, words in wnid_to_words.iteritems():
104 |       wnid_to_words[wnid] = [w.strip() for w in words.split(',')]
105 |   class_names = [wnid_to_words[wnid] for wnid in wnids]
106 | 
107 |   # Next load training data.
108 |   X_train = []
109 |   y_train = []
110 |   for i, wnid in enumerate(wnids):
111 |     if (i + 1) % 20 == 0:
112 |       print 'loading training data for synset %d / %d' % (i + 1, len(wnids))
113 |     # To figure out the filenames we need to open the boxes file
114 |     boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)
115 |     with open(boxes_file, 'r') as f:
116 |       filenames = [x.split('\t')[0] for x in f]
117 |     num_images = len(filenames)
118 |     
119 |     X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
120 |     y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64)
121 |     for j, img_file in enumerate(filenames):
122 |       img_file = os.path.join(path, 'train', wnid, 'images', img_file)
123 |       img = imread(img_file)
124 |       if img.ndim == 2:
125 |         ## grayscale file
126 |         img.shape = (64, 64, 1)
127 |       X_train_block[j] = img.transpose(2, 0, 1)
128 |     X_train.append(X_train_block)
129 |     y_train.append(y_train_block)
130 |       
131 |   # We need to concatenate all training data
132 |   X_train = np.concatenate(X_train, axis=0)
133 |   y_train = np.concatenate(y_train, axis=0)
134 |   
135 |   # Next load validation data
136 |   with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f:
137 |     img_files = []
138 |     val_wnids = []
139 |     for line in f:
140 |       img_file, wnid = line.split('\t')[:2]
141 |       img_files.append(img_file)
142 |       val_wnids.append(wnid)
143 |     num_val = len(img_files)
144 |     y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
145 |     X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
146 |     for i, img_file in enumerate(img_files):
147 |       img_file = os.path.join(path, 'val', 'images', img_file)
148 |       img = imread(img_file)
149 |       if img.ndim == 2:
150 |         img.shape = (64, 64, 1)
151 |       X_val[i] = img.transpose(2, 0, 1)
152 | 
153 |   # Next load test images
154 |   # Students won't have test labels, so we need to iterate over files in the
155 |   # images directory.
156 |   img_files = os.listdir(os.path.join(path, 'test', 'images'))
157 |   X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
158 |   for i, img_file in enumerate(img_files):
159 |     img_file = os.path.join(path, 'test', 'images', img_file)
160 |     img = imread(img_file)
161 |     if img.ndim == 2:
162 |       img.shape = (64, 64, 1)
163 |     X_test[i] = img.transpose(2, 0, 1)
164 | 
165 |   y_test = None
166 |   y_test_file = os.path.join(path, 'test', 'test_annotations.txt')
167 |   if os.path.isfile(y_test_file):
168 |     with open(y_test_file, 'r') as f:
169 |       img_file_to_wnid = {}
170 |       for line in f:
171 |         line = line.split('\t')
172 |         img_file_to_wnid[line[0]] = line[1]
173 |     y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files]
174 |     y_test = np.array(y_test)
175 |   
176 |   return class_names, X_train, y_train, X_val, y_val, X_test, y_test
177 | 
178 | 
179 | def load_models(models_dir):
180 |   """
181 |   Load saved models from disk. This will attempt to unpickle all files in a
182 |   directory; any files that give errors on unpickling (such as README.txt) will
183 |   be skipped.
184 | 
185 |   Inputs:
186 |   - models_dir: String giving the path to a directory containing model files.
187 |     Each model file is a pickled dictionary with a 'model' field.
188 | 
189 |   Returns:
190 |   A dictionary mapping model file names to models.
191 |   """
192 |   models = {}
193 |   for model_file in os.listdir(models_dir):
194 |     with open(os.path.join(models_dir, model_file), 'rb') as f:
195 |       try:
196 |         models[model_file] = pickle.load(f)['model']
197 |       except pickle.UnpicklingError:
198 |         continue
199 |   return models
200 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/datasets/.gitignore:
--------------------------------------------------------------------------------
1 | cifar-10-batches-py/*
2 | tiny-imagenet-100-A*
3 | tiny-imagenet-100-B*
4 | tiny-100-A-pretrained/*
5 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/datasets/get_datasets.sh:
--------------------------------------------------------------------------------
1 | # Get CIFAR10
2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
3 | tar -xzvf cifar-10-python.tar.gz
4 | rm cifar-10-python.tar.gz 
5 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/fast_layers.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | try:
  3 |   from cs231n.im2col_cython import col2im_cython, im2col_cython
  4 |   from cs231n.im2col_cython import col2im_6d_cython
  5 | except ImportError:
  6 |   print 'run the following from the cs231n directory and try again:'
  7 |   print 'python setup.py build_ext --inplace'
  8 |   print 'You may also need to restart your iPython kernel'
  9 | 
 10 | from cs231n.im2col import *
 11 | 
 12 | 
 13 | def conv_forward_im2col(x, w, b, conv_param):
 14 |   """
 15 |   A fast implementation of the forward pass for a convolutional layer
 16 |   based on im2col and col2im.
 17 |   """
 18 |   N, C, H, W = x.shape
 19 |   num_filters, _, filter_height, filter_width = w.shape
 20 |   stride, pad = conv_param['stride'], conv_param['pad']
 21 | 
 22 |   # Check dimensions
 23 |   assert (W + 2 * pad - filter_width) % stride == 0, 'width does not work'
 24 |   assert (H + 2 * pad - filter_height) % stride == 0, 'height does not work'
 25 | 
 26 |   # Create output
 27 |   out_height = (H + 2 * pad - filter_height) / stride + 1
 28 |   out_width = (W + 2 * pad - filter_width) / stride + 1
 29 |   out = np.zeros((N, num_filters, out_height, out_width), dtype=x.dtype)
 30 | 
 31 |   # x_cols = im2col_indices(x, w.shape[2], w.shape[3], pad, stride)
 32 |   x_cols = im2col_cython(x, w.shape[2], w.shape[3], pad, stride)
 33 |   res = w.reshape((w.shape[0], -1)).dot(x_cols) + b.reshape(-1, 1)
 34 | 
 35 |   out = res.reshape(w.shape[0], out.shape[2], out.shape[3], x.shape[0])
 36 |   out = out.transpose(3, 0, 1, 2)
 37 | 
 38 |   cache = (x, w, b, conv_param, x_cols)
 39 |   return out, cache
 40 | 
 41 | 
 42 | def conv_forward_strides(x, w, b, conv_param):
 43 |   N, C, H, W = x.shape
 44 |   F, _, HH, WW = w.shape
 45 |   stride, pad = conv_param['stride'], conv_param['pad']
 46 | 
 47 |   # Check dimensions
 48 |   assert (W + 2 * pad - WW) % stride == 0, 'width does not work'
 49 |   assert (H + 2 * pad - HH) % stride == 0, 'height does not work'
 50 | 
 51 |   # Pad the input
 52 |   p = pad
 53 |   x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
 54 |   
 55 |   # Figure out output dimensions
 56 |   H += 2 * pad
 57 |   W += 2 * pad
 58 |   out_h = (H - HH) / stride + 1
 59 |   out_w = (W - WW) / stride + 1
 60 | 
 61 |   # Perform an im2col operation by picking clever strides
 62 |   shape = (C, HH, WW, N, out_h, out_w)
 63 |   strides = (H * W, W, 1, C * H * W, stride * W, stride)
 64 |   strides = x.itemsize * np.array(strides)
 65 |   x_stride = np.lib.stride_tricks.as_strided(x_padded,
 66 |                 shape=shape, strides=strides)
 67 |   x_cols = np.ascontiguousarray(x_stride)
 68 |   x_cols.shape = (C * HH * WW, N * out_h * out_w)
 69 | 
 70 |   # Now all our convolutions are a big matrix multiply
 71 |   res = w.reshape(F, -1).dot(x_cols) + b.reshape(-1, 1)
 72 | 
 73 |   # Reshape the output
 74 |   res.shape = (F, N, out_h, out_w)
 75 |   out = res.transpose(1, 0, 2, 3)
 76 | 
 77 |   # Be nice and return a contiguous array
 78 |   # The old version of conv_forward_fast doesn't do this, so for a fair
 79 |   # comparison we won't either
 80 |   out = np.ascontiguousarray(out)
 81 | 
 82 |   cache = (x, w, b, conv_param, x_cols)
 83 |   return out, cache
 84 |   
 85 | 
 86 | def conv_backward_strides(dout, cache):
 87 |   x, w, b, conv_param, x_cols = cache
 88 |   stride, pad = conv_param['stride'], conv_param['pad']
 89 | 
 90 |   N, C, H, W = x.shape
 91 |   F, _, HH, WW = w.shape
 92 |   _, _, out_h, out_w = dout.shape
 93 | 
 94 |   db = np.sum(dout, axis=(0, 2, 3))
 95 | 
 96 |   dout_reshaped = dout.transpose(1, 0, 2, 3).reshape(F, -1)
 97 |   dw = dout_reshaped.dot(x_cols.T).reshape(w.shape)
 98 | 
 99 |   dx_cols = w.reshape(F, -1).T.dot(dout_reshaped)
100 |   dx_cols.shape = (C, HH, WW, N, out_h, out_w)
101 |   dx = col2im_6d_cython(dx_cols, N, C, H, W, HH, WW, pad, stride)
102 | 
103 |   return dx, dw, db
104 | 
105 | 
106 | def conv_backward_im2col(dout, cache):
107 |   """
108 |   A fast implementation of the backward pass for a convolutional layer
109 |   based on im2col and col2im.
110 |   """
111 |   x, w, b, conv_param, x_cols = cache
112 |   stride, pad = conv_param['stride'], conv_param['pad']
113 | 
114 |   db = np.sum(dout, axis=(0, 2, 3))
115 | 
116 |   num_filters, _, filter_height, filter_width = w.shape
117 |   dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(num_filters, -1)
118 |   dw = dout_reshaped.dot(x_cols.T).reshape(w.shape)
119 | 
120 |   dx_cols = w.reshape(num_filters, -1).T.dot(dout_reshaped)
121 |   # dx = col2im_indices(dx_cols, x.shape, filter_height, filter_width, pad, stride)
122 |   dx = col2im_cython(dx_cols, x.shape[0], x.shape[1], x.shape[2], x.shape[3],
123 |                      filter_height, filter_width, pad, stride)
124 | 
125 |   return dx, dw, db
126 | 
127 | 
128 | conv_forward_fast = conv_forward_strides
129 | conv_backward_fast = conv_backward_strides
130 | 
131 | 
132 | def max_pool_forward_fast(x, pool_param):
133 |   """
134 |   A fast implementation of the forward pass for a max pooling layer.
135 | 
136 |   This chooses between the reshape method and the im2col method. If the pooling
137 |   regions are square and tile the input image, then we can use the reshape
138 |   method which is very fast. Otherwise we fall back on the im2col method, which
139 |   is not much faster than the naive method.
140 |   """
141 |   N, C, H, W = x.shape
142 |   pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
143 |   stride = pool_param['stride']
144 | 
145 |   same_size = pool_height == pool_width == stride
146 |   tiles = H % pool_height == 0 and W % pool_width == 0
147 |   if same_size and tiles:
148 |     out, reshape_cache = max_pool_forward_reshape(x, pool_param)
149 |     cache = ('reshape', reshape_cache)
150 |   else:
151 |     out, im2col_cache = max_pool_forward_im2col(x, pool_param)
152 |     cache = ('im2col', im2col_cache)
153 |   return out, cache
154 | 
155 | 
156 | def max_pool_backward_fast(dout, cache):
157 |   """
158 |   A fast implementation of the backward pass for a max pooling layer.
159 | 
160 |   This switches between the reshape method an the im2col method depending on
161 |   which method was used to generate the cache.
162 |   """
163 |   method, real_cache = cache
164 |   if method == 'reshape':
165 |     return max_pool_backward_reshape(dout, real_cache)
166 |   elif method == 'im2col':
167 |     return max_pool_backward_im2col(dout, real_cache)
168 |   else:
169 |     raise ValueError('Unrecognized method "%s"' % method)
170 | 
171 | 
172 | def max_pool_forward_reshape(x, pool_param):
173 |   """
174 |   A fast implementation of the forward pass for the max pooling layer that uses
175 |   some clever reshaping.
176 | 
177 |   This can only be used for square pooling regions that tile the input.
178 |   """
179 |   N, C, H, W = x.shape
180 |   pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
181 |   stride = pool_param['stride']
182 |   assert pool_height == pool_width == stride, 'Invalid pool params'
183 |   assert H % pool_height == 0
184 |   assert W % pool_height == 0
185 |   x_reshaped = x.reshape(N, C, H / pool_height, pool_height,
186 |                          W / pool_width, pool_width)
187 |   out = x_reshaped.max(axis=3).max(axis=4)
188 | 
189 |   cache = (x, x_reshaped, out)
190 |   return out, cache
191 | 
192 | 
193 | def max_pool_backward_reshape(dout, cache):
194 |   """
195 |   A fast implementation of the backward pass for the max pooling layer that
196 |   uses some clever broadcasting and reshaping.
197 | 
198 |   This can only be used if the forward pass was computed using
199 |   max_pool_forward_reshape.
200 | 
201 |   NOTE: If there are multiple argmaxes, this method will assign gradient to
202 |   ALL argmax elements of the input rather than picking one. In this case the
203 |   gradient will actually be incorrect. However this is unlikely to occur in
204 |   practice, so it shouldn't matter much. One possible solution is to split the
205 |   upstream gradient equally among all argmax elements; this should result in a
206 |   valid subgradient. You can make this happen by uncommenting the line below;
207 |   however this results in a significant performance penalty (about 40% slower)
208 |   and is unlikely to matter in practice so we don't do it.
209 |   """
210 |   x, x_reshaped, out = cache
211 | 
212 |   dx_reshaped = np.zeros_like(x_reshaped)
213 |   out_newaxis = out[:, :, :, np.newaxis, :, np.newaxis]
214 |   mask = (x_reshaped == out_newaxis)
215 |   dout_newaxis = dout[:, :, :, np.newaxis, :, np.newaxis]
216 |   dout_broadcast, _ = np.broadcast_arrays(dout_newaxis, dx_reshaped)
217 |   dx_reshaped[mask] = dout_broadcast[mask]
218 |   dx_reshaped /= np.sum(mask, axis=(3, 5), keepdims=True)
219 |   dx = dx_reshaped.reshape(x.shape)
220 | 
221 |   return dx
222 | 
223 | 
224 | def max_pool_forward_im2col(x, pool_param):
225 |   """
226 |   An implementation of the forward pass for max pooling based on im2col.
227 | 
228 |   This isn't much faster than the naive version, so it should be avoided if
229 |   possible.
230 |   """
231 |   N, C, H, W = x.shape
232 |   pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
233 |   stride = pool_param['stride']
234 | 
235 |   assert (H - pool_height) % stride == 0, 'Invalid height'
236 |   assert (W - pool_width) % stride == 0, 'Invalid width'
237 | 
238 |   out_height = (H - pool_height) / stride + 1
239 |   out_width = (W - pool_width) / stride + 1
240 | 
241 |   x_split = x.reshape(N * C, 1, H, W)
242 |   x_cols = im2col(x_split, pool_height, pool_width, padding=0, stride=stride)
243 |   x_cols_argmax = np.argmax(x_cols, axis=0)
244 |   x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])]
245 |   out = x_cols_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1)
246 | 
247 |   cache = (x, x_cols, x_cols_argmax, pool_param)
248 |   return out, cache
249 | 
250 | 
251 | def max_pool_backward_im2col(dout, cache):
252 |   """
253 |   An implementation of the backward pass for max pooling based on im2col.
254 | 
255 |   This isn't much faster than the naive version, so it should be avoided if
256 |   possible.
257 |   """
258 |   x, x_cols, x_cols_argmax, pool_param = cache
259 |   N, C, H, W = x.shape
260 |   pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
261 |   stride = pool_param['stride']
262 | 
263 |   dout_reshaped = dout.transpose(2, 3, 0, 1).flatten()
264 |   dx_cols = np.zeros_like(x_cols)
265 |   dx_cols[x_cols_argmax, np.arange(dx_cols.shape[1])] = dout_reshaped
266 |   dx = col2im_indices(dx_cols, (N * C, 1, H, W), pool_height, pool_width,
267 |               padding=0, stride=stride)
268 |   dx = dx.reshape(x.shape)
269 | 
270 |   return dx
271 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/gradient_check.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from random import randrange
  3 | 
  4 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
  5 |   """ 
  6 |   a naive implementation of numerical gradient of f at x 
  7 |   - f should be a function that takes a single argument
  8 |   - x is the point (numpy array) to evaluate the gradient at
  9 |   """ 
 10 | 
 11 |   fx = f(x) # evaluate function value at original point
 12 |   grad = np.zeros_like(x)
 13 |   # iterate over all indexes in x
 14 |   it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 15 |   while not it.finished:
 16 | 
 17 |     # evaluate function at x+h
 18 |     ix = it.multi_index
 19 |     oldval = x[ix]
 20 |     x[ix] = oldval + h # increment by h
 21 |     fxph = f(x) # evalute f(x + h)
 22 |     x[ix] = oldval - h
 23 |     fxmh = f(x) # evaluate f(x - h)
 24 |     x[ix] = oldval # restore
 25 | 
 26 |     # compute the partial derivative with centered formula
 27 |     grad[ix] = (fxph - fxmh) / (2 * h) # the slope
 28 |     if verbose:
 29 |       print ix, grad[ix]
 30 |     it.iternext() # step to next dimension
 31 | 
 32 |   return grad
 33 | 
 34 | 
 35 | def eval_numerical_gradient_array(f, x, df, h=1e-5):
 36 |   """
 37 |   Evaluate a numeric gradient for a function that accepts a numpy
 38 |   array and returns a numpy array.
 39 |   """
 40 |   grad = np.zeros_like(x)
 41 |   it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 42 |   while not it.finished:
 43 |     ix = it.multi_index
 44 |     
 45 |     oldval = x[ix]
 46 |     x[ix] = oldval + h
 47 |     pos = f(x).copy()
 48 |     x[ix] = oldval - h
 49 |     neg = f(x).copy()
 50 |     x[ix] = oldval
 51 |     
 52 |     grad[ix] = np.sum((pos - neg) * df) / (2 * h)
 53 |     it.iternext()
 54 |   return grad
 55 | 
 56 | 
 57 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
 58 |   """
 59 |   Compute numeric gradients for a function that operates on input
 60 |   and output blobs.
 61 |   
 62 |   We assume that f accepts several input blobs as arguments, followed by a blob
 63 |   into which outputs will be written. For example, f might be called like this:
 64 | 
 65 |   f(x, w, out)
 66 |   
 67 |   where x and w are input Blobs, and the result of f will be written to out.
 68 | 
 69 |   Inputs: 
 70 |   - f: function
 71 |   - inputs: tuple of input blobs
 72 |   - output: output blob
 73 |   - h: step size
 74 |   """
 75 |   numeric_diffs = []
 76 |   for input_blob in inputs:
 77 |     diff = np.zeros_like(input_blob.diffs)
 78 |     it = np.nditer(input_blob.vals, flags=['multi_index'],
 79 |                    op_flags=['readwrite'])
 80 |     while not it.finished:
 81 |       idx = it.multi_index
 82 |       orig = input_blob.vals[idx]
 83 | 
 84 |       input_blob.vals[idx] = orig + h
 85 |       f(*(inputs + (output,)))
 86 |       pos = np.copy(output.vals)
 87 |       input_blob.vals[idx] = orig - h
 88 |       f(*(inputs + (output,)))
 89 |       neg = np.copy(output.vals)
 90 |       input_blob.vals[idx] = orig
 91 |       
 92 |       diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)
 93 | 
 94 |       it.iternext()
 95 |     numeric_diffs.append(diff)
 96 |   return numeric_diffs
 97 | 
 98 | 
 99 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
100 |   return eval_numerical_gradient_blobs(lambda *args: net.forward(),
101 |               inputs, output, h=h)
102 | 
103 | 
104 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
105 |   """
106 |   sample a few random elements and only return numerical
107 |   in this dimensions.
108 |   """
109 | 
110 |   for i in xrange(num_checks):
111 |     ix = tuple([randrange(m) for m in x.shape])
112 | 
113 |     oldval = x[ix]
114 |     x[ix] = oldval + h # increment by h
115 |     fxph = f(x) # evaluate f(x + h)
116 |     x[ix] = oldval - h # increment by h
117 |     fxmh = f(x) # evaluate f(x - h)
118 |     x[ix] = oldval # reset
119 | 
120 |     grad_numerical = (fxph - fxmh) / (2 * h)
121 |     grad_analytic = analytic_grad[ix]
122 |     rel_error = abs(grad_numerical - grad_analytic) / (abs(grad_numerical) + abs(grad_analytic))
123 |     print 'numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error)
124 | 
125 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/im2col.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1):
 5 |   # First figure out what the size of the output should be
 6 |   N, C, H, W = x_shape
 7 |   assert (H + 2 * padding - field_height) % stride == 0
 8 |   assert (W + 2 * padding - field_height) % stride == 0
 9 |   out_height = (H + 2 * padding - field_height) / stride + 1
10 |   out_width = (W + 2 * padding - field_width) / stride + 1
11 | 
12 |   i0 = np.repeat(np.arange(field_height), field_width)
13 |   i0 = np.tile(i0, C)
14 |   i1 = stride * np.repeat(np.arange(out_height), out_width)
15 |   j0 = np.tile(np.arange(field_width), field_height * C)
16 |   j1 = stride * np.tile(np.arange(out_width), out_height)
17 |   i = i0.reshape(-1, 1) + i1.reshape(1, -1)
18 |   j = j0.reshape(-1, 1) + j1.reshape(1, -1)
19 | 
20 |   k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1)
21 | 
22 |   return (k, i, j)
23 | 
24 | 
25 | def im2col_indices(x, field_height, field_width, padding=1, stride=1):
26 |   """ An implementation of im2col based on some fancy indexing """
27 |   # Zero-pad the input
28 |   p = padding
29 |   x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
30 | 
31 |   k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding,
32 |                                stride)
33 | 
34 |   cols = x_padded[:, k, i, j]
35 |   C = x.shape[1]
36 |   cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1)
37 |   return cols
38 | 
39 | 
40 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1,
41 |                    stride=1):
42 |   """ An implementation of col2im based on fancy indexing and np.add.at """
43 |   N, C, H, W = x_shape
44 |   H_padded, W_padded = H + 2 * padding, W + 2 * padding
45 |   x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
46 |   k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding,
47 |                                stride)
48 |   cols_reshaped = cols.reshape(C * field_height * field_width, -1, N)
49 |   cols_reshaped = cols_reshaped.transpose(2, 0, 1)
50 |   np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped)
51 |   if padding == 0:
52 |     return x_padded
53 |   return x_padded[:, :, padding:-padding, padding:-padding]
54 | 
55 | pass
56 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/im2col_cython.pyx:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | cimport numpy as np
  3 | cimport cython
  4 | 
  5 | # DTYPE = np.float64
  6 | # ctypedef np.float64_t DTYPE_t
  7 | 
  8 | ctypedef fused DTYPE_t:
  9 |     np.float32_t
 10 |     np.float64_t
 11 | 
 12 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height,
 13 |                   int field_width, int padding, int stride):
 14 |     cdef int N = x.shape[0]
 15 |     cdef int C = x.shape[1]
 16 |     cdef int H = x.shape[2]
 17 |     cdef int W = x.shape[3]
 18 |     
 19 |     cdef int HH = (H + 2 * padding - field_height) / stride + 1
 20 |     cdef int WW = (W + 2 * padding - field_width) / stride + 1
 21 | 
 22 |     cdef int p = padding
 23 |     cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x,
 24 |             ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
 25 | 
 26 |     cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros(
 27 |             (C * field_height * field_width, N * HH * WW),
 28 |             dtype=x.dtype)
 29 | 
 30 |     # Moving the inner loop to a C function with no bounds checking works, but does
 31 |     # not seem to help performance in any measurable way.
 32 | 
 33 |     im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW,
 34 |                         field_height, field_width, padding, stride)
 35 |     return cols
 36 | 
 37 | 
 38 | @cython.boundscheck(False)
 39 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols,
 40 |                              np.ndarray[DTYPE_t, ndim=4] x_padded,
 41 |                              int N, int C, int H, int W, int HH, int WW,
 42 |                              int field_height, int field_width, int padding, int stride) except? -1:
 43 |     cdef int c, ii, jj, row, yy, xx, i, col
 44 | 
 45 |     for c in range(C):
 46 |         for yy in range(HH):
 47 |             for xx in range(WW):
 48 |                 for ii in range(field_height):
 49 |                     for jj in range(field_width):
 50 |                         row = c * field_width * field_height + ii * field_height + jj
 51 |                         for i in range(N):
 52 |                             col = yy * WW * N + xx * N + i
 53 |                             cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj]
 54 | 
 55 | 
 56 | 
 57 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W,
 58 |                   int field_height, int field_width, int padding, int stride):
 59 |     cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype)
 60 |     cdef int HH = (H + 2 * padding - field_height) / stride + 1
 61 |     cdef int WW = (W + 2 * padding - field_width) / stride + 1
 62 |     cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding),
 63 |                                         dtype=cols.dtype)
 64 | 
 65 |     # Moving the inner loop to a C-function with no bounds checking improves
 66 |     # performance quite a bit for col2im.
 67 |     col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 
 68 |                         field_height, field_width, padding, stride)
 69 |     if padding > 0:
 70 |         return x_padded[:, :, padding:-padding, padding:-padding]
 71 |     return x_padded
 72 | 
 73 | 
 74 | @cython.boundscheck(False)
 75 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols,
 76 |                              np.ndarray[DTYPE_t, ndim=4] x_padded,
 77 |                              int N, int C, int H, int W, int HH, int WW,
 78 |                              int field_height, int field_width, int padding, int stride) except? -1:
 79 |     cdef int c, ii, jj, row, yy, xx, i, col
 80 | 
 81 |     for c in range(C):
 82 |         for ii in range(field_height):
 83 |             for jj in range(field_width):
 84 |                 row = c * field_width * field_height + ii * field_height + jj
 85 |                 for yy in range(HH):
 86 |                     for xx in range(WW):
 87 |                         for i in range(N):
 88 |                             col = yy * WW * N + xx * N + i
 89 |                             x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col]
 90 | 
 91 | 
 92 | @cython.boundscheck(False)
 93 | @cython.wraparound(False)
 94 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols,
 95 |                             np.ndarray[DTYPE_t, ndim=4] x_padded,
 96 |                             int N, int C, int H, int W, int HH, int WW,
 97 |                             int out_h, int out_w, int pad, int stride):
 98 | 
 99 |     cdef int c, hh, ww, n, h, w
100 |     for n in range(N):
101 |         for c in range(C):
102 |             for hh in range(HH):
103 |                 for ww in range(WW):
104 |                     for h in range(out_h):
105 |                         for w in range(out_w):
106 |                             x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w]
107 |     
108 | 
109 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W,
110 |         int HH, int WW, int pad, int stride):
111 |     cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype)
112 |     cdef int out_h = (H + 2 * pad - HH) / stride + 1
113 |     cdef int out_w = (W + 2 * pad - WW) / stride + 1
114 |     cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad),
115 |                                                   dtype=cols.dtype)
116 | 
117 |     col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride)
118 | 
119 |     if pad > 0:
120 |         return x_padded[:, :, pad:-pad, pad:-pad]
121 |     return x_padded 
122 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/layer_utils.py:
--------------------------------------------------------------------------------
  1 | from cs231n.layers import *
  2 | from cs231n.fast_layers import *
  3 | 
  4 | 
  5 | def affine_relu_forward(x, w, b):
  6 |   """
  7 |   Convenience layer that perorms an affine transform followed by a ReLU
  8 | 
  9 |   Inputs:
 10 |   - x: Input to the affine layer
 11 |   - w, b: Weights for the affine layer
 12 | 
 13 |   Returns a tuple of:
 14 |   - out: Output from the ReLU
 15 |   - cache: Object to give to the backward pass
 16 |   """
 17 |   a, fc_cache = affine_forward(x, w, b)
 18 |   out, relu_cache = relu_forward(a)
 19 |   cache = (fc_cache, relu_cache)
 20 |   return out, cache
 21 | 
 22 | 
 23 | def affine_relu_backward(dout, cache):
 24 |   """
 25 |   Backward pass for the affine-relu convenience layer
 26 |   """
 27 |   fc_cache, relu_cache = cache
 28 |   da = relu_backward(dout, relu_cache)
 29 |   dx, dw, db = affine_backward(da, fc_cache)
 30 |   return dx, dw, db
 31 | 
 32 | 
 33 | pass
 34 | def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param):
 35 |   a, fc_cache = affine_forward(x, w, b)
 36 |   bn, bn_cache = batchnorm_forward(a, gamma, beta, bn_param)
 37 |   out, relu_cache = relu_forward(bn)
 38 |   cache = (fc_cache, bn_cache, relu_cache)
 39 |   return out, cache
 40 | 
 41 | def affine_bn_relu_backward(dout, cache):
 42 |   fc_cache, bn_cache, relu_cache = cache
 43 |   da = relu_backward(dout, relu_cache)
 44 |   dbn, dgamma, dbeta = batchnorm_backward(da, bn_cache)
 45 |   dx, dw, db = affine_backward(dbn, fc_cache)
 46 |   return dx, dw, db, dgamma, dbeta
 47 | 
 48 | 
 49 | def conv_relu_forward(x, w, b, conv_param):
 50 |   """
 51 |   A convenience layer that performs a convolution followed by a ReLU.
 52 | 
 53 |   Inputs:
 54 |   - x: Input to the convolutional layer
 55 |   - w, b, conv_param: Weights and parameters for the convolutional layer
 56 | 
 57 |   Returns a tuple of:
 58 |   - out: Output from the ReLU
 59 |   - cache: Object to give to the backward pass
 60 |   """
 61 |   a, conv_cache = conv_forward_fast(x, w, b, conv_param)
 62 |   out, relu_cache = relu_forward(a)
 63 |   cache = (conv_cache, relu_cache)
 64 |   return out, cache
 65 | 
 66 | 
 67 | def conv_relu_backward(dout, cache):
 68 |   """
 69 |   Backward pass for the conv-relu convenience layer.
 70 |   """
 71 |   conv_cache, relu_cache = cache
 72 |   da = relu_backward(dout, relu_cache)
 73 |   dx, dw, db = conv_backward_fast(da, conv_cache)
 74 |   return dx, dw, db
 75 | 
 76 | 
 77 | def conv_relu_pool_forward(x, w, b, conv_param, pool_param):
 78 |   """
 79 |   Convenience layer that performs a convolution, a ReLU, and a pool.
 80 | 
 81 |   Inputs:
 82 |   - x: Input to the convolutional layer
 83 |   - w, b, conv_param: Weights and parameters for the convolutional layer
 84 |   - pool_param: Parameters for the pooling layer
 85 | 
 86 |   Returns a tuple of:
 87 |   - out: Output from the pooling layer
 88 |   - cache: Object to give to the backward pass
 89 |   """
 90 |   a, conv_cache = conv_forward_fast(x, w, b, conv_param)
 91 |   s, relu_cache = relu_forward(a)
 92 |   out, pool_cache = max_pool_forward_fast(s, pool_param)
 93 |   cache = (conv_cache, relu_cache, pool_cache)
 94 |   return out, cache
 95 | 
 96 | 
 97 | def conv_relu_pool_backward(dout, cache):
 98 |   """
 99 |   Backward pass for the conv-relu-pool convenience layer
100 |   """
101 |   conv_cache, relu_cache, pool_cache = cache
102 |   ds = max_pool_backward_fast(dout, pool_cache)
103 |   da = relu_backward(ds, relu_cache)
104 |   dx, dw, db = conv_backward_fast(da, conv_cache)
105 |   return dx, dw, db
106 | 
107 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/optim.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | """
  4 | This file implements various first-order update rules that are commonly used for
  5 | training neural networks. Each update rule accepts current weights and the
  6 | gradient of the loss with respect to those weights and produces the next set of
  7 | weights. Each update rule has the same interface:
  8 | 
  9 | def update(w, dw, config=None):
 10 | 
 11 | Inputs:
 12 |   - w: A numpy array giving the current weights.
 13 |   - dw: A numpy array of the same shape as w giving the gradient of the
 14 |     loss with respect to w.
 15 |   - config: A dictionary containing hyperparameter values such as learning rate,
 16 |     momentum, etc. If the update rule requires caching values over many
 17 |     iterations, then config will also hold these cached values.
 18 | 
 19 | Returns:
 20 |   - next_w: The next point after the update.
 21 |   - config: The config dictionary to be passed to the next iteration of the
 22 |     update rule.
 23 | 
 24 | NOTE: For most update rules, the default learning rate will probably not perform
 25 | well; however the default values of the other hyperparameters should work well
 26 | for a variety of different problems.
 27 | 
 28 | For efficiency, update rules may perform in-place updates, mutating w and
 29 | setting next_w equal to w.
 30 | """
 31 | 
 32 | 
 33 | def sgd(w, dw, config=None):
 34 |   """
 35 |   Performs vanilla stochastic gradient descent.
 36 | 
 37 |   config format:
 38 |   - learning_rate: Scalar learning rate.
 39 |   """
 40 |   if config is None: config = {}
 41 |   config.setdefault('learning_rate', 1e-2)
 42 | 
 43 |   w -= config['learning_rate'] * dw
 44 |   return w, config
 45 | 
 46 | 
 47 | def sgd_momentum(w, dw, config=None):
 48 |   """
 49 |   Performs stochastic gradient descent with momentum.
 50 | 
 51 |   config format:
 52 |   - learning_rate: Scalar learning rate.
 53 |   - momentum: Scalar between 0 and 1 giving the momentum value.
 54 |     Setting momentum = 0 reduces to sgd.
 55 |   - velocity: A numpy array of the same shape as w and dw used to store a moving
 56 |     average of the gradients.
 57 |   """
 58 |   if config is None: config = {}
 59 |   config.setdefault('learning_rate', 1e-2)
 60 |   config.setdefault('momentum', 0.9)
 61 |   v = config.get('velocity', np.zeros_like(w))
 62 | 
 63 |   next_w = None
 64 |   #############################################################################
 65 |   # TODO: Implement the momentum update formula. Store the updated value in   #
 66 |   # the next_w variable. You should also use and update the velocity v.       #
 67 |   #############################################################################
 68 |   pass
 69 |   v = config['momentum'] * v - config['learning_rate'] * dw
 70 |   next_w = w + v
 71 |   #############################################################################
 72 |   #                             END OF YOUR CODE                              #
 73 |   #############################################################################
 74 |   config['velocity'] = v
 75 | 
 76 |   return next_w, config
 77 | 
 78 | 
 79 | 
 80 | def rmsprop(x, dx, config=None):
 81 |   """
 82 |   Uses the RMSProp update rule, which uses a moving average of squared gradient
 83 |   values to set adaptive per-parameter learning rates.
 84 | 
 85 |   config format:
 86 |   - learning_rate: Scalar learning rate.
 87 |   - decay_rate: Scalar between 0 and 1 giving the decay rate for the squared
 88 |     gradient cache.
 89 |   - epsilon: Small scalar used for smoothing to avoid dividing by zero.
 90 |   - cache: Moving average of second moments of gradients.
 91 |   """
 92 |   if config is None: config = {}
 93 |   config.setdefault('learning_rate', 1e-2)
 94 |   config.setdefault('decay_rate', 0.99)
 95 |   config.setdefault('epsilon', 1e-8)
 96 |   config.setdefault('cache', np.zeros_like(x))
 97 | 
 98 |   next_x = None
 99 |   #############################################################################
100 |   # TODO: Implement the RMSprop update formula, storing the next value of x   #
101 |   # in the next_x variable. Don't forget to update cache value stored in      #
102 |   # config['cache'].                                                          #
103 |   #############################################################################
104 |   pass
105 |   config['cache'] = config['decay_rate'] * config['cache'] + (1 - config['decay_rate']) * dx ** 2
106 |   next_x = x - config['learning_rate'] * dx / (np.sqrt(config['cache']) + config['epsilon'])
107 |   #############################################################################
108 |   #                             END OF YOUR CODE                              #
109 |   #############################################################################
110 | 
111 |   return next_x, config
112 | 
113 | 
114 | def adam(x, dx, config=None):
115 |   """
116 |   Uses the Adam update rule, which incorporates moving averages of both the
117 |   gradient and its square and a bias correction term.
118 | 
119 |   config format:
120 |   - learning_rate: Scalar learning rate.
121 |   - beta1: Decay rate for moving average of first moment of gradient.
122 |   - beta2: Decay rate for moving average of second moment of gradient.
123 |   - epsilon: Small scalar used for smoothing to avoid dividing by zero.
124 |   - m: Moving average of gradient.
125 |   - v: Moving average of squared gradient.
126 |   - t: Iteration number.
127 |   """
128 |   if config is None: config = {}
129 |   config.setdefault('learning_rate', 1e-3)
130 |   config.setdefault('beta1', 0.9)
131 |   config.setdefault('beta2', 0.999)
132 |   config.setdefault('epsilon', 1e-8)
133 |   config.setdefault('m', np.zeros_like(x))
134 |   config.setdefault('v', np.zeros_like(x))
135 |   config.setdefault('t', 0)
136 | 
137 |   next_x = None
138 |   #############################################################################
139 |   # TODO: Implement the Adam update formula, storing the next value of x in   #
140 |   # the next_x variable. Don't forget to update the m, v, and t variables     #
141 |   # stored in config.                                                         #
142 |   #############################################################################
143 |   pass
144 |   config['m'] = config['beta1'] * config['m'] + (1 - config['beta1']) * dx
145 |   config['v'] = config['beta2'] * config['v'] + (1 - config['beta2']) * (dx ** 2)
146 |   next_x = x - config['learning_rate'] * config['m'] / (np.sqrt(config['v']) + config['epsilon'])
147 |   #############################################################################
148 |   #                             END OF YOUR CODE                              #
149 |   #############################################################################
150 | 
151 |   return next_x, config
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from distutils.extension import Extension
 3 | from Cython.Build import cythonize
 4 | import numpy
 5 | 
 6 | extensions = [
 7 |   Extension('im2col_cython', ['im2col_cython.pyx'],
 8 |             include_dirs = [numpy.get_include()]
 9 |   ),
10 | ]
11 | 
12 | setup(
13 |     ext_modules = cythonize(extensions),
14 | )
15 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/solver.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from cs231n import optim
  4 | 
  5 | 
  6 | class Solver(object):
  7 |   """
  8 |   A Solver encapsulates all the logic necessary for training classification
  9 |   models. The Solver performs stochastic gradient descent using different
 10 |   update rules defined in optim.py.
 11 | 
 12 |   The solver accepts both training and validataion data and labels so it can
 13 |   periodically check classification accuracy on both training and validation
 14 |   data to watch out for overfitting.
 15 | 
 16 |   To train a model, you will first construct a Solver instance, passing the
 17 |   model, dataset, and various optoins (learning rate, batch size, etc) to the
 18 |   constructor. You will then call the train() method to run the optimization
 19 |   procedure and train the model.
 20 |   
 21 |   After the train() method returns, model.params will contain the parameters
 22 |   that performed best on the validation set over the course of training.
 23 |   In addition, the instance variable solver.loss_history will contain a list
 24 |   of all losses encountered during training and the instance variables
 25 |   solver.train_acc_history and solver.val_acc_history will be lists containing
 26 |   the accuracies of the model on the training and validation set at each epoch.
 27 |   
 28 |   Example usage might look something like this:
 29 |   
 30 |   data = {
 31 |     'X_train': # training data
 32 |     'y_train': # training labels
 33 |     'X_val': # validation data
 34 |     'X_train': # validation labels
 35 |   }
 36 |   model = MyAwesomeModel(hidden_size=100, reg=10)
 37 |   solver = Solver(model, data,
 38 |                   update_rule='sgd',
 39 |                   optim_config={
 40 |                     'learning_rate': 1e-3,
 41 |                   },
 42 |                   lr_decay=0.95,
 43 |                   num_epochs=10, batch_size=100,
 44 |                   print_every=100)
 45 |   solver.train()
 46 | 
 47 | 
 48 |   A Solver works on a model object that must conform to the following API:
 49 | 
 50 |   - model.params must be a dictionary mapping string parameter names to numpy
 51 |     arrays containing parameter values.
 52 | 
 53 |   - model.loss(X, y) must be a function that computes training-time loss and
 54 |     gradients, and test-time classification scores, with the following inputs
 55 |     and outputs:
 56 | 
 57 |     Inputs:
 58 |     - X: Array giving a minibatch of input data of shape (N, d_1, ..., d_k)
 59 |     - y: Array of labels, of shape (N,) giving labels for X where y[i] is the
 60 |       label for X[i].
 61 | 
 62 |     Returns:
 63 |     If y is None, run a test-time forward pass and return:
 64 |     - scores: Array of shape (N, C) giving classification scores for X where
 65 |       scores[i, c] gives the score of class c for X[i].
 66 | 
 67 |     If y is not None, run a training time forward and backward pass and return
 68 |     a tuple of:
 69 |     - loss: Scalar giving the loss
 70 |     - grads: Dictionary with the same keys as self.params mapping parameter
 71 |       names to gradients of the loss with respect to those parameters.
 72 |   """
 73 | 
 74 |   def __init__(self, model, data, **kwargs):
 75 |     """
 76 |     Construct a new Solver instance.
 77 |     
 78 |     Required arguments:
 79 |     - model: A model object conforming to the API described above
 80 |     - data: A dictionary of training and validation data with the following:
 81 |       'X_train': Array of shape (N_train, d_1, ..., d_k) giving training images
 82 |       'X_val': Array of shape (N_val, d_1, ..., d_k) giving validation images
 83 |       'y_train': Array of shape (N_train,) giving labels for training images
 84 |       'y_val': Array of shape (N_val,) giving labels for validation images
 85 |       
 86 |     Optional arguments:
 87 |     - update_rule: A string giving the name of an update rule in optim.py.
 88 |       Default is 'sgd'.
 89 |     - optim_config: A dictionary containing hyperparameters that will be
 90 |       passed to the chosen update rule. Each update rule requires different
 91 |       hyperparameters (see optim.py) but all update rules require a
 92 |       'learning_rate' parameter so that should always be present.
 93 |     - lr_decay: A scalar for learning rate decay; after each epoch the learning
 94 |       rate is multiplied by this value.
 95 |     - batch_size: Size of minibatches used to compute loss and gradient during
 96 |       training.
 97 |     - num_epochs: The number of epochs to run for during training.
 98 |     - print_every: Integer; training losses will be printed every print_every
 99 |       iterations.
100 |     - verbose: Boolean; if set to false then no output will be printed during
101 |       training.
102 |     """
103 |     self.model = model
104 |     self.X_train = data['X_train']
105 |     self.y_train = data['y_train']
106 |     self.X_val = data['X_val']
107 |     self.y_val = data['y_val']
108 |     
109 |     # Unpack keyword arguments
110 |     self.update_rule = kwargs.pop('update_rule', 'sgd')
111 |     self.optim_config = kwargs.pop('optim_config', {})
112 |     self.lr_decay = kwargs.pop('lr_decay', 1.0)
113 |     self.batch_size = kwargs.pop('batch_size', 100)
114 |     self.num_epochs = kwargs.pop('num_epochs', 10)
115 | 
116 |     self.print_every = kwargs.pop('print_every', 10)
117 |     self.verbose = kwargs.pop('verbose', True)
118 | 
119 |     # Throw an error if there are extra keyword arguments
120 |     if len(kwargs) > 0:
121 |       extra = ', '.join('"%s"' % k for k in kwargs.keys())
122 |       raise ValueError('Unrecognized arguments %s' % extra)
123 | 
124 |     # Make sure the update rule exists, then replace the string
125 |     # name with the actual function
126 |     if not hasattr(optim, self.update_rule):
127 |       raise ValueError('Invalid update_rule "%s"' % self.update_rule)
128 |     self.update_rule = getattr(optim, self.update_rule)
129 | 
130 |     self._reset()
131 | 
132 | 
133 |   def _reset(self):
134 |     """
135 |     Set up some book-keeping variables for optimization. Don't call this
136 |     manually.
137 |     """
138 |     # Set up some variables for book-keeping
139 |     self.epoch = 0
140 |     self.best_val_acc = 0
141 |     self.best_params = {}
142 |     self.loss_history = []
143 |     self.train_acc_history = []
144 |     self.val_acc_history = []
145 | 
146 |     # Make a deep copy of the optim_config for each parameter
147 |     self.optim_configs = {}
148 |     for p in self.model.params:
149 |       d = {k: v for k, v in self.optim_config.iteritems()}
150 |       self.optim_configs[p] = d
151 | 
152 | 
153 |   def _step(self):
154 |     """
155 |     Make a single gradient update. This is called by train() and should not
156 |     be called manually.
157 |     """
158 |     # Make a minibatch of training data
159 |     num_train = self.X_train.shape[0]
160 |     batch_mask = np.random.choice(num_train, self.batch_size)
161 |     X_batch = self.X_train[batch_mask]
162 |     y_batch = self.y_train[batch_mask]
163 | 
164 |     # Compute loss and gradient
165 |     loss, grads = self.model.loss(X_batch, y_batch)
166 |     self.loss_history.append(loss)
167 | 
168 |     # Perform a parameter update
169 |     for p, w in self.model.params.iteritems():
170 |       dw = grads[p]
171 |       config = self.optim_configs[p]
172 |       next_w, next_config = self.update_rule(w, dw, config)
173 |       self.model.params[p] = next_w
174 |       self.optim_configs[p] = next_config
175 | 
176 | 
177 |   def check_accuracy(self, X, y, num_samples=None, batch_size=100):
178 |     """
179 |     Check accuracy of the model on the provided data.
180 |     
181 |     Inputs:
182 |     - X: Array of data, of shape (N, d_1, ..., d_k)
183 |     - y: Array of labels, of shape (N,)
184 |     - num_samples: If not None, subsample the data and only test the model
185 |       on num_samples datapoints.
186 |     - batch_size: Split X and y into batches of this size to avoid using too
187 |       much memory.
188 |       
189 |     Returns:
190 |     - acc: Scalar giving the fraction of instances that were correctly
191 |       classified by the model.
192 |     """
193 |     
194 |     # Maybe subsample the data
195 |     N = X.shape[0]
196 |     if num_samples is not None and N > num_samples:
197 |       mask = np.random.choice(N, num_samples)
198 |       N = num_samples
199 |       X = X[mask]
200 |       y = y[mask]
201 | 
202 |     # Compute predictions in batches
203 |     num_batches = N / batch_size
204 |     if N % batch_size != 0:
205 |       num_batches += 1
206 |     y_pred = []
207 |     for i in xrange(num_batches):
208 |       start = i * batch_size
209 |       end = (i + 1) * batch_size
210 |       scores = self.model.loss(X[start:end])
211 |       y_pred.append(np.argmax(scores, axis=1))
212 |     y_pred = np.hstack(y_pred)
213 |     acc = np.mean(y_pred == y)
214 | 
215 |     return acc
216 | 
217 | 
218 |   def train(self):
219 |     """
220 |     Run optimization to train the model.
221 |     """
222 |     num_train = self.X_train.shape[0]
223 |     iterations_per_epoch = max(num_train / self.batch_size, 1)
224 |     num_iterations = self.num_epochs * iterations_per_epoch
225 | 
226 |     for t in xrange(num_iterations):
227 |       self._step()
228 | 
229 |       # Maybe print training loss
230 |       if self.verbose and t % self.print_every == 0:
231 |         print '(Iteration %d / %d) loss: %f' % (
232 |                t + 1, num_iterations, self.loss_history[-1])
233 | 
234 |       # At the end of every epoch, increment the epoch counter and decay the
235 |       # learning rate.
236 |       epoch_end = (t + 1) % iterations_per_epoch == 0
237 |       if epoch_end:
238 |         self.epoch += 1
239 |         for k in self.optim_configs:
240 |           self.optim_configs[k]['learning_rate'] *= self.lr_decay
241 | 
242 |       # Check train and val accuracy on the first iteration, the last
243 |       # iteration, and at the end of each epoch.
244 |       first_it = (t == 0)
245 |       last_it = (t == num_iterations + 1)
246 |       if first_it or last_it or epoch_end:
247 |         train_acc = self.check_accuracy(self.X_train, self.y_train,
248 |                                         num_samples=1000)
249 |         val_acc = self.check_accuracy(self.X_val, self.y_val)
250 |         self.train_acc_history.append(train_acc)
251 |         self.val_acc_history.append(val_acc)
252 | 
253 |         if self.verbose:
254 |           print '(Epoch %d / %d) train acc: %f; val_acc: %f' % (
255 |                  self.epoch, self.num_epochs, train_acc, val_acc)
256 | 
257 |         # Keep track of the best model
258 |         if val_acc > self.best_val_acc:
259 |           self.best_val_acc = val_acc
260 |           self.best_params = {}
261 |           for k, v in self.model.params.iteritems():
262 |             self.best_params[k] = v.copy()
263 | 
264 |     # At the end of training swap the best params into the model
265 |     self.model.params = self.best_params
266 | 
267 | 


--------------------------------------------------------------------------------
/assignment2/cs231n/vis_utils.py:
--------------------------------------------------------------------------------
 1 | from math import sqrt, ceil
 2 | import numpy as np
 3 | 
 4 | def visualize_grid(Xs, ubound=255.0, padding=1):
 5 |   """
 6 |   Reshape a 4D tensor of image data to a grid for easy visualization.
 7 | 
 8 |   Inputs:
 9 |   - Xs: Data of shape (N, H, W, C)
10 |   - ubound: Output grid will have values scaled to the range [0, ubound]
11 |   - padding: The number of blank pixels between elements of the grid
12 |   """
13 |   (N, H, W, C) = Xs.shape
14 |   grid_size = int(ceil(sqrt(N)))
15 |   grid_height = H * grid_size + padding * (grid_size - 1)
16 |   grid_width = W * grid_size + padding * (grid_size - 1)
17 |   grid = np.zeros((grid_height, grid_width, C))
18 |   next_idx = 0
19 |   y0, y1 = 0, H
20 |   for y in xrange(grid_size):
21 |     x0, x1 = 0, W
22 |     for x in xrange(grid_size):
23 |       if next_idx < N:
24 |         img = Xs[next_idx]
25 |         low, high = np.min(img), np.max(img)
26 |         grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low)
27 |         # grid[y0:y1, x0:x1] = Xs[next_idx]
28 |         next_idx += 1
29 |       x0 += W + padding
30 |       x1 += W + padding
31 |     y0 += H + padding
32 |     y1 += H + padding
33 |   # grid_max = np.max(grid)
34 |   # grid_min = np.min(grid)
35 |   # grid = ubound * (grid - grid_min) / (grid_max - grid_min)
36 |   return grid
37 | 
38 | def vis_grid(Xs):
39 |   """ visualize a grid of images """
40 |   (N, H, W, C) = Xs.shape
41 |   A = int(ceil(sqrt(N)))
42 |   G = np.ones((A*H+A, A*W+A, C), Xs.dtype)
43 |   G *= np.min(Xs)
44 |   n = 0
45 |   for y in range(A):
46 |     for x in range(A):
47 |       if n < N:
48 |         G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:]
49 |         n += 1
50 |   # normalize to [0,1]
51 |   maxg = G.max()
52 |   ming = G.min()
53 |   G = (G - ming)/(maxg-ming)
54 |   return G
55 |   
56 | def vis_nn(rows):
57 |   """ visualize array of arrays of images """
58 |   N = len(rows)
59 |   D = len(rows[0])
60 |   H,W,C = rows[0][0].shape
61 |   Xs = rows[0][0]
62 |   G = np.ones((N*H+N, D*W+D, C), Xs.dtype)
63 |   for y in range(N):
64 |     for x in range(D):
65 |       G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x]
66 |   # normalize to [0,1]
67 |   maxg = G.max()
68 |   ming = G.min()
69 |   G = (G - ming)/(maxg-ming)
70 |   return G
71 | 
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/assignment2/frameworkpython:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # what real Python executable to use
 4 | PYVER=2.7
 5 | PATHTOPYTHON=/usr/local/bin/
 6 | PYTHON=${PATHTOPYTHON}python${PYVER}
 7 | 
 8 | # find the root of the virtualenv, it should be the parent of the dir this script is in
 9 | ENV=`$PYTHON -c "import os; print os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..'))"`
10 | 
11 | # now run Python with the virtualenv set as Python's HOME
12 | export PYTHONHOME=$ENV
13 | exec $PYTHON "$@"
14 | 


--------------------------------------------------------------------------------
/assignment2/kitten.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment2/kitten.jpg


--------------------------------------------------------------------------------
/assignment2/puppy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment2/puppy.jpg


--------------------------------------------------------------------------------
/assignment2/requirements.txt:
--------------------------------------------------------------------------------
 1 | Cython==0.23.4
 2 | Jinja2==2.8
 3 | MarkupSafe==0.23
 4 | Pillow==3.0.0
 5 | Pygments==2.0.2
 6 | appnope==0.1.0
 7 | argparse==1.2.1
 8 | backports-abc==0.4
 9 | backports.ssl-match-hostname==3.5.0.1
10 | certifi==2015.11.20.1
11 | cycler==0.9.0
12 | decorator==4.0.6
13 | functools32==3.2.3-2
14 | gnureadline==6.3.3
15 | ipykernel==4.2.2
16 | ipython==4.0.1
17 | ipython-genutils==0.1.0
18 | ipywidgets==4.1.1
19 | jsonschema==2.5.1
20 | jupyter==1.0.0
21 | jupyter-client==4.1.1
22 | jupyter-console==4.0.3
23 | jupyter-core==4.0.6
24 | matplotlib==1.5.0
25 | mistune==0.7.1
26 | nbconvert==4.1.0
27 | nbformat==4.0.1
28 | notebook==4.0.6
29 | numpy==1.10.4
30 | path.py==8.1.2
31 | pexpect==4.0.1
32 | pickleshare==0.5
33 | ptyprocess==0.5
34 | pyparsing==2.0.7
35 | python-dateutil==2.4.2
36 | pytz==2015.7
37 | pyzmq==15.1.0
38 | qtconsole==4.1.1
39 | scipy==0.16.1
40 | simplegeneric==0.8.1
41 | singledispatch==3.4.0.3
42 | six==1.10.0
43 | terminado==0.5
44 | tornado==4.3
45 | traitlets==4.0.0
46 | wsgiref==0.1.2
47 | 


--------------------------------------------------------------------------------
/assignment2/start_ipython_osx.sh:
--------------------------------------------------------------------------------
1 | # Assume the virtualenv is called .env
2 | 
3 | cp frameworkpython .env/bin
4 | .env/bin/frameworkpython -m IPython notebook
5 | 


--------------------------------------------------------------------------------
/assignment3/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.pyc
3 | .env/*
4 | 


--------------------------------------------------------------------------------
/assignment3/collectSubmission.sh:
--------------------------------------------------------------------------------
1 | rm -f assignment3.zip
2 | zip -r assignment3.zip . -x "*.git" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" ".env/*" "*.pyc" "*cs231n/build/*"
3 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/.gitignore:
--------------------------------------------------------------------------------
1 | build/*
2 | im2col_cython.c
3 | im2col_cython.so
4 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment3/cs231n/__init__.py


--------------------------------------------------------------------------------
/assignment3/cs231n/captioning_solver.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from cs231n import optim
  4 | from cs231n.coco_utils import sample_coco_minibatch
  5 | 
  6 | 
  7 | class CaptioningSolver(object):
  8 |   """
  9 |   A CaptioningSolver encapsulates all the logic necessary for training
 10 |   image captioning models. The CaptioningSolver performs stochastic gradient
 11 |   descent using different update rules defined in optim.py.
 12 | 
 13 |   The solver accepts both training and validataion data and labels so it can
 14 |   periodically check classification accuracy on both training and validation
 15 |   data to watch out for overfitting.
 16 | 
 17 |   To train a model, you will first construct a CaptioningSolver instance,
 18 |   passing the model, dataset, and various options (learning rate, batch size,
 19 |   etc) to the constructor. You will then call the train() method to run the 
 20 |   optimization procedure and train the model.
 21 |   
 22 |   After the train() method returns, model.params will contain the parameters
 23 |   that performed best on the validation set over the course of training.
 24 |   In addition, the instance variable solver.loss_history will contain a list
 25 |   of all losses encountered during training and the instance variables
 26 |   solver.train_acc_history and solver.val_acc_history will be lists containing
 27 |   the accuracies of the model on the training and validation set at each epoch.
 28 |   
 29 |   Example usage might look something like this:
 30 |   
 31 |   data = load_coco_data()
 32 |   model = MyAwesomeModel(hidden_dim=100)
 33 |   solver = CaptioningSolver(model, data,
 34 |                   update_rule='sgd',
 35 |                   optim_config={
 36 |                     'learning_rate': 1e-3,
 37 |                   },
 38 |                   lr_decay=0.95,
 39 |                   num_epochs=10, batch_size=100,
 40 |                   print_every=100)
 41 |   solver.train()
 42 | 
 43 | 
 44 |   A CaptioningSolver works on a model object that must conform to the following
 45 |   API:
 46 | 
 47 |   - model.params must be a dictionary mapping string parameter names to numpy
 48 |     arrays containing parameter values.
 49 | 
 50 |   - model.loss(features, captions) must be a function that computes
 51 |     training-time loss and gradients, with the following inputs and outputs:
 52 | 
 53 |     Inputs:
 54 |     - features: Array giving a minibatch of features for images, of shape (N, D
 55 |     - captions: Array of captions for those images, of shape (N, T) where
 56 |       each element is in the range (0, V].
 57 | 
 58 |     Returns:
 59 |     - loss: Scalar giving the loss
 60 |     - grads: Dictionary with the same keys as self.params mapping parameter
 61 |       names to gradients of the loss with respect to those parameters.
 62 |   """
 63 | 
 64 |   def __init__(self, model, data, **kwargs):
 65 |     """
 66 |     Construct a new CaptioningSolver instance.
 67 |     
 68 |     Required arguments:
 69 |     - model: A model object conforming to the API described above
 70 |     - data: A dictionary of training and validation data from load_coco_data
 71 | 
 72 |     Optional arguments:
 73 |     - update_rule: A string giving the name of an update rule in optim.py.
 74 |       Default is 'sgd'.
 75 |     - optim_config: A dictionary containing hyperparameters that will be
 76 |       passed to the chosen update rule. Each update rule requires different
 77 |       hyperparameters (see optim.py) but all update rules require a
 78 |       'learning_rate' parameter so that should always be present.
 79 |     - lr_decay: A scalar for learning rate decay; after each epoch the learning
 80 |       rate is multiplied by this value.
 81 |     - batch_size: Size of minibatches used to compute loss and gradient during
 82 |       training.
 83 |     - num_epochs: The number of epochs to run for during training.
 84 |     - print_every: Integer; training losses will be printed every print_every
 85 |       iterations.
 86 |     - verbose: Boolean; if set to false then no output will be printed during
 87 |       training.
 88 |     """
 89 |     self.model = model
 90 |     self.data = data
 91 |     
 92 |     # Unpack keyword arguments
 93 |     self.update_rule = kwargs.pop('update_rule', 'sgd')
 94 |     self.optim_config = kwargs.pop('optim_config', {})
 95 |     self.lr_decay = kwargs.pop('lr_decay', 1.0)
 96 |     self.batch_size = kwargs.pop('batch_size', 100)
 97 |     self.num_epochs = kwargs.pop('num_epochs', 10)
 98 | 
 99 |     self.print_every = kwargs.pop('print_every', 10)
100 |     self.verbose = kwargs.pop('verbose', True)
101 | 
102 |     # Throw an error if there are extra keyword arguments
103 |     if len(kwargs) > 0:
104 |       extra = ', '.join('"%s"' % k for k in kwargs.keys())
105 |       raise ValueError('Unrecognized arguments %s' % extra)
106 | 
107 |     # Make sure the update rule exists, then replace the string
108 |     # name with the actual function
109 |     if not hasattr(optim, self.update_rule):
110 |       raise ValueError('Invalid update_rule "%s"' % self.update_rule)
111 |     self.update_rule = getattr(optim, self.update_rule)
112 | 
113 |     self._reset()
114 | 
115 | 
116 |   def _reset(self):
117 |     """
118 |     Set up some book-keeping variables for optimization. Don't call this
119 |     manually.
120 |     """
121 |     # Set up some variables for book-keeping
122 |     self.epoch = 0
123 |     self.best_val_acc = 0
124 |     self.best_params = {}
125 |     self.loss_history = []
126 |     self.train_acc_history = []
127 |     self.val_acc_history = []
128 | 
129 |     # Make a deep copy of the optim_config for each parameter
130 |     self.optim_configs = {}
131 |     for p in self.model.params:
132 |       d = {k: v for k, v in self.optim_config.iteritems()}
133 |       self.optim_configs[p] = d
134 | 
135 | 
136 |   def _step(self):
137 |     """
138 |     Make a single gradient update. This is called by train() and should not
139 |     be called manually.
140 |     """
141 |     # Make a minibatch of training data
142 |     minibatch = sample_coco_minibatch(self.data,
143 |                   batch_size=self.batch_size,
144 |                   split='train')
145 |     captions, features, urls = minibatch
146 | 
147 |     # Compute loss and gradient
148 |     loss, grads = self.model.loss(features, captions)
149 |     self.loss_history.append(loss)
150 | 
151 |     # Perform a parameter update
152 |     for p, w in self.model.params.iteritems():
153 |       dw = grads[p]
154 |       config = self.optim_configs[p]
155 |       next_w, next_config = self.update_rule(w, dw, config)
156 |       self.model.params[p] = next_w
157 |       self.optim_configs[p] = next_config
158 | 
159 |   
160 |   # TODO: This does nothing right now; maybe implement BLEU?
161 |   def check_accuracy(self, X, y, num_samples=None, batch_size=100):
162 |     """
163 |     Check accuracy of the model on the provided data.
164 |     
165 |     Inputs:
166 |     - X: Array of data, of shape (N, d_1, ..., d_k)
167 |     - y: Array of labels, of shape (N,)
168 |     - num_samples: If not None, subsample the data and only test the model
169 |       on num_samples datapoints.
170 |     - batch_size: Split X and y into batches of this size to avoid using too
171 |       much memory.
172 |       
173 |     Returns:
174 |     - acc: Scalar giving the fraction of instances that were correctly
175 |       classified by the model.
176 |     """
177 |     return 0.0
178 |     
179 |     # Maybe subsample the data
180 |     N = X.shape[0]
181 |     if num_samples is not None and N > num_samples:
182 |       mask = np.random.choice(N, num_samples)
183 |       N = num_samples
184 |       X = X[mask]
185 |       y = y[mask]
186 | 
187 |     # Compute predictions in batches
188 |     num_batches = N / batch_size
189 |     if N % batch_size != 0:
190 |       num_batches += 1
191 |     y_pred = []
192 |     for i in xrange(num_batches):
193 |       start = i * batch_size
194 |       end = (i + 1) * batch_size
195 |       scores = self.model.loss(X[start:end])
196 |       y_pred.append(np.argmax(scores, axis=1))
197 |     y_pred = np.hstack(y_pred)
198 |     acc = np.mean(y_pred == y)
199 | 
200 |     return acc
201 | 
202 | 
203 |   def train(self):
204 |     """
205 |     Run optimization to train the model.
206 |     """
207 |     num_train = self.data['train_captions'].shape[0]
208 |     iterations_per_epoch = max(num_train / self.batch_size, 1)
209 |     num_iterations = self.num_epochs * iterations_per_epoch
210 | 
211 |     for t in xrange(num_iterations):
212 |       self._step()
213 | 
214 |       # Maybe print training loss
215 |       if self.verbose and t % self.print_every == 0:
216 |         print '(Iteration %d / %d) loss: %f' % (
217 |                t + 1, num_iterations, self.loss_history[-1])
218 | 
219 |       # At the end of every epoch, increment the epoch counter and decay the
220 |       # learning rate.
221 |       epoch_end = (t + 1) % iterations_per_epoch == 0
222 |       if epoch_end:
223 |         self.epoch += 1
224 |         for k in self.optim_configs:
225 |           self.optim_configs[k]['learning_rate'] *= self.lr_decay
226 | 
227 |       # Check train and val accuracy on the first iteration, the last
228 |       # iteration, and at the end of each epoch.
229 |       # TODO: Implement some logic to check Bleu on validation set periodically
230 | 
231 |     # At the end of training swap the best params into the model
232 |     # self.model.params = self.best_params
233 | 
234 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/classifiers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment3/cs231n/classifiers/__init__.py


--------------------------------------------------------------------------------
/assignment3/cs231n/classifiers/pretrained_cnn.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import h5py
  3 | 
  4 | from cs231n.layers import *
  5 | from cs231n.fast_layers import *
  6 | from cs231n.layer_utils import *
  7 | 
  8 | 
  9 | class PretrainedCNN(object):
 10 |   def __init__(self, dtype=np.float32, num_classes=100, input_size=64, h5_file=None):
 11 |     self.dtype = dtype
 12 |     self.conv_params = []
 13 |     self.input_size = input_size
 14 |     self.num_classes = num_classes
 15 |     
 16 |     # TODO: In the future it would be nice if the architecture could be loaded from
 17 |     # the HDF5 file rather than being hardcoded. For now this will have to do.
 18 |     self.conv_params.append({'stride': 2, 'pad': 2})
 19 |     self.conv_params.append({'stride': 1, 'pad': 1})
 20 |     self.conv_params.append({'stride': 2, 'pad': 1})
 21 |     self.conv_params.append({'stride': 1, 'pad': 1})
 22 |     self.conv_params.append({'stride': 2, 'pad': 1})
 23 |     self.conv_params.append({'stride': 1, 'pad': 1})
 24 |     self.conv_params.append({'stride': 2, 'pad': 1})
 25 |     self.conv_params.append({'stride': 1, 'pad': 1})
 26 |     self.conv_params.append({'stride': 2, 'pad': 1})
 27 | 
 28 |     self.filter_sizes = [5, 3, 3, 3, 3, 3, 3, 3, 3]
 29 |     self.num_filters = [64, 64, 128, 128, 256, 256, 512, 512, 1024]
 30 |     hidden_dim = 512
 31 | 
 32 |     self.bn_params = []
 33 |     
 34 |     cur_size = input_size
 35 |     prev_dim = 3
 36 |     self.params = {}
 37 |     for i, (f, next_dim) in enumerate(zip(self.filter_sizes, self.num_filters)):
 38 |       fan_in = f * f * prev_dim
 39 |       self.params['W%d' % (i + 1)] = np.sqrt(2.0 / fan_in) * np.random.randn(next_dim, prev_dim, f, f)
 40 |       self.params['b%d' % (i + 1)] = np.zeros(next_dim)
 41 |       self.params['gamma%d' % (i + 1)] = np.ones(next_dim)
 42 |       self.params['beta%d' % (i + 1)] = np.zeros(next_dim)
 43 |       self.bn_params.append({'mode': 'train'})
 44 |       prev_dim = next_dim
 45 |       if self.conv_params[i]['stride'] == 2: cur_size /= 2
 46 |     
 47 |     # Add a fully-connected layers
 48 |     fan_in = cur_size * cur_size * self.num_filters[-1]
 49 |     self.params['W%d' % (i + 2)] = np.sqrt(2.0 / fan_in) * np.random.randn(fan_in, hidden_dim)
 50 |     self.params['b%d' % (i + 2)] = np.zeros(hidden_dim)
 51 |     self.params['gamma%d' % (i + 2)] = np.ones(hidden_dim)
 52 |     self.params['beta%d' % (i + 2)] = np.zeros(hidden_dim)
 53 |     self.bn_params.append({'mode': 'train'})
 54 |     self.params['W%d' % (i + 3)] = np.sqrt(2.0 / hidden_dim) * np.random.randn(hidden_dim, num_classes)
 55 |     self.params['b%d' % (i + 3)] = np.zeros(num_classes)
 56 |     
 57 |     for k, v in self.params.iteritems():
 58 |       self.params[k] = v.astype(dtype)
 59 | 
 60 |     if h5_file is not None:
 61 |       self.load_weights(h5_file)
 62 | 
 63 |   
 64 |   def load_weights(self, h5_file, verbose=False):
 65 |     """
 66 |     Load pretrained weights from an HDF5 file.
 67 | 
 68 |     Inputs:
 69 |     - h5_file: Path to the HDF5 file where pretrained weights are stored.
 70 |     - verbose: Whether to print debugging info
 71 |     """
 72 | 
 73 |     # Before loading weights we need to make a dummy forward pass to initialize
 74 |     # the running averages in the bn_pararams
 75 |     x = np.random.randn(1, 3, self.input_size, self.input_size)
 76 |     y = np.random.randint(self.num_classes, size=1)
 77 |     loss, grads = self.loss(x, y)
 78 | 
 79 |     with h5py.File(h5_file, 'r') as f:
 80 |       for k, v in f.iteritems():
 81 |         v = np.asarray(v)
 82 |         if k in self.params:
 83 |           if verbose: print k, v.shape, self.params[k].shape
 84 |           if v.shape == self.params[k].shape:
 85 |             self.params[k] = v.copy()
 86 |           elif v.T.shape == self.params[k].shape:
 87 |             self.params[k] = v.T.copy()
 88 |           else:
 89 |             raise ValueError('shapes for %s do not match' % k)
 90 |         if k.startswith('running_mean'):
 91 |           i = int(k[12:]) - 1
 92 |           assert self.bn_params[i]['running_mean'].shape == v.shape
 93 |           self.bn_params[i]['running_mean'] = v.copy()
 94 |           if verbose: print k, v.shape
 95 |         if k.startswith('running_var'):
 96 |           i = int(k[11:]) - 1
 97 |           assert v.shape == self.bn_params[i]['running_var'].shape
 98 |           self.bn_params[i]['running_var'] = v.copy()
 99 |           if verbose: print k, v.shape
100 |         
101 |     for k, v in self.params.iteritems():
102 |       self.params[k] = v.astype(self.dtype)
103 | 
104 |   
105 |   def forward(self, X, start=None, end=None, mode='test'):
106 |     """
107 |     Run part of the model forward, starting and ending at an arbitrary layer,
108 |     in either training mode or testing mode.
109 | 
110 |     You can pass arbitrary input to the starting layer, and you will receive
111 |     output from the ending layer and a cache object that can be used to run
112 |     the model backward over the same set of layers.
113 | 
114 |     For the purposes of this function, a "layer" is one of the following blocks:
115 | 
116 |     [conv - spatial batchnorm - relu] (There are 9 of these)
117 |     [affine - batchnorm - relu] (There is one of these)
118 |     [affine] (There is one of these)
119 | 
120 |     Inputs:
121 |     - X: The input to the starting layer. If start=0, then this should be an
122 |       array of shape (N, C, 64, 64).
123 |     - start: The index of the layer to start from. start=0 starts from the first
124 |       convolutional layer. Default is 0.
125 |     - end: The index of the layer to end at. start=11 ends at the last
126 |       fully-connected layer, returning class scores. Default is 11.
127 |     - mode: The mode to use, either 'test' or 'train'. We need this because
128 |       batch normalization behaves differently at training time and test time.
129 | 
130 |     Returns:
131 |     - out: Output from the end layer.
132 |     - cache: A cache object that can be passed to the backward method to run the
133 |       network backward over the same range of layers.
134 |     """
135 |     X = X.astype(self.dtype)
136 |     if start is None: start = 0
137 |     if end is None: end = len(self.conv_params) + 1
138 |     layer_caches = []
139 | 
140 |     prev_a = X
141 |     for i in xrange(start, end + 1):
142 |       i1 = i + 1
143 |       if 0 <= i < len(self.conv_params):
144 |         # This is a conv layer
145 |         w, b = self.params['W%d' % i1], self.params['b%d' % i1]
146 |         gamma, beta = self.params['gamma%d' % i1], self.params['beta%d' % i1]
147 |         conv_param = self.conv_params[i]
148 |         bn_param = self.bn_params[i]
149 |         bn_param['mode'] = mode
150 | 
151 |         next_a, cache = conv_bn_relu_forward(prev_a, w, b, gamma, beta, conv_param, bn_param)
152 |       elif i == len(self.conv_params):
153 |         # This is the fully-connected hidden layer
154 |         w, b = self.params['W%d' % i1], self.params['b%d' % i1]
155 |         gamma, beta = self.params['gamma%d' % i1], self.params['beta%d' % i1]
156 |         bn_param = self.bn_params[i]
157 |         bn_param['mode'] = mode
158 |         next_a, cache = affine_bn_relu_forward(prev_a, w, b, gamma, beta, bn_param)
159 |       elif i == len(self.conv_params) + 1:
160 |         # This is the last fully-connected layer that produces scores
161 |         w, b = self.params['W%d' % i1], self.params['b%d' % i1]
162 |         next_a, cache = affine_forward(prev_a, w, b)
163 |       else:
164 |         raise ValueError('Invalid layer index %d' % i)
165 | 
166 |       layer_caches.append(cache)
167 |       prev_a = next_a
168 | 
169 |     out = prev_a
170 |     cache = (start, end, layer_caches)
171 |     return out, cache
172 | 
173 | 
174 |   def backward(self, dout, cache):
175 |     """
176 |     Run the model backward over a sequence of layers that were previously run
177 |     forward using the self.forward method.
178 | 
179 |     Inputs:
180 |     - dout: Gradient with respect to the ending layer; this should have the same
181 |       shape as the out variable returned from the corresponding call to forward.
182 |     - cache: A cache object returned from self.forward.
183 | 
184 |     Returns:
185 |     - dX: Gradient with respect to the start layer. This will have the same
186 |       shape as the input X passed to self.forward.
187 |     - grads: Gradient of all parameters in the layers. For example if you run
188 |       forward through two convolutional layers, then on the corresponding call
189 |       to backward grads will contain the gradients with respect to the weights,
190 |       biases, and spatial batchnorm parameters of those two convolutional
191 |       layers. The grads dictionary will therefore contain a subset of the keys
192 |       of self.params, and grads[k] and self.params[k] will have the same shape.
193 |     """
194 |     start, end, layer_caches = cache
195 |     dnext_a = dout
196 |     grads = {}
197 |     for i in reversed(range(start, end + 1)):
198 |       i1 = i + 1
199 |       if i == len(self.conv_params) + 1:
200 |         # This is the last fully-connected layer
201 |         dprev_a, dw, db = affine_backward(dnext_a, layer_caches.pop())
202 |         grads['W%d' % i1] = dw
203 |         grads['b%d' % i1] = db
204 |       elif i == len(self.conv_params):
205 |         # This is the fully-connected hidden layer
206 |         temp = affine_bn_relu_backward(dnext_a, layer_caches.pop())
207 |         dprev_a, dw, db, dgamma, dbeta = temp
208 |         grads['W%d' % i1] = dw
209 |         grads['b%d' % i1] = db
210 |         grads['gamma%d' % i1] = dgamma
211 |         grads['beta%d' % i1] = dbeta
212 |       elif 0 <= i < len(self.conv_params):
213 |         # This is a conv layer
214 |         temp = conv_bn_relu_backward(dnext_a, layer_caches.pop())
215 |         dprev_a, dw, db, dgamma, dbeta = temp
216 |         grads['W%d' % i1] = dw
217 |         grads['b%d' % i1] = db
218 |         grads['gamma%d' % i1] = dgamma
219 |         grads['beta%d' % i1] = dbeta
220 |       else:
221 |         raise ValueError('Invalid layer index %d' % i)
222 |       dnext_a = dprev_a
223 | 
224 |     dX = dnext_a
225 |     return dX, grads
226 | 
227 | 
228 |   def loss(self, X, y=None):
229 |     """
230 |     Classification loss used to train the network.
231 | 
232 |     Inputs:
233 |     - X: Array of data, of shape (N, 3, 64, 64)
234 |     - y: Array of labels, of shape (N,)
235 | 
236 |     If y is None, then run a test-time forward pass and return:
237 |     - scores: Array of shape (N, 100) giving class scores.
238 | 
239 |     If y is not None, then run a training-time forward and backward pass and
240 |     return a tuple of:
241 |     - loss: Scalar giving loss
242 |     - grads: Dictionary of gradients, with the same keys as self.params.
243 |     """
244 |     # Note that we implement this by just caling self.forward and self.backward
245 |     mode = 'test' if y is None else 'train'
246 |     scores, cache = self.forward(X, mode=mode)
247 |     if mode == 'test':
248 |       return scores
249 |     loss, dscores = softmax_loss(scores, y)
250 |     dX, grads = self.backward(dscores, cache)
251 |     return loss, grads
252 | 
253 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/classifiers/rnn.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from cs231n.layers import *
  4 | from cs231n.rnn_layers import *
  5 | 
  6 | 
  7 | class CaptioningRNN(object):
  8 |   """
  9 |   A CaptioningRNN produces captions from image features using a recurrent
 10 |   neural network.
 11 | 
 12 |   The RNN receives input vectors of size D, has a vocab size of V, works on
 13 |   sequences of length T, has an RNN hidden dimension of H, uses word vectors
 14 |   of dimension W, and operates on minibatches of size N.
 15 | 
 16 |   Note that we don't use any regularization for the CaptioningRNN.
 17 |   """
 18 | 
 19 |   def __init__(self, word_to_idx, input_dim=512, wordvec_dim=128,
 20 |                hidden_dim=128, cell_type='rnn', dtype=np.float32):
 21 |     """
 22 |     Construct a new CaptioningRNN instance.
 23 | 
 24 |     Inputs:
 25 |     - word_to_idx: A dictionary giving the vocabulary. It contains V entries,
 26 |       and maps each string to a unique integer in the range [0, V).
 27 |     - input_dim: Dimension D of input image feature vectors.
 28 |     - wordvec_dim: Dimension W of word vectors.
 29 |     - hidden_dim: Dimension H for the hidden state of the RNN.
 30 |     - cell_type: What type of RNN to use; either 'rnn' or 'lstm'.
 31 |     - dtype: numpy datatype to use; use float32 for training and float64 for
 32 |       numeric gradient checking.
 33 |     """
 34 |     if cell_type not in {'rnn', 'lstm'}:
 35 |       raise ValueError('Invalid cell_type "%s"' % cell_type)
 36 | 
 37 |     self.cell_type = cell_type
 38 |     self.dtype = dtype
 39 |     self.word_to_idx = word_to_idx
 40 |     self.idx_to_word = {i: w for w, i in word_to_idx.iteritems()}
 41 |     self.params = {}
 42 | 
 43 |     vocab_size = len(word_to_idx)
 44 | 
 45 |     self._null = word_to_idx['<NULL>']
 46 |     self._start = word_to_idx.get('<START>', None)
 47 |     self._end = word_to_idx.get('<END>', None)
 48 | 
 49 |     # Initialize word vectors
 50 |     self.params['W_embed'] = np.random.randn(vocab_size, wordvec_dim)
 51 |     self.params['W_embed'] /= 100
 52 | 
 53 |     # Initialize CNN -> hidden state projection parameters
 54 |     self.params['W_proj'] = np.random.randn(input_dim, hidden_dim)
 55 |     self.params['W_proj'] /= np.sqrt(input_dim)
 56 |     self.params['b_proj'] = np.zeros(hidden_dim)
 57 | 
 58 |     # Initialize parameters for the RNN
 59 |     dim_mul = {'lstm': 4, 'rnn': 1}[cell_type]
 60 |     self.params['Wx'] = np.random.randn(wordvec_dim, dim_mul * hidden_dim)
 61 |     self.params['Wx'] /= np.sqrt(wordvec_dim)
 62 |     self.params['Wh'] = np.random.randn(hidden_dim, dim_mul * hidden_dim)
 63 |     self.params['Wh'] /= np.sqrt(hidden_dim)
 64 |     self.params['b'] = np.zeros(dim_mul * hidden_dim)
 65 | 
 66 |     # Initialize output to vocab weights
 67 |     self.params['W_vocab'] = np.random.randn(hidden_dim, vocab_size)
 68 |     self.params['W_vocab'] /= np.sqrt(hidden_dim)
 69 |     self.params['b_vocab'] = np.zeros(vocab_size)
 70 | 
 71 |     # Cast parameters to correct dtype
 72 |     for k, v in self.params.iteritems():
 73 |       self.params[k] = v.astype(self.dtype)
 74 | 
 75 | 
 76 |   def loss(self, features, captions):
 77 |     """
 78 |     Compute training-time loss for the RNN. We input image features and
 79 |     ground-truth captions for those images, and use an RNN (or LSTM) to compute
 80 |     loss and gradients on all parameters.
 81 | 
 82 |     Inputs:
 83 |     - features: Input image features, of shape (N, D)
 84 |     - captions: Ground-truth captions; an integer array of shape (N, T) where
 85 |       each element is in the range 0 <= y[i, t] < V
 86 | 
 87 |     Returns a tuple of:
 88 |     - loss: Scalar loss
 89 |     - grads: Dictionary of gradients parallel to self.params
 90 |     """
 91 |     # Cut captions into two pieces: captions_in has everything but the last word
 92 |     # and will be input to the RNN; captions_out has everything but the first
 93 |     # word and this is what we will expect the RNN to generate. These are offset
 94 |     # by one relative to each other because the RNN should produce word (t+1)
 95 |     # after receiving word t. The first element of captions_in will be the START
 96 |     # token, and the first element of captions_out will be the first word.
 97 |     captions_in = captions[:, :-1]
 98 |     captions_out = captions[:, 1:]
 99 | 
100 |     # You'll need this
101 |     mask = (captions_out != self._null)
102 | 
103 |     # Weight and bias for the affine transform from image features to initial
104 |     # hidden state
105 |     W_proj, b_proj = self.params['W_proj'], self.params['b_proj']
106 | 
107 |     # Word embedding matrix
108 |     W_embed = self.params['W_embed']
109 | 
110 |     # Input-to-hidden, hidden-to-hidden, and biases for the RNN
111 |     Wx, Wh, b = self.params['Wx'], self.params['Wh'], self.params['b']
112 | 
113 |     # Weight and bias for the hidden-to-vocab transformation.
114 |     W_vocab, b_vocab = self.params['W_vocab'], self.params['b_vocab']
115 | 
116 |     loss, grads = 0.0, {}
117 |     ############################################################################
118 |     # TODO: Implement the forward and backward passes for the CaptioningRNN.   #
119 |     # In the forward pass you will need to do the following:                   #
120 |     # (1) Use an affine transformation to compute the initial hidden state     #
121 |     #     from the image features. This should produce an array of shape (N, H)#
122 |     # (2) Use a word embedding layer to transform the words in captions_in     #
123 |     #     from indices to vectors, giving an array of shape (N, T, W).         #
124 |     # (3) Use either a vanilla RNN or LSTM (depending on self.cell_type) to    #
125 |     #     process the sequence of input word vectors and produce hidden state  #
126 |     #     vectors for all timesteps, producing an array of shape (N, T, H).    #
127 |     # (4) Use a (temporal) affine transformation to compute scores over the    #
128 |     #     vocabulary at every timestep using the hidden states, giving an      #
129 |     #     array of shape (N, T, V).                                            #
130 |     # (5) Use (temporal) softmax to compute loss using captions_out, ignoring  #
131 |     #     the points where the output word is <NULL> using the mask above.     #
132 |     #                                                                          #
133 |     # In the backward pass you will need to compute the gradient of the loss   #
134 |     # with respect to all model parameters. Use the loss and grads variables   #
135 |     # defined above to store loss and gradients; grads[k] should give the      #
136 |     # gradients for self.params[k].                                            #
137 |     ############################################################################
138 |     pass
139 |     # forward and loss
140 |     # (1) image feature to rnn hidden state
141 |     h0, features_cache = affine_forward(features, W_proj, b_proj)
142 |     # (2) words indices to word embedding vectors
143 |     captions_in_emb, emb_in_cache = word_embedding_forward(captions_in, W_embed)
144 |     # (3) rnn forward
145 |     if self.cell_type == 'rnn':
146 |         h, rnn_cache = rnn_forward(captions_in_emb, h0, Wx, Wh, b)
147 |     elif self.cell_type == 'lstm':
148 |         h, lstm_cache = lstm_forward(captions_in_emb, h0, Wx, Wh, b)
149 |     # (4) hidden state to words
150 |     temporal_out, temporal_cache = temporal_affine_forward(h, W_vocab, b_vocab)
151 |     # (5) prediction and label, loss and gradients
152 |     loss, dout = temporal_softmax_loss(temporal_out, captions_out, mask)
153 | 
154 |     # backward and grads
155 |     dtemp, grads['W_vocab'], grads['b_vocab'] = temporal_affine_backward(dout, temporal_cache)
156 |     if self.cell_type == 'rnn':
157 |         drnn, dh0, grads['Wx'], grads['Wh'], grads['b'] = rnn_backward(dtemp, rnn_cache)
158 |     elif self.cell_type == 'lstm':
159 |         drnn, dh0, grads['Wx'], grads['Wh'], grads['b'] = lstm_backward(dtemp, lstm_cache)
160 |     grads['W_embed'] = word_embedding_backward(drnn, emb_in_cache)
161 |     dfeatures, grads['W_proj'], grads['b_proj'] = affine_backward(dh0, features_cache)
162 |     ############################################################################
163 |     #                             END OF YOUR CODE                             #
164 |     ############################################################################
165 | 
166 |     return loss, grads
167 | 
168 | 
169 |   def sample(self, features, max_length=30):
170 |     """
171 |     Run a test-time forward pass for the model, sampling captions for input
172 |     feature vectors.
173 | 
174 |     At each timestep, we embed the current word, pass it and the previous hidden
175 |     state to the RNN to get the next hidden state, use the hidden state to get
176 |     scores for all vocab words, and choose the word with the highest score as
177 |     the next word. The initial hidden state is computed by applying an affine
178 |     transform to the input image features, and the initial word is the <START>
179 |     token.
180 | 
181 |     For LSTMs you will also have to keep track of the cell state; in that case
182 |     the initial cell state should be zero.
183 | 
184 |     Inputs:
185 |     - features: Array of input image features of shape (N, D).
186 |     - max_length: Maximum length T of generated captions.
187 | 
188 |     Returns:
189 |     - captions: Array of shape (N, max_length) giving sampled captions,
190 |       where each element is an integer in the range [0, V). The first element
191 |       of captions should be the first sampled word, not the <START> token.
192 |     """
193 |     N = features.shape[0]
194 |     captions = self._null * np.ones((N, max_length), dtype=np.int32)
195 | 
196 |     # Unpack parameters
197 |     W_proj, b_proj = self.params['W_proj'], self.params['b_proj']
198 |     W_embed = self.params['W_embed']
199 |     Wx, Wh, b = self.params['Wx'], self.params['Wh'], self.params['b']
200 |     W_vocab, b_vocab = self.params['W_vocab'], self.params['b_vocab']
201 | 
202 |     ###########################################################################
203 |     # TODO: Implement test-time sampling for the model. You will need to      #
204 |     # initialize the hidden state of the RNN by applying the learned affine   #
205 |     # transform to the input image features. The first word that you feed to  #
206 |     # the RNN should be the <START> token; its value is stored in the         #
207 |     # variable self._start. At each timestep you will need to do to:          #
208 |     # (1) Embed the previous word using the learned word embeddings           #
209 |     # (2) Make an RNN step using the previous hidden state and the embedded   #
210 |     #     current word to get the next hidden state.                          #
211 |     # (3) Apply the learned affine transformation to the next hidden state to #
212 |     #     get scores for all words in the vocabulary                          #
213 |     # (4) Select the word with the highest score as the next word, writing it #
214 |     #     to the appropriate slot in the captions variable                    #
215 |     #                                                                         #
216 |     # For simplicity, you do not need to stop generating after an <END> token #
217 |     # is sampled, but you can if you want to.                                 #
218 |     #                                                                         #
219 |     # HINT: You will not be able to use the rnn_forward or lstm_forward       #
220 |     # functions; you'll need to call rnn_step_forward or lstm_step_forward in #
221 |     # a loop.                                                                 #
222 |     ###########################################################################
223 |     pass
224 |     prev_h, _ = affine_forward(features, W_proj, b_proj) # using image features as h0
225 |     if self.cell_type == 'lstm':
226 |         prev_c = np.zeros_like(prev_h)
227 |     x = np.array([self._start for i in range(N)])
228 |     captions[:, 0] = self._start
229 |     for t in range(1, max_length):
230 |         x_emb, _ = word_embedding_forward(x, W_embed)
231 |         if self.cell_type == 'rnn':
232 |             next_h, cache = rnn_step_forward(x_emb, prev_h, Wx, Wh, b)
233 |             prev_h = next_h
234 |         elif self.cell_type == 'lstm':
235 |             next_h, next_c, cache = lstm_step_forward(x_emb, prev_h, prev_c, Wx, Wh, b)
236 |             prev_h, prev_c = next_h, next_c
237 |         vocab_out, vocab_cache = affine_forward(next_h, W_vocab, b_vocab)
238 |         x = vocab_out.argmax(1)
239 |         captions[:, t] = x
240 |     ############################################################################
241 |     #                             END OF YOUR CODE                             #
242 |     ############################################################################
243 |     return captions
244 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/coco_utils.py:
--------------------------------------------------------------------------------
 1 | import os, json
 2 | import numpy as np
 3 | import h5py
 4 | 
 5 | 
 6 | def load_coco_data(base_dir='cs231n/datasets/coco_captioning',
 7 |                    max_train=None,
 8 |                    pca_features=True):
 9 |   data = {}
10 |   caption_file = os.path.join(base_dir, 'coco2014_captions.h5')
11 |   with h5py.File(caption_file, 'r') as f:
12 |     for k, v in f.iteritems():
13 |       data[k] = np.asarray(v)
14 | 
15 |   if pca_features:
16 |     train_feat_file = os.path.join(base_dir, 'train2014_vgg16_fc7_pca.h5')
17 |   else:
18 |     train_feat_file = os.path.join(base_dir, 'train2014_vgg16_fc7.h5')
19 |   with h5py.File(train_feat_file, 'r') as f:
20 |     data['train_features'] = np.asarray(f['features'])
21 | 
22 |   if pca_features:
23 |     val_feat_file = os.path.join(base_dir, 'val2014_vgg16_fc7_pca.h5')
24 |   else:
25 |     val_feat_file = os.path.join(base_dir, 'val2014_vgg16_fc7.h5')
26 |   with h5py.File(val_feat_file, 'r') as f:
27 |     data['val_features'] = np.asarray(f['features'])
28 | 
29 |   dict_file = os.path.join(base_dir, 'coco2014_vocab.json')
30 |   with open(dict_file, 'r') as f:
31 |     dict_data = json.load(f)
32 |     for k, v in dict_data.iteritems():
33 |       data[k] = v
34 | 
35 |   train_url_file = os.path.join(base_dir, 'train2014_urls.txt')
36 |   with open(train_url_file, 'r') as f:
37 |     train_urls = np.asarray([line.strip() for line in f])
38 |   data['train_urls'] = train_urls
39 | 
40 |   val_url_file = os.path.join(base_dir, 'val2014_urls.txt')
41 |   with open(val_url_file, 'r') as f:
42 |     val_urls = np.asarray([line.strip() for line in f])
43 |   data['val_urls'] = val_urls
44 | 
45 |   # Maybe subsample the training data
46 |   if max_train is not None:
47 |     num_train = data['train_captions'].shape[0]
48 |     mask = np.random.randint(num_train, size=max_train)
49 |     data['train_captions'] = data['train_captions'][mask]
50 |     data['train_image_idxs'] = data['train_image_idxs'][mask]
51 | 
52 |   return data
53 | 
54 | 
55 | def decode_captions(captions, idx_to_word):
56 |   singleton = False
57 |   if captions.ndim == 1:
58 |     singleton = True
59 |     captions = captions[None]
60 |   decoded = []
61 |   N, T = captions.shape
62 |   for i in xrange(N):
63 |     words = []
64 |     for t in xrange(T):
65 |       word = idx_to_word[captions[i, t]]
66 |       if word != '<NULL>':
67 |         words.append(word)
68 |       if word == '<END>':
69 |         break
70 |     decoded.append(' '.join(words))
71 |   if singleton:
72 |     decoded = decoded[0]
73 |   return decoded
74 | 
75 | 
76 | def sample_coco_minibatch(data, batch_size=100, split='train'):
77 |   split_size = data['%s_captions' % split].shape[0]
78 |   mask = np.random.choice(split_size, batch_size)
79 |   captions = data['%s_captions' % split][mask]
80 |   image_idxs = data['%s_image_idxs' % split][mask]
81 |   image_features = data['%s_features' % split][image_idxs]
82 |   urls = data['%s_urls' % split][image_idxs]
83 |   return captions, image_features, urls
84 | 
85 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/data_utils.py:
--------------------------------------------------------------------------------
  1 | import cPickle as pickle
  2 | import numpy as np
  3 | import os
  4 | from scipy.misc import imread
  5 | 
  6 | def load_CIFAR_batch(filename):
  7 |   """ load single batch of cifar """
  8 |   with open(filename, 'rb') as f:
  9 |     datadict = pickle.load(f)
 10 |     X = datadict['data']
 11 |     Y = datadict['labels']
 12 |     X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
 13 |     Y = np.array(Y)
 14 |     return X, Y
 15 | 
 16 | def load_CIFAR10(ROOT):
 17 |   """ load all of cifar """
 18 |   xs = []
 19 |   ys = []
 20 |   for b in range(1,6):
 21 |     f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
 22 |     X, Y = load_CIFAR_batch(f)
 23 |     xs.append(X)
 24 |     ys.append(Y)    
 25 |   Xtr = np.concatenate(xs)
 26 |   Ytr = np.concatenate(ys)
 27 |   del X, Y
 28 |   Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
 29 |   return Xtr, Ytr, Xte, Yte
 30 | 
 31 | 
 32 | def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000,
 33 |                      subtract_mean=True):
 34 |     """
 35 |     Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
 36 |     it for classifiers. These are the same steps as we used for the SVM, but
 37 |     condensed to a single function.
 38 |     """
 39 |     # Load the raw CIFAR-10 data
 40 |     cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
 41 |     X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
 42 |         
 43 |     # Subsample the data
 44 |     mask = range(num_training, num_training + num_validation)
 45 |     X_val = X_train[mask]
 46 |     y_val = y_train[mask]
 47 |     mask = range(num_training)
 48 |     X_train = X_train[mask]
 49 |     y_train = y_train[mask]
 50 |     mask = range(num_test)
 51 |     X_test = X_test[mask]
 52 |     y_test = y_test[mask]
 53 | 
 54 |     # Normalize the data: subtract the mean image
 55 |     if subtract_mean:
 56 |       mean_image = np.mean(X_train, axis=0)
 57 |       X_train -= mean_image
 58 |       X_val -= mean_image
 59 |       X_test -= mean_image
 60 |     
 61 |     # Transpose so that channels come first
 62 |     X_train = X_train.transpose(0, 3, 1, 2).copy()
 63 |     X_val = X_val.transpose(0, 3, 1, 2).copy()
 64 |     X_test = X_test.transpose(0, 3, 1, 2).copy()
 65 | 
 66 |     # Package data into a dictionary
 67 |     return {
 68 |       'X_train': X_train, 'y_train': y_train,
 69 |       'X_val': X_val, 'y_val': y_val,
 70 |       'X_test': X_test, 'y_test': y_test,
 71 |     }
 72 |     
 73 | 
 74 | def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True):
 75 |   """
 76 |   Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
 77 |   TinyImageNet-200 have the same directory structure, so this can be used
 78 |   to load any of them.
 79 | 
 80 |   Inputs:
 81 |   - path: String giving path to the directory to load.
 82 |   - dtype: numpy datatype used to load the data.
 83 |   - subtract_mean: Whether to subtract the mean training image.
 84 | 
 85 |   Returns: A dictionary with the following entries:
 86 |   - class_names: A list where class_names[i] is a list of strings giving the
 87 |     WordNet names for class i in the loaded dataset.
 88 |   - X_train: (N_tr, 3, 64, 64) array of training images
 89 |   - y_train: (N_tr,) array of training labels
 90 |   - X_val: (N_val, 3, 64, 64) array of validation images
 91 |   - y_val: (N_val,) array of validation labels
 92 |   - X_test: (N_test, 3, 64, 64) array of testing images.
 93 |   - y_test: (N_test,) array of test labels; if test labels are not available
 94 |     (such as in student code) then y_test will be None.
 95 |   - mean_image: (3, 64, 64) array giving mean training image
 96 |   """
 97 |   # First load wnids
 98 |   with open(os.path.join(path, 'wnids.txt'), 'r') as f:
 99 |     wnids = [x.strip() for x in f]
100 | 
101 |   # Map wnids to integer labels
102 |   wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}
103 | 
104 |   # Use words.txt to get names for each class
105 |   with open(os.path.join(path, 'words.txt'), 'r') as f:
106 |     wnid_to_words = dict(line.split('\t') for line in f)
107 |     for wnid, words in wnid_to_words.iteritems():
108 |       wnid_to_words[wnid] = [w.strip() for w in words.split(',')]
109 |   class_names = [wnid_to_words[wnid] for wnid in wnids]
110 | 
111 |   # Next load training data.
112 |   X_train = []
113 |   y_train = []
114 |   for i, wnid in enumerate(wnids):
115 |     if (i + 1) % 20 == 0:
116 |       print 'loading training data for synset %d / %d' % (i + 1, len(wnids))
117 |     # To figure out the filenames we need to open the boxes file
118 |     boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)
119 |     with open(boxes_file, 'r') as f:
120 |       filenames = [x.split('\t')[0] for x in f]
121 |     num_images = len(filenames)
122 |     
123 |     X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
124 |     y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64)
125 |     for j, img_file in enumerate(filenames):
126 |       img_file = os.path.join(path, 'train', wnid, 'images', img_file)
127 |       img = imread(img_file)
128 |       if img.ndim == 2:
129 |         ## grayscale file
130 |         img.shape = (64, 64, 1)
131 |       X_train_block[j] = img.transpose(2, 0, 1)
132 |     X_train.append(X_train_block)
133 |     y_train.append(y_train_block)
134 |       
135 |   # We need to concatenate all training data
136 |   X_train = np.concatenate(X_train, axis=0)
137 |   y_train = np.concatenate(y_train, axis=0)
138 |   
139 |   # Next load validation data
140 |   with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f:
141 |     img_files = []
142 |     val_wnids = []
143 |     for line in f:
144 |       img_file, wnid = line.split('\t')[:2]
145 |       img_files.append(img_file)
146 |       val_wnids.append(wnid)
147 |     num_val = len(img_files)
148 |     y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
149 |     X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
150 |     for i, img_file in enumerate(img_files):
151 |       img_file = os.path.join(path, 'val', 'images', img_file)
152 |       img = imread(img_file)
153 |       if img.ndim == 2:
154 |         img.shape = (64, 64, 1)
155 |       X_val[i] = img.transpose(2, 0, 1)
156 | 
157 |   # Next load test images
158 |   # Students won't have test labels, so we need to iterate over files in the
159 |   # images directory.
160 |   img_files = os.listdir(os.path.join(path, 'test', 'images'))
161 |   X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
162 |   for i, img_file in enumerate(img_files):
163 |     img_file = os.path.join(path, 'test', 'images', img_file)
164 |     img = imread(img_file)
165 |     if img.ndim == 2:
166 |       img.shape = (64, 64, 1)
167 |     X_test[i] = img.transpose(2, 0, 1)
168 | 
169 |   y_test = None
170 |   y_test_file = os.path.join(path, 'test', 'test_annotations.txt')
171 |   if os.path.isfile(y_test_file):
172 |     with open(y_test_file, 'r') as f:
173 |       img_file_to_wnid = {}
174 |       for line in f:
175 |         line = line.split('\t')
176 |         img_file_to_wnid[line[0]] = line[1]
177 |     y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files]
178 |     y_test = np.array(y_test)
179 |   
180 |   mean_image = X_train.mean(axis=0)
181 |   if subtract_mean:
182 |     X_train -= mean_image[None]
183 |     X_val -= mean_image[None]
184 |     X_test -= mean_image[None]
185 | 
186 |   return {
187 |     'class_names': class_names,
188 |     'X_train': X_train,
189 |     'y_train': y_train,
190 |     'X_val': X_val,
191 |     'y_val': y_val,
192 |     'X_test': X_test,
193 |     'y_test': y_test,
194 |     'class_names': class_names,
195 |     'mean_image': mean_image,
196 |   }
197 | 
198 | 
199 | def load_models(models_dir):
200 |   """
201 |   Load saved models from disk. This will attempt to unpickle all files in a
202 |   directory; any files that give errors on unpickling (such as README.txt) will
203 |   be skipped.
204 | 
205 |   Inputs:
206 |   - models_dir: String giving the path to a directory containing model files.
207 |     Each model file is a pickled dictionary with a 'model' field.
208 | 
209 |   Returns:
210 |   A dictionary mapping model file names to models.
211 |   """
212 |   models = {}
213 |   for model_file in os.listdir(models_dir):
214 |     with open(os.path.join(models_dir, model_file), 'rb') as f:
215 |       try:
216 |         models[model_file] = pickle.load(f)['model']
217 |       except pickle.UnpicklingError:
218 |         continue
219 |   return models
220 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/datasets/get_coco_captioning.sh:
--------------------------------------------------------------------------------
1 | wget "http://cs231n.stanford.edu/coco_captioning.zip"
2 | unzip coco_captioning.zip
3 | rm coco_captioning.zip
4 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/datasets/get_pretrained_model.sh:
--------------------------------------------------------------------------------
1 | wget http://cs231n.stanford.edu/pretrained_model.h5
2 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/datasets/get_tiny_imagenet_a.sh:
--------------------------------------------------------------------------------
1 | wget http://cs231n.stanford.edu/tiny-imagenet-100-A.zip
2 | unzip tiny-imagenet-100-A.zip
3 | rm tiny-imagenet-100-A.zip
4 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/fast_layers.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | try:
  3 |   from cs231n.im2col_cython import col2im_cython, im2col_cython
  4 |   from cs231n.im2col_cython import col2im_6d_cython
  5 | except ImportError:
  6 |   print 'run the following from the cs231n directory and try again:'
  7 |   print 'python setup.py build_ext --inplace'
  8 |   print 'You may also need to restart your iPython kernel'
  9 | 
 10 | from cs231n.im2col import *
 11 | 
 12 | 
 13 | def conv_forward_im2col(x, w, b, conv_param):
 14 |   """
 15 |   A fast implementation of the forward pass for a convolutional layer
 16 |   based on im2col and col2im.
 17 |   """
 18 |   N, C, H, W = x.shape
 19 |   num_filters, _, filter_height, filter_width = w.shape
 20 |   stride, pad = conv_param['stride'], conv_param['pad']
 21 | 
 22 |   # Check dimensions
 23 |   assert (W + 2 * pad - filter_width) % stride == 0, 'width does not work'
 24 |   assert (H + 2 * pad - filter_height) % stride == 0, 'height does not work'
 25 | 
 26 |   # Create output
 27 |   out_height = (H + 2 * pad - filter_height) / stride + 1
 28 |   out_width = (W + 2 * pad - filter_width) / stride + 1
 29 |   out = np.zeros((N, num_filters, out_height, out_width), dtype=x.dtype)
 30 | 
 31 |   # x_cols = im2col_indices(x, w.shape[2], w.shape[3], pad, stride)
 32 |   x_cols = im2col_cython(x, w.shape[2], w.shape[3], pad, stride)
 33 |   res = w.reshape((w.shape[0], -1)).dot(x_cols) + b.reshape(-1, 1)
 34 | 
 35 |   out = res.reshape(w.shape[0], out.shape[2], out.shape[3], x.shape[0])
 36 |   out = out.transpose(3, 0, 1, 2)
 37 | 
 38 |   cache = (x, w, b, conv_param, x_cols)
 39 |   return out, cache
 40 | 
 41 | 
 42 | def conv_forward_strides(x, w, b, conv_param):
 43 |   N, C, H, W = x.shape
 44 |   F, _, HH, WW = w.shape
 45 |   stride, pad = conv_param['stride'], conv_param['pad']
 46 | 
 47 |   # Check dimensions
 48 |   #assert (W + 2 * pad - WW) % stride == 0, 'width does not work'
 49 |   #assert (H + 2 * pad - HH) % stride == 0, 'height does not work'
 50 | 
 51 |   # Pad the input
 52 |   p = pad
 53 |   x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
 54 |   
 55 |   # Figure out output dimensions
 56 |   H += 2 * pad
 57 |   W += 2 * pad
 58 |   out_h = (H - HH) / stride + 1
 59 |   out_w = (W - WW) / stride + 1
 60 | 
 61 |   # Perform an im2col operation by picking clever strides
 62 |   shape = (C, HH, WW, N, out_h, out_w)
 63 |   strides = (H * W, W, 1, C * H * W, stride * W, stride)
 64 |   strides = x.itemsize * np.array(strides)
 65 |   x_stride = np.lib.stride_tricks.as_strided(x_padded,
 66 |                 shape=shape, strides=strides)
 67 |   x_cols = np.ascontiguousarray(x_stride)
 68 |   x_cols.shape = (C * HH * WW, N * out_h * out_w)
 69 | 
 70 |   # Now all our convolutions are a big matrix multiply
 71 |   res = w.reshape(F, -1).dot(x_cols) + b.reshape(-1, 1)
 72 | 
 73 |   # Reshape the output
 74 |   res.shape = (F, N, out_h, out_w)
 75 |   out = res.transpose(1, 0, 2, 3)
 76 | 
 77 |   # Be nice and return a contiguous array
 78 |   # The old version of conv_forward_fast doesn't do this, so for a fair
 79 |   # comparison we won't either
 80 |   out = np.ascontiguousarray(out)
 81 | 
 82 |   cache = (x, w, b, conv_param, x_cols)
 83 |   return out, cache
 84 |   
 85 | 
 86 | def conv_backward_strides(dout, cache):
 87 |   x, w, b, conv_param, x_cols = cache
 88 |   stride, pad = conv_param['stride'], conv_param['pad']
 89 | 
 90 |   N, C, H, W = x.shape
 91 |   F, _, HH, WW = w.shape
 92 |   _, _, out_h, out_w = dout.shape
 93 | 
 94 |   db = np.sum(dout, axis=(0, 2, 3))
 95 | 
 96 |   dout_reshaped = dout.transpose(1, 0, 2, 3).reshape(F, -1)
 97 |   dw = dout_reshaped.dot(x_cols.T).reshape(w.shape)
 98 | 
 99 |   dx_cols = w.reshape(F, -1).T.dot(dout_reshaped)
100 |   dx_cols.shape = (C, HH, WW, N, out_h, out_w)
101 |   dx = col2im_6d_cython(dx_cols, N, C, H, W, HH, WW, pad, stride)
102 | 
103 |   return dx, dw, db
104 | 
105 | 
106 | def conv_backward_im2col(dout, cache):
107 |   """
108 |   A fast implementation of the backward pass for a convolutional layer
109 |   based on im2col and col2im.
110 |   """
111 |   x, w, b, conv_param, x_cols = cache
112 |   stride, pad = conv_param['stride'], conv_param['pad']
113 | 
114 |   db = np.sum(dout, axis=(0, 2, 3))
115 | 
116 |   num_filters, _, filter_height, filter_width = w.shape
117 |   dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(num_filters, -1)
118 |   dw = dout_reshaped.dot(x_cols.T).reshape(w.shape)
119 | 
120 |   dx_cols = w.reshape(num_filters, -1).T.dot(dout_reshaped)
121 |   # dx = col2im_indices(dx_cols, x.shape, filter_height, filter_width, pad, stride)
122 |   dx = col2im_cython(dx_cols, x.shape[0], x.shape[1], x.shape[2], x.shape[3],
123 |                      filter_height, filter_width, pad, stride)
124 | 
125 |   return dx, dw, db
126 | 
127 | 
128 | conv_forward_fast = conv_forward_strides
129 | conv_backward_fast = conv_backward_strides
130 | 
131 | 
132 | def max_pool_forward_fast(x, pool_param):
133 |   """
134 |   A fast implementation of the forward pass for a max pooling layer.
135 | 
136 |   This chooses between the reshape method and the im2col method. If the pooling
137 |   regions are square and tile the input image, then we can use the reshape
138 |   method which is very fast. Otherwise we fall back on the im2col method, which
139 |   is not much faster than the naive method.
140 |   """
141 |   N, C, H, W = x.shape
142 |   pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
143 |   stride = pool_param['stride']
144 | 
145 |   same_size = pool_height == pool_width == stride
146 |   tiles = H % pool_height == 0 and W % pool_width == 0
147 |   if same_size and tiles:
148 |     out, reshape_cache = max_pool_forward_reshape(x, pool_param)
149 |     cache = ('reshape', reshape_cache)
150 |   else:
151 |     out, im2col_cache = max_pool_forward_im2col(x, pool_param)
152 |     cache = ('im2col', im2col_cache)
153 |   return out, cache
154 | 
155 | 
156 | def max_pool_backward_fast(dout, cache):
157 |   """
158 |   A fast implementation of the backward pass for a max pooling layer.
159 | 
160 |   This switches between the reshape method an the im2col method depending on
161 |   which method was used to generate the cache.
162 |   """
163 |   method, real_cache = cache
164 |   if method == 'reshape':
165 |     return max_pool_backward_reshape(dout, real_cache)
166 |   elif method == 'im2col':
167 |     return max_pool_backward_im2col(dout, real_cache)
168 |   else:
169 |     raise ValueError('Unrecognized method "%s"' % method)
170 | 
171 | 
172 | def max_pool_forward_reshape(x, pool_param):
173 |   """
174 |   A fast implementation of the forward pass for the max pooling layer that uses
175 |   some clever reshaping.
176 | 
177 |   This can only be used for square pooling regions that tile the input.
178 |   """
179 |   N, C, H, W = x.shape
180 |   pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
181 |   stride = pool_param['stride']
182 |   assert pool_height == pool_width == stride, 'Invalid pool params'
183 |   assert H % pool_height == 0
184 |   assert W % pool_height == 0
185 |   x_reshaped = x.reshape(N, C, H / pool_height, pool_height,
186 |                          W / pool_width, pool_width)
187 |   out = x_reshaped.max(axis=3).max(axis=4)
188 | 
189 |   cache = (x, x_reshaped, out)
190 |   return out, cache
191 | 
192 | 
193 | def max_pool_backward_reshape(dout, cache):
194 |   """
195 |   A fast implementation of the backward pass for the max pooling layer that
196 |   uses some clever broadcasting and reshaping.
197 | 
198 |   This can only be used if the forward pass was computed using
199 |   max_pool_forward_reshape.
200 | 
201 |   NOTE: If there are multiple argmaxes, this method will assign gradient to
202 |   ALL argmax elements of the input rather than picking one. In this case the
203 |   gradient will actually be incorrect. However this is unlikely to occur in
204 |   practice, so it shouldn't matter much. One possible solution is to split the
205 |   upstream gradient equally among all argmax elements; this should result in a
206 |   valid subgradient. You can make this happen by uncommenting the line below;
207 |   however this results in a significant performance penalty (about 40% slower)
208 |   and is unlikely to matter in practice so we don't do it.
209 |   """
210 |   x, x_reshaped, out = cache
211 | 
212 |   dx_reshaped = np.zeros_like(x_reshaped)
213 |   out_newaxis = out[:, :, :, np.newaxis, :, np.newaxis]
214 |   mask = (x_reshaped == out_newaxis)
215 |   dout_newaxis = dout[:, :, :, np.newaxis, :, np.newaxis]
216 |   dout_broadcast, _ = np.broadcast_arrays(dout_newaxis, dx_reshaped)
217 |   dx_reshaped[mask] = dout_broadcast[mask]
218 |   dx_reshaped /= np.sum(mask, axis=(3, 5), keepdims=True)
219 |   dx = dx_reshaped.reshape(x.shape)
220 | 
221 |   return dx
222 | 
223 | 
224 | def max_pool_forward_im2col(x, pool_param):
225 |   """
226 |   An implementation of the forward pass for max pooling based on im2col.
227 | 
228 |   This isn't much faster than the naive version, so it should be avoided if
229 |   possible.
230 |   """
231 |   N, C, H, W = x.shape
232 |   pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
233 |   stride = pool_param['stride']
234 | 
235 |   assert (H - pool_height) % stride == 0, 'Invalid height'
236 |   assert (W - pool_width) % stride == 0, 'Invalid width'
237 | 
238 |   out_height = (H - pool_height) / stride + 1
239 |   out_width = (W - pool_width) / stride + 1
240 | 
241 |   x_split = x.reshape(N * C, 1, H, W)
242 |   x_cols = im2col(x_split, pool_height, pool_width, padding=0, stride=stride)
243 |   x_cols_argmax = np.argmax(x_cols, axis=0)
244 |   x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])]
245 |   out = x_cols_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1)
246 | 
247 |   cache = (x, x_cols, x_cols_argmax, pool_param)
248 |   return out, cache
249 | 
250 | 
251 | def max_pool_backward_im2col(dout, cache):
252 |   """
253 |   An implementation of the backward pass for max pooling based on im2col.
254 | 
255 |   This isn't much faster than the naive version, so it should be avoided if
256 |   possible.
257 |   """
258 |   x, x_cols, x_cols_argmax, pool_param = cache
259 |   N, C, H, W = x.shape
260 |   pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
261 |   stride = pool_param['stride']
262 | 
263 |   dout_reshaped = dout.transpose(2, 3, 0, 1).flatten()
264 |   dx_cols = np.zeros_like(x_cols)
265 |   dx_cols[x_cols_argmax, np.arange(dx_cols.shape[1])] = dout_reshaped
266 |   dx = col2im_indices(dx_cols, (N * C, 1, H, W), pool_height, pool_width,
267 |               padding=0, stride=stride)
268 |   dx = dx.reshape(x.shape)
269 | 
270 |   return dx
271 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/gradient_check.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from random import randrange
  3 | 
  4 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
  5 |   """ 
  6 |   a naive implementation of numerical gradient of f at x 
  7 |   - f should be a function that takes a single argument
  8 |   - x is the point (numpy array) to evaluate the gradient at
  9 |   """ 
 10 | 
 11 |   fx = f(x) # evaluate function value at original point
 12 |   grad = np.zeros_like(x)
 13 |   # iterate over all indexes in x
 14 |   it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 15 |   while not it.finished:
 16 | 
 17 |     # evaluate function at x+h
 18 |     ix = it.multi_index
 19 |     oldval = x[ix]
 20 |     x[ix] = oldval + h # increment by h
 21 |     fxph = f(x) # evalute f(x + h)
 22 |     x[ix] = oldval - h
 23 |     fxmh = f(x) # evaluate f(x - h)
 24 |     x[ix] = oldval # restore
 25 | 
 26 |     # compute the partial derivative with centered formula
 27 |     grad[ix] = (fxph - fxmh) / (2 * h) # the slope
 28 |     if verbose:
 29 |       print ix, grad[ix]
 30 |     it.iternext() # step to next dimension
 31 | 
 32 |   return grad
 33 | 
 34 | 
 35 | def eval_numerical_gradient_array(f, x, df, h=1e-5):
 36 |   """
 37 |   Evaluate a numeric gradient for a function that accepts a numpy
 38 |   array and returns a numpy array.
 39 |   """
 40 |   grad = np.zeros_like(x)
 41 |   it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
 42 |   while not it.finished:
 43 |     ix = it.multi_index
 44 |     
 45 |     oldval = x[ix]
 46 |     x[ix] = oldval + h
 47 |     pos = f(x).copy()
 48 |     x[ix] = oldval - h
 49 |     neg = f(x).copy()
 50 |     x[ix] = oldval
 51 |     
 52 |     grad[ix] = np.sum((pos - neg) * df) / (2 * h)
 53 |     it.iternext()
 54 |   return grad
 55 | 
 56 | 
 57 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
 58 |   """
 59 |   Compute numeric gradients for a function that operates on input
 60 |   and output blobs.
 61 |   
 62 |   We assume that f accepts several input blobs as arguments, followed by a blob
 63 |   into which outputs will be written. For example, f might be called like this:
 64 | 
 65 |   f(x, w, out)
 66 |   
 67 |   where x and w are input Blobs, and the result of f will be written to out.
 68 | 
 69 |   Inputs: 
 70 |   - f: function
 71 |   - inputs: tuple of input blobs
 72 |   - output: output blob
 73 |   - h: step size
 74 |   """
 75 |   numeric_diffs = []
 76 |   for input_blob in inputs:
 77 |     diff = np.zeros_like(input_blob.diffs)
 78 |     it = np.nditer(input_blob.vals, flags=['multi_index'],
 79 |                    op_flags=['readwrite'])
 80 |     while not it.finished:
 81 |       idx = it.multi_index
 82 |       orig = input_blob.vals[idx]
 83 | 
 84 |       input_blob.vals[idx] = orig + h
 85 |       f(*(inputs + (output,)))
 86 |       pos = np.copy(output.vals)
 87 |       input_blob.vals[idx] = orig - h
 88 |       f(*(inputs + (output,)))
 89 |       neg = np.copy(output.vals)
 90 |       input_blob.vals[idx] = orig
 91 |       
 92 |       diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)
 93 | 
 94 |       it.iternext()
 95 |     numeric_diffs.append(diff)
 96 |   return numeric_diffs
 97 | 
 98 | 
 99 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
100 |   return eval_numerical_gradient_blobs(lambda *args: net.forward(),
101 |               inputs, output, h=h)
102 | 
103 | 
104 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
105 |   """
106 |   sample a few random elements and only return numerical
107 |   in this dimensions.
108 |   """
109 | 
110 |   for i in xrange(num_checks):
111 |     ix = tuple([randrange(m) for m in x.shape])
112 | 
113 |     oldval = x[ix]
114 |     x[ix] = oldval + h # increment by h
115 |     fxph = f(x) # evaluate f(x + h)
116 |     x[ix] = oldval - h # increment by h
117 |     fxmh = f(x) # evaluate f(x - h)
118 |     x[ix] = oldval # reset
119 | 
120 |     grad_numerical = (fxph - fxmh) / (2 * h)
121 |     grad_analytic = analytic_grad[ix]
122 |     rel_error = abs(grad_numerical - grad_analytic) / (abs(grad_numerical) + abs(grad_analytic))
123 |     print 'numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error)
124 | 
125 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/im2col.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1):
 5 |   # First figure out what the size of the output should be
 6 |   N, C, H, W = x_shape
 7 |   assert (H + 2 * padding - field_height) % stride == 0
 8 |   assert (W + 2 * padding - field_height) % stride == 0
 9 |   out_height = (H + 2 * padding - field_height) / stride + 1
10 |   out_width = (W + 2 * padding - field_width) / stride + 1
11 | 
12 |   i0 = np.repeat(np.arange(field_height), field_width)
13 |   i0 = np.tile(i0, C)
14 |   i1 = stride * np.repeat(np.arange(out_height), out_width)
15 |   j0 = np.tile(np.arange(field_width), field_height * C)
16 |   j1 = stride * np.tile(np.arange(out_width), out_height)
17 |   i = i0.reshape(-1, 1) + i1.reshape(1, -1)
18 |   j = j0.reshape(-1, 1) + j1.reshape(1, -1)
19 | 
20 |   k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1)
21 | 
22 |   return (k, i, j)
23 | 
24 | 
25 | def im2col_indices(x, field_height, field_width, padding=1, stride=1):
26 |   """ An implementation of im2col based on some fancy indexing """
27 |   # Zero-pad the input
28 |   p = padding
29 |   x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
30 | 
31 |   k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding,
32 |                                stride)
33 | 
34 |   cols = x_padded[:, k, i, j]
35 |   C = x.shape[1]
36 |   cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1)
37 |   return cols
38 | 
39 | 
40 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1,
41 |                    stride=1):
42 |   """ An implementation of col2im based on fancy indexing and np.add.at """
43 |   N, C, H, W = x_shape
44 |   H_padded, W_padded = H + 2 * padding, W + 2 * padding
45 |   x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
46 |   k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding,
47 |                                stride)
48 |   cols_reshaped = cols.reshape(C * field_height * field_width, -1, N)
49 |   cols_reshaped = cols_reshaped.transpose(2, 0, 1)
50 |   np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped)
51 |   if padding == 0:
52 |     return x_padded
53 |   return x_padded[:, :, padding:-padding, padding:-padding]
54 | 
55 | pass
56 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/im2col_cython.pyx:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | cimport numpy as np
  3 | cimport cython
  4 | 
  5 | # DTYPE = np.float64
  6 | # ctypedef np.float64_t DTYPE_t
  7 | 
  8 | ctypedef fused DTYPE_t:
  9 |     np.float32_t
 10 |     np.float64_t
 11 | 
 12 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height,
 13 |                   int field_width, int padding, int stride):
 14 |     cdef int N = x.shape[0]
 15 |     cdef int C = x.shape[1]
 16 |     cdef int H = x.shape[2]
 17 |     cdef int W = x.shape[3]
 18 |     
 19 |     cdef int HH = (H + 2 * padding - field_height) / stride + 1
 20 |     cdef int WW = (W + 2 * padding - field_width) / stride + 1
 21 | 
 22 |     cdef int p = padding
 23 |     cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x,
 24 |             ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
 25 | 
 26 |     cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros(
 27 |             (C * field_height * field_width, N * HH * WW),
 28 |             dtype=x.dtype)
 29 | 
 30 |     # Moving the inner loop to a C function with no bounds checking works, but does
 31 |     # not seem to help performance in any measurable way.
 32 | 
 33 |     im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW,
 34 |                         field_height, field_width, padding, stride)
 35 |     return cols
 36 | 
 37 | 
 38 | @cython.boundscheck(False)
 39 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols,
 40 |                              np.ndarray[DTYPE_t, ndim=4] x_padded,
 41 |                              int N, int C, int H, int W, int HH, int WW,
 42 |                              int field_height, int field_width, int padding, int stride) except? -1:
 43 |     cdef int c, ii, jj, row, yy, xx, i, col
 44 | 
 45 |     for c in range(C):
 46 |         for yy in range(HH):
 47 |             for xx in range(WW):
 48 |                 for ii in range(field_height):
 49 |                     for jj in range(field_width):
 50 |                         row = c * field_width * field_height + ii * field_height + jj
 51 |                         for i in range(N):
 52 |                             col = yy * WW * N + xx * N + i
 53 |                             cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj]
 54 | 
 55 | 
 56 | 
 57 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W,
 58 |                   int field_height, int field_width, int padding, int stride):
 59 |     cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype)
 60 |     cdef int HH = (H + 2 * padding - field_height) / stride + 1
 61 |     cdef int WW = (W + 2 * padding - field_width) / stride + 1
 62 |     cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding),
 63 |                                         dtype=cols.dtype)
 64 | 
 65 |     # Moving the inner loop to a C-function with no bounds checking improves
 66 |     # performance quite a bit for col2im.
 67 |     col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 
 68 |                         field_height, field_width, padding, stride)
 69 |     if padding > 0:
 70 |         return x_padded[:, :, padding:-padding, padding:-padding]
 71 |     return x_padded
 72 | 
 73 | 
 74 | @cython.boundscheck(False)
 75 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols,
 76 |                              np.ndarray[DTYPE_t, ndim=4] x_padded,
 77 |                              int N, int C, int H, int W, int HH, int WW,
 78 |                              int field_height, int field_width, int padding, int stride) except? -1:
 79 |     cdef int c, ii, jj, row, yy, xx, i, col
 80 | 
 81 |     for c in range(C):
 82 |         for ii in range(field_height):
 83 |             for jj in range(field_width):
 84 |                 row = c * field_width * field_height + ii * field_height + jj
 85 |                 for yy in range(HH):
 86 |                     for xx in range(WW):
 87 |                         for i in range(N):
 88 |                             col = yy * WW * N + xx * N + i
 89 |                             x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col]
 90 | 
 91 | 
 92 | @cython.boundscheck(False)
 93 | @cython.wraparound(False)
 94 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols,
 95 |                             np.ndarray[DTYPE_t, ndim=4] x_padded,
 96 |                             int N, int C, int H, int W, int HH, int WW,
 97 |                             int out_h, int out_w, int pad, int stride):
 98 | 
 99 |     cdef int c, hh, ww, n, h, w
100 |     for n in range(N):
101 |         for c in range(C):
102 |             for hh in range(HH):
103 |                 for ww in range(WW):
104 |                     for h in range(out_h):
105 |                         for w in range(out_w):
106 |                             x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w]
107 |     
108 | 
109 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W,
110 |         int HH, int WW, int pad, int stride):
111 |     cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype)
112 |     cdef int out_h = (H + 2 * pad - HH) / stride + 1
113 |     cdef int out_w = (W + 2 * pad - WW) / stride + 1
114 |     cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad),
115 |                                                   dtype=cols.dtype)
116 | 
117 |     col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride)
118 | 
119 |     if pad > 0:
120 |         return x_padded[:, :, pad:-pad, pad:-pad]
121 |     return x_padded 
122 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/image_utils.py:
--------------------------------------------------------------------------------
 1 | import urllib2, os, tempfile
 2 | 
 3 | import numpy as np
 4 | from scipy.misc import imread
 5 | 
 6 | from cs231n.fast_layers import conv_forward_fast
 7 | 
 8 | 
 9 | """
10 | Utility functions used for viewing and processing images.
11 | """
12 | 
13 | 
14 | def blur_image(X):
15 |   """
16 |   A very gentle image blurring operation, to be used as a regularizer for image
17 |   generation.
18 |   
19 |   Inputs:
20 |   - X: Image data of shape (N, 3, H, W)
21 |   
22 |   Returns:
23 |   - X_blur: Blurred version of X, of shape (N, 3, H, W)
24 |   """
25 |   w_blur = np.zeros((3, 3, 3, 3))
26 |   b_blur = np.zeros(3)
27 |   blur_param = {'stride': 1, 'pad': 1}
28 |   for i in xrange(3):
29 |     w_blur[i, i] = np.asarray([[1, 2, 1], [2, 188, 2], [1, 2, 1]], dtype=np.float32)
30 |   w_blur /= 200.0
31 |   return conv_forward_fast(X, w_blur, b_blur, blur_param)[0]
32 | 
33 | 
34 | def preprocess_image(img, mean_img, mean='image'):
35 |   """
36 |   Convert to float, transepose, and subtract mean pixel
37 |   
38 |   Input:
39 |   - img: (H, W, 3)
40 |   
41 |   Returns:
42 |   - (1, 3, H, 3)
43 |   """
44 |   if mean == 'image':
45 |     mean = mean_img
46 |   elif mean == 'pixel':
47 |     mean = mean_img.mean(axis=(1, 2), keepdims=True)
48 |   elif mean == 'none':
49 |     mean = 0
50 |   else:
51 |     raise ValueError('mean must be image or pixel or none')
52 |   return img.astype(np.float32).transpose(2, 0, 1)[None] - mean
53 | 
54 | 
55 | def deprocess_image(img, mean_img, mean='image', renorm=False):
56 |   """
57 |   Add mean pixel, transpose, and convert to uint8
58 |   
59 |   Input:
60 |   - (1, 3, H, W) or (3, H, W)
61 |   
62 |   Returns:
63 |   - (H, W, 3)
64 |   """
65 |   if mean == 'image':
66 |     mean = mean_img
67 |   elif mean == 'pixel':
68 |     mean = mean_img.mean(axis=(1, 2), keepdims=True)
69 |   elif mean == 'none':
70 |     mean = 0
71 |   else:
72 |     raise ValueError('mean must be image or pixel or none')
73 |   if img.ndim == 3:
74 |     img = img[None]
75 |   img = (img + mean)[0].transpose(1, 2, 0)
76 |   if renorm:
77 |     low, high = img.min(), img.max()
78 |     img = 255.0 * (img - low) / (high - low)
79 |   return img.astype(np.uint8)
80 | 
81 | 
82 | def image_from_url(url):
83 |   """
84 |   Read an image from a URL. Returns a numpy array with the pixel data.
85 |   We write the image to a temporary file then read it back. Kinda gross.
86 |   """
87 |   try:
88 |     f = urllib2.urlopen(url)
89 |     _, fname = tempfile.mkstemp()
90 |     with open(fname, 'wb') as ff:
91 |       ff.write(f.read())
92 |     img = imread(fname)
93 |     os.remove(fname)
94 |     return img
95 |   except urllib2.URLError as e:
96 |     print 'URL Error: ', e.reason, url
97 |   except urllib2.HTTPError as e:
98 |     print 'HTTP Error: ', e.code, url
99 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/layer_utils.py:
--------------------------------------------------------------------------------
  1 | from cs231n.layers import *
  2 | from cs231n.fast_layers import *
  3 | 
  4 | 
  5 | def affine_relu_forward(x, w, b):
  6 |   """
  7 |   Convenience layer that perorms an affine transform followed by a ReLU
  8 | 
  9 |   Inputs:
 10 |   - x: Input to the affine layer
 11 |   - w, b: Weights for the affine layer
 12 | 
 13 |   Returns a tuple of:
 14 |   - out: Output from the ReLU
 15 |   - cache: Object to give to the backward pass
 16 |   """
 17 |   a, fc_cache = affine_forward(x, w, b)
 18 |   out, relu_cache = relu_forward(a)
 19 |   cache = (fc_cache, relu_cache)
 20 |   return out, cache
 21 | 
 22 | 
 23 | def affine_relu_backward(dout, cache):
 24 |   """
 25 |   Backward pass for the affine-relu convenience layer
 26 |   """
 27 |   fc_cache, relu_cache = cache
 28 |   da = relu_backward(dout, relu_cache)
 29 |   dx, dw, db = affine_backward(da, fc_cache)
 30 |   return dx, dw, db
 31 | 
 32 | 
 33 | def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param):
 34 |   """
 35 |   Convenience layer that performs an affine transform, batch normalization,
 36 |   and ReLU.
 37 | 
 38 |   Inputs:
 39 |   - x: Array of shape (N, D1); input to the affine layer
 40 |   - w, b: Arrays of shape (D2, D2) and (D2,) giving the weight and bias for
 41 |     the affine transform.
 42 |   - gamma, beta: Arrays of shape (D2,) and (D2,) giving scale and shift
 43 |     parameters for batch normalization.
 44 |   - bn_param: Dictionary of parameters for batch normalization.
 45 | 
 46 |   Returns:
 47 |   - out: Output from ReLU, of shape (N, D2)
 48 |   - cache: Object to give to the backward pass.
 49 |   """
 50 |   a, fc_cache = affine_forward(x, w, b)
 51 |   a_bn, bn_cache = batchnorm_forward(a, gamma, beta, bn_param)
 52 |   out, relu_cache = relu_forward(a_bn)
 53 |   cache = (fc_cache, bn_cache, relu_cache)
 54 |   return out, cache
 55 | 
 56 | 
 57 | def affine_bn_relu_backward(dout, cache):
 58 |   """
 59 |   Backward pass for the affine-batchnorm-relu convenience layer.
 60 |   """
 61 |   fc_cache, bn_cache, relu_cache = cache
 62 |   da_bn = relu_backward(dout, relu_cache)
 63 |   da, dgamma, dbeta = batchnorm_backward(da_bn, bn_cache)
 64 |   dx, dw, db = affine_backward(da, fc_cache)
 65 |   return dx, dw, db, dgamma, dbeta  
 66 | 
 67 | 
 68 | def conv_relu_forward(x, w, b, conv_param):
 69 |   """
 70 |   A convenience layer that performs a convolution followed by a ReLU.
 71 | 
 72 |   Inputs:
 73 |   - x: Input to the convolutional layer
 74 |   - w, b, conv_param: Weights and parameters for the convolutional layer
 75 |   
 76 |   Returns a tuple of:
 77 |   - out: Output from the ReLU
 78 |   - cache: Object to give to the backward pass
 79 |   """
 80 |   a, conv_cache = conv_forward_fast(x, w, b, conv_param)
 81 |   out, relu_cache = relu_forward(a)
 82 |   cache = (conv_cache, relu_cache)
 83 |   return out, cache
 84 | 
 85 | 
 86 | def conv_relu_backward(dout, cache):
 87 |   """
 88 |   Backward pass for the conv-relu convenience layer.
 89 |   """
 90 |   conv_cache, relu_cache = cache
 91 |   da = relu_backward(dout, relu_cache)
 92 |   dx, dw, db = conv_backward_fast(da, conv_cache)
 93 |   return dx, dw, db
 94 | 
 95 | 
 96 | def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param):
 97 |   a, conv_cache = conv_forward_fast(x, w, b, conv_param)
 98 |   an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param)
 99 |   out, relu_cache = relu_forward(an)
100 |   cache = (conv_cache, bn_cache, relu_cache)
101 |   return out, cache
102 | 
103 | 
104 | def conv_bn_relu_backward(dout, cache):
105 |   conv_cache, bn_cache, relu_cache = cache
106 |   dan = relu_backward(dout, relu_cache)
107 |   da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache)
108 |   dx, dw, db = conv_backward_fast(da, conv_cache)
109 |   return dx, dw, db, dgamma, dbeta
110 | 
111 | 
112 | def conv_relu_pool_forward(x, w, b, conv_param, pool_param):
113 |   """
114 |   Convenience layer that performs a convolution, a ReLU, and a pool.
115 | 
116 |   Inputs:
117 |   - x: Input to the convolutional layer
118 |   - w, b, conv_param: Weights and parameters for the convolutional layer
119 |   - pool_param: Parameters for the pooling layer
120 | 
121 |   Returns a tuple of:
122 |   - out: Output from the pooling layer
123 |   - cache: Object to give to the backward pass
124 |   """
125 |   a, conv_cache = conv_forward_fast(x, w, b, conv_param)
126 |   s, relu_cache = relu_forward(a)
127 |   out, pool_cache = max_pool_forward_fast(s, pool_param)
128 |   cache = (conv_cache, relu_cache, pool_cache)
129 |   return out, cache
130 | 
131 | 
132 | def conv_relu_pool_backward(dout, cache):
133 |   """
134 |   Backward pass for the conv-relu-pool convenience layer
135 |   """
136 |   conv_cache, relu_cache, pool_cache = cache
137 |   ds = max_pool_backward_fast(dout, pool_cache)
138 |   da = relu_backward(ds, relu_cache)
139 |   dx, dw, db = conv_backward_fast(da, conv_cache)
140 |   return dx, dw, db
141 | 
142 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/layers.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def affine_forward(x, w, b):
  5 |   """
  6 |   Computes the forward pass for an affine (fully-connected) layer.
  7 | 
  8 |   The input x has shape (N, d_1, ..., d_k) where x[i] is the ith input.
  9 |   We multiply this against a weight matrix of shape (D, M) where
 10 |   D = \prod_i d_i
 11 | 
 12 |   Inputs:
 13 |   x - Input data, of shape (N, d_1, ..., d_k)
 14 |   w - Weights, of shape (D, M)
 15 |   b - Biases, of shape (M,)
 16 |   
 17 |   Returns a tuple of:
 18 |   - out: output, of shape (N, M)
 19 |   - cache: (x, w, b)
 20 |   """
 21 |   out = x.reshape(x.shape[0], -1).dot(w) + b
 22 |   cache = (x, w, b)
 23 |   return out, cache
 24 | 
 25 | 
 26 | def affine_backward(dout, cache):
 27 |   """
 28 |   Computes the backward pass for an affine layer.
 29 | 
 30 |   Inputs:
 31 |   - dout: Upstream derivative, of shape (N, M)
 32 |   - cache: Tuple of:
 33 |     - x: Input data, of shape (N, d_1, ... d_k)
 34 |     - w: Weights, of shape (D, M)
 35 | 
 36 |   Returns a tuple of:
 37 |   - dx: Gradient with respect to x, of shape (N, d1, ..., d_k)
 38 |   - dw: Gradient with respect to w, of shape (D, M)
 39 |   - db: Gradient with respect to b, of shape (M,)
 40 |   """
 41 |   x, w, b = cache
 42 |   dx = dout.dot(w.T).reshape(x.shape)
 43 |   dw = x.reshape(x.shape[0], -1).T.dot(dout)
 44 |   db = np.sum(dout, axis=0)
 45 |   return dx, dw, db
 46 | 
 47 | 
 48 | def relu_forward(x):
 49 |   """
 50 |   Computes the forward pass for a layer of rectified linear units (ReLUs).
 51 | 
 52 |   Input:
 53 |   - x: Inputs, of any shape
 54 | 
 55 |   Returns a tuple of:
 56 |   - out: Output, of the same shape as x
 57 |   - cache: x
 58 |   """
 59 |   out = np.maximum(0, x)
 60 |   cache = x
 61 |   return out, cache
 62 | 
 63 | 
 64 | def relu_backward(dout, cache):
 65 |   """
 66 |   Computes the backward pass for a layer of rectified linear units (ReLUs).
 67 | 
 68 |   Input:
 69 |   - dout: Upstream derivatives, of any shape
 70 |   - cache: Input x, of same shape as dout
 71 | 
 72 |   Returns:
 73 |   - dx: Gradient with respect to x
 74 |   """
 75 |   x = cache
 76 |   dx = np.where(x > 0, dout, 0)
 77 |   return dx
 78 | 
 79 | 
 80 | def batchnorm_forward(x, gamma, beta, bn_param):
 81 |   """
 82 |   Forward pass for batch normalization.
 83 |   
 84 |   During training the sample mean and (uncorrected) sample variance are
 85 |   computed from minibatch statistics and used to normalize the incoming data.
 86 |   During training we also keep an exponentially decaying running mean of the mean
 87 |   and variance of each feature, and these averages are used to normalize data
 88 |   at test-time.
 89 | 
 90 |   At each timestep we update the running averages for mean and variance using
 91 |   an exponential decay based on the momentum parameter:
 92 | 
 93 |   running_mean = momentum * running_mean + (1 - momentum) * sample_mean
 94 |   running_var = momentum * running_var + (1 - momentum) * sample_var
 95 | 
 96 |   Note that the batch normalization paper suggests a different test-time
 97 |   behavior: they compute sample mean and variance for each feature using a
 98 |   large number of training images rather than using a running average. For
 99 |   this implementation we have chosen to use running averages instead since
100 |   they do not require an additional estimation step; the torch7 implementation
101 |   of batch normalization also uses running averages.
102 | 
103 |   Input:
104 |   - x: Data of shape (N, D)
105 |   - gamma: Scale parameter of shape (D,)
106 |   - beta: Shift paremeter of shape (D,)
107 |   - bn_param: Dictionary with the following keys:
108 |     - mode: 'train' or 'test'; required
109 |     - eps: Constant for numeric stability
110 |     - momentum: Constant for running mean / variance.
111 |     - running_mean: Array of shape (D,) giving running mean of features
112 |     - running_var Array of shape (D,) giving running variance of features
113 | 
114 |   Returns a tuple of:
115 |   - out: of shape (N, D)
116 |   - cache: A tuple of values needed in the backward pass
117 |   """
118 |   mode = bn_param['mode']
119 |   eps = bn_param.get('eps', 1e-5)
120 |   momentum = bn_param.get('momentum', 0.9)
121 | 
122 |   N, D = x.shape
123 |   running_mean = bn_param.get('running_mean', np.zeros(D, dtype=x.dtype))
124 |   running_var = bn_param.get('running_var', np.zeros(D, dtype=x.dtype))
125 | 
126 |   out, cache = None, None
127 |   if mode == 'train':
128 |     # Compute output
129 |     mu = x.mean(axis=0)
130 |     xc = x - mu
131 |     var = np.mean(xc ** 2, axis=0)
132 |     std = np.sqrt(var + eps)
133 |     xn = xc / std
134 |     out = gamma * xn + beta
135 | 
136 |     cache = (mode, x, gamma, xc, std, xn, out)
137 | 
138 |     # Update running average of mean
139 |     running_mean *= momentum
140 |     running_mean += (1 - momentum) * mu
141 | 
142 |     # Update running average of variance
143 |     running_var *= momentum
144 |     running_var += (1 - momentum) * var
145 |   elif mode == 'test':
146 |     # Using running mean and variance to normalize
147 |     std = np.sqrt(running_var + eps)
148 |     xn = (x - running_mean) / std
149 |     out = gamma * xn + beta
150 |     cache = (mode, x, xn, gamma, beta, std)
151 |   else:
152 |     raise ValueError('Invalid forward batchnorm mode "%s"' % mode)
153 | 
154 |   # Store the updated running means back into bn_param
155 |   bn_param['running_mean'] = running_mean
156 |   bn_param['running_var'] = running_var
157 | 
158 |   return out, cache
159 | 
160 | 
161 | def batchnorm_backward(dout, cache):
162 |   """
163 |   Backward pass for batch normalization.
164 |   
165 |   For this implementation, you should write out a computation graph for
166 |   batch normalization on paper and propagate gradients backward through
167 |   intermediate nodes.
168 |   
169 |   Inputs:
170 |   - dout: Upstream derivatives, of shape (N, D)
171 |   - cache: Variable of intermediates from batchnorm_forward.
172 |   
173 |   Returns a tuple of:
174 |   - dx: Gradient with respect to inputs x, of shape (N, D)
175 |   - dgamma: Gradient with respect to scale parameter gamma, of shape (D,)
176 |   - dbeta: Gradient with respect to shift parameter beta, of shape (D,)
177 |   """
178 |   mode = cache[0]
179 |   if mode == 'train':
180 |     mode, x, gamma, xc, std, xn, out = cache
181 | 
182 |     N = x.shape[0]
183 |     dbeta = dout.sum(axis=0)
184 |     dgamma = np.sum(xn * dout, axis=0)
185 |     dxn = gamma * dout
186 |     dxc = dxn / std
187 |     dstd = -np.sum((dxn * xc) / (std * std), axis=0)
188 |     dvar = 0.5 * dstd / std
189 |     dxc += (2.0 / N) * xc * dvar
190 |     dmu = np.sum(dxc, axis=0)
191 |     dx = dxc - dmu / N
192 |   elif mode == 'test':
193 |     mode, x, xn, gamma, beta, std = cache
194 |     dbeta = dout.sum(axis=0)
195 |     dgamma = np.sum(xn * dout, axis=0)
196 |     dxn = gamma * dout
197 |     dx = dxn / std
198 |   else:
199 |     raise ValueError(mode)
200 | 
201 |   return dx, dgamma, dbeta
202 | 
203 | 
204 | def spatial_batchnorm_forward(x, gamma, beta, bn_param):
205 |   """
206 |   Computes the forward pass for spatial batch normalization.
207 |   
208 |   Inputs:
209 |   - x: Input data of shape (N, C, H, W)
210 |   - gamma: Scale parameter, of shape (C,)
211 |   - beta: Shift parameter, of shape (C,)
212 |   - bn_param: Dictionary with the following keys:
213 |     - mode: 'train' or 'test'; required
214 |     - eps: Constant for numeric stability
215 |     - momentum: Constant for running mean / variance. momentum=0 means that
216 |       old information is discarded completely at every time step, while
217 |       momentum=1 means that new information is never incorporated. The
218 |       default of momentum=0.9 should work well in most situations.
219 |     - running_mean: Array of shape (D,) giving running mean of features
220 |     - running_var Array of shape (D,) giving running variance of features
221 |     
222 |   Returns a tuple of:
223 |   - out: Output data, of shape (N, C, H, W)
224 |   - cache: Values needed for the backward pass
225 |   """
226 |   N, C, H, W = x.shape
227 |   x_flat = x.transpose(0, 2, 3, 1).reshape(-1, C)
228 |   out_flat, cache = batchnorm_forward(x_flat, gamma, beta, bn_param)
229 |   out = out_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2)
230 |   return out, cache
231 | 
232 | 
233 | def spatial_batchnorm_backward(dout, cache):
234 |   """
235 |   Computes the backward pass for spatial batch normalization.
236 |   
237 |   Inputs:
238 |   - dout: Upstream derivatives, of shape (N, C, H, W)
239 |   - cache: Values from the forward pass
240 |   
241 |   Returns a tuple of:
242 |   - dx: Gradient with respect to inputs, of shape (N, C, H, W)
243 |   - dgamma: Gradient with respect to scale parameter, of shape (C,)
244 |   - dbeta: Gradient with respect to shift parameter, of shape (C,)
245 |   """
246 |   N, C, H, W = dout.shape
247 |   dout_flat = dout.transpose(0, 2, 3, 1).reshape(-1, C)
248 |   dx_flat, dgamma, dbeta = batchnorm_backward(dout_flat, cache)
249 |   dx = dx_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2)
250 |   return dx, dgamma, dbeta
251 | 
252 | 
253 | def svm_loss(x, y):
254 |   """
255 |   Computes the loss and gradient using for multiclass SVM classification.
256 | 
257 |   Inputs:
258 |   - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
259 |     for the ith input.
260 |   - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and
261 |     0 <= y[i] < C
262 | 
263 |   Returns a tuple of:
264 |   - loss: Scalar giving the loss
265 |   - dx: Gradient of the loss with respect to x
266 |   """
267 |   N = x.shape[0]
268 |   correct_class_scores = x[np.arange(N), y]
269 |   margins = np.maximum(0, x - correct_class_scores[:, np.newaxis] + 1.0)
270 |   margins[np.arange(N), y] = 0
271 |   loss = np.sum(margins) / N
272 |   num_pos = np.sum(margins > 0, axis=1)
273 |   dx = np.zeros_like(x)
274 |   dx[margins > 0] = 1
275 |   dx[np.arange(N), y] -= num_pos
276 |   dx /= N
277 |   return loss, dx
278 | 
279 | 
280 | def softmax_loss(x, y):
281 |   """
282 |   Computes the loss and gradient for softmax classification.
283 | 
284 |   Inputs:
285 |   - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
286 |     for the ith input.
287 |   - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and
288 |     0 <= y[i] < C
289 | 
290 |   Returns a tuple of:
291 |   - loss: Scalar giving the loss
292 |   - dx: Gradient of the loss with respect to x
293 |   """
294 |   probs = np.exp(x - np.max(x, axis=1, keepdims=True))
295 |   probs /= np.sum(probs, axis=1, keepdims=True)
296 |   N = x.shape[0]
297 |   loss = -np.sum(np.log(probs[np.arange(N), y])) / N
298 |   dx = probs.copy()
299 |   dx[np.arange(N), y] -= 1
300 |   dx /= N
301 |   return loss, dx
302 | 
303 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/optim.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | """
 4 | This file implements various first-order update rules that are commonly used for
 5 | training neural networks. Each update rule accepts current weights and the
 6 | gradient of the loss with respect to those weights and produces the next set of
 7 | weights. Each update rule has the same interface:
 8 | 
 9 | def update(w, dw, config=None):
10 | 
11 | Inputs:
12 |   - w: A numpy array giving the current weights.
13 |   - dw: A numpy array of the same shape as w giving the gradient of the
14 |     loss with respect to w.
15 |   - config: A dictionary containing hyperparameter values such as learning rate,
16 |     momentum, etc. If the update rule requires caching values over many
17 |     iterations, then config will also hold these cached values.
18 | 
19 | Returns:
20 |   - next_w: The next point after the update.
21 |   - config: The config dictionary to be passed to the next iteration of the
22 |     update rule.
23 | 
24 | NOTE: For most update rules, the default learning rate will probably not perform
25 | well; however the default values of the other hyperparameters should work well
26 | for a variety of different problems.
27 | 
28 | For efficiency, update rules may perform in-place updates, mutating w and
29 | setting next_w equal to w.
30 | """
31 | 
32 | 
33 | def sgd(w, dw, config=None):
34 |   """
35 |   Performs vanilla stochastic gradient descent.
36 | 
37 |   config format:
38 |   - learning_rate: Scalar learning rate.
39 |   """
40 |   if config is None: config = {}
41 |   config.setdefault('learning_rate', 1e-2)
42 | 
43 |   w -= config['learning_rate'] * dw
44 |   return w, config
45 | 
46 | 
47 | def adam(x, dx, config=None):
48 |   """
49 |   Uses the Adam update rule, which incorporates moving averages of both the
50 |   gradient and its square and a bias correction term.
51 | 
52 |   config format:
53 |   - learning_rate: Scalar learning rate.
54 |   - beta1: Decay rate for moving average of first moment of gradient.
55 |   - beta2: Decay rate for moving average of second moment of gradient.
56 |   - epsilon: Small scalar used for smoothing to avoid dividing by zero.
57 |   - m: Moving average of gradient.
58 |   - v: Moving average of squared gradient.
59 |   - t: Iteration number.
60 |   """
61 |   if config is None: config = {}
62 |   config.setdefault('learning_rate', 1e-3)
63 |   config.setdefault('beta1', 0.9)
64 |   config.setdefault('beta2', 0.999)
65 |   config.setdefault('epsilon', 1e-8)
66 |   config.setdefault('m', np.zeros_like(x))
67 |   config.setdefault('v', np.zeros_like(x))
68 |   config.setdefault('t', 0)
69 |   
70 |   next_x = None
71 |   beta1, beta2, eps = config['beta1'], config['beta2'], config['epsilon']
72 |   t, m, v = config['t'], config['m'], config['v']
73 |   m = beta1 * m + (1 - beta1) * dx
74 |   v = beta2 * v + (1 - beta2) * (dx * dx)
75 |   t += 1
76 |   alpha = config['learning_rate'] * np.sqrt(1 - beta2 ** t) / (1 - beta1 ** t)
77 |   x -= alpha * (m / (np.sqrt(v) + eps))
78 |   config['t'] = t
79 |   config['m'] = m
80 |   config['v'] = v
81 |   next_x = x
82 |   
83 |   return next_x, config
84 | 
85 |   
86 | 


--------------------------------------------------------------------------------
/assignment3/cs231n/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from distutils.extension import Extension
 3 | from Cython.Build import cythonize
 4 | import numpy
 5 | 
 6 | extensions = [
 7 |   Extension('im2col_cython', ['im2col_cython.pyx'],
 8 |             include_dirs = [numpy.get_include()]
 9 |   ),
10 | ]
11 | 
12 | setup(
13 |     ext_modules = cythonize(extensions),
14 | )
15 | 


--------------------------------------------------------------------------------
/assignment3/frameworkpython:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # what real Python executable to use
 4 | PYVER=2.7
 5 | PATHTOPYTHON=/usr/local/bin/
 6 | PYTHON=${PATHTOPYTHON}python${PYVER}
 7 | 
 8 | # find the root of the virtualenv, it should be the parent of the dir this script is in
 9 | ENV=`$PYTHON -c "import os; print os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..'))"`
10 | 
11 | # now run Python with the virtualenv set as Python's HOME
12 | export PYTHONHOME=$ENV
13 | exec $PYTHON "$@"
14 | 


--------------------------------------------------------------------------------
/assignment3/kitten.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment3/kitten.jpg


--------------------------------------------------------------------------------
/assignment3/requirements.txt:
--------------------------------------------------------------------------------
 1 | Cython==0.23.4
 2 | Jinja2==2.8
 3 | MarkupSafe==0.23
 4 | Pillow==3.0.0
 5 | Pygments==2.0.2
 6 | appnope==0.1.0
 7 | argparse==1.2.1
 8 | backports-abc==0.4
 9 | backports.ssl-match-hostname==3.5.0.1
10 | certifi==2015.11.20.1
11 | cycler==0.9.0
12 | decorator==4.0.6
13 | functools32==3.2.3-2
14 | gnureadline==6.3.3
15 | ipykernel==4.2.2
16 | ipython==4.0.1
17 | ipython-genutils==0.1.0
18 | ipywidgets==4.1.1
19 | jsonschema==2.5.1
20 | jupyter==1.0.0
21 | jupyter-client==4.1.1
22 | jupyter-console==4.0.3
23 | jupyter-core==4.0.6
24 | matplotlib==1.5.0
25 | mistune==0.7.1
26 | nbconvert==4.1.0
27 | nbformat==4.0.1
28 | notebook==4.0.6
29 | numpy==1.10.4
30 | path.py==8.1.2
31 | pexpect==4.0.1
32 | pickleshare==0.5
33 | ptyprocess==0.5
34 | pyparsing==2.0.7
35 | python-dateutil==2.4.2
36 | pytz==2015.7
37 | pyzmq==15.1.0
38 | qtconsole==4.1.1
39 | scipy==0.16.1
40 | simplegeneric==0.8.1
41 | singledispatch==3.4.0.3
42 | six==1.10.0
43 | terminado==0.5
44 | tornado==4.3
45 | traitlets==4.0.0
46 | wsgiref==0.1.2
47 | 


--------------------------------------------------------------------------------
/assignment3/sky.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Halfish/cs231n/d355c0c61296d80c7709907271c548d2994e9990/assignment3/sky.jpg


--------------------------------------------------------------------------------
/assignment3/start_ipython_osx.sh:
--------------------------------------------------------------------------------
1 | # Assume the virtualenv is called .env
2 | 
3 | cp frameworkpython .env/bin
4 | .env/bin/frameworkpython -m IPython notebook
5 | 


--------------------------------------------------------------------------------