├── .gitattributes ├── .gitignore ├── README.md ├── assignment1 ├── README.md ├── collectSubmission.sh ├── cs231n │ ├── __init__.py │ ├── classifiers │ │ ├── __init__.py │ │ ├── k_nearest_neighbor.py │ │ ├── linear_classifier.py │ │ ├── linear_svm.py │ │ ├── neural_net.py │ │ └── softmax.py │ ├── data_utils.py │ ├── datasets │ │ └── get_datasets.sh │ ├── features.py │ ├── gradient_check.py │ └── vis_utils.py ├── features.ipynb ├── frameworkpython ├── knn.ipynb ├── makepdf.py ├── requirements.txt ├── softmax.ipynb ├── svm.ipynb └── two_layer_net.ipynb ├── assignment2 ├── BatchNormalization.ipynb ├── ConvolutionalNetworks.ipynb ├── Dropout.ipynb ├── FullyConnectedNets.ipynb ├── PyTorch.ipynb ├── TensorFlow.ipynb ├── collectSubmission.sh ├── cs231n │ ├── __init__.py │ ├── classifiers │ │ ├── __init__.py │ │ ├── cnn.py │ │ └── fc_net.py │ ├── data_utils.py │ ├── datasets │ │ └── get_datasets.sh │ ├── fast_layers.py │ ├── gradient_check.py │ ├── im2col.py │ ├── im2col_cython.pyx │ ├── layer_utils.py │ ├── layers.py │ ├── notebook_images │ │ ├── batchnorm_graph.png │ │ ├── kitten.jpg │ │ ├── normalization.png │ │ └── puppy.jpg │ ├── optim.py │ ├── setup.py │ ├── solver.py │ └── vis_utils.py ├── frameworkpython ├── makepdf.py ├── requirements.txt └── start_ipython_osx.sh └── assignment3 ├── Generative_Adversarial_Networks_PyTorch.ipynb ├── Generative_Adversarial_Networks_TF.ipynb ├── LSTM_Captioning.ipynb ├── NetworkVisualization-PyTorch.ipynb ├── NetworkVisualization-TensorFlow.ipynb ├── RNN_Captioning.ipynb ├── StyleTransfer-PyTorch.ipynb ├── StyleTransfer-TensorFlow.ipynb ├── collectSubmission.sh ├── cs231n ├── __init__.py ├── captioning_solver.py ├── classifiers │ ├── __init__.py │ ├── rnn.py │ └── squeezenet.py ├── coco_utils.py ├── data_utils.py ├── fast_layers.py ├── gan_pytorch.py ├── gan_tf.py ├── gradient_check.py ├── im2col.py ├── im2col_cython.pyx ├── image_utils.py ├── layer_utils.py ├── layers.py ├── net_visualization_pytorch.py ├── net_visualization_tensorflow.py ├── optim.py ├── rnn_layers.py ├── setup.py ├── style_transfer_pytorch.py └── style_transfer_tensorflow.py ├── example_styletransfer.png ├── frameworkpython ├── gan-checks-tf.npz ├── gan_outputs_pytorch.png ├── gan_outputs_tf.png ├── kitten.jpg ├── makepdf.py ├── requirements.txt ├── sky.jpg ├── start_ipython_osx.sh ├── style-transfer-checks-tf.npz ├── style-transfer-checks.npz ├── style_stransfer.gif ├── style_stransfer2.gif └── styles ├── composition_vii.jpg ├── muse.jpg ├── starry_night.jpg ├── the_scream.jpg └── tubingen.jpg /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # celery beat schedule file 95 | celerybeat-schedule 96 | 97 | # SageMath parsed files 98 | *.sage.py 99 | 100 | # Environments 101 | .env 102 | .venv 103 | env/ 104 | venv/ 105 | ENV/ 106 | env.bak/ 107 | venv.bak/ 108 | 109 | # Spyder project settings 110 | .spyderproject 111 | .spyproject 112 | 113 | # Rope project settings 114 | .ropeproject 115 | 116 | # mkdocs documentation 117 | /site 118 | 119 | # mypy 120 | .mypy_cache/ 121 | .dmypy.json 122 | dmypy.json 123 | 124 | # Pyre type checker 125 | .pyre/ 126 | Slides/ 127 | Slides2020/ 128 | cs231n.github.io/ 129 | CS231n-all-in-one.pdf 130 | datasets/ 131 | .virtual_documents/ 132 | assignment2/cs231n/im2col_cython.c 133 | assignment3/StyleTransfer-PyTorch-Copy1.ipynb 134 | assignment3/powers.gif 135 | assignment3/未命名.ipynb 136 | assignment1/two_layer_net-Copy1.ipynb 137 | CS231n-2017-Summary/ 138 | assignment1/cs231n/classifiers/neural_net_b.py 139 | HyperQuest/pickle/ 140 | HyperQuest/old/ 141 | HyperQuest/制作指南.md 142 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | bingcheng.openmc.cn 2 | en.sjtu.edu.cn/en.sjtu.edu.cn/ 3 | www.ji.sjtu.edu.cn/ 4 | 5 | # CS231n-2020-spring-assignment-solution 6 | 7 | ~~TODO:~~ All Finished 8 | 9 | - [x] Assignment [#1](https://cs231n.github.io/assignments2020/assignment1/) (Finished 2020/9/12) 10 | - [x] Assignment [#2](https://cs231n.github.io/assignments2020/assignment2/) (Finished 2020/9/27) 11 | - [x] Assignment [#3](https://cs231n.github.io/assignments2020/assignment3/) (Finished 2020/10/8) 12 | - [x] Notes 扫描全能王[链接](https://www.camscanner.com/share/show?encrypt_id=MHgzZGQ1NzU2NA%3D%3D&sid=689CA9CN&pid=dsa&style=1&share_link_style=2) (Chinese Only) 13 | - [x] HyperQuest (try it [HERE](https://bingcheng.openmc.cn/HyperQuest/)) 14 | 15 | --- 16 | 17 | ## HyperQuest 18 | 19 | **HyperQuest** mimics the hyper parameter tuning app from Stanford University, CS231n. **HyperQuest** is a web-app designed for beginners in Machine Learning to easily get a proper intuition for choosing the right hyper parameters. This is initially an extremely daunting task because not having proper hyper parameters leads to the models breaking down. 20 | 21 | Try HyperQuest [HERE](https://bingcheng.openmc.cn/HyperQuest/)! 22 | 23 | ![](https://img.vim-cn.com/58/16771e2f97c0468052b4120ca2c68062b42b74.png) 24 | 25 | --- 26 | 27 | ## Interesting Examples 28 | 29 | ### KNN 30 | 31 | > Visualize the distance matrix: each row is a single test example and its distances to training examples: 32 | > 33 | > ![](https://tva2.sinaimg.cn/large/007S8ZIlgy1gjs5v76fxtj30gm02j3yj.jpg) 34 | > 35 | > Explain: 36 | > 37 | > ![](https://tva1.sinaimg.cn/large/007S8ZIlgy1gjs5rwc1u8j30fw01pq2u.jpg) 38 | > 39 | > ![](https://tva1.sinaimg.cn/large/007S8ZIlgy1gjs5sbgp2cj30fw01p745.jpg) 40 | > 41 | > As you can see above, the background of those images which is similar to many other images is black. Because there are many other images that have a black color on its margin, while the white part of those images is rarely seen in other images, which will cause a large difference, so will generate the white bar. 42 | > 43 | > Find best `k` for kNN: 44 | > 45 | > 46 | 47 | 48 | 49 | ### SVM v.s. Softmax 50 | 51 | > **SVM** 52 | > 53 | > 54 | 55 | > **Softmax** 56 | > 57 | > 58 | 59 | It can be found that softmax is much smoother than SVM, which means it’s more generalized. 60 | 61 | ## 2-layer net with different dtype 62 | 63 | 64 | 65 | > With dtype `np.single`, visualize the weights of the first layer: 66 | > 67 | > ![](https://tva1.sinaimg.cn/large/007S8ZIlgy1gjt9y8lqtdj30ch0ch759.jpg) 68 | 69 | > With dtype `np.float64`, visualize the weights of the first layer: 70 | > 71 | > ![](https://tva1.sinaimg.cn/large/007S8ZIlgy1gjt9zuf3d7j30ch0chab1.jpg) 72 | 73 | As you can see, there is no difference. 74 | 75 | 76 | 77 | ### Style Transfer GIFs 78 | 79 |
80 | 81 |
82 | 83 | By watching the first iteration we can find that there is no difference between starting with a random image or starting with the original image. 84 | -------------------------------------------------------------------------------- /assignment1/README.md: -------------------------------------------------------------------------------- 1 | Details about this assignment can be found [on the course webpage](http://cs231n.github.io/), under Assignment #1 of Spring 2020`. 2 | 3 | -------------------------------------------------------------------------------- /assignment1/collectSubmission.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #NOTE: DO NOT EDIT THIS FILE-- MAY RESULT IN INCOMPLETE SUBMISSIONS 3 | set -euo pipefail 4 | 5 | CODE=( 6 | "cs231n/classifiers/k_nearest_neighbor.py" 7 | "cs231n/classifiers/linear_classifier.py" 8 | "cs231n/classifiers/linear_svm.py" 9 | "cs231n/classifiers/softmax.py" 10 | "cs231n/classifiers/neural_net.py" 11 | ) 12 | 13 | # these notebooks should ideally 14 | # be in order of questions so 15 | # that the generated pdf is 16 | # in order of questions 17 | NOTEBOOKS=( 18 | "knn.ipynb" 19 | "svm.ipynb" 20 | "softmax.ipynb" 21 | "two_layer_net.ipynb" 22 | "features.ipynb" 23 | ) 24 | 25 | FILES=( "${CODE[@]}" "${NOTEBOOKS[@]}" ) 26 | 27 | LOCAL_DIR=`pwd` 28 | ASSIGNMENT_NO=1 29 | ZIP_FILENAME="a1.zip" 30 | 31 | C_R="\e[31m" 32 | C_G="\e[32m" 33 | C_BLD="\e[1m" 34 | C_E="\e[0m" 35 | 36 | for FILE in "${FILES[@]}" 37 | do 38 | if [ ! -f ${FILE} ]; then 39 | echo -e "${C_R}Required file ${FILE} not found, Exiting.${C_E}" 40 | exit 0 41 | fi 42 | done 43 | 44 | echo -e "### Zipping file ###" 45 | rm -f ${ZIP_FILENAME} 46 | zip -q "${ZIP_FILENAME}" -r ${NOTEBOOKS[@]} $(find . -name "*.py") -x "makepdf.py" 47 | 48 | echo -e "### Creating PDFs ###" 49 | python makepdf.py --notebooks "${NOTEBOOKS[@]}" 50 | 51 | echo -e "### Done! Please submit a1.zip and the pdfs to Gradescope. ###" 52 | -------------------------------------------------------------------------------- /assignment1/cs231n/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment1/cs231n/__init__.py -------------------------------------------------------------------------------- /assignment1/cs231n/classifiers/__init__.py: -------------------------------------------------------------------------------- 1 | from cs231n.classifiers.k_nearest_neighbor import * 2 | from cs231n.classifiers.linear_classifier import * 3 | -------------------------------------------------------------------------------- /assignment1/cs231n/classifiers/k_nearest_neighbor.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | from builtins import object 3 | import numpy as np 4 | from past.builtins import xrange 5 | 6 | 7 | class KNearestNeighbor(object): 8 | """ a kNN classifier with L2 distance """ 9 | 10 | def __init__(self): 11 | pass 12 | 13 | def train(self, X, y): 14 | """ 15 | Train the classifier. For k-nearest neighbors this is just 16 | memorizing the training data. 17 | 18 | Inputs: 19 | - X: A numpy array of shape (num_train, D) containing the training data 20 | consisting of num_train samples each of dimension D. 21 | - y: A numpy array of shape (N,) containing the training labels, where 22 | y[i] is the label for X[i]. 23 | """ 24 | self.X_train = X 25 | self.y_train = y 26 | 27 | def predict(self, X, k=1, num_loops=0): 28 | """ 29 | Predict labels for test data using this classifier. 30 | 31 | Inputs: 32 | - X: A numpy array of shape (num_test, D) containing test data consisting 33 | of num_test samples each of dimension D. 34 | - k: The number of nearest neighbors that vote for the predicted labels. 35 | - num_loops: Determines which implementation to use to compute distances 36 | between training points and testing points. 37 | 38 | Returns: 39 | - y: A numpy array of shape (num_test,) containing predicted labels for the 40 | test data, where y[i] is the predicted label for the test point X[i]. 41 | """ 42 | if num_loops == 0: 43 | dists = self.compute_distances_no_loops(X) 44 | elif num_loops == 1: 45 | dists = self.compute_distances_one_loop(X) 46 | elif num_loops == 2: 47 | dists = self.compute_distances_two_loops(X) 48 | else: 49 | raise ValueError('Invalid value %d for num_loops' % num_loops) 50 | 51 | return self.predict_labels(dists, k=k) 52 | 53 | 54 | 55 | 56 | def compute_distances_two_loops(self, X): 57 | """ 58 | Compute the distance between each test point in X and each training point 59 | in self.X_train using a nested loop over both the training data and the 60 | test data. 61 | 62 | Inputs: 63 | - X: A numpy array of shape (num_test, D) containing test data. 64 | 65 | Returns: 66 | - dists: A numpy array of shape (num_test, num_train) where dists[i, j] 67 | is the Euclidean distance between the ith test point and the jth training 68 | point. 69 | """ 70 | num_test = X.shape[0] 71 | num_train = self.X_train.shape[0] 72 | dists = np.zeros((num_test, num_train)) 73 | for i in range(num_test): 74 | for j in range(num_train): 75 | ##################################################################### 76 | # TODO: # 77 | # Compute the l2 distance between the ith test point and the jth # 78 | # training point, and store the result in dists[i, j]. You should # 79 | # not use a loop over dimension, nor use np.linalg.norm(). # 80 | ##################################################################### 81 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 82 | dists[i,j] = np.sqrt(np.sum(np.square(self.X_train[j] - X[i]))) 83 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 84 | return dists 85 | 86 | def compute_distances_one_loop(self, X): 87 | """ 88 | Compute the distance between each test point in X and each training point 89 | in self.X_train using a single loop over the test data. 90 | 91 | Input / Output: Same as compute_distances_two_loops 92 | """ 93 | num_test = X.shape[0] 94 | num_train = self.X_train.shape[0] 95 | dists = np.zeros((num_test, num_train)) 96 | # num_test = 5 97 | for i in range(num_test): 98 | ####################################################################### 99 | # TODO: # 100 | # Compute the l2 distance between the ith test point and all training # 101 | # points, and store the result in dists[i, :]. # 102 | # Do not use np.linalg.norm(). # 103 | ####################################################################### 104 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 105 | dists[i, :] = np.sqrt(np.sum(np.square(self.X_train - X[i]), axis = 1)) 106 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 107 | return dists 108 | 109 | def compute_distances_no_loops(self, X): 110 | """ 111 | Compute the distance between each test point in X and each training point 112 | in self.X_train using no explicit loops. 113 | 114 | Input / Output: Same as compute_distances_two_loops 115 | """ 116 | num_test = X.shape[0] 117 | num_train = self.X_train.shape[0] 118 | dists = np.zeros((num_test, num_train)) 119 | ######################################################################### 120 | # TODO: # 121 | # Compute the l2 distance between all test points and all training # 122 | # points without using any explicit loops, and store the result in # 123 | # dists. # 124 | # # 125 | # You should implement this function using only basic array operations; # 126 | # in particular you should not use functions from scipy, # 127 | # nor use np.linalg.norm(). # 128 | # # 129 | # HINT: Try to formulate the l2 distance using matrix multiplication # 130 | # and two broadcast sums. # 131 | ######################################################################### 132 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 133 | # def D3Broadcast(a, b): 134 | # # a is Axn, b is Bxn 135 | # # Return BxA 136 | # assert b.shape[1] == a.shape[1] 137 | # b_new = np.reshape(b,(b.shape[0],1,b.shape[1])) 138 | # a_new = np.tile(a,(b.shape[0],1)) 139 | # a_new = np.reshape(a_new,(b.shape[0],a.shape[0],b.shape[1])) 140 | # f = a_new-b_new 141 | # return np.sum(np.abs(f),axis = 2) 142 | 143 | # dists = D3Broadcast(X, self.X_train) 144 | # dists = D3Broadcast(self.X_train, X) 145 | # a = X 146 | # b = self.X_train 147 | # dists = np.sum(np.abs(a.reshape(a.shape[0],1,a.shape[1])-b),axis = 2) 148 | dists = np.sqrt(-2*np.dot(X, self.X_train.T) + np.sum(np.square(self.X_train), axis = 1) + np.transpose([np.sum(np.square(X), axis = 1)])) 149 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 150 | return dists 151 | 152 | def predict_labels(self, dists, k=1): 153 | """ 154 | Given a matrix of distances between test points and training points, 155 | predict a label for each test point. 156 | 157 | Inputs: 158 | - dists: A numpy array of shape (num_test, num_train) where dists[i, j] 159 | gives the distance betwen the ith test point and the jth training point. 160 | 161 | Returns: 162 | - y: A numpy array of shape (num_test,) containing predicted labels for the 163 | test data, where y[i] is the predicted label for the test point X[i]. 164 | """ 165 | num_test = dists.shape[0] 166 | y_pred = np.zeros(num_test) 167 | # num_test = 4 168 | for i in range(num_test): 169 | # A list of length k storing the labels of the k nearest neighbors to 170 | # the ith test point. 171 | closest_y = [] 172 | ######################################################################### 173 | # TODO: # 174 | # Use the distance matrix to find the k nearest neighbors of the ith # 175 | # testing point, and use self.y_train to find the labels of these # 176 | # neighbors. Store these labels in closest_y. # 177 | # Hint: Look up the function numpy.argsort. # 178 | ######################################################################### 179 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 180 | row_i = dists[i,:].reshape(1,-1)[0] 181 | sort_index = np.argsort(row_i) # Here no need to use negative, because 182 | # samaller is better 183 | kth_near = sort_index[:k] 184 | closest_y = [self.y_train[i] for i in kth_near] 185 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 186 | ######################################################################### 187 | # TODO: # 188 | # Now that you have found the labels of the k nearest neighbors, you # 189 | # need to find the most common label in the list closest_y of labels. # 190 | # Store this label in y_pred[i]. Break ties by choosing the smaller # 191 | # label. # 192 | ######################################################################### 193 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 194 | # if k == 1: 195 | # y_pred[i] = self.y_train[sort_index[0]] 196 | # continue 197 | maxlabel = max(closest_y, key=closest_y.count) 198 | # print(maxlabel) 199 | y_pred[i] = maxlabel 200 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 201 | 202 | return y_pred 203 | -------------------------------------------------------------------------------- /assignment1/cs231n/classifiers/linear_classifier.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from builtins import range 4 | from builtins import object 5 | import numpy as np 6 | from cs231n.classifiers.linear_svm import * 7 | from cs231n.classifiers.softmax import * 8 | from past.builtins import xrange 9 | 10 | 11 | class LinearClassifier(object): 12 | 13 | def __init__(self): 14 | self.W = None 15 | 16 | def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100, 17 | batch_size=200, verbose=False): 18 | """ 19 | Train this linear classifier using stochastic gradient descent. 20 | 21 | Inputs: 22 | - X: A numpy array of shape (N, D) containing training data; there are N 23 | training samples each of dimension D. 24 | - y: A numpy array of shape (N,) containing training labels; y[i] = c 25 | means that X[i] has label 0 <= c < C for C classes. 26 | - learning_rate: (float) learning rate for optimization. 27 | - reg: (float) regularization strength. 28 | - num_iters: (integer) number of steps to take when optimizing 29 | - batch_size: (integer) number of training examples to use at each step. 30 | - verbose: (boolean) If true, print progress during optimization. 31 | 32 | Outputs: 33 | A list containing the value of the loss function at each training iteration. 34 | """ 35 | num_train, dim = X.shape 36 | num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes 37 | if self.W is None: 38 | # lazily initialize W 39 | self.W = 0.001 * np.random.randn(dim, num_classes) 40 | 41 | # Run stochastic gradient descent to optimize W 42 | loss_history = [] 43 | for it in range(num_iters): 44 | X_batch = None 45 | y_batch = None 46 | 47 | ######################################################################### 48 | # TODO: # 49 | # Sample batch_size elements from the training data and their # 50 | # corresponding labels to use in this round of gradient descent. # 51 | # Store the data in X_batch and their corresponding labels in # 52 | # y_batch; after sampling X_batch should have shape (batch_size, dim) # 53 | # and y_batch should have shape (batch_size,) # 54 | # # 55 | # Hint: Use np.random.choice to generate indices. Sampling with # 56 | # replacement is faster than sampling without replacement. # 57 | ######################################################################### 58 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 59 | mask = np.random.choice(num_train, batch_size, replace=True) 60 | X_batch = X[mask, :] 61 | y_batch = y[mask] 62 | # assert (batch_size, dim == X_batch.shape), X_batch.shape 63 | # assert (batch_size == y_batch.shape[0]), y_batch.shape 64 | 65 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 66 | 67 | # evaluate loss and gradient 68 | loss, grad = self.loss(X_batch, y_batch, reg) 69 | loss_history.append(loss) 70 | 71 | # perform parameter update 72 | ######################################################################### 73 | # TODO: # 74 | # Update the weights using the gradient and the learning rate. # 75 | ######################################################################### 76 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 77 | 78 | self.W -= learning_rate * grad 79 | 80 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 81 | 82 | if verbose and it % 100 == 0: 83 | print('iteration %d / %d: loss %f' % (it, num_iters, loss)) 84 | 85 | return loss_history 86 | 87 | def predict(self, X): 88 | """ 89 | Use the trained weights of this linear classifier to predict labels for 90 | data points. 91 | 92 | Inputs: 93 | - X: A numpy array of shape (N, D) containing training data; there are N 94 | training samples each of dimension D. 95 | 96 | Returns: 97 | - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional 98 | array of length N, and each element is an integer giving the predicted 99 | class. 100 | """ 101 | y_pred = np.zeros(X.shape[0]) 102 | ########################################################################### 103 | # TODO: # 104 | # Implement this method. Store the predicted labels in y_pred. # 105 | ########################################################################### 106 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 107 | 108 | scores = X@self.W 109 | y_pred = np.argmax(scores, axis=1) 110 | 111 | 112 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 113 | return y_pred 114 | 115 | def loss(self, X_batch, y_batch, reg): 116 | """ 117 | Compute the loss function and its derivative. 118 | Subclasses will override this. 119 | 120 | Inputs: 121 | - X_batch: A numpy array of shape (N, D) containing a minibatch of N 122 | data points; each point has dimension D. 123 | - y_batch: A numpy array of shape (N,) containing labels for the minibatch. 124 | - reg: (float) regularization strength. 125 | 126 | Returns: A tuple containing: 127 | - loss as a single float 128 | - gradient with respect to self.W; an array of the same shape as W 129 | """ 130 | pass 131 | 132 | 133 | class LinearSVM(LinearClassifier): 134 | """ A subclass that uses the Multiclass SVM loss function """ 135 | 136 | def loss(self, X_batch, y_batch, reg): 137 | return svm_loss_vectorized(self.W, X_batch, y_batch, reg) 138 | 139 | 140 | class Softmax(LinearClassifier): 141 | """ A subclass that uses the Softmax + Cross-entropy loss function """ 142 | 143 | def loss(self, X_batch, y_batch, reg): 144 | return softmax_loss_vectorized(self.W, X_batch, y_batch, reg) 145 | -------------------------------------------------------------------------------- /assignment1/cs231n/classifiers/linear_svm.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | import numpy as np 3 | from random import shuffle 4 | from past.builtins import xrange 5 | 6 | def svm_loss_naive(W, X, y, reg): 7 | """ 8 | Structured SVM loss function, naive implementation (with loops). 9 | 10 | Inputs have dimension D, there are C classes, and we operate on minibatches 11 | of N examples. 12 | 13 | Inputs: 14 | - W: A numpy array of shape (D, C) containing weights. 15 | - X: A numpy array of shape (N, D) containing a minibatch of data. 16 | - y: A numpy array of shape (N,) containing training labels; y[i] = c means 17 | that X[i] has label c, where 0 <= c < C. 18 | - reg: (float) regularization strength 19 | 20 | Returns a tuple of: 21 | - loss as single float 22 | - gradient with respect to weights W; an array of same shape as W 23 | """ 24 | dW = np.zeros(W.shape) # initialize the gradient as zero 25 | 26 | # compute the loss and the gradient 27 | num_classes = W.shape[1] 28 | num_train = X.shape[0] 29 | loss = 0.0 30 | for i in range(num_train): 31 | scores = X[i].dot(W) 32 | correct_class_score = scores[y[i]] 33 | for j in range(num_classes): 34 | if j == y[i]: 35 | continue # loss += 0 36 | 37 | margin = scores[j] - correct_class_score + 1 # note delta = 1 38 | if margin > 0: 39 | loss += margin 40 | dW[:,j] += X[i].T 41 | dW[:,y[i]] -= X[i].T 42 | 43 | 44 | 45 | # Right now the loss is a sum over all training examples, but we want it 46 | # to be an average instead so we divide by num_train. 47 | loss /= num_train 48 | dW /= num_train 49 | 50 | # Add regularization to the loss. 51 | loss += reg * np.sum(W * W) 52 | dW += 2* reg*W 53 | ############################################################################# 54 | # TODO: # 55 | # Compute the gradient of the loss function and store it dW. # 56 | # Rather than first computing the loss and then computing the derivative, # 57 | # it may be simpler to compute the derivative at the same time that the # 58 | # loss is being computed. As a result you may need to modify some of the # 59 | # code above to compute the gradient. # 60 | ############################################################################# 61 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 62 | 63 | "Please check the code above" 64 | 65 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 66 | 67 | return loss, dW 68 | 69 | 70 | 71 | def svm_loss_vectorized(W, X, y, reg): 72 | """ 73 | Structured SVM loss function, vectorized implementation. 74 | 75 | Inputs and outputs are the same as svm_loss_naive. 76 | """ 77 | loss = 0.0 78 | dW = np.zeros(W.shape) # initialize the gradient as zero 79 | 80 | ############################################################################# 81 | # TODO: # 82 | # Implement a vectorized version of the structured SVM loss, storing the # 83 | # result in loss. # 84 | ############################################################################# 85 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 86 | 87 | scores = X.dot(W) 88 | correct_class_scores = scores[np.arange(len(y)), y] # select the correct items 89 | margins = np.maximum(0, (scores.T - correct_class_scores).T + 1) # note delta = 1 90 | margins[np.arange(X.shape[0]), y] = 0 # set correct items to 0 91 | loss = np.sum(margins)/X.shape[0] + reg*np.sum(W*W) 92 | 93 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 94 | 95 | ############################################################################# 96 | # TODO: # 97 | # Implement a vectorized version of the gradient for the structured SVM # 98 | # loss, storing the result in dW. # 99 | # # 100 | # Hint: Instead of computing the gradient from scratch, it may be easier # 101 | # to reuse some of the intermediate values that you used to compute the # 102 | # loss. # 103 | ############################################################################# 104 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 105 | 106 | # d_score = np.int64((scores.T - correct_class_scores).T + 1 >0) 107 | # # print('shape d_score', d_score.shape) 108 | # d_score[np.arange(scores.shape[0]), y] = 0 109 | # sum_by_row = np.sum(d_score, axis=1) 110 | # d_score[np.arange(scores.shape[0]), y] = -sum_by_row 111 | # dW = X.T@d_score 112 | # # print("dW.shape1", dW.shape) 113 | # dW = dW/X.shape[0] + 2*reg*W 114 | # # print("dW.shape2", dW.shape) 115 | d_score = (scores.T - correct_class_scores).T + 1 116 | d_score[d_score<0]=0 117 | d_score[d_score>0]=1 118 | d_score[np.arange(scores.shape[0]), y] = 0 119 | d_score[np.arange(scores.shape[0]), y] = -np.sum(d_score, axis=1) 120 | dW = X.T@d_score 121 | dW = dW/X.shape[0] + 2*reg*W 122 | 123 | 124 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 125 | 126 | return loss, dW 127 | -------------------------------------------------------------------------------- /assignment1/cs231n/classifiers/softmax.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | import numpy as np 3 | from random import shuffle 4 | from past.builtins import xrange 5 | 6 | def softmax_loss_naive(W, X, y, reg): 7 | """ 8 | Softmax loss function, naive implementation (with loops) 9 | 10 | Inputs have dimension D, there are C classes, and we operate on minibatches 11 | of N examples. 12 | 13 | Inputs: 14 | - W: A numpy array of shape (D, C) containing weights. 15 | - X: A numpy array of shape (N, D) containing a minibatch of data. 16 | - y: A numpy array of shape (N,) containing training labels; y[i] = c means 17 | that X[i] has label c, where 0 <= c < C. 18 | - reg: (float) regularization strength 19 | 20 | Returns a tuple of: 21 | - loss as single float 22 | - gradient with respect to weights W; an array of same shape as W 23 | """ 24 | # Initialize the loss and gradient to zero. 25 | loss = 0.0 26 | dW = np.zeros_like(W) 27 | 28 | ############################################################################# 29 | # TODO: Compute the softmax loss and its gradient using explicit loops. # 30 | # Store the loss in loss and the gradient in dW. If you are not careful # 31 | # here, it is easy to run into numeric instability. Don't forget the # 32 | # regularization! # 33 | ############################################################################# 34 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 35 | 36 | num_classes = W.shape[1] 37 | num_train = X.shape[0] 38 | loss = 0.0 39 | for i in range(num_train): 40 | scores = X[i].dot(W) 41 | scores -= np.max(scores) # to avoid numeric instability 42 | # Otherwise: RuntimeWarning: invalid value encountered in true_divide 43 | correct_class_score = scores[y[i]] 44 | # loss += -np.log(np.exp(correct_class_score)/np.sum(np.exp(scores))) 45 | loss += -correct_class_score + np.log(np.sum(np.exp(scores))) 46 | 47 | 48 | for j in range(num_classes): 49 | p = np.exp(scores[j])/np.sum(np.exp(scores)) 50 | if j == y[i]: 51 | dW[:,j] += (p-1)*X[i].T 52 | else: 53 | dW[:,j] += p*X[i].T 54 | 55 | loss /= num_train 56 | dW /= num_train 57 | 58 | loss += reg * np.sum(W * W) 59 | dW += 2* reg*W 60 | 61 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 62 | 63 | return loss, dW 64 | 65 | 66 | def softmax_loss_vectorized(W, X, y, reg): 67 | """ 68 | Softmax loss function, vectorized version. 69 | 70 | Inputs and outputs are the same as softmax_loss_naive. 71 | """ 72 | # Initialize the loss and gradient to zero. 73 | loss = 0.0 74 | dW = np.zeros_like(W) 75 | 76 | ############################################################################# 77 | # TODO: Compute the softmax loss and its gradient using no explicit loops. # 78 | # Store the loss in loss and the gradient in dW. If you are not careful # 79 | # here, it is easy to run into numeric instability. Don't forget the # 80 | # regularization! # 81 | ############################################################################# 82 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 83 | num_train = X.shape[0] 84 | 85 | scores = X@W 86 | scores = (scores.T - np.max(scores, axis = 1)).T # to avoid numeric instability 87 | correct4rows = scores[range(len(y)),y] 88 | loss = np.sum(-correct4rows + np.log(np.sum(np.exp(scores), axis = 1))) 89 | dscore = np.exp(scores)/np.sum(np.exp(scores), axis = 1).reshape(-1,1) 90 | dscore[range(len(y)),y] -= 1 91 | dW = X.T@dscore 92 | 93 | loss /= num_train 94 | dW /= num_train 95 | 96 | loss += reg * np.sum(W * W) 97 | dW += 2* reg*W 98 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 99 | 100 | return loss, dW 101 | -------------------------------------------------------------------------------- /assignment1/cs231n/data_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from builtins import range 4 | from six.moves import cPickle as pickle 5 | import numpy as np 6 | import os 7 | from imageio import imread 8 | import platform 9 | 10 | def load_pickle(f): 11 | version = platform.python_version_tuple() 12 | if version[0] == '2': 13 | return pickle.load(f) 14 | elif version[0] == '3': 15 | return pickle.load(f, encoding='latin1') 16 | raise ValueError("invalid python version: {}".format(version)) 17 | 18 | def load_CIFAR_batch(filename): 19 | """ load single batch of cifar """ 20 | with open(filename, 'rb') as f: 21 | datadict = load_pickle(f) 22 | X = datadict['data'] 23 | Y = datadict['labels'] 24 | X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float") 25 | Y = np.array(Y) 26 | return X, Y 27 | 28 | def load_CIFAR10(ROOT): 29 | """ load all of cifar """ 30 | xs = [] 31 | ys = [] 32 | for b in range(1,6): 33 | f = os.path.join(ROOT, 'data_batch_%d' % (b, )) 34 | X, Y = load_CIFAR_batch(f) 35 | xs.append(X) 36 | ys.append(Y) 37 | Xtr = np.concatenate(xs) 38 | Ytr = np.concatenate(ys) 39 | del X, Y 40 | Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch')) 41 | return Xtr, Ytr, Xte, Yte 42 | 43 | 44 | def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, 45 | subtract_mean=True): 46 | """ 47 | Load the CIFAR-10 dataset from disk and perform preprocessing to prepare 48 | it for classifiers. These are the same steps as we used for the SVM, but 49 | condensed to a single function. 50 | """ 51 | # Load the raw CIFAR-10 data 52 | cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' 53 | X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) 54 | 55 | # Subsample the data 56 | mask = list(range(num_training, num_training + num_validation)) 57 | X_val = X_train[mask] 58 | y_val = y_train[mask] 59 | mask = list(range(num_training)) 60 | X_train = X_train[mask] 61 | y_train = y_train[mask] 62 | mask = list(range(num_test)) 63 | X_test = X_test[mask] 64 | y_test = y_test[mask] 65 | 66 | # Normalize the data: subtract the mean image 67 | if subtract_mean: 68 | mean_image = np.mean(X_train, axis=0) 69 | X_train -= mean_image 70 | X_val -= mean_image 71 | X_test -= mean_image 72 | 73 | # Transpose so that channels come first 74 | X_train = X_train.transpose(0, 3, 1, 2).copy() 75 | X_val = X_val.transpose(0, 3, 1, 2).copy() 76 | X_test = X_test.transpose(0, 3, 1, 2).copy() 77 | 78 | # Package data into a dictionary 79 | return { 80 | 'X_train': X_train, 'y_train': y_train, 81 | 'X_val': X_val, 'y_val': y_val, 82 | 'X_test': X_test, 'y_test': y_test, 83 | } 84 | 85 | 86 | def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True): 87 | """ 88 | Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and 89 | TinyImageNet-200 have the same directory structure, so this can be used 90 | to load any of them. 91 | 92 | Inputs: 93 | - path: String giving path to the directory to load. 94 | - dtype: numpy datatype used to load the data. 95 | - subtract_mean: Whether to subtract the mean training image. 96 | 97 | Returns: A dictionary with the following entries: 98 | - class_names: A list where class_names[i] is a list of strings giving the 99 | WordNet names for class i in the loaded dataset. 100 | - X_train: (N_tr, 3, 64, 64) array of training images 101 | - y_train: (N_tr,) array of training labels 102 | - X_val: (N_val, 3, 64, 64) array of validation images 103 | - y_val: (N_val,) array of validation labels 104 | - X_test: (N_test, 3, 64, 64) array of testing images. 105 | - y_test: (N_test,) array of test labels; if test labels are not available 106 | (such as in student code) then y_test will be None. 107 | - mean_image: (3, 64, 64) array giving mean training image 108 | """ 109 | # First load wnids 110 | with open(os.path.join(path, 'wnids.txt'), 'r') as f: 111 | wnids = [x.strip() for x in f] 112 | 113 | # Map wnids to integer labels 114 | wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)} 115 | 116 | # Use words.txt to get names for each class 117 | with open(os.path.join(path, 'words.txt'), 'r') as f: 118 | wnid_to_words = dict(line.split('\t') for line in f) 119 | for wnid, words in wnid_to_words.items(): 120 | wnid_to_words[wnid] = [w.strip() for w in words.split(',')] 121 | class_names = [wnid_to_words[wnid] for wnid in wnids] 122 | 123 | # Next load training data. 124 | X_train = [] 125 | y_train = [] 126 | for i, wnid in enumerate(wnids): 127 | if (i + 1) % 20 == 0: 128 | print('loading training data for synset %d / %d' 129 | % (i + 1, len(wnids))) 130 | # To figure out the filenames we need to open the boxes file 131 | boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid) 132 | with open(boxes_file, 'r') as f: 133 | filenames = [x.split('\t')[0] for x in f] 134 | num_images = len(filenames) 135 | 136 | X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype) 137 | y_train_block = wnid_to_label[wnid] * \ 138 | np.ones(num_images, dtype=np.int64) 139 | for j, img_file in enumerate(filenames): 140 | img_file = os.path.join(path, 'train', wnid, 'images', img_file) 141 | img = imread(img_file) 142 | if img.ndim == 2: 143 | ## grayscale file 144 | img.shape = (64, 64, 1) 145 | X_train_block[j] = img.transpose(2, 0, 1) 146 | X_train.append(X_train_block) 147 | y_train.append(y_train_block) 148 | 149 | # We need to concatenate all training data 150 | X_train = np.concatenate(X_train, axis=0) 151 | y_train = np.concatenate(y_train, axis=0) 152 | 153 | # Next load validation data 154 | with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f: 155 | img_files = [] 156 | val_wnids = [] 157 | for line in f: 158 | img_file, wnid = line.split('\t')[:2] 159 | img_files.append(img_file) 160 | val_wnids.append(wnid) 161 | num_val = len(img_files) 162 | y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids]) 163 | X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype) 164 | for i, img_file in enumerate(img_files): 165 | img_file = os.path.join(path, 'val', 'images', img_file) 166 | img = imread(img_file) 167 | if img.ndim == 2: 168 | img.shape = (64, 64, 1) 169 | X_val[i] = img.transpose(2, 0, 1) 170 | 171 | # Next load test images 172 | # Students won't have test labels, so we need to iterate over files in the 173 | # images directory. 174 | img_files = os.listdir(os.path.join(path, 'test', 'images')) 175 | X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype) 176 | for i, img_file in enumerate(img_files): 177 | img_file = os.path.join(path, 'test', 'images', img_file) 178 | img = imread(img_file) 179 | if img.ndim == 2: 180 | img.shape = (64, 64, 1) 181 | X_test[i] = img.transpose(2, 0, 1) 182 | 183 | y_test = None 184 | y_test_file = os.path.join(path, 'test', 'test_annotations.txt') 185 | if os.path.isfile(y_test_file): 186 | with open(y_test_file, 'r') as f: 187 | img_file_to_wnid = {} 188 | for line in f: 189 | line = line.split('\t') 190 | img_file_to_wnid[line[0]] = line[1] 191 | y_test = [wnid_to_label[img_file_to_wnid[img_file]] 192 | for img_file in img_files] 193 | y_test = np.array(y_test) 194 | 195 | mean_image = X_train.mean(axis=0) 196 | if subtract_mean: 197 | X_train -= mean_image[None] 198 | X_val -= mean_image[None] 199 | X_test -= mean_image[None] 200 | 201 | return { 202 | 'class_names': class_names, 203 | 'X_train': X_train, 204 | 'y_train': y_train, 205 | 'X_val': X_val, 206 | 'y_val': y_val, 207 | 'X_test': X_test, 208 | 'y_test': y_test, 209 | 'class_names': class_names, 210 | 'mean_image': mean_image, 211 | } 212 | 213 | 214 | def load_models(models_dir): 215 | """ 216 | Load saved models from disk. This will attempt to unpickle all files in a 217 | directory; any files that give errors on unpickling (such as README.txt) 218 | will be skipped. 219 | 220 | Inputs: 221 | - models_dir: String giving the path to a directory containing model files. 222 | Each model file is a pickled dictionary with a 'model' field. 223 | 224 | Returns: 225 | A dictionary mapping model file names to models. 226 | """ 227 | models = {} 228 | for model_file in os.listdir(models_dir): 229 | with open(os.path.join(models_dir, model_file), 'rb') as f: 230 | try: 231 | models[model_file] = load_pickle(f)['model'] 232 | except pickle.UnpicklingError: 233 | continue 234 | return models 235 | 236 | 237 | def load_imagenet_val(num=None): 238 | """Load a handful of validation images from ImageNet. 239 | 240 | Inputs: 241 | - num: Number of images to load (max of 25) 242 | 243 | Returns: 244 | - X: numpy array with shape [num, 224, 224, 3] 245 | - y: numpy array of integer image labels, shape [num] 246 | - class_names: dict mapping integer label to class name 247 | """ 248 | imagenet_fn = 'cs231n/datasets/imagenet_val_25.npz' 249 | if not os.path.isfile(imagenet_fn): 250 | print('file %s not found' % imagenet_fn) 251 | print('Run the following:') 252 | print('cd cs231n/datasets') 253 | print('bash get_imagenet_val.sh') 254 | assert False, 'Need to download imagenet_val_25.npz' 255 | f = np.load(imagenet_fn) 256 | X = f['X'] 257 | y = f['y'] 258 | class_names = f['label_map'].item() 259 | if num is not None: 260 | X = X[:num] 261 | y = y[:num] 262 | return X, y, class_names 263 | -------------------------------------------------------------------------------- /assignment1/cs231n/datasets/get_datasets.sh: -------------------------------------------------------------------------------- 1 | # Get CIFAR10 2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz -O cifar-10-python.tar.gz 3 | tar -xzvf cifar-10-python.tar.gz 4 | rm cifar-10-python.tar.gz 5 | -------------------------------------------------------------------------------- /assignment1/cs231n/features.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from builtins import zip 3 | from builtins import range 4 | from past.builtins import xrange 5 | 6 | import matplotlib 7 | import numpy as np 8 | from scipy.ndimage import uniform_filter 9 | 10 | 11 | def extract_features(imgs, feature_fns, verbose=False): 12 | """ 13 | Given pixel data for images and several feature functions that can operate on 14 | single images, apply all feature functions to all images, concatenating the 15 | feature vectors for each image and storing the features for all images in 16 | a single matrix. 17 | 18 | Inputs: 19 | - imgs: N x H X W X C array of pixel data for N images. 20 | - feature_fns: List of k feature functions. The ith feature function should 21 | take as input an H x W x D array and return a (one-dimensional) array of 22 | length F_i. 23 | - verbose: Boolean; if true, print progress. 24 | 25 | Returns: 26 | An array of shape (N, F_1 + ... + F_k) where each column is the concatenation 27 | of all features for a single image. 28 | """ 29 | num_images = imgs.shape[0] 30 | if num_images == 0: 31 | return np.array([]) 32 | 33 | # Use the first image to determine feature dimensions 34 | feature_dims = [] 35 | first_image_features = [] 36 | for feature_fn in feature_fns: 37 | feats = feature_fn(imgs[0].squeeze()) 38 | assert len(feats.shape) == 1, 'Feature functions must be one-dimensional' 39 | feature_dims.append(feats.size) 40 | first_image_features.append(feats) 41 | 42 | # Now that we know the dimensions of the features, we can allocate a single 43 | # big array to store all features as columns. 44 | total_feature_dim = sum(feature_dims) 45 | imgs_features = np.zeros((num_images, total_feature_dim)) 46 | imgs_features[0] = np.hstack(first_image_features).T 47 | 48 | # Extract features for the rest of the images. 49 | for i in range(1, num_images): 50 | idx = 0 51 | for feature_fn, feature_dim in zip(feature_fns, feature_dims): 52 | next_idx = idx + feature_dim 53 | imgs_features[i, idx:next_idx] = feature_fn(imgs[i].squeeze()) 54 | idx = next_idx 55 | if verbose and i % 1000 == 999: 56 | print('Done extracting features for %d / %d images' % (i+1, num_images)) 57 | 58 | return imgs_features 59 | 60 | 61 | def rgb2gray(rgb): 62 | """Convert RGB image to grayscale 63 | 64 | Parameters: 65 | rgb : RGB image 66 | 67 | Returns: 68 | gray : grayscale image 69 | 70 | """ 71 | return np.dot(rgb[...,:3], [0.299, 0.587, 0.144]) 72 | 73 | 74 | def hog_feature(im): 75 | """Compute Histogram of Gradient (HOG) feature for an image 76 | 77 | Modified from skimage.feature.hog 78 | https://scikit-image.org/docs/dev/api/skimage.feature.html#skimage.feature.hog 79 | 80 | Reference: 81 | Histograms of Oriented Gradients for Human Detection 82 | Navneet Dalal and Bill Triggs, CVPR 2005 83 | 84 | Parameters: 85 | im : an input grayscale or rgb image 86 | 87 | Returns: 88 | feat: Histogram of Gradient (HOG) feature 89 | 90 | """ 91 | 92 | # convert rgb to grayscale if needed 93 | if im.ndim == 3: 94 | image = rgb2gray(im) 95 | else: 96 | image = np.at_least_2d(im) 97 | 98 | sx, sy = image.shape # image size 99 | orientations = 9 # number of gradient bins 100 | cx, cy = (8, 8) # pixels per cell 101 | 102 | gx = np.zeros(image.shape) 103 | gy = np.zeros(image.shape) 104 | gx[:, :-1] = np.diff(image, n=1, axis=1) # compute gradient on x-direction 105 | gy[:-1, :] = np.diff(image, n=1, axis=0) # compute gradient on y-direction 106 | grad_mag = np.sqrt(gx ** 2 + gy ** 2) # gradient magnitude 107 | grad_ori = np.arctan2(gy, (gx + 1e-15)) * (180 / np.pi) + 90 # gradient orientation 108 | 109 | n_cellsx = int(np.floor(sx / cx)) # number of cells in x 110 | n_cellsy = int(np.floor(sy / cy)) # number of cells in y 111 | # compute orientations integral images 112 | orientation_histogram = np.zeros((n_cellsx, n_cellsy, orientations)) 113 | for i in range(orientations): 114 | # create new integral image for this orientation 115 | # isolate orientations in this range 116 | temp_ori = np.where(grad_ori < 180 / orientations * (i + 1), 117 | grad_ori, 0) 118 | temp_ori = np.where(grad_ori >= 180 / orientations * i, 119 | temp_ori, 0) 120 | # select magnitudes for those orientations 121 | cond2 = temp_ori > 0 122 | temp_mag = np.where(cond2, grad_mag, 0) 123 | orientation_histogram[:,:,i] = uniform_filter(temp_mag, size=(cx, cy))[round(cx/2)::cx, round(cy/2)::cy].T 124 | 125 | return orientation_histogram.ravel() 126 | 127 | 128 | def color_histogram_hsv(im, nbin=10, xmin=0, xmax=255, normalized=True): 129 | """ 130 | Compute color histogram for an image using hue. 131 | 132 | Inputs: 133 | - im: H x W x C array of pixel data for an RGB image. 134 | - nbin: Number of histogram bins. (default: 10) 135 | - xmin: Minimum pixel value (default: 0) 136 | - xmax: Maximum pixel value (default: 255) 137 | - normalized: Whether to normalize the histogram (default: True) 138 | 139 | Returns: 140 | 1D vector of length nbin giving the color histogram over the hue of the 141 | input image. 142 | """ 143 | ndim = im.ndim 144 | bins = np.linspace(xmin, xmax, nbin+1) 145 | hsv = matplotlib.colors.rgb_to_hsv(im/xmax) * xmax 146 | imhist, bin_edges = np.histogram(hsv[:,:,0], bins=bins, density=normalized) 147 | imhist = imhist * np.diff(bin_edges) 148 | 149 | # return histogram 150 | return imhist 151 | 152 | 153 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 154 | 155 | pass 156 | 157 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 158 | -------------------------------------------------------------------------------- /assignment1/cs231n/gradient_check.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from builtins import range 3 | from past.builtins import xrange 4 | 5 | import numpy as np 6 | from random import randrange 7 | 8 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001): 9 | """ 10 | a naive implementation of numerical gradient of f at x 11 | - f should be a function that takes a single argument 12 | - x is the point (numpy array) to evaluate the gradient at 13 | """ 14 | 15 | fx = f(x) # evaluate function value at original point 16 | grad = np.zeros_like(x) 17 | # iterate over all indexes in x 18 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 19 | while not it.finished: 20 | 21 | # evaluate function at x+h 22 | ix = it.multi_index 23 | oldval = x[ix] 24 | x[ix] = oldval + h # increment by h 25 | fxph = f(x) # evalute f(x + h) 26 | x[ix] = oldval - h 27 | fxmh = f(x) # evaluate f(x - h) 28 | x[ix] = oldval # restore 29 | 30 | # compute the partial derivative with centered formula 31 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 32 | if verbose: 33 | print(ix, grad[ix]) 34 | it.iternext() # step to next dimension 35 | 36 | return grad 37 | 38 | 39 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 40 | """ 41 | Evaluate a numeric gradient for a function that accepts a numpy 42 | array and returns a numpy array. 43 | """ 44 | grad = np.zeros_like(x) 45 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 46 | while not it.finished: 47 | ix = it.multi_index 48 | 49 | oldval = x[ix] 50 | x[ix] = oldval + h 51 | pos = f(x).copy() 52 | x[ix] = oldval - h 53 | neg = f(x).copy() 54 | x[ix] = oldval 55 | 56 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 57 | it.iternext() 58 | return grad 59 | 60 | 61 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): 62 | """ 63 | Compute numeric gradients for a function that operates on input 64 | and output blobs. 65 | 66 | We assume that f accepts several input blobs as arguments, followed by a 67 | blob where outputs will be written. For example, f might be called like: 68 | 69 | f(x, w, out) 70 | 71 | where x and w are input Blobs, and the result of f will be written to out. 72 | 73 | Inputs: 74 | - f: function 75 | - inputs: tuple of input blobs 76 | - output: output blob 77 | - h: step size 78 | """ 79 | numeric_diffs = [] 80 | for input_blob in inputs: 81 | diff = np.zeros_like(input_blob.diffs) 82 | it = np.nditer(input_blob.vals, flags=['multi_index'], 83 | op_flags=['readwrite']) 84 | while not it.finished: 85 | idx = it.multi_index 86 | orig = input_blob.vals[idx] 87 | 88 | input_blob.vals[idx] = orig + h 89 | f(*(inputs + (output,))) 90 | pos = np.copy(output.vals) 91 | input_blob.vals[idx] = orig - h 92 | f(*(inputs + (output,))) 93 | neg = np.copy(output.vals) 94 | input_blob.vals[idx] = orig 95 | 96 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) 97 | 98 | it.iternext() 99 | numeric_diffs.append(diff) 100 | return numeric_diffs 101 | 102 | 103 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5): 104 | return eval_numerical_gradient_blobs(lambda *args: net.forward(), 105 | inputs, output, h=h) 106 | 107 | 108 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): 109 | """ 110 | sample a few random elements and only return numerical 111 | in this dimensions. 112 | """ 113 | 114 | for i in range(num_checks): 115 | ix = tuple([randrange(m) for m in x.shape]) 116 | 117 | oldval = x[ix] 118 | x[ix] = oldval + h # increment by h 119 | fxph = f(x) # evaluate f(x + h) 120 | x[ix] = oldval - h # increment by h 121 | fxmh = f(x) # evaluate f(x - h) 122 | x[ix] = oldval # reset 123 | 124 | grad_numerical = (fxph - fxmh) / (2 * h) 125 | grad_analytic = analytic_grad[ix] 126 | rel_error = (abs(grad_numerical - grad_analytic) / 127 | (abs(grad_numerical) + abs(grad_analytic))) 128 | print('numerical: %f analytic: %f, relative error: %e' 129 | %(grad_numerical, grad_analytic, rel_error)) 130 | -------------------------------------------------------------------------------- /assignment1/cs231n/vis_utils.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | from past.builtins import xrange 3 | 4 | from math import sqrt, ceil 5 | import numpy as np 6 | 7 | def visualize_grid(Xs, ubound=255.0, padding=1): 8 | """ 9 | Reshape a 4D tensor of image data to a grid for easy visualization. 10 | 11 | Inputs: 12 | - Xs: Data of shape (N, H, W, C) 13 | - ubound: Output grid will have values scaled to the range [0, ubound] 14 | - padding: The number of blank pixels between elements of the grid 15 | """ 16 | (N, H, W, C) = Xs.shape 17 | grid_size = int(ceil(sqrt(N))) 18 | grid_height = H * grid_size + padding * (grid_size - 1) 19 | grid_width = W * grid_size + padding * (grid_size - 1) 20 | grid = np.zeros((grid_height, grid_width, C)) 21 | next_idx = 0 22 | y0, y1 = 0, H 23 | for y in range(grid_size): 24 | x0, x1 = 0, W 25 | for x in range(grid_size): 26 | if next_idx < N: 27 | img = Xs[next_idx] 28 | low, high = np.min(img), np.max(img) 29 | grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low) 30 | # grid[y0:y1, x0:x1] = Xs[next_idx] 31 | next_idx += 1 32 | x0 += W + padding 33 | x1 += W + padding 34 | y0 += H + padding 35 | y1 += H + padding 36 | # grid_max = np.max(grid) 37 | # grid_min = np.min(grid) 38 | # grid = ubound * (grid - grid_min) / (grid_max - grid_min) 39 | return grid 40 | 41 | def vis_grid(Xs): 42 | """ visualize a grid of images """ 43 | (N, H, W, C) = Xs.shape 44 | A = int(ceil(sqrt(N))) 45 | G = np.ones((A*H+A, A*W+A, C), Xs.dtype) 46 | G *= np.min(Xs) 47 | n = 0 48 | for y in range(A): 49 | for x in range(A): 50 | if n < N: 51 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:] 52 | n += 1 53 | # normalize to [0,1] 54 | maxg = G.max() 55 | ming = G.min() 56 | G = (G - ming)/(maxg-ming) 57 | return G 58 | 59 | def vis_nn(rows): 60 | """ visualize array of arrays of images """ 61 | N = len(rows) 62 | D = len(rows[0]) 63 | H,W,C = rows[0][0].shape 64 | Xs = rows[0][0] 65 | G = np.ones((N*H+N, D*W+D, C), Xs.dtype) 66 | for y in range(N): 67 | for x in range(D): 68 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x] 69 | # normalize to [0,1] 70 | maxg = G.max() 71 | ming = G.min() 72 | G = (G - ming)/(maxg-ming) 73 | return G 74 | -------------------------------------------------------------------------------- /assignment1/frameworkpython: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # what real Python executable to use 4 | #PYVER=2.7 5 | #PATHTOPYTHON=/usr/local/bin/ 6 | #PYTHON=${PATHTOPYTHON}python${PYVER} 7 | 8 | PYTHON=$(which $(readlink .env/bin/python)) # only works with python3 9 | 10 | # find the root of the virtualenv, it should be the parent of the dir this script is in 11 | ENV=`$PYTHON -c "import os; print(os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..')))"` 12 | 13 | # now run Python with the virtualenv set as Python's HOME 14 | export PYTHONHOME=$ENV 15 | exec $PYTHON "$@" 16 | -------------------------------------------------------------------------------- /assignment1/makepdf.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import subprocess 4 | 5 | try: 6 | from PyPDF2 import PdfFileMerger 7 | MERGE = True 8 | except ImportError: 9 | print("Could not find PyPDF2. Leaving pdf files unmerged.") 10 | MERGE = False 11 | 12 | 13 | def main(files): 14 | os_args = [ 15 | 'jupyter', 16 | 'nbconvert', 17 | '--log-level', 18 | 'CRITICAL', 19 | '--to', 20 | 'pdf', 21 | ] 22 | for f in files: 23 | os_args.append(f) 24 | subprocess.run(os_args) 25 | print("Created PDF {}.".format(f)) 26 | if MERGE: 27 | pdfs = [f.split('.')[0] + ".pdf" for f in files] 28 | merger = PdfFileMerger() 29 | for pdf in pdfs: 30 | merger.append(pdf) 31 | merger.write("assignment.pdf") 32 | merger.close() 33 | for pdf in pdfs: 34 | os.remove(pdf) 35 | 36 | 37 | if __name__ == "__main__": 38 | parser = argparse.ArgumentParser() 39 | # we pass in explicit notebook arg so that we can provide 40 | # an ordered list and produce an ordered pdf 41 | parser.add_argument("--notebooks", type=str, nargs='+', required=True) 42 | args = parser.parse_args() 43 | main(args.notebooks) 44 | -------------------------------------------------------------------------------- /assignment1/requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==19.1.0 2 | backcall==0.1.0 3 | bleach==3.1.0 4 | certifi==2019.3.9 5 | chardet==3.0.4 6 | colorama==0.4.1 7 | cycler==0.10.0 8 | decorator==4.4.0 9 | defusedxml==0.5.0 10 | entrypoints==0.3 11 | future==0.17.1 12 | gitdb2==2.0.5 13 | GitPython==2.1.11 14 | idna==2.8 15 | ipykernel==5.1.0 16 | ipython==7.4.0 17 | ipython-genutils==0.2.0 18 | ipywidgets==7.4.2 19 | imageio==2.8.0 20 | jedi==0.13.3 21 | Jinja2==2.10 22 | jsonschema==3.0.1 23 | jupyter==1.0.0 24 | jupyter-client==5.2.4 25 | jupyter-console==6.0.0 26 | jupyter-core==4.4.0 27 | jupyterlab==0.35.4 28 | jupyterlab-server==0.2.0 29 | kiwisolver==1.0.1 30 | MarkupSafe==1.1.1 31 | matplotlib==3.0.3 32 | mistune==0.8.4 33 | nbconvert==5.4.1 34 | nbdime==1.0.5 35 | nbformat==4.4.0 36 | notebook==5.7.8 37 | numpy==1.16.2 38 | pandocfilters==1.4.2 39 | parso==0.3.4 40 | pexpect==4.6.0 41 | pickleshare==0.7.5 42 | Pillow==6.0.0 43 | prometheus-client==0.6.0 44 | prompt-toolkit==2.0.9 45 | ptyprocess==0.6.0 46 | Pygments==2.3.1 47 | pyparsing==2.3.1 48 | pyrsistent==0.14.11 49 | python-dateutil==2.8.0 50 | pyzmq==18.0.1 51 | qtconsole==4.4.3 52 | requests==2.21.0 53 | scipy==1.2.1 54 | Send2Trash==1.5.0 55 | six==1.12.0 56 | smmap2==2.0.5 57 | terminado==0.8.2 58 | testpath==0.4.2 59 | tornado==6.0.2 60 | traitlets==4.3.2 61 | urllib3==1.24.1 62 | wcwidth==0.1.7 63 | webencodings==0.5.1 64 | widgetsnbextension==3.4.2 65 | -------------------------------------------------------------------------------- /assignment2/collectSubmission.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #NOTE: DO NOT EDIT THIS FILE-- MAY RESULT IN INCOMPLETE SUBMISSIONS 3 | set -euo pipefail 4 | 5 | CODE=( 6 | "cs231n/layers.py" 7 | "cs231n/classifiers/fc_net.py" 8 | "cs231n/optim.py" 9 | "cs231n/solver.py" 10 | "cs231n/classifiers/cnn.py" 11 | ) 12 | 13 | # these notebooks should ideally 14 | # be in order of questions so 15 | # that the generated pdf is 16 | # in order of questions 17 | NOTEBOOKS=( 18 | "FullyConnectedNets.ipynb" 19 | "BatchNormalization.ipynb" 20 | "Dropout.ipynb" 21 | "ConvolutionalNetworks.ipynb" 22 | "PyTorch.ipynb" 23 | "TensorFlow.ipynb" 24 | ) 25 | 26 | FILES=( "${CODE[@]}" "${NOTEBOOKS[@]}" ) 27 | 28 | LOCAL_DIR=`pwd` 29 | ASSIGNMENT_NO=2 30 | ZIP_FILENAME="a2.zip" 31 | 32 | C_R="\e[31m" 33 | C_G="\e[32m" 34 | C_BLD="\e[1m" 35 | C_E="\e[0m" 36 | 37 | for FILE in "${FILES[@]}" 38 | do 39 | if [ ! -f ${FILE} ]; then 40 | echo -e "${C_R}Required file ${FILE} not found, Exiting.${C_E}" 41 | exit 0 42 | fi 43 | done 44 | 45 | echo -e "### Zipping file ###" 46 | rm -f ${ZIP_FILENAME} 47 | zip -q "${ZIP_FILENAME}" -r ${NOTEBOOKS[@]} $(find . -name "*.py") $(find . -name "*.pyx") -x "makepdf.py" 48 | 49 | echo -e "### Creating PDFs ###" 50 | python makepdf.py --notebooks "${NOTEBOOKS[@]}" 51 | 52 | echo -e "### Done! Please submit a2.zip and the pdfs to Gradescope. ###" 53 | -------------------------------------------------------------------------------- /assignment2/cs231n/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment2/cs231n/__init__.py -------------------------------------------------------------------------------- /assignment2/cs231n/classifiers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment2/cs231n/classifiers/__init__.py -------------------------------------------------------------------------------- /assignment2/cs231n/classifiers/cnn.py: -------------------------------------------------------------------------------- 1 | from builtins import object 2 | import numpy as np 3 | 4 | from ..layers import * 5 | from ..fast_layers import * 6 | from ..layer_utils import * 7 | 8 | 9 | class ThreeLayerConvNet(object): 10 | """ 11 | A three-layer convolutional network with the following architecture: 12 | 13 | conv - relu - 2x2 max pool - affine - relu - affine - softmax 14 | 15 | The network operates on minibatches of data that have shape (N, C, H, W) 16 | consisting of N images, each with height H and width W and with C input 17 | channels. 18 | """ 19 | 20 | def __init__( 21 | self, 22 | input_dim=(3, 32, 32), 23 | num_filters=32, 24 | filter_size=7, 25 | hidden_dim=100, 26 | num_classes=10, 27 | weight_scale=1e-3, 28 | reg=0.0, 29 | dtype=np.float32, 30 | ): 31 | """ 32 | Initialize a new network. 33 | 34 | Inputs: 35 | - input_dim: Tuple (C, H, W) giving size of input data 36 | - num_filters: Number of filters to use in the convolutional layer 37 | - filter_size: Width/height of filters to use in the convolutional layer 38 | - hidden_dim: Number of units to use in the fully-connected hidden layer 39 | - num_classes: Number of scores to produce from the final affine layer. 40 | - weight_scale: Scalar giving standard deviation for random initialization 41 | of weights. 42 | - reg: Scalar giving L2 regularization strength 43 | - dtype: numpy datatype to use for computation. 44 | """ 45 | self.params = {} 46 | self.reg = reg 47 | self.dtype = dtype 48 | 49 | ############################################################################ 50 | # TODO: Initialize weights and biases for the three-layer convolutional # 51 | # network. Weights should be initialized from a Gaussian centered at 0.0 # 52 | # with standard deviation equal to weight_scale; biases should be # 53 | # initialized to zero. All weights and biases should be stored in the # 54 | # dictionary self.params. Store weights and biases for the convolutional # 55 | # layer using the keys 'W1' and 'b1'; use keys 'W2' and 'b2' for the # 56 | # weights and biases of the hidden affine layer, and keys 'W3' and 'b3' # 57 | # for the weights and biases of the output affine layer. # 58 | # # 59 | # IMPORTANT: For this assignment, you can assume that the padding # 60 | # and stride of the first convolutional layer are chosen so that # 61 | # **the width and height of the input are preserved**. Take a look at # 62 | # the start of the loss() function to see how that happens. # 63 | ############################################################################ 64 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 65 | 66 | pad = 1 67 | stride = 2 68 | filter_size = pad*2 + 1 69 | W1 = weight_scale * np.random.randn(num_filters, input_dim[0], filter_size, filter_size) 70 | b1 = np.zeros(num_filters) 71 | 72 | out_h = (input_dim[1] - filter_size + 2*pad) // stride + 1 73 | out_w = (input_dim[2] - filter_size + 2*pad) // stride + 1 74 | 75 | W2 = weight_scale * np.random.randn(num_filters*out_h*out_w, hidden_dim) 76 | b2 = np.zeros(hidden_dim) 77 | 78 | W3 = weight_scale * np.random.randn(hidden_dim, num_classes) 79 | b3 = np.zeros(num_classes) 80 | 81 | self.params["W1"], self.params["b1"] = W1, b1 82 | self.params["W2"], self.params["b2"] = W2, b2 83 | self.params["W3"], self.params["b3"] = W3, b3 84 | 85 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 86 | ############################################################################ 87 | # END OF YOUR CODE # 88 | ############################################################################ 89 | 90 | for k, v in self.params.items(): 91 | self.params[k] = v.astype(dtype) 92 | 93 | def loss(self, X, y=None): 94 | """ 95 | Evaluate loss and gradient for the three-layer convolutional network. 96 | 97 | Input / output: Same API as TwoLayerNet in fc_net.py. 98 | """ 99 | W1, b1 = self.params["W1"], self.params["b1"] 100 | W2, b2 = self.params["W2"], self.params["b2"] 101 | W3, b3 = self.params["W3"], self.params["b3"] 102 | 103 | # pass conv_param to the forward pass for the convolutional layer 104 | # Padding and stride chosen to preserve the input spatial size 105 | filter_size = W1.shape[2] 106 | conv_param = {"stride": 1, "pad": (filter_size - 1) // 2} 107 | 108 | # pass pool_param to the forward pass for the max-pooling layer 109 | pool_param = {"pool_height": 2, "pool_width": 2, "stride": 2} 110 | 111 | scores = None 112 | ############################################################################ 113 | # TODO: Implement the forward pass for the three-layer convolutional net, # 114 | # computing the class scores for X and storing them in the scores # 115 | # variable. # 116 | # # 117 | # Remember you can use the functions defined in cs231n/fast_layers.py and # 118 | # cs231n/layer_utils.py in your implementation (already imported). # 119 | ############################################################################ 120 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 121 | 122 | conv_relu_pool_out, conv_relu_pool_cache = conv_relu_pool_forward(X, W1, b1, conv_param, pool_param) 123 | conv_relu_pool_out_flat = conv_relu_pool_out.reshape(conv_relu_pool_out.shape[0], -1) 124 | affine_relu_out, affine_relu_cache = affine_relu_forward(conv_relu_pool_out_flat, W2, b2) 125 | affine_out, affine_cache = affine_forward(affine_relu_out, W3, b3) 126 | scores = affine_out 127 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 128 | ############################################################################ 129 | # END OF YOUR CODE # 130 | ############################################################################ 131 | 132 | if y is None: 133 | return scores 134 | 135 | loss, grads = 0, {} 136 | ############################################################################ 137 | # TODO: Implement the backward pass for the three-layer convolutional net, # 138 | # storing the loss and gradients in the loss and grads variables. Compute # 139 | # data loss using softmax, and make sure that grads[k] holds the gradients # 140 | # for self.params[k]. Don't forget to add L2 regularization! # 141 | # # 142 | # NOTE: To ensure that your implementation matches ours and you pass the # 143 | # automated tests, make sure that your L2 regularization includes a factor # 144 | # of 0.5 to simplify the expression for the gradient. # 145 | ############################################################################ 146 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 147 | 148 | reg = self.reg 149 | loss, dscore = softmax_loss(scores, y) 150 | loss += 0.5* reg * np.sum(W1 * W1) + 0.5 * reg * np.sum(W2 * W2) + 0.5 * reg * np.sum(W3 * W3) 151 | 152 | daffine_out, dW3, db3 = affine_backward(dscore, affine_cache) 153 | daffine_relu_out, dW2, db2 = affine_relu_backward(daffine_out, affine_relu_cache) 154 | daffine_relu_out_build = daffine_relu_out.reshape(conv_relu_pool_out.shape) 155 | dconv_relu_pool_out, dW1, db1 = conv_relu_pool_backward(daffine_relu_out_build, conv_relu_pool_cache) 156 | 157 | grads["W1"], grads["b1"] = dW1 + reg * W1, db1 158 | grads["W2"], grads["b2"] = dW2 + reg * W2, db2 159 | grads["W3"], grads["b3"] = dW3 + reg * W3, db3 160 | 161 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 162 | ############################################################################ 163 | # END OF YOUR CODE # 164 | ############################################################################ 165 | 166 | return loss, grads 167 | -------------------------------------------------------------------------------- /assignment2/cs231n/data_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from builtins import range 4 | from six.moves import cPickle as pickle 5 | import numpy as np 6 | import os 7 | from imageio import imread 8 | import platform 9 | 10 | 11 | def load_pickle(f): 12 | version = platform.python_version_tuple() 13 | if version[0] == "2": 14 | return pickle.load(f) 15 | elif version[0] == "3": 16 | return pickle.load(f, encoding="latin1") 17 | raise ValueError("invalid python version: {}".format(version)) 18 | 19 | 20 | def load_CIFAR_batch(filename): 21 | """ load single batch of cifar """ 22 | with open(filename, "rb") as f: 23 | datadict = load_pickle(f) 24 | X = datadict["data"] 25 | Y = datadict["labels"] 26 | X = X.reshape(10000, 3, 32, 32).transpose(0, 2, 3, 1).astype("float") 27 | Y = np.array(Y) 28 | return X, Y 29 | 30 | 31 | def load_CIFAR10(ROOT): 32 | """ load all of cifar """ 33 | xs = [] 34 | ys = [] 35 | for b in range(1, 6): 36 | f = os.path.join(ROOT, "data_batch_%d" % (b,)) 37 | X, Y = load_CIFAR_batch(f) 38 | xs.append(X) 39 | ys.append(Y) 40 | Xtr = np.concatenate(xs) 41 | Ytr = np.concatenate(ys) 42 | del X, Y 43 | Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, "test_batch")) 44 | return Xtr, Ytr, Xte, Yte 45 | 46 | 47 | def get_CIFAR10_data( 48 | num_training=49000, num_validation=1000, num_test=1000, subtract_mean=True 49 | ): 50 | """ 51 | Load the CIFAR-10 dataset from disk and perform preprocessing to prepare 52 | it for classifiers. These are the same steps as we used for the SVM, but 53 | condensed to a single function. 54 | """ 55 | # Load the raw CIFAR-10 data 56 | cifar10_dir = os.path.join( 57 | os.path.dirname(__file__), "datasets/cifar-10-batches-py" 58 | ) 59 | X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) 60 | 61 | # Subsample the data 62 | mask = list(range(num_training, num_training + num_validation)) 63 | X_val = X_train[mask] 64 | y_val = y_train[mask] 65 | mask = list(range(num_training)) 66 | X_train = X_train[mask] 67 | y_train = y_train[mask] 68 | mask = list(range(num_test)) 69 | X_test = X_test[mask] 70 | y_test = y_test[mask] 71 | 72 | # Normalize the data: subtract the mean image 73 | if subtract_mean: 74 | mean_image = np.mean(X_train, axis=0) 75 | X_train -= mean_image 76 | X_val -= mean_image 77 | X_test -= mean_image 78 | 79 | # Transpose so that channels come first 80 | X_train = X_train.transpose(0, 3, 1, 2).copy() 81 | X_val = X_val.transpose(0, 3, 1, 2).copy() 82 | X_test = X_test.transpose(0, 3, 1, 2).copy() 83 | 84 | # Package data into a dictionary 85 | return { 86 | "X_train": X_train, 87 | "y_train": y_train, 88 | "X_val": X_val, 89 | "y_val": y_val, 90 | "X_test": X_test, 91 | "y_test": y_test, 92 | } 93 | 94 | 95 | def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True): 96 | """ 97 | Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and 98 | TinyImageNet-200 have the same directory structure, so this can be used 99 | to load any of them. 100 | 101 | Inputs: 102 | - path: String giving path to the directory to load. 103 | - dtype: numpy datatype used to load the data. 104 | - subtract_mean: Whether to subtract the mean training image. 105 | 106 | Returns: A dictionary with the following entries: 107 | - class_names: A list where class_names[i] is a list of strings giving the 108 | WordNet names for class i in the loaded dataset. 109 | - X_train: (N_tr, 3, 64, 64) array of training images 110 | - y_train: (N_tr,) array of training labels 111 | - X_val: (N_val, 3, 64, 64) array of validation images 112 | - y_val: (N_val,) array of validation labels 113 | - X_test: (N_test, 3, 64, 64) array of testing images. 114 | - y_test: (N_test,) array of test labels; if test labels are not available 115 | (such as in student code) then y_test will be None. 116 | - mean_image: (3, 64, 64) array giving mean training image 117 | """ 118 | # First load wnids 119 | with open(os.path.join(path, "wnids.txt"), "r") as f: 120 | wnids = [x.strip() for x in f] 121 | 122 | # Map wnids to integer labels 123 | wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)} 124 | 125 | # Use words.txt to get names for each class 126 | with open(os.path.join(path, "words.txt"), "r") as f: 127 | wnid_to_words = dict(line.split("\t") for line in f) 128 | for wnid, words in wnid_to_words.items(): 129 | wnid_to_words[wnid] = [w.strip() for w in words.split(",")] 130 | class_names = [wnid_to_words[wnid] for wnid in wnids] 131 | 132 | # Next load training data. 133 | X_train = [] 134 | y_train = [] 135 | for i, wnid in enumerate(wnids): 136 | if (i + 1) % 20 == 0: 137 | print("loading training data for synset %d / %d" % (i + 1, len(wnids))) 138 | # To figure out the filenames we need to open the boxes file 139 | boxes_file = os.path.join(path, "train", wnid, "%s_boxes.txt" % wnid) 140 | with open(boxes_file, "r") as f: 141 | filenames = [x.split("\t")[0] for x in f] 142 | num_images = len(filenames) 143 | 144 | X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype) 145 | y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64) 146 | for j, img_file in enumerate(filenames): 147 | img_file = os.path.join(path, "train", wnid, "images", img_file) 148 | img = imread(img_file) 149 | if img.ndim == 2: 150 | ## grayscale file 151 | img.shape = (64, 64, 1) 152 | X_train_block[j] = img.transpose(2, 0, 1) 153 | X_train.append(X_train_block) 154 | y_train.append(y_train_block) 155 | 156 | # We need to concatenate all training data 157 | X_train = np.concatenate(X_train, axis=0) 158 | y_train = np.concatenate(y_train, axis=0) 159 | 160 | # Next load validation data 161 | with open(os.path.join(path, "val", "val_annotations.txt"), "r") as f: 162 | img_files = [] 163 | val_wnids = [] 164 | for line in f: 165 | img_file, wnid = line.split("\t")[:2] 166 | img_files.append(img_file) 167 | val_wnids.append(wnid) 168 | num_val = len(img_files) 169 | y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids]) 170 | X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype) 171 | for i, img_file in enumerate(img_files): 172 | img_file = os.path.join(path, "val", "images", img_file) 173 | img = imread(img_file) 174 | if img.ndim == 2: 175 | img.shape = (64, 64, 1) 176 | X_val[i] = img.transpose(2, 0, 1) 177 | 178 | # Next load test images 179 | # Students won't have test labels, so we need to iterate over files in the 180 | # images directory. 181 | img_files = os.listdir(os.path.join(path, "test", "images")) 182 | X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype) 183 | for i, img_file in enumerate(img_files): 184 | img_file = os.path.join(path, "test", "images", img_file) 185 | img = imread(img_file) 186 | if img.ndim == 2: 187 | img.shape = (64, 64, 1) 188 | X_test[i] = img.transpose(2, 0, 1) 189 | 190 | y_test = None 191 | y_test_file = os.path.join(path, "test", "test_annotations.txt") 192 | if os.path.isfile(y_test_file): 193 | with open(y_test_file, "r") as f: 194 | img_file_to_wnid = {} 195 | for line in f: 196 | line = line.split("\t") 197 | img_file_to_wnid[line[0]] = line[1] 198 | y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files] 199 | y_test = np.array(y_test) 200 | 201 | mean_image = X_train.mean(axis=0) 202 | if subtract_mean: 203 | X_train -= mean_image[None] 204 | X_val -= mean_image[None] 205 | X_test -= mean_image[None] 206 | 207 | return { 208 | "class_names": class_names, 209 | "X_train": X_train, 210 | "y_train": y_train, 211 | "X_val": X_val, 212 | "y_val": y_val, 213 | "X_test": X_test, 214 | "y_test": y_test, 215 | "class_names": class_names, 216 | "mean_image": mean_image, 217 | } 218 | 219 | 220 | def load_models(models_dir): 221 | """ 222 | Load saved models from disk. This will attempt to unpickle all files in a 223 | directory; any files that give errors on unpickling (such as README.txt) 224 | will be skipped. 225 | 226 | Inputs: 227 | - models_dir: String giving the path to a directory containing model files. 228 | Each model file is a pickled dictionary with a 'model' field. 229 | 230 | Returns: 231 | A dictionary mapping model file names to models. 232 | """ 233 | models = {} 234 | for model_file in os.listdir(models_dir): 235 | with open(os.path.join(models_dir, model_file), "rb") as f: 236 | try: 237 | models[model_file] = load_pickle(f)["model"] 238 | except pickle.UnpicklingError: 239 | continue 240 | return models 241 | 242 | 243 | def load_imagenet_val(num=None): 244 | """Load a handful of validation images from ImageNet. 245 | 246 | Inputs: 247 | - num: Number of images to load (max of 25) 248 | 249 | Returns: 250 | - X: numpy array with shape [num, 224, 224, 3] 251 | - y: numpy array of integer image labels, shape [num] 252 | - class_names: dict mapping integer label to class name 253 | """ 254 | imagenet_fn = os.path.join( 255 | os.path.dirname(__file__), "datasets/imagenet_val_25.npz" 256 | ) 257 | if not os.path.isfile(imagenet_fn): 258 | print("file %s not found" % imagenet_fn) 259 | print("Run the following:") 260 | print("cd cs231n/datasets") 261 | print("bash get_imagenet_val.sh") 262 | assert False, "Need to download imagenet_val_25.npz" 263 | f = np.load(imagenet_fn) 264 | X = f["X"] 265 | y = f["y"] 266 | class_names = f["label_map"].item() 267 | if num is not None: 268 | X = X[:num] 269 | y = y[:num] 270 | return X, y, class_names 271 | -------------------------------------------------------------------------------- /assignment2/cs231n/datasets/get_datasets.sh: -------------------------------------------------------------------------------- 1 | if [ ! -d "cifar-10-batches-py" ]; then 2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz -O cifar-10-python.tar.gz 3 | tar -xzvf cifar-10-python.tar.gz 4 | rm cifar-10-python.tar.gz 5 | fi 6 | -------------------------------------------------------------------------------- /assignment2/cs231n/gradient_check.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from builtins import range 3 | from past.builtins import xrange 4 | 5 | import numpy as np 6 | from random import randrange 7 | 8 | 9 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001): 10 | """ 11 | a naive implementation of numerical gradient of f at x 12 | - f should be a function that takes a single argument 13 | - x is the point (numpy array) to evaluate the gradient at 14 | """ 15 | 16 | fx = f(x) # evaluate function value at original point 17 | grad = np.zeros_like(x) 18 | # iterate over all indexes in x 19 | it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"]) 20 | while not it.finished: 21 | 22 | # evaluate function at x+h 23 | ix = it.multi_index 24 | oldval = x[ix] 25 | x[ix] = oldval + h # increment by h 26 | fxph = f(x) # evalute f(x + h) 27 | x[ix] = oldval - h 28 | fxmh = f(x) # evaluate f(x - h) 29 | x[ix] = oldval # restore 30 | 31 | # compute the partial derivative with centered formula 32 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 33 | if verbose: 34 | print(ix, grad[ix]) 35 | it.iternext() # step to next dimension 36 | 37 | return grad 38 | 39 | 40 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 41 | """ 42 | Evaluate a numeric gradient for a function that accepts a numpy 43 | array and returns a numpy array. 44 | """ 45 | grad = np.zeros_like(x) 46 | it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"]) 47 | while not it.finished: 48 | ix = it.multi_index 49 | 50 | oldval = x[ix] 51 | x[ix] = oldval + h 52 | pos = f(x).copy() 53 | x[ix] = oldval - h 54 | neg = f(x).copy() 55 | x[ix] = oldval 56 | 57 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 58 | it.iternext() 59 | return grad 60 | 61 | 62 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): 63 | """ 64 | Compute numeric gradients for a function that operates on input 65 | and output blobs. 66 | 67 | We assume that f accepts several input blobs as arguments, followed by a 68 | blob where outputs will be written. For example, f might be called like: 69 | 70 | f(x, w, out) 71 | 72 | where x and w are input Blobs, and the result of f will be written to out. 73 | 74 | Inputs: 75 | - f: function 76 | - inputs: tuple of input blobs 77 | - output: output blob 78 | - h: step size 79 | """ 80 | numeric_diffs = [] 81 | for input_blob in inputs: 82 | diff = np.zeros_like(input_blob.diffs) 83 | it = np.nditer(input_blob.vals, flags=["multi_index"], op_flags=["readwrite"]) 84 | while not it.finished: 85 | idx = it.multi_index 86 | orig = input_blob.vals[idx] 87 | 88 | input_blob.vals[idx] = orig + h 89 | f(*(inputs + (output,))) 90 | pos = np.copy(output.vals) 91 | input_blob.vals[idx] = orig - h 92 | f(*(inputs + (output,))) 93 | neg = np.copy(output.vals) 94 | input_blob.vals[idx] = orig 95 | 96 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) 97 | 98 | it.iternext() 99 | numeric_diffs.append(diff) 100 | return numeric_diffs 101 | 102 | 103 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5): 104 | return eval_numerical_gradient_blobs( 105 | lambda *args: net.forward(), inputs, output, h=h 106 | ) 107 | 108 | 109 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): 110 | """ 111 | sample a few random elements and only return numerical 112 | in this dimensions. 113 | """ 114 | 115 | for i in range(num_checks): 116 | ix = tuple([randrange(m) for m in x.shape]) 117 | 118 | oldval = x[ix] 119 | x[ix] = oldval + h # increment by h 120 | fxph = f(x) # evaluate f(x + h) 121 | x[ix] = oldval - h # increment by h 122 | fxmh = f(x) # evaluate f(x - h) 123 | x[ix] = oldval # reset 124 | 125 | grad_numerical = (fxph - fxmh) / (2 * h) 126 | grad_analytic = analytic_grad[ix] 127 | rel_error = abs(grad_numerical - grad_analytic) / ( 128 | abs(grad_numerical) + abs(grad_analytic) 129 | ) 130 | print( 131 | "numerical: %f analytic: %f, relative error: %e" 132 | % (grad_numerical, grad_analytic, rel_error) 133 | ) 134 | -------------------------------------------------------------------------------- /assignment2/cs231n/im2col.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | import numpy as np 3 | 4 | 5 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1): 6 | # First figure out what the size of the output should be 7 | N, C, H, W = x_shape 8 | assert (H + 2 * padding - field_height) % stride == 0 9 | assert (W + 2 * padding - field_height) % stride == 0 10 | out_height = (H + 2 * padding - field_height) / stride + 1 11 | out_width = (W + 2 * padding - field_width) / stride + 1 12 | 13 | i0 = np.repeat(np.arange(field_height), field_width) 14 | i0 = np.tile(i0, C) 15 | i1 = stride * np.repeat(np.arange(out_height), out_width) 16 | j0 = np.tile(np.arange(field_width), field_height * C) 17 | j1 = stride * np.tile(np.arange(out_width), out_height) 18 | i = i0.reshape(-1, 1) + i1.reshape(1, -1) 19 | j = j0.reshape(-1, 1) + j1.reshape(1, -1) 20 | 21 | k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1) 22 | 23 | return (k, i, j) 24 | 25 | 26 | def im2col_indices(x, field_height, field_width, padding=1, stride=1): 27 | """ An implementation of im2col based on some fancy indexing """ 28 | # Zero-pad the input 29 | p = padding 30 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode="constant") 31 | 32 | k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, stride) 33 | 34 | cols = x_padded[:, k, i, j] 35 | C = x.shape[1] 36 | cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1) 37 | return cols 38 | 39 | 40 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1, stride=1): 41 | """ An implementation of col2im based on fancy indexing and np.add.at """ 42 | N, C, H, W = x_shape 43 | H_padded, W_padded = H + 2 * padding, W + 2 * padding 44 | x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype) 45 | k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, stride) 46 | cols_reshaped = cols.reshape(C * field_height * field_width, -1, N) 47 | cols_reshaped = cols_reshaped.transpose(2, 0, 1) 48 | np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped) 49 | if padding == 0: 50 | return x_padded 51 | return x_padded[:, :, padding:-padding, padding:-padding] 52 | 53 | 54 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 55 | 56 | pass 57 | 58 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 59 | -------------------------------------------------------------------------------- /assignment2/cs231n/im2col_cython.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | 5 | # DTYPE = np.float64 6 | # ctypedef np.float64_t DTYPE_t 7 | 8 | ctypedef fused DTYPE_t: 9 | np.float32_t 10 | np.float64_t 11 | 12 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height, 13 | int field_width, int padding, int stride): 14 | cdef int N = x.shape[0] 15 | cdef int C = x.shape[1] 16 | cdef int H = x.shape[2] 17 | cdef int W = x.shape[3] 18 | 19 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 20 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 21 | 22 | cdef int p = padding 23 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x, 24 | ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 25 | 26 | cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros( 27 | (C * field_height * field_width, N * HH * WW), 28 | dtype=x.dtype) 29 | 30 | # Moving the inner loop to a C function with no bounds checking works, but does 31 | # not seem to help performance in any measurable way. 32 | 33 | im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 34 | field_height, field_width, padding, stride) 35 | return cols 36 | 37 | 38 | @cython.boundscheck(False) 39 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 40 | np.ndarray[DTYPE_t, ndim=4] x_padded, 41 | int N, int C, int H, int W, int HH, int WW, 42 | int field_height, int field_width, int padding, int stride) except? -1: 43 | cdef int c, ii, jj, row, yy, xx, i, col 44 | 45 | for c in range(C): 46 | for yy in range(HH): 47 | for xx in range(WW): 48 | for ii in range(field_height): 49 | for jj in range(field_width): 50 | row = c * field_width * field_height + ii * field_height + jj 51 | for i in range(N): 52 | col = yy * WW * N + xx * N + i 53 | cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj] 54 | 55 | 56 | 57 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W, 58 | int field_height, int field_width, int padding, int stride): 59 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 60 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 61 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 62 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding), 63 | dtype=cols.dtype) 64 | 65 | # Moving the inner loop to a C-function with no bounds checking improves 66 | # performance quite a bit for col2im. 67 | col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 68 | field_height, field_width, padding, stride) 69 | if padding > 0: 70 | return x_padded[:, :, padding:-padding, padding:-padding] 71 | return x_padded 72 | 73 | 74 | @cython.boundscheck(False) 75 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 76 | np.ndarray[DTYPE_t, ndim=4] x_padded, 77 | int N, int C, int H, int W, int HH, int WW, 78 | int field_height, int field_width, int padding, int stride) except? -1: 79 | cdef int c, ii, jj, row, yy, xx, i, col 80 | 81 | for c in range(C): 82 | for ii in range(field_height): 83 | for jj in range(field_width): 84 | row = c * field_width * field_height + ii * field_height + jj 85 | for yy in range(HH): 86 | for xx in range(WW): 87 | for i in range(N): 88 | col = yy * WW * N + xx * N + i 89 | x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col] 90 | 91 | 92 | @cython.boundscheck(False) 93 | @cython.wraparound(False) 94 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols, 95 | np.ndarray[DTYPE_t, ndim=4] x_padded, 96 | int N, int C, int H, int W, int HH, int WW, 97 | int out_h, int out_w, int pad, int stride): 98 | 99 | cdef int c, hh, ww, n, h, w 100 | for n in range(N): 101 | for c in range(C): 102 | for hh in range(HH): 103 | for ww in range(WW): 104 | for h in range(out_h): 105 | for w in range(out_w): 106 | x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w] 107 | 108 | 109 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W, 110 | int HH, int WW, int pad, int stride): 111 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 112 | cdef int out_h = (H + 2 * pad - HH) / stride + 1 113 | cdef int out_w = (W + 2 * pad - WW) / stride + 1 114 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad), 115 | dtype=cols.dtype) 116 | 117 | col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride) 118 | 119 | if pad > 0: 120 | return x_padded[:, :, pad:-pad, pad:-pad] 121 | return x_padded 122 | -------------------------------------------------------------------------------- /assignment2/cs231n/layer_utils.py: -------------------------------------------------------------------------------- 1 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 2 | 3 | pass 4 | 5 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 6 | from .layers import * 7 | from .fast_layers import * 8 | 9 | 10 | def affine_relu_forward(x, w, b): 11 | """ 12 | Convenience layer that perorms an affine transform followed by a ReLU 13 | 14 | Inputs: 15 | - x: Input to the affine layer 16 | - w, b: Weights for the affine layer 17 | 18 | Returns a tuple of: 19 | - out: Output from the ReLU 20 | - cache: Object to give to the backward pass 21 | """ 22 | a, fc_cache = affine_forward(x, w, b) 23 | out, relu_cache = relu_forward(a) 24 | cache = (fc_cache, relu_cache) 25 | return out, cache 26 | 27 | 28 | def affine_relu_backward(dout, cache): 29 | """ 30 | Backward pass for the affine-relu convenience layer 31 | """ 32 | fc_cache, relu_cache = cache 33 | da = relu_backward(dout, relu_cache) 34 | dx, dw, db = affine_backward(da, fc_cache) 35 | return dx, dw, db 36 | 37 | 38 | ################### Bingcheng HU's Code for Q2 starts ##################### 39 | def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param): 40 | """ 41 | Convenience layer that perorms an affine transform followed by a Batch Norm then a ReLU 42 | 43 | Returns a tuple of: 44 | - out: Output from the ReLU 45 | - cache: Object to give to the backward pass 46 | """ 47 | a, fc_cache = affine_forward(x, w, b) 48 | an, bn_cache = batchnorm_forward(a, gamma, beta, bn_param) 49 | out, relu_cache = relu_forward(an) 50 | cache = (fc_cache, bn_cache, relu_cache) 51 | return out, cache 52 | 53 | def affine_bn_relu_backward(dout, cache): 54 | """ 55 | Backward pass for the affine_bn_relu convenience layer 56 | """ 57 | fc_cache, bn_cache, relu_cache = cache 58 | da = relu_backward(dout, relu_cache) 59 | dan, dgamma, dbeta = batchnorm_backward_alt(da, bn_cache) 60 | dx, dw, db = affine_backward(dan, fc_cache) 61 | return dx, dw, db, dgamma, dbeta 62 | 63 | def affine_ln_relu_forward(x, w, b, gamma, beta, bn_param): 64 | """ 65 | Convenience layer that perorms an affine transform followed by a Batch Norm then a ReLU 66 | 67 | Returns a tuple of: 68 | - out: Output from the ReLU 69 | - cache: Object to give to the backward pass 70 | """ 71 | a, fc_cache = affine_forward(x, w, b) 72 | an, bn_cache = layernorm_forward(a, gamma, beta, bn_param) 73 | out, relu_cache = relu_forward(an) 74 | cache = (fc_cache, bn_cache, relu_cache) 75 | return out, cache 76 | 77 | def affine_ln_relu_backward(dout, cache): 78 | """ 79 | Backward pass for the affine_bn_relu convenience layer 80 | """ 81 | fc_cache, bn_cache, relu_cache = cache 82 | da = relu_backward(dout, relu_cache) 83 | dan, dgamma, dbeta = layernorm_backward(da, bn_cache) 84 | dx, dw, db = affine_backward(dan, fc_cache) 85 | return dx, dw, db, dgamma, dbeta 86 | ################### Bingcheng HU's Code for Q2 ends ##################### 87 | 88 | def conv_relu_forward(x, w, b, conv_param): 89 | """ 90 | A convenience layer that performs a convolution followed by a ReLU. 91 | 92 | Inputs: 93 | - x: Input to the convolutional layer 94 | - w, b, conv_param: Weights and parameters for the convolutional layer 95 | 96 | Returns a tuple of: 97 | - out: Output from the ReLU 98 | - cache: Object to give to the backward pass 99 | """ 100 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 101 | out, relu_cache = relu_forward(a) 102 | cache = (conv_cache, relu_cache) 103 | return out, cache 104 | 105 | 106 | def conv_relu_backward(dout, cache): 107 | """ 108 | Backward pass for the conv-relu convenience layer. 109 | """ 110 | conv_cache, relu_cache = cache 111 | da = relu_backward(dout, relu_cache) 112 | dx, dw, db = conv_backward_fast(da, conv_cache) 113 | return dx, dw, db 114 | 115 | 116 | def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param): 117 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 118 | an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param) 119 | out, relu_cache = relu_forward(an) 120 | cache = (conv_cache, bn_cache, relu_cache) 121 | return out, cache 122 | 123 | 124 | def conv_bn_relu_backward(dout, cache): 125 | conv_cache, bn_cache, relu_cache = cache 126 | dan = relu_backward(dout, relu_cache) 127 | da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache) 128 | dx, dw, db = conv_backward_fast(da, conv_cache) 129 | return dx, dw, db, dgamma, dbeta 130 | 131 | 132 | def conv_relu_pool_forward(x, w, b, conv_param, pool_param): 133 | """ 134 | Convenience layer that performs a convolution, a ReLU, and a pool. 135 | 136 | Inputs: 137 | - x: Input to the convolutional layer 138 | - w, b, conv_param: Weights and parameters for the convolutional layer 139 | - pool_param: Parameters for the pooling layer 140 | 141 | Returns a tuple of: 142 | - out: Output from the pooling layer 143 | - cache: Object to give to the backward pass 144 | """ 145 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 146 | s, relu_cache = relu_forward(a) 147 | out, pool_cache = max_pool_forward_fast(s, pool_param) 148 | cache = (conv_cache, relu_cache, pool_cache) 149 | return out, cache 150 | 151 | 152 | def conv_relu_pool_backward(dout, cache): 153 | """ 154 | Backward pass for the conv-relu-pool convenience layer 155 | """ 156 | conv_cache, relu_cache, pool_cache = cache 157 | ds = max_pool_backward_fast(dout, pool_cache) 158 | da = relu_backward(ds, relu_cache) 159 | dx, dw, db = conv_backward_fast(da, conv_cache) 160 | return dx, dw, db 161 | -------------------------------------------------------------------------------- /assignment2/cs231n/notebook_images/batchnorm_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment2/cs231n/notebook_images/batchnorm_graph.png -------------------------------------------------------------------------------- /assignment2/cs231n/notebook_images/kitten.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment2/cs231n/notebook_images/kitten.jpg -------------------------------------------------------------------------------- /assignment2/cs231n/notebook_images/normalization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment2/cs231n/notebook_images/normalization.png -------------------------------------------------------------------------------- /assignment2/cs231n/notebook_images/puppy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment2/cs231n/notebook_images/puppy.jpg -------------------------------------------------------------------------------- /assignment2/cs231n/optim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | This file implements various first-order update rules that are commonly used 5 | for training neural networks. Each update rule accepts current weights and the 6 | gradient of the loss with respect to those weights and produces the next set of 7 | weights. Each update rule has the same interface: 8 | 9 | def update(w, dw, config=None): 10 | 11 | Inputs: 12 | - w: A numpy array giving the current weights. 13 | - dw: A numpy array of the same shape as w giving the gradient of the 14 | loss with respect to w. 15 | - config: A dictionary containing hyperparameter values such as learning 16 | rate, momentum, etc. If the update rule requires caching values over many 17 | iterations, then config will also hold these cached values. 18 | 19 | Returns: 20 | - next_w: The next point after the update. 21 | - config: The config dictionary to be passed to the next iteration of the 22 | update rule. 23 | 24 | NOTE: For most update rules, the default learning rate will probably not 25 | perform well; however the default values of the other hyperparameters should 26 | work well for a variety of different problems. 27 | 28 | For efficiency, update rules may perform in-place updates, mutating w and 29 | setting next_w equal to w. 30 | """ 31 | 32 | 33 | def sgd(w, dw, config=None): 34 | """ 35 | Performs vanilla stochastic gradient descent. 36 | 37 | config format: 38 | - learning_rate: Scalar learning rate. 39 | """ 40 | if config is None: 41 | config = {} 42 | config.setdefault("learning_rate", 1e-2) 43 | 44 | w -= config["learning_rate"] * dw 45 | return w, config 46 | 47 | 48 | def sgd_momentum(w, dw, config=None): 49 | """ 50 | Performs stochastic gradient descent with momentum. 51 | 52 | config format: 53 | - learning_rate: Scalar learning rate. 54 | - momentum: Scalar between 0 and 1 giving the momentum value. 55 | Setting momentum = 0 reduces to sgd. 56 | - velocity: A numpy array of the same shape as w and dw used to store a 57 | moving average of the gradients. 58 | """ 59 | if config is None: 60 | config = {} 61 | config.setdefault("learning_rate", 1e-2) 62 | config.setdefault("momentum", 0.9) 63 | v = config.get("velocity", np.zeros_like(w)) 64 | 65 | next_w = None 66 | ########################################################################### 67 | # TODO: Implement the momentum update formula. Store the updated value in # 68 | # the next_w variable. You should also use and update the velocity v. # 69 | ########################################################################### 70 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 71 | 72 | v = config['momentum'] * v -config['learning_rate'] * dw 73 | next_w = w + v 74 | 75 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 76 | ########################################################################### 77 | # END OF YOUR CODE # 78 | ########################################################################### 79 | config["velocity"] = v 80 | 81 | return next_w, config 82 | 83 | 84 | def rmsprop(w, dw, config=None): 85 | """ 86 | Uses the RMSProp update rule, which uses a moving average of squared 87 | gradient values to set adaptive per-parameter learning rates. 88 | 89 | config format: 90 | - learning_rate: Scalar learning rate. 91 | - decay_rate: Scalar between 0 and 1 giving the decay rate for the squared 92 | gradient cache. 93 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 94 | - cache: Moving average of second moments of gradients. 95 | """ 96 | if config is None: 97 | config = {} 98 | config.setdefault("learning_rate", 1e-2) 99 | config.setdefault("decay_rate", 0.99) 100 | config.setdefault("epsilon", 1e-8) 101 | config.setdefault("cache", np.zeros_like(w)) 102 | 103 | next_w = None 104 | ########################################################################### 105 | # TODO: Implement the RMSprop update formula, storing the next value of w # 106 | # in the next_w variable. Don't forget to update cache value stored in # 107 | # config['cache']. # 108 | ########################################################################### 109 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 110 | decay_rate = config['decay_rate'] 111 | config['cache'] = decay_rate * config['cache'] + (1 - decay_rate) * (dw * dw) 112 | next_w = w - config['learning_rate'] * dw / (np.sqrt(config['cache']) + config['epsilon']) 113 | 114 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 115 | ########################################################################### 116 | # END OF YOUR CODE # 117 | ########################################################################### 118 | 119 | return next_w, config 120 | 121 | 122 | def adam(w, dw, config=None): 123 | """ 124 | Uses the Adam update rule, which incorporates moving averages of both the 125 | gradient and its square and a bias correction term. 126 | 127 | config format: 128 | - learning_rate: Scalar learning rate. 129 | - beta1: Decay rate for moving average of first moment of gradient. 130 | - beta2: Decay rate for moving average of second moment of gradient. 131 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 132 | - m: Moving average of gradient. 133 | - v: Moving average of squared gradient. 134 | - t: Iteration number. 135 | """ 136 | if config is None: 137 | config = {} 138 | config.setdefault("learning_rate", 1e-3) 139 | config.setdefault("beta1", 0.9) 140 | config.setdefault("beta2", 0.999) 141 | config.setdefault("epsilon", 1e-8) 142 | config.setdefault("m", np.zeros_like(w)) 143 | config.setdefault("v", np.zeros_like(w)) 144 | config.setdefault("t", 0) 145 | 146 | next_w = None 147 | ########################################################################### 148 | # TODO: Implement the Adam update formula, storing the next value of w in # 149 | # the next_w variable. Don't forget to update the m, v, and t variables # 150 | # stored in config. # 151 | # # 152 | # NOTE: In order to match the reference output, please modify t _before_ # 153 | # using it in any calculations. # 154 | ########################################################################### 155 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 156 | 157 | config['t'] += 1 158 | config['m'] = config['beta1']*config['m'] + (1-config['beta1']) * dw 159 | config['v'] = config['beta2']*config['v'] + (1-config['beta2']) * (dw*dw) 160 | m_hat = config['m'] / (1-config['beta1']) 161 | v_hat = config['v'] / (1-config['beta2']) 162 | next_w = w - config['learning_rate'] * m_hat/(np.sqrt(v_hat) + config['epsilon']) 163 | 164 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 165 | ########################################################################### 166 | # END OF YOUR CODE # 167 | ########################################################################### 168 | 169 | return next_w, config 170 | -------------------------------------------------------------------------------- /assignment2/cs231n/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Build import cythonize 4 | import numpy 5 | 6 | extensions = [ 7 | Extension( 8 | "im2col_cython", ["im2col_cython.pyx"], include_dirs=[numpy.get_include()] 9 | ), 10 | ] 11 | 12 | setup(ext_modules=cythonize(extensions),) 13 | -------------------------------------------------------------------------------- /assignment2/cs231n/vis_utils.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | from past.builtins import xrange 3 | 4 | from math import sqrt, ceil 5 | import numpy as np 6 | 7 | 8 | def visualize_grid(Xs, ubound=255.0, padding=1): 9 | """ 10 | Reshape a 4D tensor of image data to a grid for easy visualization. 11 | 12 | Inputs: 13 | - Xs: Data of shape (N, H, W, C) 14 | - ubound: Output grid will have values scaled to the range [0, ubound] 15 | - padding: The number of blank pixels between elements of the grid 16 | """ 17 | (N, H, W, C) = Xs.shape 18 | grid_size = int(ceil(sqrt(N))) 19 | grid_height = H * grid_size + padding * (grid_size - 1) 20 | grid_width = W * grid_size + padding * (grid_size - 1) 21 | grid = np.zeros((grid_height, grid_width, C)) 22 | next_idx = 0 23 | y0, y1 = 0, H 24 | for y in range(grid_size): 25 | x0, x1 = 0, W 26 | for x in range(grid_size): 27 | if next_idx < N: 28 | img = Xs[next_idx] 29 | low, high = np.min(img), np.max(img) 30 | grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low) 31 | # grid[y0:y1, x0:x1] = Xs[next_idx] 32 | next_idx += 1 33 | x0 += W + padding 34 | x1 += W + padding 35 | y0 += H + padding 36 | y1 += H + padding 37 | # grid_max = np.max(grid) 38 | # grid_min = np.min(grid) 39 | # grid = ubound * (grid - grid_min) / (grid_max - grid_min) 40 | return grid 41 | 42 | 43 | def vis_grid(Xs): 44 | """ visualize a grid of images """ 45 | (N, H, W, C) = Xs.shape 46 | A = int(ceil(sqrt(N))) 47 | G = np.ones((A * H + A, A * W + A, C), Xs.dtype) 48 | G *= np.min(Xs) 49 | n = 0 50 | for y in range(A): 51 | for x in range(A): 52 | if n < N: 53 | G[y * H + y : (y + 1) * H + y, x * W + x : (x + 1) * W + x, :] = Xs[ 54 | n, :, :, : 55 | ] 56 | n += 1 57 | # normalize to [0,1] 58 | maxg = G.max() 59 | ming = G.min() 60 | G = (G - ming) / (maxg - ming) 61 | return G 62 | 63 | 64 | def vis_nn(rows): 65 | """ visualize array of arrays of images """ 66 | N = len(rows) 67 | D = len(rows[0]) 68 | H, W, C = rows[0][0].shape 69 | Xs = rows[0][0] 70 | G = np.ones((N * H + N, D * W + D, C), Xs.dtype) 71 | for y in range(N): 72 | for x in range(D): 73 | G[y * H + y : (y + 1) * H + y, x * W + x : (x + 1) * W + x, :] = rows[y][x] 74 | # normalize to [0,1] 75 | maxg = G.max() 76 | ming = G.min() 77 | G = (G - ming) / (maxg - ming) 78 | return G 79 | -------------------------------------------------------------------------------- /assignment2/frameworkpython: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # what real Python executable to use 4 | #PYVER=2.7 5 | #PATHTOPYTHON=/usr/local/bin/ 6 | #PYTHON=${PATHTOPYTHON}python${PYVER} 7 | 8 | PYTHON=$(which $(readlink .env/bin/python)) # only works with python3 9 | 10 | # find the root of the virtualenv, it should be the parent of the dir this script is in 11 | ENV=`$PYTHON -c "import os; print(os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..')))"` 12 | 13 | # now run Python with the virtualenv set as Python's HOME 14 | export PYTHONHOME=$ENV 15 | exec $PYTHON "$@" 16 | -------------------------------------------------------------------------------- /assignment2/makepdf.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import subprocess 4 | 5 | try: 6 | from PyPDF2 import PdfFileMerger 7 | 8 | MERGE = True 9 | except ImportError: 10 | print("Could not find PyPDF2. Leaving pdf files unmerged.") 11 | MERGE = False 12 | 13 | 14 | def main(files): 15 | os_args = [ 16 | "jupyter", 17 | "nbconvert", 18 | "--log-level", 19 | "CRITICAL", 20 | "--to", 21 | "pdf", 22 | ] 23 | for f in files: 24 | os_args.append(f) 25 | subprocess.run(os_args) 26 | print("Created PDF {}.".format(f)) 27 | if MERGE: 28 | pdfs = [f.split(".")[0] + ".pdf" for f in files] 29 | merger = PdfFileMerger() 30 | for pdf in pdfs: 31 | merger.append(pdf) 32 | merger.write("assignment.pdf") 33 | merger.close() 34 | for pdf in pdfs: 35 | os.remove(pdf) 36 | 37 | 38 | if __name__ == "__main__": 39 | parser = argparse.ArgumentParser() 40 | # we pass in explicit notebook arg so that we can provide 41 | # an ordered list and produce an ordered pdf 42 | parser.add_argument("--notebooks", type=str, nargs="+", required=True) 43 | args = parser.parse_args() 44 | main(args.notebooks) 45 | -------------------------------------------------------------------------------- /assignment2/requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==19.1.0 2 | backcall==0.1.0 3 | bleach==3.1.0 4 | certifi==2019.3.9 5 | chardet==3.0.4 6 | colorama==0.4.1 7 | cycler==0.10.0 8 | Cython==0.29.16 9 | decorator==4.4.0 10 | defusedxml==0.5.0 11 | entrypoints==0.3 12 | future==0.17.1 13 | gitdb2==2.0.5 14 | GitPython==2.1.11 15 | idna==2.8 16 | ipykernel==5.1.0 17 | ipython==7.4.0 18 | ipython-genutils==0.2.0 19 | ipywidgets==7.4.2 20 | imageio==2.8.0 21 | jedi==0.13.3 22 | Jinja2==2.10 23 | jsonschema==3.0.1 24 | jupyter==1.0.0 25 | jupyter-client==5.2.4 26 | jupyter-console==6.0.0 27 | jupyter-core==4.4.0 28 | jupyterlab==0.35.4 29 | jupyterlab-server==0.2.0 30 | kiwisolver==1.0.1 31 | MarkupSafe==1.1.1 32 | matplotlib==3.0.3 33 | mistune==0.8.4 34 | nbconvert==5.4.1 35 | nbdime==1.0.5 36 | nbformat==4.4.0 37 | notebook==5.7.8 38 | numpy==1.16.2 39 | pandocfilters==1.4.2 40 | parso==0.3.4 41 | pexpect==4.6.0 42 | pickleshare==0.7.5 43 | Pillow==6.0.0 44 | prometheus-client==0.6.0 45 | prompt-toolkit==2.0.9 46 | ptyprocess==0.6.0 47 | Pygments==2.3.1 48 | pyparsing==2.3.1 49 | pyrsistent==0.14.11 50 | python-dateutil==2.8.0 51 | pyzmq==18.0.1 52 | qtconsole==4.4.3 53 | requests==2.21.0 54 | scipy==1.2.1 55 | Send2Trash==1.5.0 56 | six==1.12.0 57 | smmap2==2.0.5 58 | terminado==0.8.2 59 | testpath==0.4.2 60 | tornado==6.0.2 61 | traitlets==4.3.2 62 | urllib3==1.24.1 63 | wcwidth==0.1.7 64 | webencodings==0.5.1 65 | widgetsnbextension==3.4.2 66 | -------------------------------------------------------------------------------- /assignment2/start_ipython_osx.sh: -------------------------------------------------------------------------------- 1 | # Assume the virtualenv is called .env 2 | 3 | cp frameworkpython .env/bin 4 | .env/bin/frameworkpython -m IPython notebook 5 | -------------------------------------------------------------------------------- /assignment3/collectSubmission.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #NOTE: DO NOT EDIT THIS FILE-- MAY RESULT IN INCOMPLETE SUBMISSIONS 3 | set -euo pipefail 4 | 5 | tensorflow=0 6 | while getopts "t::" flag 7 | do 8 | case "${flag}" in 9 | t) 10 | tensorflow=${OPTARG} 11 | ;; 12 | esac 13 | done 14 | 15 | CODE=( 16 | "cs231n/rnn_layers.py" 17 | "cs231n/classifiers/rnn.py" 18 | "cs231n/net_visualization_pytorch.py" 19 | "cs231n/style_transfer_pytorch.py" 20 | "cs231n/gan_pytorch.py" 21 | ) 22 | NOTEBOOKS=( 23 | "RNN_Captioning.ipynb" 24 | "LSTM_Captioning.ipynb" 25 | "NetworkVisualization-PyTorch.ipynb" 26 | "StyleTransfer-PyTorch.ipynb" 27 | "Generative_Adversarial_Networks_PyTorch.ipynb" 28 | ) 29 | 30 | if $tensorflow; then 31 | CODE=( 32 | "cs231n/rnn_layers.py" 33 | "cs231n/classifiers/rnn.py" 34 | "cs231n/net_visualization_tensorflow.py" 35 | "cs231n/style_transfer_tensorflow.py" 36 | "cs231n/gan_tf.py" 37 | ) 38 | NOTEBOOKS=( 39 | "RNN_Captioning.ipynb" 40 | "LSTM_Captioning.ipynb" 41 | "NetworkVisualization-TensorFlow.ipynb" 42 | "StyleTransfer-TensorFlow.ipynb" 43 | "Generative_Adversarial_Networks_TF.ipynb" 44 | ) 45 | fi 46 | 47 | FILES=( "${CODE[@]}" "${NOTEBOOKS[@]}" ) 48 | ZIP_FILENAME="a3.zip" 49 | 50 | for FILE in "${FILES[@]}" 51 | do 52 | if [ ! -f ${FILE} ]; then 53 | echo -e "${C_R}Required file ${FILE} not found, Exiting.${C_E}" 54 | exit 0 55 | fi 56 | done 57 | 58 | echo -e "### Zipping file ###" 59 | rm -f ${ZIP_FILENAME} 60 | zip -q "${ZIP_FILENAME}" -r ${NOTEBOOKS[@]} $(find . -name "*.py") $(find . -name "*.pyx") -x "makepdf.py" 61 | 62 | echo -e "### Creating PDFs ###" 63 | python makepdf.py --notebooks "${NOTEBOOKS[@]}" 64 | 65 | echo -e "### Done! Please submit a3.zip and the pdfs to Gradescope. ###" 66 | -------------------------------------------------------------------------------- /assignment3/cs231n/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/cs231n/__init__.py -------------------------------------------------------------------------------- /assignment3/cs231n/captioning_solver.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from builtins import range 3 | from builtins import object 4 | import numpy as np 5 | 6 | from . import optim 7 | from .coco_utils import sample_coco_minibatch 8 | 9 | 10 | class CaptioningSolver(object): 11 | """ 12 | A CaptioningSolver encapsulates all the logic necessary for training 13 | image captioning models. The CaptioningSolver performs stochastic gradient 14 | descent using different update rules defined in optim.py. 15 | 16 | The solver accepts both training and validataion data and labels so it can 17 | periodically check classification accuracy on both training and validation 18 | data to watch out for overfitting. 19 | 20 | To train a model, you will first construct a CaptioningSolver instance, 21 | passing the model, dataset, and various options (learning rate, batch size, 22 | etc) to the constructor. You will then call the train() method to run the 23 | optimization procedure and train the model. 24 | 25 | After the train() method returns, model.params will contain the parameters 26 | that performed best on the validation set over the course of training. 27 | In addition, the instance variable solver.loss_history will contain a list 28 | of all losses encountered during training and the instance variables 29 | solver.train_acc_history and solver.val_acc_history will be lists containing 30 | the accuracies of the model on the training and validation set at each epoch. 31 | 32 | Example usage might look something like this: 33 | 34 | data = load_coco_data() 35 | model = MyAwesomeModel(hidden_dim=100) 36 | solver = CaptioningSolver(model, data, 37 | update_rule='sgd', 38 | optim_config={ 39 | 'learning_rate': 1e-3, 40 | }, 41 | lr_decay=0.95, 42 | num_epochs=10, batch_size=100, 43 | print_every=100) 44 | solver.train() 45 | 46 | 47 | A CaptioningSolver works on a model object that must conform to the following 48 | API: 49 | 50 | - model.params must be a dictionary mapping string parameter names to numpy 51 | arrays containing parameter values. 52 | 53 | - model.loss(features, captions) must be a function that computes 54 | training-time loss and gradients, with the following inputs and outputs: 55 | 56 | Inputs: 57 | - features: Array giving a minibatch of features for images, of shape (N, D 58 | - captions: Array of captions for those images, of shape (N, T) where 59 | each element is in the range (0, V]. 60 | 61 | Returns: 62 | - loss: Scalar giving the loss 63 | - grads: Dictionary with the same keys as self.params mapping parameter 64 | names to gradients of the loss with respect to those parameters. 65 | """ 66 | 67 | def __init__(self, model, data, **kwargs): 68 | """ 69 | Construct a new CaptioningSolver instance. 70 | 71 | Required arguments: 72 | - model: A model object conforming to the API described above 73 | - data: A dictionary of training and validation data from load_coco_data 74 | 75 | Optional arguments: 76 | - update_rule: A string giving the name of an update rule in optim.py. 77 | Default is 'sgd'. 78 | - optim_config: A dictionary containing hyperparameters that will be 79 | passed to the chosen update rule. Each update rule requires different 80 | hyperparameters (see optim.py) but all update rules require a 81 | 'learning_rate' parameter so that should always be present. 82 | - lr_decay: A scalar for learning rate decay; after each epoch the learning 83 | rate is multiplied by this value. 84 | - batch_size: Size of minibatches used to compute loss and gradient during 85 | training. 86 | - num_epochs: The number of epochs to run for during training. 87 | - print_every: Integer; training losses will be printed every print_every 88 | iterations. 89 | - verbose: Boolean; if set to false then no output will be printed during 90 | training. 91 | """ 92 | self.model = model 93 | self.data = data 94 | 95 | # Unpack keyword arguments 96 | self.update_rule = kwargs.pop("update_rule", "sgd") 97 | self.optim_config = kwargs.pop("optim_config", {}) 98 | self.lr_decay = kwargs.pop("lr_decay", 1.0) 99 | self.batch_size = kwargs.pop("batch_size", 100) 100 | self.num_epochs = kwargs.pop("num_epochs", 10) 101 | 102 | self.print_every = kwargs.pop("print_every", 10) 103 | self.verbose = kwargs.pop("verbose", True) 104 | 105 | # Throw an error if there are extra keyword arguments 106 | if len(kwargs) > 0: 107 | extra = ", ".join('"%s"' % k for k in list(kwargs.keys())) 108 | raise ValueError("Unrecognized arguments %s" % extra) 109 | 110 | # Make sure the update rule exists, then replace the string 111 | # name with the actual function 112 | if not hasattr(optim, self.update_rule): 113 | raise ValueError('Invalid update_rule "%s"' % self.update_rule) 114 | self.update_rule = getattr(optim, self.update_rule) 115 | 116 | self._reset() 117 | 118 | def _reset(self): 119 | """ 120 | Set up some book-keeping variables for optimization. Don't call this 121 | manually. 122 | """ 123 | # Set up some variables for book-keeping 124 | self.epoch = 0 125 | self.best_val_acc = 0 126 | self.best_params = {} 127 | self.loss_history = [] 128 | self.train_acc_history = [] 129 | self.val_acc_history = [] 130 | 131 | # Make a deep copy of the optim_config for each parameter 132 | self.optim_configs = {} 133 | for p in self.model.params: 134 | d = {k: v for k, v in self.optim_config.items()} 135 | self.optim_configs[p] = d 136 | 137 | def _step(self): 138 | """ 139 | Make a single gradient update. This is called by train() and should not 140 | be called manually. 141 | """ 142 | # Make a minibatch of training data 143 | minibatch = sample_coco_minibatch( 144 | self.data, batch_size=self.batch_size, split="train" 145 | ) 146 | captions, features, urls = minibatch 147 | 148 | # Compute loss and gradient 149 | loss, grads = self.model.loss(features, captions) 150 | self.loss_history.append(loss) 151 | 152 | # Perform a parameter update 153 | for p, w in self.model.params.items(): 154 | dw = grads[p] 155 | config = self.optim_configs[p] 156 | next_w, next_config = self.update_rule(w, dw, config) 157 | self.model.params[p] = next_w 158 | self.optim_configs[p] = next_config 159 | 160 | def check_accuracy(self, X, y, num_samples=None, batch_size=100): 161 | """ 162 | Check accuracy of the model on the provided data. 163 | 164 | Inputs: 165 | - X: Array of data, of shape (N, d_1, ..., d_k) 166 | - y: Array of labels, of shape (N,) 167 | - num_samples: If not None, subsample the data and only test the model 168 | on num_samples datapoints. 169 | - batch_size: Split X and y into batches of this size to avoid using too 170 | much memory. 171 | 172 | Returns: 173 | - acc: Scalar giving the fraction of instances that were correctly 174 | classified by the model. 175 | """ 176 | return 0.0 177 | 178 | # Maybe subsample the data 179 | N = X.shape[0] 180 | if num_samples is not None and N > num_samples: 181 | mask = np.random.choice(N, num_samples) 182 | N = num_samples 183 | X = X[mask] 184 | y = y[mask] 185 | 186 | # Compute predictions in batches 187 | num_batches = N / batch_size 188 | if N % batch_size != 0: 189 | num_batches += 1 190 | y_pred = [] 191 | for i in range(num_batches): 192 | start = i * batch_size 193 | end = (i + 1) * batch_size 194 | scores = self.model.loss(X[start:end]) 195 | y_pred.append(np.argmax(scores, axis=1)) 196 | y_pred = np.hstack(y_pred) 197 | acc = np.mean(y_pred == y) 198 | 199 | return acc 200 | 201 | def train(self): 202 | """ 203 | Run optimization to train the model. 204 | """ 205 | num_train = self.data["train_captions"].shape[0] 206 | iterations_per_epoch = max(num_train // self.batch_size, 1) 207 | num_iterations = self.num_epochs * iterations_per_epoch 208 | 209 | for t in range(num_iterations): 210 | self._step() 211 | 212 | # Maybe print training loss 213 | if self.verbose and t % self.print_every == 0: 214 | print( 215 | "(Iteration %d / %d) loss: %f" 216 | % (t + 1, num_iterations, self.loss_history[-1]) 217 | ) 218 | 219 | # At the end of every epoch, increment the epoch counter and decay the 220 | # learning rate. 221 | epoch_end = (t + 1) % iterations_per_epoch == 0 222 | if epoch_end: 223 | self.epoch += 1 224 | for k in self.optim_configs: 225 | self.optim_configs[k]["learning_rate"] *= self.lr_decay 226 | -------------------------------------------------------------------------------- /assignment3/cs231n/classifiers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/cs231n/classifiers/__init__.py -------------------------------------------------------------------------------- /assignment3/cs231n/classifiers/squeezenet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | NUM_CLASSES = 1000 4 | 5 | class Fire(tf.keras.Model): 6 | def __init__(self, inplanes, squeeze_planes, expand1x1_planes, expand3x3_planes,name=None): 7 | super(Fire, self).__init__(name='%s/fire'%name) 8 | self.inplanes = inplanes 9 | self.squeeze = tf.keras.layers.Conv2D(squeeze_planes, input_shape=(inplanes,), kernel_size=1, strides=(1,1), padding="VALID", activation='relu',name='squeeze') 10 | self.expand1x1 = tf.keras.layers.Conv2D(expand1x1_planes, kernel_size=1, padding="VALID", strides=(1,1), activation='relu',name='e11') 11 | self.expand3x3 = tf.keras.layers.Conv2D(expand3x3_planes, kernel_size=3, padding="SAME", strides=(1,1), activation='relu',name='e33') 12 | 13 | def call(self, x): 14 | x = self.squeeze(x) 15 | return tf.concat([ 16 | self.expand1x1(x), 17 | self.expand3x3(x) 18 | ], axis=3) 19 | 20 | 21 | class SqueezeNet(tf.keras.Model): 22 | def __init__(self, num_classes=NUM_CLASSES): 23 | super(SqueezeNet, self).__init__() 24 | self.num_classes = num_classes 25 | 26 | self.net = tf.keras.models.Sequential([ 27 | tf.keras.layers.Conv2D(64, kernel_size=(3, 3), strides=(2,2), padding="VALID", activation='relu', input_shape=(224, 224, 3), name='features/layer0'), 28 | tf.keras.layers.MaxPool2D(pool_size=3, strides=2, name='features/layer2'), 29 | Fire(64, 16, 64, 64, name='features/layer3'), 30 | Fire(128, 16, 64, 64, name='features/layer4'), 31 | tf.keras.layers.MaxPool2D(pool_size=3, strides=2, name='features/layer5'), 32 | Fire(128, 32, 128, 128, name='features/layer6'), 33 | Fire(256, 32, 128, 128, name='features/layer7'), 34 | tf.keras.layers.MaxPool2D(pool_size=3, strides=2, name='features/layer8'), 35 | Fire(256, 48, 192, 192, name='features/layer9'), 36 | Fire(384, 48, 192, 192, name='features/layer10'), 37 | Fire(384, 64, 256, 256, name='features/layer11'), 38 | Fire(512, 64, 256, 256, name='features/layer12'), 39 | tf.keras.layers.Conv2D(self.num_classes, kernel_size=1, padding="VALID", activation='relu', name='classifier/layer1'), 40 | tf.keras.layers.AveragePooling2D(pool_size=13, strides=13, padding="VALID", name='classifier/layer3') 41 | ]) 42 | 43 | def call(self, x, save_path=None): 44 | x = self.net(x) 45 | scores = tf.reshape(x, (-1, self.num_classes)) 46 | return scores 47 | -------------------------------------------------------------------------------- /assignment3/cs231n/coco_utils.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | import os, json 3 | import numpy as np 4 | import h5py 5 | 6 | dir_path = os.path.dirname(os.path.realpath(__file__)) 7 | BASE_DIR = os.path.join(dir_path, "datasets/coco_captioning") 8 | 9 | def load_coco_data(base_dir=BASE_DIR, max_train=None, pca_features=True): 10 | print('base dir ', base_dir) 11 | data = {} 12 | caption_file = os.path.join(base_dir, "coco2014_captions.h5") 13 | with h5py.File(caption_file, "r") as f: 14 | for k, v in f.items(): 15 | data[k] = np.asarray(v) 16 | 17 | if pca_features: 18 | train_feat_file = os.path.join(base_dir, "train2014_vgg16_fc7_pca.h5") 19 | else: 20 | train_feat_file = os.path.join(base_dir, "train2014_vgg16_fc7.h5") 21 | with h5py.File(train_feat_file, "r") as f: 22 | data["train_features"] = np.asarray(f["features"]) 23 | 24 | if pca_features: 25 | val_feat_file = os.path.join(base_dir, "val2014_vgg16_fc7_pca.h5") 26 | else: 27 | val_feat_file = os.path.join(base_dir, "val2014_vgg16_fc7.h5") 28 | with h5py.File(val_feat_file, "r") as f: 29 | data["val_features"] = np.asarray(f["features"]) 30 | 31 | dict_file = os.path.join(base_dir, "coco2014_vocab.json") 32 | with open(dict_file, "r") as f: 33 | dict_data = json.load(f) 34 | for k, v in dict_data.items(): 35 | data[k] = v 36 | 37 | train_url_file = os.path.join(base_dir, "train2014_urls.txt") 38 | with open(train_url_file, "r") as f: 39 | train_urls = np.asarray([line.strip() for line in f]) 40 | data["train_urls"] = train_urls 41 | 42 | val_url_file = os.path.join(base_dir, "val2014_urls.txt") 43 | with open(val_url_file, "r") as f: 44 | val_urls = np.asarray([line.strip() for line in f]) 45 | data["val_urls"] = val_urls 46 | 47 | # Maybe subsample the training data 48 | if max_train is not None: 49 | num_train = data["train_captions"].shape[0] 50 | mask = np.random.randint(num_train, size=max_train) 51 | data["train_captions"] = data["train_captions"][mask] 52 | data["train_image_idxs"] = data["train_image_idxs"][mask] 53 | 54 | return data 55 | 56 | 57 | def decode_captions(captions, idx_to_word): 58 | singleton = False 59 | if captions.ndim == 1: 60 | singleton = True 61 | captions = captions[None] 62 | decoded = [] 63 | N, T = captions.shape 64 | for i in range(N): 65 | words = [] 66 | for t in range(T): 67 | word = idx_to_word[captions[i, t]] 68 | if word != "": 69 | words.append(word) 70 | if word == "": 71 | break 72 | decoded.append(" ".join(words)) 73 | if singleton: 74 | decoded = decoded[0] 75 | return decoded 76 | 77 | 78 | def sample_coco_minibatch(data, batch_size=100, split="train"): 79 | split_size = data["%s_captions" % split].shape[0] 80 | mask = np.random.choice(split_size, batch_size) 81 | captions = data["%s_captions" % split][mask] 82 | image_idxs = data["%s_image_idxs" % split][mask] 83 | image_features = data["%s_features" % split][image_idxs] 84 | urls = data["%s_urls" % split][image_idxs] 85 | return captions, image_features, urls 86 | -------------------------------------------------------------------------------- /assignment3/cs231n/data_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from builtins import range 4 | from six.moves import cPickle as pickle 5 | import numpy as np 6 | import os 7 | from imageio import imread 8 | import platform 9 | 10 | 11 | def load_pickle(f): 12 | version = platform.python_version_tuple() 13 | if version[0] == "2": 14 | return pickle.load(f) 15 | elif version[0] == "3": 16 | return pickle.load(f, encoding="latin1") 17 | raise ValueError("invalid python version: {}".format(version)) 18 | 19 | 20 | def load_CIFAR_batch(filename): 21 | """ load single batch of cifar """ 22 | with open(filename, "rb") as f: 23 | datadict = load_pickle(f) 24 | X = datadict["data"] 25 | Y = datadict["labels"] 26 | X = X.reshape(10000, 3, 32, 32).transpose(0, 2, 3, 1).astype("float") 27 | Y = np.array(Y) 28 | return X, Y 29 | 30 | 31 | def load_CIFAR10(ROOT): 32 | """ load all of cifar """ 33 | xs = [] 34 | ys = [] 35 | for b in range(1, 6): 36 | f = os.path.join(ROOT, "data_batch_%d" % (b,)) 37 | X, Y = load_CIFAR_batch(f) 38 | xs.append(X) 39 | ys.append(Y) 40 | Xtr = np.concatenate(xs) 41 | Ytr = np.concatenate(ys) 42 | del X, Y 43 | Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, "test_batch")) 44 | return Xtr, Ytr, Xte, Yte 45 | 46 | 47 | def get_CIFAR10_data( 48 | num_training=49000, num_validation=1000, num_test=1000, subtract_mean=True 49 | ): 50 | """ 51 | Load the CIFAR-10 dataset from disk and perform preprocessing to prepare 52 | it for classifiers. These are the same steps as we used for the SVM, but 53 | condensed to a single function. 54 | """ 55 | # Load the raw CIFAR-10 data 56 | cifar10_dir = os.path.join( 57 | os.path.dirname(__file__), "datasets/cifar-10-batches-py" 58 | ) 59 | X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) 60 | 61 | # Subsample the data 62 | mask = list(range(num_training, num_training + num_validation)) 63 | X_val = X_train[mask] 64 | y_val = y_train[mask] 65 | mask = list(range(num_training)) 66 | X_train = X_train[mask] 67 | y_train = y_train[mask] 68 | mask = list(range(num_test)) 69 | X_test = X_test[mask] 70 | y_test = y_test[mask] 71 | 72 | # Normalize the data: subtract the mean image 73 | if subtract_mean: 74 | mean_image = np.mean(X_train, axis=0) 75 | X_train -= mean_image 76 | X_val -= mean_image 77 | X_test -= mean_image 78 | 79 | # Transpose so that channels come first 80 | X_train = X_train.transpose(0, 3, 1, 2).copy() 81 | X_val = X_val.transpose(0, 3, 1, 2).copy() 82 | X_test = X_test.transpose(0, 3, 1, 2).copy() 83 | 84 | # Package data into a dictionary 85 | return { 86 | "X_train": X_train, 87 | "y_train": y_train, 88 | "X_val": X_val, 89 | "y_val": y_val, 90 | "X_test": X_test, 91 | "y_test": y_test, 92 | } 93 | 94 | 95 | def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True): 96 | """ 97 | Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and 98 | TinyImageNet-200 have the same directory structure, so this can be used 99 | to load any of them. 100 | 101 | Inputs: 102 | - path: String giving path to the directory to load. 103 | - dtype: numpy datatype used to load the data. 104 | - subtract_mean: Whether to subtract the mean training image. 105 | 106 | Returns: A dictionary with the following entries: 107 | - class_names: A list where class_names[i] is a list of strings giving the 108 | WordNet names for class i in the loaded dataset. 109 | - X_train: (N_tr, 3, 64, 64) array of training images 110 | - y_train: (N_tr,) array of training labels 111 | - X_val: (N_val, 3, 64, 64) array of validation images 112 | - y_val: (N_val,) array of validation labels 113 | - X_test: (N_test, 3, 64, 64) array of testing images. 114 | - y_test: (N_test,) array of test labels; if test labels are not available 115 | (such as in student code) then y_test will be None. 116 | - mean_image: (3, 64, 64) array giving mean training image 117 | """ 118 | # First load wnids 119 | with open(os.path.join(path, "wnids.txt"), "r") as f: 120 | wnids = [x.strip() for x in f] 121 | 122 | # Map wnids to integer labels 123 | wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)} 124 | 125 | # Use words.txt to get names for each class 126 | with open(os.path.join(path, "words.txt"), "r") as f: 127 | wnid_to_words = dict(line.split("\t") for line in f) 128 | for wnid, words in wnid_to_words.items(): 129 | wnid_to_words[wnid] = [w.strip() for w in words.split(",")] 130 | class_names = [wnid_to_words[wnid] for wnid in wnids] 131 | 132 | # Next load training data. 133 | X_train = [] 134 | y_train = [] 135 | for i, wnid in enumerate(wnids): 136 | if (i + 1) % 20 == 0: 137 | print("loading training data for synset %d / %d" % (i + 1, len(wnids))) 138 | # To figure out the filenames we need to open the boxes file 139 | boxes_file = os.path.join(path, "train", wnid, "%s_boxes.txt" % wnid) 140 | with open(boxes_file, "r") as f: 141 | filenames = [x.split("\t")[0] for x in f] 142 | num_images = len(filenames) 143 | 144 | X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype) 145 | y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64) 146 | for j, img_file in enumerate(filenames): 147 | img_file = os.path.join(path, "train", wnid, "images", img_file) 148 | img = imread(img_file) 149 | if img.ndim == 2: 150 | ## grayscale file 151 | img.shape = (64, 64, 1) 152 | X_train_block[j] = img.transpose(2, 0, 1) 153 | X_train.append(X_train_block) 154 | y_train.append(y_train_block) 155 | 156 | # We need to concatenate all training data 157 | X_train = np.concatenate(X_train, axis=0) 158 | y_train = np.concatenate(y_train, axis=0) 159 | 160 | # Next load validation data 161 | with open(os.path.join(path, "val", "val_annotations.txt"), "r") as f: 162 | img_files = [] 163 | val_wnids = [] 164 | for line in f: 165 | img_file, wnid = line.split("\t")[:2] 166 | img_files.append(img_file) 167 | val_wnids.append(wnid) 168 | num_val = len(img_files) 169 | y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids]) 170 | X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype) 171 | for i, img_file in enumerate(img_files): 172 | img_file = os.path.join(path, "val", "images", img_file) 173 | img = imread(img_file) 174 | if img.ndim == 2: 175 | img.shape = (64, 64, 1) 176 | X_val[i] = img.transpose(2, 0, 1) 177 | 178 | # Next load test images 179 | # Students won't have test labels, so we need to iterate over files in the 180 | # images directory. 181 | img_files = os.listdir(os.path.join(path, "test", "images")) 182 | X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype) 183 | for i, img_file in enumerate(img_files): 184 | img_file = os.path.join(path, "test", "images", img_file) 185 | img = imread(img_file) 186 | if img.ndim == 2: 187 | img.shape = (64, 64, 1) 188 | X_test[i] = img.transpose(2, 0, 1) 189 | 190 | y_test = None 191 | y_test_file = os.path.join(path, "test", "test_annotations.txt") 192 | if os.path.isfile(y_test_file): 193 | with open(y_test_file, "r") as f: 194 | img_file_to_wnid = {} 195 | for line in f: 196 | line = line.split("\t") 197 | img_file_to_wnid[line[0]] = line[1] 198 | y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files] 199 | y_test = np.array(y_test) 200 | 201 | mean_image = X_train.mean(axis=0) 202 | if subtract_mean: 203 | X_train -= mean_image[None] 204 | X_val -= mean_image[None] 205 | X_test -= mean_image[None] 206 | 207 | return { 208 | "class_names": class_names, 209 | "X_train": X_train, 210 | "y_train": y_train, 211 | "X_val": X_val, 212 | "y_val": y_val, 213 | "X_test": X_test, 214 | "y_test": y_test, 215 | "class_names": class_names, 216 | "mean_image": mean_image, 217 | } 218 | 219 | 220 | def load_models(models_dir): 221 | """ 222 | Load saved models from disk. This will attempt to unpickle all files in a 223 | directory; any files that give errors on unpickling (such as README.txt) 224 | will be skipped. 225 | 226 | Inputs: 227 | - models_dir: String giving the path to a directory containing model files. 228 | Each model file is a pickled dictionary with a 'model' field. 229 | 230 | Returns: 231 | A dictionary mapping model file names to models. 232 | """ 233 | models = {} 234 | for model_file in os.listdir(models_dir): 235 | with open(os.path.join(models_dir, model_file), "rb") as f: 236 | try: 237 | models[model_file] = load_pickle(f)["model"] 238 | except pickle.UnpicklingError: 239 | continue 240 | return models 241 | 242 | 243 | def load_imagenet_val(num=None): 244 | """Load a handful of validation images from ImageNet. 245 | 246 | Inputs: 247 | - num: Number of images to load (max of 25) 248 | 249 | Returns: 250 | - X: numpy array with shape [num, 224, 224, 3] 251 | - y: numpy array of integer image labels, shape [num] 252 | - class_names: dict mapping integer label to class name 253 | """ 254 | imagenet_fn = os.path.join( 255 | os.path.dirname(__file__), "datasets/imagenet_val_25.npz" 256 | ) 257 | if not os.path.isfile(imagenet_fn): 258 | print("file %s not found" % imagenet_fn) 259 | print("Run the following:") 260 | print("cd cs231n/datasets") 261 | print("bash get_imagenet_val.sh") 262 | assert False, "Need to download imagenet_val_25.npz" 263 | 264 | # modify the default parameters of np.load 265 | # https://stackoverflow.com/questions/55890813/how-to-fix-object-arrays-cannot-be-loaded-when-allow-pickle-false-for-imdb-loa 266 | np_load_old = np.load 267 | np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k) 268 | f = np.load(imagenet_fn) 269 | np.load = np_load_old 270 | X = f["X"] 271 | y = f["y"] 272 | class_names = f["label_map"].item() 273 | if num is not None: 274 | X = X[:num] 275 | y = y[:num] 276 | return X, y, class_names 277 | -------------------------------------------------------------------------------- /assignment3/cs231n/gradient_check.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from builtins import range 3 | from past.builtins import xrange 4 | 5 | import numpy as np 6 | from random import randrange 7 | 8 | 9 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001): 10 | """ 11 | a naive implementation of numerical gradient of f at x 12 | - f should be a function that takes a single argument 13 | - x is the point (numpy array) to evaluate the gradient at 14 | """ 15 | 16 | fx = f(x) # evaluate function value at original point 17 | grad = np.zeros_like(x) 18 | # iterate over all indexes in x 19 | it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"]) 20 | while not it.finished: 21 | 22 | # evaluate function at x+h 23 | ix = it.multi_index 24 | oldval = x[ix] 25 | x[ix] = oldval + h # increment by h 26 | fxph = f(x) # evalute f(x + h) 27 | x[ix] = oldval - h 28 | fxmh = f(x) # evaluate f(x - h) 29 | x[ix] = oldval # restore 30 | 31 | # compute the partial derivative with centered formula 32 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 33 | if verbose: 34 | print(ix, grad[ix]) 35 | it.iternext() # step to next dimension 36 | 37 | return grad 38 | 39 | 40 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 41 | """ 42 | Evaluate a numeric gradient for a function that accepts a numpy 43 | array and returns a numpy array. 44 | """ 45 | grad = np.zeros_like(x) 46 | it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"]) 47 | while not it.finished: 48 | ix = it.multi_index 49 | 50 | oldval = x[ix] 51 | x[ix] = oldval + h 52 | pos = f(x).copy() 53 | x[ix] = oldval - h 54 | neg = f(x).copy() 55 | x[ix] = oldval 56 | 57 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 58 | it.iternext() 59 | return grad 60 | 61 | 62 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): 63 | """ 64 | Compute numeric gradients for a function that operates on input 65 | and output blobs. 66 | 67 | We assume that f accepts several input blobs as arguments, followed by a 68 | blob where outputs will be written. For example, f might be called like: 69 | 70 | f(x, w, out) 71 | 72 | where x and w are input Blobs, and the result of f will be written to out. 73 | 74 | Inputs: 75 | - f: function 76 | - inputs: tuple of input blobs 77 | - output: output blob 78 | - h: step size 79 | """ 80 | numeric_diffs = [] 81 | for input_blob in inputs: 82 | diff = np.zeros_like(input_blob.diffs) 83 | it = np.nditer(input_blob.vals, flags=["multi_index"], op_flags=["readwrite"]) 84 | while not it.finished: 85 | idx = it.multi_index 86 | orig = input_blob.vals[idx] 87 | 88 | input_blob.vals[idx] = orig + h 89 | f(*(inputs + (output,))) 90 | pos = np.copy(output.vals) 91 | input_blob.vals[idx] = orig - h 92 | f(*(inputs + (output,))) 93 | neg = np.copy(output.vals) 94 | input_blob.vals[idx] = orig 95 | 96 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) 97 | 98 | it.iternext() 99 | numeric_diffs.append(diff) 100 | return numeric_diffs 101 | 102 | 103 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5): 104 | return eval_numerical_gradient_blobs( 105 | lambda *args: net.forward(), inputs, output, h=h 106 | ) 107 | 108 | 109 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): 110 | """ 111 | sample a few random elements and only return numerical 112 | in this dimensions. 113 | """ 114 | 115 | for i in range(num_checks): 116 | ix = tuple([randrange(m) for m in x.shape]) 117 | 118 | oldval = x[ix] 119 | x[ix] = oldval + h # increment by h 120 | fxph = f(x) # evaluate f(x + h) 121 | x[ix] = oldval - h # increment by h 122 | fxmh = f(x) # evaluate f(x - h) 123 | x[ix] = oldval # reset 124 | 125 | grad_numerical = (fxph - fxmh) / (2 * h) 126 | grad_analytic = analytic_grad[ix] 127 | rel_error = abs(grad_numerical - grad_analytic) / ( 128 | abs(grad_numerical) + abs(grad_analytic) 129 | ) 130 | print( 131 | "numerical: %f analytic: %f, relative error: %e" 132 | % (grad_numerical, grad_analytic, rel_error) 133 | ) 134 | -------------------------------------------------------------------------------- /assignment3/cs231n/im2col.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | import numpy as np 3 | 4 | 5 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1): 6 | # First figure out what the size of the output should be 7 | N, C, H, W = x_shape 8 | assert (H + 2 * padding - field_height) % stride == 0 9 | assert (W + 2 * padding - field_height) % stride == 0 10 | out_height = (H + 2 * padding - field_height) / stride + 1 11 | out_width = (W + 2 * padding - field_width) / stride + 1 12 | 13 | i0 = np.repeat(np.arange(field_height), field_width) 14 | i0 = np.tile(i0, C) 15 | i1 = stride * np.repeat(np.arange(out_height), out_width) 16 | j0 = np.tile(np.arange(field_width), field_height * C) 17 | j1 = stride * np.tile(np.arange(out_width), out_height) 18 | i = i0.reshape(-1, 1) + i1.reshape(1, -1) 19 | j = j0.reshape(-1, 1) + j1.reshape(1, -1) 20 | 21 | k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1) 22 | 23 | return (k, i, j) 24 | 25 | 26 | def im2col_indices(x, field_height, field_width, padding=1, stride=1): 27 | """ An implementation of im2col based on some fancy indexing """ 28 | # Zero-pad the input 29 | p = padding 30 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode="constant") 31 | 32 | k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, stride) 33 | 34 | cols = x_padded[:, k, i, j] 35 | C = x.shape[1] 36 | cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1) 37 | return cols 38 | 39 | 40 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1, stride=1): 41 | """ An implementation of col2im based on fancy indexing and np.add.at """ 42 | N, C, H, W = x_shape 43 | H_padded, W_padded = H + 2 * padding, W + 2 * padding 44 | x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype) 45 | k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, stride) 46 | cols_reshaped = cols.reshape(C * field_height * field_width, -1, N) 47 | cols_reshaped = cols_reshaped.transpose(2, 0, 1) 48 | np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped) 49 | if padding == 0: 50 | return x_padded 51 | return x_padded[:, :, padding:-padding, padding:-padding] 52 | 53 | 54 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 55 | 56 | pass 57 | 58 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 59 | -------------------------------------------------------------------------------- /assignment3/cs231n/im2col_cython.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | 5 | # DTYPE = np.float64 6 | # ctypedef np.float64_t DTYPE_t 7 | 8 | ctypedef fused DTYPE_t: 9 | np.float32_t 10 | np.float64_t 11 | 12 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height, 13 | int field_width, int padding, int stride): 14 | cdef int N = x.shape[0] 15 | cdef int C = x.shape[1] 16 | cdef int H = x.shape[2] 17 | cdef int W = x.shape[3] 18 | 19 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 20 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 21 | 22 | cdef int p = padding 23 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x, 24 | ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 25 | 26 | cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros( 27 | (C * field_height * field_width, N * HH * WW), 28 | dtype=x.dtype) 29 | 30 | # Moving the inner loop to a C function with no bounds checking works, but does 31 | # not seem to help performance in any measurable way. 32 | 33 | im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 34 | field_height, field_width, padding, stride) 35 | return cols 36 | 37 | 38 | @cython.boundscheck(False) 39 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 40 | np.ndarray[DTYPE_t, ndim=4] x_padded, 41 | int N, int C, int H, int W, int HH, int WW, 42 | int field_height, int field_width, int padding, int stride) except? -1: 43 | cdef int c, ii, jj, row, yy, xx, i, col 44 | 45 | for c in range(C): 46 | for yy in range(HH): 47 | for xx in range(WW): 48 | for ii in range(field_height): 49 | for jj in range(field_width): 50 | row = c * field_width * field_height + ii * field_height + jj 51 | for i in range(N): 52 | col = yy * WW * N + xx * N + i 53 | cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj] 54 | 55 | 56 | 57 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W, 58 | int field_height, int field_width, int padding, int stride): 59 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 60 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 61 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 62 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding), 63 | dtype=cols.dtype) 64 | 65 | # Moving the inner loop to a C-function with no bounds checking improves 66 | # performance quite a bit for col2im. 67 | col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 68 | field_height, field_width, padding, stride) 69 | if padding > 0: 70 | return x_padded[:, :, padding:-padding, padding:-padding] 71 | return x_padded 72 | 73 | 74 | @cython.boundscheck(False) 75 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 76 | np.ndarray[DTYPE_t, ndim=4] x_padded, 77 | int N, int C, int H, int W, int HH, int WW, 78 | int field_height, int field_width, int padding, int stride) except? -1: 79 | cdef int c, ii, jj, row, yy, xx, i, col 80 | 81 | for c in range(C): 82 | for ii in range(field_height): 83 | for jj in range(field_width): 84 | row = c * field_width * field_height + ii * field_height + jj 85 | for yy in range(HH): 86 | for xx in range(WW): 87 | for i in range(N): 88 | col = yy * WW * N + xx * N + i 89 | x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col] 90 | 91 | 92 | @cython.boundscheck(False) 93 | @cython.wraparound(False) 94 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols, 95 | np.ndarray[DTYPE_t, ndim=4] x_padded, 96 | int N, int C, int H, int W, int HH, int WW, 97 | int out_h, int out_w, int pad, int stride): 98 | 99 | cdef int c, hh, ww, n, h, w 100 | for n in range(N): 101 | for c in range(C): 102 | for hh in range(HH): 103 | for ww in range(WW): 104 | for h in range(out_h): 105 | for w in range(out_w): 106 | x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w] 107 | 108 | 109 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W, 110 | int HH, int WW, int pad, int stride): 111 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 112 | cdef int out_h = (H + 2 * pad - HH) / stride + 1 113 | cdef int out_w = (W + 2 * pad - WW) / stride + 1 114 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad), 115 | dtype=cols.dtype) 116 | 117 | col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride) 118 | 119 | if pad > 0: 120 | return x_padded[:, :, pad:-pad, pad:-pad] 121 | return x_padded 122 | -------------------------------------------------------------------------------- /assignment3/cs231n/image_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from future import standard_library 3 | 4 | standard_library.install_aliases() 5 | from builtins import range 6 | import urllib.request, urllib.error, urllib.parse, os, tempfile 7 | 8 | import numpy as np 9 | from imageio import imread 10 | from PIL import Image 11 | 12 | """ 13 | Utility functions used for viewing and processing images. 14 | """ 15 | 16 | 17 | def blur_image(X): 18 | """ 19 | A very gentle image blurring operation, to be used as a regularizer for 20 | image generation. 21 | 22 | Inputs: 23 | - X: Image data of shape (N, 3, H, W) 24 | 25 | Returns: 26 | - X_blur: Blurred version of X, of shape (N, 3, H, W) 27 | """ 28 | from .fast_layers import conv_forward_fast 29 | 30 | w_blur = np.zeros((3, 3, 3, 3)) 31 | b_blur = np.zeros(3) 32 | blur_param = {"stride": 1, "pad": 1} 33 | for i in range(3): 34 | w_blur[i, i] = np.asarray([[1, 2, 1], [2, 188, 2], [1, 2, 1]], dtype=np.float32) 35 | w_blur /= 200.0 36 | return conv_forward_fast(X, w_blur, b_blur, blur_param)[0] 37 | 38 | 39 | SQUEEZENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32) 40 | SQUEEZENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32) 41 | 42 | 43 | def preprocess_image(img): 44 | """Preprocess an image for squeezenet. 45 | 46 | Subtracts the pixel mean and divides by the standard deviation. 47 | """ 48 | return (img.astype(np.float32) / 255.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD 49 | 50 | 51 | def deprocess_image(img, rescale=False): 52 | """Undo preprocessing on an image and convert back to uint8.""" 53 | img = img * SQUEEZENET_STD + SQUEEZENET_MEAN 54 | if rescale: 55 | vmin, vmax = img.min(), img.max() 56 | img = (img - vmin) / (vmax - vmin) 57 | return np.clip(255 * img, 0.0, 255.0).astype(np.uint8) 58 | 59 | def get_default_image(): 60 | url = 'https://tva1.sinaimg.cn/large/007S8ZIlgy1gjfpgrdb33j30dw07t0sk.jpg' 61 | try: 62 | f = urllib.request.urlopen(url) 63 | _, fname = tempfile.mkstemp() 64 | with open(fname, "wb") as ff: 65 | ff.write(f.read()) 66 | img = imread(fname) 67 | os.remove(fname) 68 | return img 69 | except urllib.error.URLError as e: 70 | print("URL Error: ", e.reason, url) 71 | return e.reason 72 | except urllib.error.HTTPError as e: 73 | print("HTTP Error: ", e.code, url) 74 | return e.code 75 | 76 | def image_from_url(url): 77 | """ 78 | Read an image from a URL. Returns a numpy array with the pixel data. 79 | We write the image to a temporary file then read it back. Kinda gross. 80 | """ 81 | try: 82 | f = urllib.request.urlopen(url) 83 | _, fname = tempfile.mkstemp() 84 | with open(fname, "wb") as ff: 85 | ff.write(f.read()) 86 | img = imread(fname) 87 | os.remove(fname) 88 | return img 89 | except urllib.error.URLError as e: 90 | print("URL Error: ", e.reason, url) 91 | return get_default_image() 92 | except urllib.error.HTTPError as e: 93 | print("HTTP Error: ", e.code, url) 94 | return e.code 95 | 96 | 97 | def load_image(filename, size=None): 98 | """Load and resize an image from disk. 99 | 100 | Inputs: 101 | - filename: path to file 102 | - size: size of shortest dimension after rescaling 103 | """ 104 | img = imread(filename) 105 | if size is not None: 106 | orig_shape = np.array(img.shape[:2]) 107 | min_idx = np.argmin(orig_shape) 108 | scale_factor = float(size) / orig_shape[min_idx] 109 | new_shape = (orig_shape * scale_factor).astype(int) 110 | img = np.array(Image.fromarray(img).resize(new_shape)) 111 | return img 112 | -------------------------------------------------------------------------------- /assignment3/cs231n/layer_utils.py: -------------------------------------------------------------------------------- 1 | from .layers import * 2 | from .fast_layers import * 3 | 4 | 5 | def affine_relu_forward(x, w, b): 6 | """ 7 | Convenience layer that perorms an affine transform followed by a ReLU 8 | 9 | Inputs: 10 | - x: Input to the affine layer 11 | - w, b: Weights for the affine layer 12 | 13 | Returns a tuple of: 14 | - out: Output from the ReLU 15 | - cache: Object to give to the backward pass 16 | """ 17 | a, fc_cache = affine_forward(x, w, b) 18 | out, relu_cache = relu_forward(a) 19 | cache = (fc_cache, relu_cache) 20 | return out, cache 21 | 22 | 23 | def affine_relu_backward(dout, cache): 24 | """ 25 | Backward pass for the affine-relu convenience layer 26 | """ 27 | fc_cache, relu_cache = cache 28 | da = relu_backward(dout, relu_cache) 29 | dx, dw, db = affine_backward(da, fc_cache) 30 | return dx, dw, db 31 | 32 | 33 | def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param): 34 | """ 35 | Convenience layer that performs an affine transform, batch normalization, 36 | and ReLU. 37 | 38 | Inputs: 39 | - x: Array of shape (N, D1); input to the affine layer 40 | - w, b: Arrays of shape (D2, D2) and (D2,) giving the weight and bias for 41 | the affine transform. 42 | - gamma, beta: Arrays of shape (D2,) and (D2,) giving scale and shift 43 | parameters for batch normalization. 44 | - bn_param: Dictionary of parameters for batch normalization. 45 | 46 | Returns: 47 | - out: Output from ReLU, of shape (N, D2) 48 | - cache: Object to give to the backward pass. 49 | """ 50 | a, fc_cache = affine_forward(x, w, b) 51 | a_bn, bn_cache = batchnorm_forward(a, gamma, beta, bn_param) 52 | out, relu_cache = relu_forward(a_bn) 53 | cache = (fc_cache, bn_cache, relu_cache) 54 | return out, cache 55 | 56 | 57 | def affine_bn_relu_backward(dout, cache): 58 | """ 59 | Backward pass for the affine-batchnorm-relu convenience layer. 60 | """ 61 | fc_cache, bn_cache, relu_cache = cache 62 | da_bn = relu_backward(dout, relu_cache) 63 | da, dgamma, dbeta = batchnorm_backward(da_bn, bn_cache) 64 | dx, dw, db = affine_backward(da, fc_cache) 65 | return dx, dw, db, dgamma, dbeta 66 | 67 | 68 | def affine_ln_relu_forward(x, w, b, gamma, beta, ln_param): 69 | """ 70 | Convenience layer that performs an affine transform, layer normalization, 71 | and ReLU. 72 | 73 | Inputs: 74 | - x: Array of shape (N, D1); input to the affine layer 75 | - w, b: Arrays of shape (D2, D2) and (D2,) giving the weight and bias for 76 | the affine transform. 77 | - gamma, beta: Arrays of shape (D2,) and (D2,) giving scale and shift 78 | parameters for batch normalization. 79 | - ln_param: Dictionary of parameters for layer normalization. 80 | 81 | Returns: 82 | - out: Output from ReLU, of shape (N, D2) 83 | - cache: Object to give to the backward pass. 84 | """ 85 | a, fc_cache = affine_forward(x, w, b) 86 | a_ln, ln_cache = layernorm_forward(a, gamma, beta, ln_param) 87 | out, relu_cache = relu_forward(a_ln) 88 | cache = (fc_cache, ln_cache, relu_cache) 89 | return out, cache 90 | 91 | 92 | def affine_ln_relu_backward(dout, cache): 93 | """ 94 | Backward pass for the affine-layernorm-relu convenience layer. 95 | """ 96 | fc_cache, ln_cache, relu_cache = cache 97 | da_ln = relu_backward(dout, relu_cache) 98 | da, dgamma, dbeta = layernorm_backward(da_ln, ln_cache) 99 | dx, dw, db = affine_backward(da, fc_cache) 100 | return dx, dw, db, dgamma, dbeta 101 | 102 | 103 | def conv_relu_forward(x, w, b, conv_param): 104 | """ 105 | A convenience layer that performs a convolution followed by a ReLU. 106 | 107 | Inputs: 108 | - x: Input to the convolutional layer 109 | - w, b, conv_param: Weights and parameters for the convolutional layer 110 | 111 | Returns a tuple of: 112 | - out: Output from the ReLU 113 | - cache: Object to give to the backward pass 114 | """ 115 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 116 | out, relu_cache = relu_forward(a) 117 | cache = (conv_cache, relu_cache) 118 | return out, cache 119 | 120 | 121 | def conv_relu_backward(dout, cache): 122 | """ 123 | Backward pass for the conv-relu convenience layer. 124 | """ 125 | conv_cache, relu_cache = cache 126 | da = relu_backward(dout, relu_cache) 127 | dx, dw, db = conv_backward_fast(da, conv_cache) 128 | return dx, dw, db 129 | 130 | 131 | def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param): 132 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 133 | an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param) 134 | out, relu_cache = relu_forward(an) 135 | cache = (conv_cache, bn_cache, relu_cache) 136 | return out, cache 137 | 138 | 139 | def conv_bn_relu_backward(dout, cache): 140 | conv_cache, bn_cache, relu_cache = cache 141 | dan = relu_backward(dout, relu_cache) 142 | da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache) 143 | dx, dw, db = conv_backward_fast(da, conv_cache) 144 | return dx, dw, db, dgamma, dbeta 145 | 146 | 147 | def conv_relu_pool_forward(x, w, b, conv_param, pool_param): 148 | """ 149 | Convenience layer that performs a convolution, a ReLU, and a pool. 150 | 151 | Inputs: 152 | - x: Input to the convolutional layer 153 | - w, b, conv_param: Weights and parameters for the convolutional layer 154 | - pool_param: Parameters for the pooling layer 155 | 156 | Returns a tuple of: 157 | - out: Output from the pooling layer 158 | - cache: Object to give to the backward pass 159 | """ 160 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 161 | s, relu_cache = relu_forward(a) 162 | out, pool_cache = max_pool_forward_fast(s, pool_param) 163 | cache = (conv_cache, relu_cache, pool_cache) 164 | return out, cache 165 | 166 | 167 | def conv_relu_pool_backward(dout, cache): 168 | """ 169 | Backward pass for the conv-relu-pool convenience layer 170 | """ 171 | conv_cache, relu_cache, pool_cache = cache 172 | ds = max_pool_backward_fast(dout, pool_cache) 173 | da = relu_backward(ds, relu_cache) 174 | dx, dw, db = conv_backward_fast(da, conv_cache) 175 | return dx, dw, db 176 | -------------------------------------------------------------------------------- /assignment3/cs231n/layers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def affine_forward(x, w, b): 5 | """ 6 | Computes the forward pass for an affine (fully-connected) layer. 7 | 8 | The input x has shape (N, d_1, ..., d_k) where x[i] is the ith input. 9 | We multiply this against a weight matrix of shape (D, M) where 10 | D = prod_i d_i 11 | 12 | Inputs: 13 | x - Input data, of shape (N, d_1, ..., d_k) 14 | w - Weights, of shape (D, M) 15 | b - Biases, of shape (M,) 16 | 17 | Returns a tuple of: 18 | - out: output, of shape (N, M) 19 | - cache: (x, w, b) 20 | """ 21 | out = x.reshape(x.shape[0], -1).dot(w) + b 22 | cache = (x, w, b) 23 | return out, cache 24 | 25 | 26 | def affine_backward(dout, cache): 27 | """ 28 | Computes the backward pass for an affine layer. 29 | 30 | Inputs: 31 | - dout: Upstream derivative, of shape (N, M) 32 | - cache: Tuple of: 33 | - x: Input data, of shape (N, d_1, ... d_k) 34 | - w: Weights, of shape (D, M) 35 | 36 | Returns a tuple of: 37 | - dx: Gradient with respect to x, of shape (N, d1, ..., d_k) 38 | - dw: Gradient with respect to w, of shape (D, M) 39 | - db: Gradient with respect to b, of shape (M,) 40 | """ 41 | x, w, b = cache 42 | dx = dout.dot(w.T).reshape(x.shape) 43 | dw = x.reshape(x.shape[0], -1).T.dot(dout) 44 | db = np.sum(dout, axis=0) 45 | return dx, dw, db 46 | 47 | 48 | def relu_forward(x): 49 | """ 50 | Computes the forward pass for a layer of rectified linear units (ReLUs). 51 | 52 | Input: 53 | - x: Inputs, of any shape 54 | 55 | Returns a tuple of: 56 | - out: Output, of the same shape as x 57 | - cache: x 58 | """ 59 | out = np.maximum(0, x) 60 | cache = x 61 | return out, cache 62 | 63 | 64 | def relu_backward(dout, cache): 65 | """ 66 | Computes the backward pass for a layer of rectified linear units (ReLUs). 67 | 68 | Input: 69 | - dout: Upstream derivatives, of any shape 70 | - cache: Input x, of same shape as dout 71 | 72 | Returns: 73 | - dx: Gradient with respect to x 74 | """ 75 | x = cache 76 | dx = np.where(x > 0, dout, 0) 77 | return dx 78 | 79 | 80 | def batchnorm_forward(x, gamma, beta, bn_param): 81 | """ 82 | Forward pass for batch normalization. 83 | 84 | During training the sample mean and (uncorrected) sample variance are 85 | computed from minibatch statistics and used to normalize the incoming data. 86 | During training we also keep an exponentially decaying running mean of the mean 87 | and variance of each feature, and these averages are used to normalize data 88 | at test-time. 89 | 90 | At each timestep we update the running averages for mean and variance using 91 | an exponential decay based on the momentum parameter: 92 | 93 | running_mean = momentum * running_mean + (1 - momentum) * sample_mean 94 | running_var = momentum * running_var + (1 - momentum) * sample_var 95 | 96 | Note that the batch normalization paper suggests a different test-time 97 | behavior: they compute sample mean and variance for each feature using a 98 | large number of training images rather than using a running average. For 99 | this implementation we have chosen to use running averages instead since 100 | they do not require an additional estimation step; the torch7 implementation 101 | of batch normalization also uses running averages. 102 | 103 | Input: 104 | - x: Data of shape (N, D) 105 | - gamma: Scale parameter of shape (D,) 106 | - beta: Shift paremeter of shape (D,) 107 | - bn_param: Dictionary with the following keys: 108 | - mode: 'train' or 'test'; required 109 | - eps: Constant for numeric stability 110 | - momentum: Constant for running mean / variance. 111 | - running_mean: Array of shape (D,) giving running mean of features 112 | - running_var Array of shape (D,) giving running variance of features 113 | 114 | Returns a tuple of: 115 | - out: of shape (N, D) 116 | - cache: A tuple of values needed in the backward pass 117 | """ 118 | mode = bn_param["mode"] 119 | eps = bn_param.get("eps", 1e-5) 120 | momentum = bn_param.get("momentum", 0.9) 121 | 122 | N, D = x.shape 123 | running_mean = bn_param.get("running_mean", np.zeros(D, dtype=x.dtype)) 124 | running_var = bn_param.get("running_var", np.zeros(D, dtype=x.dtype)) 125 | 126 | out, cache = None, None 127 | if mode == "train": 128 | # Compute output 129 | mu = x.mean(axis=0) 130 | xc = x - mu 131 | var = np.mean(xc ** 2, axis=0) 132 | std = np.sqrt(var + eps) 133 | xn = xc / std 134 | out = gamma * xn + beta 135 | 136 | cache = (mode, x, gamma, xc, std, xn, out) 137 | 138 | # Update running average of mean 139 | running_mean *= momentum 140 | running_mean += (1 - momentum) * mu 141 | 142 | # Update running average of variance 143 | running_var *= momentum 144 | running_var += (1 - momentum) * var 145 | elif mode == "test": 146 | # Using running mean and variance to normalize 147 | std = np.sqrt(running_var + eps) 148 | xn = (x - running_mean) / std 149 | out = gamma * xn + beta 150 | cache = (mode, x, xn, gamma, beta, std) 151 | else: 152 | raise ValueError('Invalid forward batchnorm mode "%s"' % mode) 153 | 154 | # Store the updated running means back into bn_param 155 | bn_param["running_mean"] = running_mean 156 | bn_param["running_var"] = running_var 157 | 158 | return out, cache 159 | 160 | 161 | def batchnorm_backward(dout, cache): 162 | """ 163 | Backward pass for batch normalization. 164 | 165 | For this implementation, you should write out a computation graph for 166 | batch normalization on paper and propagate gradients backward through 167 | intermediate nodes. 168 | 169 | Inputs: 170 | - dout: Upstream derivatives, of shape (N, D) 171 | - cache: Variable of intermediates from batchnorm_forward. 172 | 173 | Returns a tuple of: 174 | - dx: Gradient with respect to inputs x, of shape (N, D) 175 | - dgamma: Gradient with respect to scale parameter gamma, of shape (D,) 176 | - dbeta: Gradient with respect to shift parameter beta, of shape (D,) 177 | """ 178 | mode = cache[0] 179 | if mode == "train": 180 | mode, x, gamma, xc, std, xn, out = cache 181 | 182 | N = x.shape[0] 183 | dbeta = dout.sum(axis=0) 184 | dgamma = np.sum(xn * dout, axis=0) 185 | dxn = gamma * dout 186 | dxc = dxn / std 187 | dstd = -np.sum((dxn * xc) / (std * std), axis=0) 188 | dvar = 0.5 * dstd / std 189 | dxc += (2.0 / N) * xc * dvar 190 | dmu = np.sum(dxc, axis=0) 191 | dx = dxc - dmu / N 192 | elif mode == "test": 193 | mode, x, xn, gamma, beta, std = cache 194 | dbeta = dout.sum(axis=0) 195 | dgamma = np.sum(xn * dout, axis=0) 196 | dxn = gamma * dout 197 | dx = dxn / std 198 | else: 199 | raise ValueError(mode) 200 | 201 | return dx, dgamma, dbeta 202 | 203 | 204 | def spatial_batchnorm_forward(x, gamma, beta, bn_param): 205 | """ 206 | Computes the forward pass for spatial batch normalization. 207 | 208 | Inputs: 209 | - x: Input data of shape (N, C, H, W) 210 | - gamma: Scale parameter, of shape (C,) 211 | - beta: Shift parameter, of shape (C,) 212 | - bn_param: Dictionary with the following keys: 213 | - mode: 'train' or 'test'; required 214 | - eps: Constant for numeric stability 215 | - momentum: Constant for running mean / variance. momentum=0 means that 216 | old information is discarded completely at every time step, while 217 | momentum=1 means that new information is never incorporated. The 218 | default of momentum=0.9 should work well in most situations. 219 | - running_mean: Array of shape (D,) giving running mean of features 220 | - running_var Array of shape (D,) giving running variance of features 221 | 222 | Returns a tuple of: 223 | - out: Output data, of shape (N, C, H, W) 224 | - cache: Values needed for the backward pass 225 | """ 226 | N, C, H, W = x.shape 227 | x_flat = x.transpose(0, 2, 3, 1).reshape(-1, C) 228 | out_flat, cache = batchnorm_forward(x_flat, gamma, beta, bn_param) 229 | out = out_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2) 230 | return out, cache 231 | 232 | 233 | def spatial_batchnorm_backward(dout, cache): 234 | """ 235 | Computes the backward pass for spatial batch normalization. 236 | 237 | Inputs: 238 | - dout: Upstream derivatives, of shape (N, C, H, W) 239 | - cache: Values from the forward pass 240 | 241 | Returns a tuple of: 242 | - dx: Gradient with respect to inputs, of shape (N, C, H, W) 243 | - dgamma: Gradient with respect to scale parameter, of shape (C,) 244 | - dbeta: Gradient with respect to shift parameter, of shape (C,) 245 | """ 246 | N, C, H, W = dout.shape 247 | dout_flat = dout.transpose(0, 2, 3, 1).reshape(-1, C) 248 | dx_flat, dgamma, dbeta = batchnorm_backward(dout_flat, cache) 249 | dx = dx_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2) 250 | return dx, dgamma, dbeta 251 | 252 | 253 | def svm_loss(x, y): 254 | """ 255 | Computes the loss and gradient using for multiclass SVM classification. 256 | 257 | Inputs: 258 | - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class 259 | for the ith input. 260 | - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 261 | 0 <= y[i] < C 262 | 263 | Returns a tuple of: 264 | - loss: Scalar giving the loss 265 | - dx: Gradient of the loss with respect to x 266 | """ 267 | N = x.shape[0] 268 | correct_class_scores = x[np.arange(N), y] 269 | margins = np.maximum(0, x - correct_class_scores[:, np.newaxis] + 1.0) 270 | margins[np.arange(N), y] = 0 271 | loss = np.sum(margins) / N 272 | num_pos = np.sum(margins > 0, axis=1) 273 | dx = np.zeros_like(x) 274 | dx[margins > 0] = 1 275 | dx[np.arange(N), y] -= num_pos 276 | dx /= N 277 | return loss, dx 278 | 279 | 280 | def softmax_loss(x, y): 281 | """ 282 | Computes the loss and gradient for softmax classification. 283 | 284 | Inputs: 285 | - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class 286 | for the ith input. 287 | - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 288 | 0 <= y[i] < C 289 | 290 | Returns a tuple of: 291 | - loss: Scalar giving the loss 292 | - dx: Gradient of the loss with respect to x 293 | """ 294 | probs = np.exp(x - np.max(x, axis=1, keepdims=True)) 295 | probs /= np.sum(probs, axis=1, keepdims=True) 296 | N = x.shape[0] 297 | loss = -np.sum(np.log(probs[np.arange(N), y])) / N 298 | dx = probs.copy() 299 | dx[np.arange(N), y] -= 1 300 | dx /= N 301 | return loss, dx 302 | -------------------------------------------------------------------------------- /assignment3/cs231n/net_visualization_pytorch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import torchvision.transforms as T 4 | import numpy as np 5 | from .image_utils import SQUEEZENET_MEAN, SQUEEZENET_STD 6 | from scipy.ndimage.filters import gaussian_filter1d 7 | 8 | def compute_saliency_maps(X, y, model): 9 | """ 10 | Compute a class saliency map using the model for images X and labels y. 11 | 12 | Input: 13 | - X: Input images; Tensor of shape (N, 3, H, W) 14 | - y: Labels for X; LongTensor of shape (N,) 15 | - model: A pretrained CNN that will be used to compute the saliency map. 16 | 17 | Returns: 18 | - saliency: A Tensor of shape (N, H, W) giving the saliency maps for the input 19 | images. 20 | """ 21 | # Make sure the model is in "test" mode 22 | model.eval() 23 | 24 | # Make input tensor require gradient 25 | X.requires_grad_() 26 | 27 | saliency = None 28 | ############################################################################## 29 | # TODO: Implement this function. Perform a forward and backward pass through # 30 | # the model to compute the gradient of the correct class score with respect # 31 | # to each input image. You first want to compute the loss over the correct # 32 | # scores (we'll combine losses across a batch by summing), and then compute # 33 | # the gradients with a backward pass. # 34 | ############################################################################## 35 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 36 | 37 | scores = model(X) 38 | scores=scores.gather(1,y.view(-1,1)).squeeze() 39 | # RuntimeError: grad can be implicitly created only for scalar outputs 40 | # More Info here: https://discuss.pytorch.org/t/loss-backward-raises-error-grad-can-be-implicitly-created-only-for-scalar-outputs/12152 41 | scores.backward(torch.FloatTensor([1]*(scores.shape[0]))) 42 | gradients = X.grad.data 43 | gradients = torch.abs(gradients) 44 | saliency, _ = torch.max(gradients,dim=1) 45 | saliency = saliency.squeeze() 46 | 47 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 48 | ############################################################################## 49 | # END OF YOUR CODE # 50 | ############################################################################## 51 | return saliency 52 | 53 | def make_fooling_image(X, target_y, model): 54 | """ 55 | Generate a fooling image that is close to X, but that the model classifies 56 | as target_y. 57 | 58 | Inputs: 59 | - X: Input image; Tensor of shape (1, 3, 224, 224) 60 | - target_y: An integer in the range [0, 1000) 61 | - model: A pretrained CNN 62 | 63 | Returns: 64 | - X_fooling: An image that is close to X, but that is classifed as target_y 65 | by the model. 66 | """ 67 | # Initialize our fooling image to the input image, and make it require gradient 68 | X_fooling = X.clone() 69 | X_fooling = X_fooling.requires_grad_() 70 | 71 | learning_rate = 1 72 | ############################################################################## 73 | # TODO: Generate a fooling image X_fooling that the model will classify as # 74 | # the class target_y. You should perform gradient ascent on the score of the # 75 | # target class, stopping when the model is fooled. # 76 | # When computing an update step, first normalize the gradient: # 77 | # dX = learning_rate * g / ||g||_2 # 78 | # # 79 | # You should write a training loop. # 80 | # # 81 | # HINT: For most examples, you should be able to generate a fooling image # 82 | # in fewer than 100 iterations of gradient ascent. # 83 | # You can print your progress over iterations to check your algorithm. # 84 | ############################################################################## 85 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 86 | 87 | for i in range(100): 88 | scores = model(X_fooling) 89 | _, pred_y = scores.max(dim = 1) 90 | if pred_y == target_y: 91 | break 92 | scores=scores[0,target_y] 93 | scores.backward() 94 | gradients = X_fooling.grad.data 95 | dx = learning_rate * gradients / gradients.L2() 96 | X_fooling.data += dx 97 | X_fooling.grad.zero_() 98 | 99 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 100 | ############################################################################## 101 | # END OF YOUR CODE # 102 | ############################################################################## 103 | return X_fooling 104 | 105 | def class_visualization_update_step(img, model, target_y, l2_reg, learning_rate): 106 | ######################################################################## 107 | # TODO: Use the model to compute the gradient of the score for the # 108 | # class target_y with respect to the pixels of the image, and make a # 109 | # gradient step on the image using the learning rate. Don't forget the # 110 | # L2 regularization term! # 111 | # Be very careful about the signs of elements in your code. # 112 | ######################################################################## 113 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 114 | 115 | scores = model(img) 116 | scores = scores[0, target_y] - l2_reg * torch.norm(img.data) 117 | scores.backward() 118 | gradients = img.grad.data 119 | dx = learning_rate * gradients / torch.norm(gradients) 120 | img.data += dx 121 | img.grad.zero_() 122 | 123 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 124 | ######################################################################## 125 | # END OF YOUR CODE # 126 | ######################################################################## 127 | 128 | 129 | def preprocess(img, size=224): 130 | transform = T.Compose([ 131 | T.Resize(size), 132 | T.ToTensor(), 133 | T.Normalize(mean=SQUEEZENET_MEAN.tolist(), 134 | std=SQUEEZENET_STD.tolist()), 135 | T.Lambda(lambda x: x[None]), 136 | ]) 137 | return transform(img) 138 | 139 | def deprocess(img, should_rescale=True): 140 | transform = T.Compose([ 141 | T.Lambda(lambda x: x[0]), 142 | T.Normalize(mean=[0, 0, 0], std=(1.0 / SQUEEZENET_STD).tolist()), 143 | T.Normalize(mean=(-SQUEEZENET_MEAN).tolist(), std=[1, 1, 1]), 144 | T.Lambda(rescale) if should_rescale else T.Lambda(lambda x: x), 145 | T.ToPILImage(), 146 | ]) 147 | return transform(img) 148 | 149 | def rescale(x): 150 | low, high = x.min(), x.max() 151 | x_rescaled = (x - low) / (high - low) 152 | return x_rescaled 153 | 154 | def blur_image(X, sigma=1): 155 | X_np = X.cpu().clone().numpy() 156 | X_np = gaussian_filter1d(X_np, sigma, axis=2) 157 | X_np = gaussian_filter1d(X_np, sigma, axis=3) 158 | X.copy_(torch.Tensor(X_np).type_as(X)) 159 | return X 160 | 161 | def jitter(X, ox, oy): 162 | """ 163 | Helper function to randomly jitter an image. 164 | 165 | Inputs 166 | - X: PyTorch Tensor of shape (N, C, H, W) 167 | - ox, oy: Integers giving number of pixels to jitter along W and H axes 168 | 169 | Returns: A new PyTorch Tensor of shape (N, C, H, W) 170 | """ 171 | if ox != 0: 172 | left = X[:, :, :, :-ox] 173 | right = X[:, :, :, -ox:] 174 | X = torch.cat([right, left], dim=3) 175 | if oy != 0: 176 | top = X[:, :, :-oy] 177 | bottom = X[:, :, -oy:] 178 | X = torch.cat([bottom, top], dim=2) 179 | return X 180 | -------------------------------------------------------------------------------- /assignment3/cs231n/net_visualization_tensorflow.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from scipy.ndimage.filters import gaussian_filter1d 4 | 5 | def compute_saliency_maps(X, y, model): 6 | """ 7 | Compute a class saliency map using the model for images X and labels y. 8 | 9 | Input: 10 | - X: Input images, numpy array of shape (N, H, W, 3) 11 | - y: Labels for X, numpy of shape (N,) 12 | - model: A SqueezeNet model that will be used to compute the saliency map. 13 | 14 | Returns: 15 | - saliency: A numpy array of shape (N, H, W) giving the saliency maps for the 16 | input images. 17 | """ 18 | saliency = None 19 | # Compute the score of the correct class for each example. 20 | # This gives a Tensor with shape [N], the number of examples. 21 | # 22 | # Note: this is equivalent to scores[np.arange(N), y] we used in NumPy 23 | # for computing vectorized losses. 24 | 25 | ############################################################################### 26 | # TODO: Produce the saliency maps over a batch of images. # 27 | # # 28 | # 1) Define a gradient tape object and watch input Image variable # 29 | # 2) Compute the “loss” for the batch of given input images. # 30 | # - get scores output by the model for the given batch of input images # 31 | # - use tf.gather_nd or tf.gather to get correct scores # 32 | # 3) Use the gradient() method of the gradient tape object to compute the # 33 | # gradient of the loss with respect to the image # 34 | # 4) Finally, process the returned gradient to compute the saliency map. # 35 | ############################################################################### 36 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 37 | 38 | pass 39 | 40 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 41 | ############################################################################## 42 | # END OF YOUR CODE # 43 | ############################################################################## 44 | return saliency 45 | 46 | def make_fooling_image(X, target_y, model): 47 | """ 48 | Generate a fooling image that is close to X, but that the model classifies 49 | as target_y. 50 | 51 | Inputs: 52 | - X: Input image, a numpy array of shape (1, 224, 224, 3) 53 | - target_y: An integer in the range [0, 1000) 54 | - model: Pretrained SqueezeNet model 55 | 56 | Returns: 57 | - X_fooling: An image that is close to X, but that is classifed as target_y 58 | by the model. 59 | """ 60 | 61 | # Make a copy of the input that we will modify 62 | X_fooling = X.copy() 63 | 64 | # Step size for the update 65 | learning_rate = 1 66 | 67 | ############################################################################## 68 | # TODO: Generate a fooling image X_fooling that the model will classify as # 69 | # the class target_y. Use gradient *ascent* on the target class score, using # 70 | # the model.scores Tensor to get the class scores for the model.image. # 71 | # When computing an update step, first normalize the gradient: # 72 | # dX = learning_rate * g / ||g||_2 # 73 | # # 74 | # You should write a training loop, where in each iteration, you make an # 75 | # update to the input image X_fooling (don't modify X). The loop should # 76 | # stop when the predicted class for the input is the same as target_y. # 77 | # # 78 | # HINT: Use tf.GradientTape() to keep track of your gradients and # 79 | # use tape.gradient to get the actual gradient with respect to X_fooling. # 80 | # # 81 | # HINT 2: For most examples, you should be able to generate a fooling image # 82 | # in fewer than 100 iterations of gradient ascent. You can print your # 83 | # progress over iterations to check your algorithm. # 84 | ############################################################################## 85 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 86 | 87 | pass 88 | 89 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 90 | ############################################################################## 91 | # END OF YOUR CODE # 92 | ############################################################################## 93 | return X_fooling 94 | 95 | def class_visualization_update_step(X, model, target_y, l2_reg, learning_rate): 96 | ######################################################################## 97 | # TODO: Compute the value of the gradient of the score for # 98 | # class target_y with respect to the pixels of the image, and make a # 99 | # gradient step on the image using the learning rate. You should use # 100 | # the tf.GradientTape() and tape.gradient to compute gradients. # 101 | # # 102 | # Be very careful about the signs of elements in your code. # 103 | ######################################################################## 104 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 105 | 106 | pass 107 | 108 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 109 | ############################################################################ 110 | # END OF YOUR CODE # 111 | ############################################################################ 112 | 113 | return X 114 | 115 | def blur_image(X, sigma=1): 116 | X = gaussian_filter1d(X, sigma, axis=1) 117 | X = gaussian_filter1d(X, sigma, axis=2) 118 | return X 119 | 120 | def jitter(X, ox, oy): 121 | """ 122 | Helper function to randomly jitter an image. 123 | 124 | Inputs 125 | - X: Tensor of shape (N, H, W, C) 126 | - ox, oy: Integers giving number of pixels to jitter along W and H axes 127 | 128 | Returns: A new Tensor of shape (N, H, W, C) 129 | """ 130 | if ox != 0: 131 | left = X[:, :, :-ox] 132 | right = X[:, :, -ox:] 133 | X = tf.concat([right, left], axis=2) 134 | if oy != 0: 135 | top = X[:, :-oy] 136 | bottom = X[:, -oy:] 137 | X = tf.concat([bottom, top], axis=1) 138 | return X 139 | -------------------------------------------------------------------------------- /assignment3/cs231n/optim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | This file implements various first-order update rules that are commonly used for 5 | training neural networks. Each update rule accepts current weights and the 6 | gradient of the loss with respect to those weights and produces the next set of 7 | weights. Each update rule has the same interface: 8 | 9 | def update(w, dw, config=None): 10 | 11 | Inputs: 12 | - w: A numpy array giving the current weights. 13 | - dw: A numpy array of the same shape as w giving the gradient of the 14 | loss with respect to w. 15 | - config: A dictionary containing hyperparameter values such as learning rate, 16 | momentum, etc. If the update rule requires caching values over many 17 | iterations, then config will also hold these cached values. 18 | 19 | Returns: 20 | - next_w: The next point after the update. 21 | - config: The config dictionary to be passed to the next iteration of the 22 | update rule. 23 | 24 | NOTE: For most update rules, the default learning rate will probably not perform 25 | well; however the default values of the other hyperparameters should work well 26 | for a variety of different problems. 27 | 28 | For efficiency, update rules may perform in-place updates, mutating w and 29 | setting next_w equal to w. 30 | """ 31 | 32 | 33 | def sgd(w, dw, config=None): 34 | """ 35 | Performs vanilla stochastic gradient descent. 36 | 37 | config format: 38 | - learning_rate: Scalar learning rate. 39 | """ 40 | if config is None: 41 | config = {} 42 | config.setdefault("learning_rate", 1e-2) 43 | 44 | w -= config["learning_rate"] * dw 45 | return w, config 46 | 47 | 48 | def adam(x, dx, config=None): 49 | """ 50 | Uses the Adam update rule, which incorporates moving averages of both the 51 | gradient and its square and a bias correction term. 52 | 53 | config format: 54 | - learning_rate: Scalar learning rate. 55 | - beta1: Decay rate for moving average of first moment of gradient. 56 | - beta2: Decay rate for moving average of second moment of gradient. 57 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 58 | - m: Moving average of gradient. 59 | - v: Moving average of squared gradient. 60 | - t: Iteration number. 61 | """ 62 | if config is None: 63 | config = {} 64 | config.setdefault("learning_rate", 1e-3) 65 | config.setdefault("beta1", 0.9) 66 | config.setdefault("beta2", 0.999) 67 | config.setdefault("epsilon", 1e-8) 68 | config.setdefault("m", np.zeros_like(x)) 69 | config.setdefault("v", np.zeros_like(x)) 70 | config.setdefault("t", 0) 71 | 72 | next_x = None 73 | beta1, beta2, eps = config["beta1"], config["beta2"], config["epsilon"] 74 | t, m, v = config["t"], config["m"], config["v"] 75 | m = beta1 * m + (1 - beta1) * dx 76 | v = beta2 * v + (1 - beta2) * (dx * dx) 77 | t += 1 78 | alpha = config["learning_rate"] * np.sqrt(1 - beta2 ** t) / (1 - beta1 ** t) 79 | x -= alpha * (m / (np.sqrt(v) + eps)) 80 | config["t"] = t 81 | config["m"] = m 82 | config["v"] = v 83 | next_x = x 84 | 85 | return next_x, config 86 | -------------------------------------------------------------------------------- /assignment3/cs231n/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Build import cythonize 4 | import numpy 5 | 6 | extensions = [ 7 | Extension( 8 | "im2col_cython", ["im2col_cython.pyx"], include_dirs=[numpy.get_include()] 9 | ), 10 | ] 11 | 12 | setup(ext_modules=cythonize(extensions),) 13 | -------------------------------------------------------------------------------- /assignment3/cs231n/style_transfer_pytorch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | import torchvision.transforms as T 5 | import PIL 6 | 7 | import numpy as np 8 | 9 | from .image_utils import SQUEEZENET_MEAN, SQUEEZENET_STD 10 | 11 | dtype = torch.FloatTensor 12 | # Uncomment out the following line if you're on a machine with a GPU set up for PyTorch! 13 | #dtype = torch.cuda.FloatTensor 14 | def content_loss(content_weight, content_current, content_original): 15 | """ 16 | Compute the content loss for style transfer. 17 | 18 | Inputs: 19 | - content_weight: Scalar giving the weighting for the content loss. 20 | - content_current: features of the current image; this is a PyTorch Tensor of shape 21 | (1, C_l, H_l, W_l). 22 | - content_target: features of the content image, Tensor with shape (1, C_l, H_l, W_l). 23 | 24 | Returns: 25 | - scalar content loss 26 | """ 27 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 28 | 29 | return content_weight * torch.sum(torch.pow(content_current - content_original, 2)) 30 | 31 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 32 | 33 | def gram_matrix(features, normalize=True): 34 | """ 35 | Compute the Gram matrix from features. 36 | 37 | Inputs: 38 | - features: PyTorch Tensor of shape (N, C, H, W) giving features for 39 | a batch of N images. 40 | - normalize: optional, whether to normalize the Gram matrix 41 | If True, divide the Gram matrix by the number of neurons (H * W * C) 42 | 43 | Returns: 44 | - gram: PyTorch Tensor of shape (N, C, C) giving the 45 | (optionally normalized) Gram matrices for the N input images. 46 | """ 47 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 48 | 49 | N, C, H, W = features.shape 50 | features = features.view(N*C, H*W) 51 | G = torch.mm(features, features.T) 52 | if normalize: 53 | G = G.div(C * H * W) 54 | return G.view((N, C, C)) 55 | 56 | 57 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 58 | 59 | # Now put it together in the style_loss function... 60 | def style_loss(feats, style_layers, style_targets, style_weights): 61 | """ 62 | Computes the style loss at a set of layers. 63 | 64 | Inputs: 65 | - feats: list of the features at every layer of the current image, as produced by 66 | the extract_features function. 67 | - style_layers: List of layer indices into feats giving the layers to include in the 68 | style loss. 69 | - style_targets: List of the same length as style_layers, where style_targets[i] is 70 | a PyTorch Tensor giving the Gram matrix of the source style image computed at 71 | layer style_layers[i]. 72 | - style_weights: List of the same length as style_layers, where style_weights[i] 73 | is a scalar giving the weight for the style loss at layer style_layers[i]. 74 | 75 | Returns: 76 | - style_loss: A PyTorch Tensor holding a scalar giving the style loss. 77 | """ 78 | # Hint: you can do this with one for loop over the style layers, and should 79 | # not be very much code (~5 lines). You will need to use your gram_matrix function. 80 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 81 | 82 | loss = 0 83 | for i in range(len(style_layers)): 84 | G = gram_matrix(feats[style_layers[i]]) 85 | loss += style_weights[i] * torch.sum(torch.pow((G - style_targets[i]),2)) 86 | return loss 87 | 88 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 89 | 90 | def tv_loss(img, tv_weight): 91 | """ 92 | Compute total variation loss. 93 | 94 | Inputs: 95 | - img: PyTorch Variable of shape (1, 3, H, W) holding an input image. 96 | - tv_weight: Scalar giving the weight w_t to use for the TV loss. 97 | 98 | Returns: 99 | - loss: PyTorch Variable holding a scalar giving the total variation loss 100 | for img weighted by tv_weight. 101 | """ 102 | # Your implementation should be vectorized and not require any loops! 103 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 104 | 105 | a = torch.sum(torch.pow(img[:, :, :-1, :] - img[:, :, 1:, :], 2)) 106 | b = torch.sum(torch.pow(img[:, :, :, :-1] - img[:, :, :, 1:], 2)) 107 | return tv_weight * (a + b) 108 | 109 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 110 | def preprocess(img, size=512): 111 | """ Preprocesses a PIL JPG Image object to become a Pytorch tensor 112 | that is ready to be used as an input into the CNN model. 113 | Preprocessing steps: 114 | 1) Resize the image (preserving aspect ratio) until the shortest side is of length `size`. 115 | 2) Convert the PIL Image to a Pytorch Tensor. 116 | 3) Normalize the mean of the image pixel values to be SqueezeNet's expected mean, and 117 | the standard deviation to be SqueezeNet's expected std dev. 118 | 4) Add a batch dimension in the first position of the tensor: aka, a tensor of shape 119 | (H, W, C) will become -> (1, H, W, C). 120 | """ 121 | transform = T.Compose([ 122 | T.Resize(size), 123 | T.ToTensor(), 124 | T.Normalize(mean=SQUEEZENET_MEAN.tolist(), 125 | std=SQUEEZENET_STD.tolist()), 126 | T.Lambda(lambda x: x[None]), 127 | ]) 128 | return transform(img) 129 | 130 | def deprocess(img): 131 | """ De-processes a Pytorch tensor from the output of the CNN model to become 132 | a PIL JPG Image that we can display, save, etc. 133 | De-processing steps: 134 | 1) Remove the batch dimension at the first position by accessing the slice at index 0. 135 | A tensor of dims (1, H, W, C) will become -> (H, W, C). 136 | 2) Normalize the standard deviation: multiply each channel of the output tensor by 1/s, 137 | scaling the elements back to before scaling by SqueezeNet's standard devs. 138 | No change to the mean. 139 | 3) Normalize the mean: subtract the mean (hence the -m) from each channel of the output tensor, 140 | centering the elements back to before centering on SqueezeNet's input mean. 141 | No change to the std dev. 142 | 4) Rescale all the values in the tensor so that they lie in the interval [0, 1] to prepare for 143 | transforming it into image pixel values. 144 | 5) Convert the Pytorch Tensor to a PIL Image. 145 | """ 146 | transform = T.Compose([ 147 | T.Lambda(lambda x: x[0]), 148 | T.Normalize(mean=[0, 0, 0], std=[1.0 / s for s in SQUEEZENET_STD.tolist()]), 149 | T.Normalize(mean=[-m for m in SQUEEZENET_MEAN.tolist()], std=[1, 1, 1]), 150 | T.Lambda(rescale), 151 | T.ToPILImage(), 152 | ]) 153 | return transform(img) 154 | 155 | def rescale(x): 156 | """ A function used internally inside `deprocess`. 157 | Rescale elements of x linearly to be in the interval [0, 1] 158 | with the minimum element(s) mapped to 0, and the maximum element(s) 159 | mapped to 1. 160 | """ 161 | low, high = x.min(), x.max() 162 | x_rescaled = (x - low) / (high - low) 163 | return x_rescaled 164 | 165 | def rel_error(x,y): 166 | return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) 167 | 168 | # We provide this helper code which takes an image, a model (cnn), and returns a list of 169 | # feature maps, one per layer. 170 | def extract_features(x, cnn): 171 | """ 172 | Use the CNN to extract features from the input image x. 173 | 174 | Inputs: 175 | - x: A PyTorch Tensor of shape (N, C, H, W) holding a minibatch of images that 176 | will be fed to the CNN. 177 | - cnn: A PyTorch model that we will use to extract features. 178 | 179 | Returns: 180 | - features: A list of feature for the input images x extracted using the cnn model. 181 | features[i] is a PyTorch Tensor of shape (N, C_i, H_i, W_i); recall that features 182 | from different layers of the network may have different numbers of channels (C_i) and 183 | spatial dimensions (H_i, W_i). 184 | """ 185 | features = [] 186 | prev_feat = x 187 | for i, module in enumerate(cnn._modules.values()): 188 | next_feat = module(prev_feat) 189 | features.append(next_feat) 190 | prev_feat = next_feat 191 | return features 192 | 193 | #please disregard warnings about initialization 194 | def features_from_img(imgpath, imgsize, cnn): 195 | img = preprocess(PIL.Image.open(imgpath), size=imgsize) 196 | img_var = img.type(dtype) 197 | return extract_features(img_var, cnn), img_var 198 | 199 | 200 | 201 | 202 | -------------------------------------------------------------------------------- /assignment3/cs231n/style_transfer_tensorflow.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | def tv_loss(img, tv_weight): 5 | """ 6 | Compute total variation loss. 7 | 8 | Inputs: 9 | - img: Tensor of shape (1, H, W, 3) holding an input image. 10 | - tv_weight: Scalar giving the weight w_t to use for the TV loss. 11 | 12 | Returns: 13 | - loss: Tensor holding a scalar giving the total variation loss 14 | for img weighted by tv_weight. 15 | """ 16 | # Your implementation should be vectorized and not require any loops! 17 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 18 | 19 | pass 20 | 21 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 22 | 23 | def style_loss(feats, style_layers, style_targets, style_weights): 24 | """ 25 | Computes the style loss at a set of layers. 26 | 27 | Inputs: 28 | - feats: list of the features at every layer of the current image, as produced by 29 | the extract_features function. 30 | - style_layers: List of layer indices into feats giving the layers to include in the 31 | style loss. 32 | - style_targets: List of the same length as style_layers, where style_targets[i] is 33 | a Tensor giving the Gram matrix of the source style image computed at 34 | layer style_layers[i]. 35 | - style_weights: List of the same length as style_layers, where style_weights[i] 36 | is a scalar giving the weight for the style loss at layer style_layers[i]. 37 | 38 | Returns: 39 | - style_loss: A Tensor containing the scalar style loss. 40 | """ 41 | # Hint: you can do this with one for loop over the style layers, and should 42 | # not be short code (~5 lines). You will need to use your gram_matrix function. 43 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 44 | 45 | pass 46 | 47 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 48 | 49 | def gram_matrix(features, normalize=True): 50 | """ 51 | Compute the Gram matrix from features. 52 | 53 | Inputs: 54 | - features: Tensor of shape (1, H, W, C) giving features for 55 | a single image. 56 | - normalize: optional, whether to normalize the Gram matrix 57 | If True, divide the Gram matrix by the number of neurons (H * W * C) 58 | 59 | Returns: 60 | - gram: Tensor of shape (C, C) giving the (optionally normalized) 61 | Gram matrices for the input image. 62 | """ 63 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 64 | 65 | pass 66 | 67 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 68 | 69 | def content_loss(content_weight, content_current, content_original): 70 | """ 71 | Compute the content loss for style transfer. 72 | 73 | Inputs: 74 | - content_weight: scalar constant we multiply the content_loss by. 75 | - content_current: features of the current image, Tensor with shape [1, height, width, channels] 76 | - content_target: features of the content image, Tensor with shape [1, height, width, channels] 77 | 78 | Returns: 79 | - scalar content loss 80 | """ 81 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 82 | 83 | pass 84 | 85 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 86 | 87 | # We provide this helper code which takes an image, a model (cnn), and returns a list of 88 | # feature maps, one per layer. 89 | def extract_features(x, cnn): 90 | """ 91 | Use the CNN to extract features from the input image x. 92 | 93 | Inputs: 94 | - x: A Tensor of shape (N, H, W, C) holding a minibatch of images that 95 | will be fed to the CNN. 96 | - cnn: A Tensorflow model that we will use to extract features. 97 | 98 | Returns: 99 | - features: A list of feature for the input images x extracted using the cnn model. 100 | features[i] is a Tensor of shape (N, H_i, W_i, C_i); recall that features 101 | from different layers of the network may have different numbers of channels (C_i) and 102 | spatial dimensions (H_i, W_i). 103 | """ 104 | features = [] 105 | prev_feat = x 106 | for i, layer in enumerate(cnn.net.layers[:-2]): 107 | next_feat = layer(prev_feat) 108 | features.append(next_feat) 109 | prev_feat = next_feat 110 | return features 111 | 112 | def rel_error(x,y): 113 | return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) 114 | -------------------------------------------------------------------------------- /assignment3/example_styletransfer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/example_styletransfer.png -------------------------------------------------------------------------------- /assignment3/frameworkpython: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # what real Python executable to use 4 | #PYVER=2.7 5 | #PATHTOPYTHON=/usr/local/bin/ 6 | #PYTHON=${PATHTOPYTHON}python${PYVER} 7 | 8 | PYTHON=$(which $(readlink .env/bin/python)) # only works with python3 9 | 10 | # find the root of the virtualenv, it should be the parent of the dir this script is in 11 | ENV=`$PYTHON -c "import os; print(os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..')))"` 12 | 13 | # now run Python with the virtualenv set as Python's HOME 14 | export PYTHONHOME=$ENV 15 | exec $PYTHON "$@" 16 | -------------------------------------------------------------------------------- /assignment3/gan-checks-tf.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/gan-checks-tf.npz -------------------------------------------------------------------------------- /assignment3/gan_outputs_pytorch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/gan_outputs_pytorch.png -------------------------------------------------------------------------------- /assignment3/gan_outputs_tf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/gan_outputs_tf.png -------------------------------------------------------------------------------- /assignment3/kitten.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/kitten.jpg -------------------------------------------------------------------------------- /assignment3/makepdf.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import subprocess 4 | 5 | try: 6 | from PyPDF2 import PdfFileMerger 7 | 8 | MERGE = True 9 | except ImportError: 10 | print("Could not find PyPDF2. Leaving pdf files unmerged.") 11 | MERGE = False 12 | 13 | 14 | def main(files): 15 | os_args = [ 16 | "jupyter", 17 | "nbconvert", 18 | "--log-level", 19 | "CRITICAL", 20 | "--to", 21 | "pdf", 22 | ] 23 | for f in files: 24 | os_args.append(f) 25 | subprocess.run(os_args) 26 | print("Created PDF {}.".format(f)) 27 | if MERGE: 28 | pdfs = [f.split(".")[0] + ".pdf" for f in files] 29 | merger = PdfFileMerger() 30 | for pdf in pdfs: 31 | merger.append(pdf) 32 | merger.write("assignment.pdf") 33 | merger.close() 34 | for pdf in pdfs: 35 | os.remove(pdf) 36 | 37 | 38 | if __name__ == "__main__": 39 | parser = argparse.ArgumentParser() 40 | # we pass in explicit notebook arg so that we can provide 41 | # an ordered list and produce an ordered pdf 42 | parser.add_argument("--notebooks", type=str, nargs="+", required=True) 43 | args = parser.parse_args() 44 | main(args.notebooks) 45 | -------------------------------------------------------------------------------- /assignment3/requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==19.1.0 2 | backcall==0.1.0 3 | bleach==3.1.0 4 | certifi==2019.3.9 5 | chardet==3.0.4 6 | colorama==0.4.1 7 | cycler==0.10.0 8 | Cython==0.29.16 9 | decorator==4.4.0 10 | defusedxml==0.5.0 11 | entrypoints==0.3 12 | future==0.17.1 13 | gitdb2==2.0.5 14 | GitPython==2.1.11 15 | idna==2.8 16 | ipykernel==5.1.0 17 | ipython==7.4.0 18 | ipython-genutils==0.2.0 19 | ipywidgets==7.4.2 20 | imageio==2.8.0 21 | jedi==0.13.3 22 | Jinja2==2.10 23 | jsonschema==3.0.1 24 | jupyter==1.0.0 25 | jupyter-client==5.2.4 26 | jupyter-console==6.0.0 27 | jupyter-core==4.4.0 28 | jupyterlab==0.35.4 29 | jupyterlab-server==0.2.0 30 | kiwisolver==1.0.1 31 | MarkupSafe==1.1.1 32 | matplotlib==3.0.3 33 | mistune==0.8.4 34 | nbconvert==5.4.1 35 | nbdime==1.0.5 36 | nbformat==4.4.0 37 | notebook==5.7.8 38 | numpy==1.18.4 39 | pandocfilters==1.4.2 40 | parso==0.3.4 41 | pexpect==4.6.0 42 | pickleshare==0.7.5 43 | Pillow==6.0.0 44 | prometheus-client==0.6.0 45 | prompt-toolkit==2.0.9 46 | ptyprocess==0.6.0 47 | Pygments==2.3.1 48 | pyparsing==2.3.1 49 | pyrsistent==0.14.11 50 | python-dateutil==2.8.0 51 | pyzmq==18.0.1 52 | qtconsole==4.4.3 53 | requests==2.21.0 54 | scipy==1.2.1 55 | Send2Trash==1.5.0 56 | six==1.12.0 57 | smmap2==2.0.5 58 | terminado==0.8.2 59 | testpath==0.4.2 60 | tornado==6.0.2 61 | traitlets==4.3.2 62 | urllib3==1.24.1 63 | wcwidth==0.1.7 64 | webencodings==0.5.1 65 | widgetsnbextension==3.4.2 66 | -------------------------------------------------------------------------------- /assignment3/sky.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/sky.jpg -------------------------------------------------------------------------------- /assignment3/start_ipython_osx.sh: -------------------------------------------------------------------------------- 1 | # Assume the virtualenv is called .env 2 | 3 | cp frameworkpython .env/bin 4 | .env/bin/frameworkpython -m IPython notebook 5 | -------------------------------------------------------------------------------- /assignment3/style-transfer-checks-tf.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/style-transfer-checks-tf.npz -------------------------------------------------------------------------------- /assignment3/style-transfer-checks.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/style-transfer-checks.npz -------------------------------------------------------------------------------- /assignment3/style_stransfer.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/style_stransfer.gif -------------------------------------------------------------------------------- /assignment3/style_stransfer2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/style_stransfer2.gif -------------------------------------------------------------------------------- /assignment3/styles/composition_vii.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/styles/composition_vii.jpg -------------------------------------------------------------------------------- /assignment3/styles/muse.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/styles/muse.jpg -------------------------------------------------------------------------------- /assignment3/styles/starry_night.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/styles/starry_night.jpg -------------------------------------------------------------------------------- /assignment3/styles/the_scream.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/styles/the_scream.jpg -------------------------------------------------------------------------------- /assignment3/styles/tubingen.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/styles/tubingen.jpg --------------------------------------------------------------------------------