├── .gitattributes
├── .gitignore
├── README.md
├── assignment1
├── README.md
├── collectSubmission.sh
├── cs231n
│ ├── __init__.py
│ ├── classifiers
│ │ ├── __init__.py
│ │ ├── k_nearest_neighbor.py
│ │ ├── linear_classifier.py
│ │ ├── linear_svm.py
│ │ ├── neural_net.py
│ │ └── softmax.py
│ ├── data_utils.py
│ ├── datasets
│ │ └── get_datasets.sh
│ ├── features.py
│ ├── gradient_check.py
│ └── vis_utils.py
├── features.ipynb
├── frameworkpython
├── knn.ipynb
├── makepdf.py
├── requirements.txt
├── softmax.ipynb
├── svm.ipynb
└── two_layer_net.ipynb
├── assignment2
├── BatchNormalization.ipynb
├── ConvolutionalNetworks.ipynb
├── Dropout.ipynb
├── FullyConnectedNets.ipynb
├── PyTorch.ipynb
├── TensorFlow.ipynb
├── collectSubmission.sh
├── cs231n
│ ├── __init__.py
│ ├── classifiers
│ │ ├── __init__.py
│ │ ├── cnn.py
│ │ └── fc_net.py
│ ├── data_utils.py
│ ├── datasets
│ │ └── get_datasets.sh
│ ├── fast_layers.py
│ ├── gradient_check.py
│ ├── im2col.py
│ ├── im2col_cython.pyx
│ ├── layer_utils.py
│ ├── layers.py
│ ├── notebook_images
│ │ ├── batchnorm_graph.png
│ │ ├── kitten.jpg
│ │ ├── normalization.png
│ │ └── puppy.jpg
│ ├── optim.py
│ ├── setup.py
│ ├── solver.py
│ └── vis_utils.py
├── frameworkpython
├── makepdf.py
├── requirements.txt
└── start_ipython_osx.sh
└── assignment3
├── Generative_Adversarial_Networks_PyTorch.ipynb
├── Generative_Adversarial_Networks_TF.ipynb
├── LSTM_Captioning.ipynb
├── NetworkVisualization-PyTorch.ipynb
├── NetworkVisualization-TensorFlow.ipynb
├── RNN_Captioning.ipynb
├── StyleTransfer-PyTorch.ipynb
├── StyleTransfer-TensorFlow.ipynb
├── collectSubmission.sh
├── cs231n
├── __init__.py
├── captioning_solver.py
├── classifiers
│ ├── __init__.py
│ ├── rnn.py
│ └── squeezenet.py
├── coco_utils.py
├── data_utils.py
├── fast_layers.py
├── gan_pytorch.py
├── gan_tf.py
├── gradient_check.py
├── im2col.py
├── im2col_cython.pyx
├── image_utils.py
├── layer_utils.py
├── layers.py
├── net_visualization_pytorch.py
├── net_visualization_tensorflow.py
├── optim.py
├── rnn_layers.py
├── setup.py
├── style_transfer_pytorch.py
└── style_transfer_tensorflow.py
├── example_styletransfer.png
├── frameworkpython
├── gan-checks-tf.npz
├── gan_outputs_pytorch.png
├── gan_outputs_tf.png
├── kitten.jpg
├── makepdf.py
├── requirements.txt
├── sky.jpg
├── start_ipython_osx.sh
├── style-transfer-checks-tf.npz
├── style-transfer-checks.npz
├── style_stransfer.gif
├── style_stransfer2.gif
└── styles
├── composition_vii.jpg
├── muse.jpg
├── starry_night.jpg
├── the_scream.jpg
└── tubingen.jpg
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # celery beat schedule file
95 | celerybeat-schedule
96 |
97 | # SageMath parsed files
98 | *.sage.py
99 |
100 | # Environments
101 | .env
102 | .venv
103 | env/
104 | venv/
105 | ENV/
106 | env.bak/
107 | venv.bak/
108 |
109 | # Spyder project settings
110 | .spyderproject
111 | .spyproject
112 |
113 | # Rope project settings
114 | .ropeproject
115 |
116 | # mkdocs documentation
117 | /site
118 |
119 | # mypy
120 | .mypy_cache/
121 | .dmypy.json
122 | dmypy.json
123 |
124 | # Pyre type checker
125 | .pyre/
126 | Slides/
127 | Slides2020/
128 | cs231n.github.io/
129 | CS231n-all-in-one.pdf
130 | datasets/
131 | .virtual_documents/
132 | assignment2/cs231n/im2col_cython.c
133 | assignment3/StyleTransfer-PyTorch-Copy1.ipynb
134 | assignment3/powers.gif
135 | assignment3/未命名.ipynb
136 | assignment1/two_layer_net-Copy1.ipynb
137 | CS231n-2017-Summary/
138 | assignment1/cs231n/classifiers/neural_net_b.py
139 | HyperQuest/pickle/
140 | HyperQuest/old/
141 | HyperQuest/制作指南.md
142 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | 
3 |
4 |
5 | # CS231n-2020-spring-assignment-solution
6 |
7 | ~~TODO:~~ All Finished
8 |
9 | - [x] Assignment [#1](https://cs231n.github.io/assignments2020/assignment1/) (Finished 2020/9/12)
10 | - [x] Assignment [#2](https://cs231n.github.io/assignments2020/assignment2/) (Finished 2020/9/27)
11 | - [x] Assignment [#3](https://cs231n.github.io/assignments2020/assignment3/) (Finished 2020/10/8)
12 | - [x] Notes 扫描全能王[链接](https://www.camscanner.com/share/show?encrypt_id=MHgzZGQ1NzU2NA%3D%3D&sid=689CA9CN&pid=dsa&style=1&share_link_style=2) (Chinese Only)
13 | - [x] HyperQuest (try it [HERE](https://bingcheng.openmc.cn/HyperQuest/))
14 |
15 | ---
16 |
17 | ## HyperQuest
18 |
19 | **HyperQuest** mimics the hyper parameter tuning app from Stanford University, CS231n. **HyperQuest** is a web-app designed for beginners in Machine Learning to easily get a proper intuition for choosing the right hyper parameters. This is initially an extremely daunting task because not having proper hyper parameters leads to the models breaking down.
20 |
21 | Try HyperQuest [HERE](https://bingcheng.openmc.cn/HyperQuest/)!
22 |
23 | 
24 |
25 | ---
26 |
27 | ## Interesting Examples
28 |
29 | ### KNN
30 |
31 | > Visualize the distance matrix: each row is a single test example and its distances to training examples:
32 | >
33 | > 
34 | >
35 | > Explain:
36 | >
37 | > 
38 | >
39 | > 
40 | >
41 | > As you can see above, the background of those images which is similar to many other images is black. Because there are many other images that have a black color on its margin, while the white part of those images is rarely seen in other images, which will cause a large difference, so will generate the white bar.
42 | >
43 | > Find best `k` for kNN:
44 | >
45 | >
46 |
47 |
48 |
49 | ### SVM v.s. Softmax
50 |
51 | > **SVM**
52 | >
53 | >
54 |
55 | > **Softmax**
56 | >
57 | >
58 |
59 | It can be found that softmax is much smoother than SVM, which means it’s more generalized.
60 |
61 | ## 2-layer net with different dtype
62 |
63 |
64 |
65 | > With dtype `np.single`, visualize the weights of the first layer:
66 | >
67 | > 
68 |
69 | > With dtype `np.float64`, visualize the weights of the first layer:
70 | >
71 | > 
72 |
73 | As you can see, there is no difference.
74 |
75 |
76 |
77 | ### Style Transfer GIFs
78 |
79 | 
80 |
81 | 
82 |
83 | By watching the first iteration we can find that there is no difference between starting with a random image or starting with the original image.
84 |
--------------------------------------------------------------------------------
/assignment1/README.md:
--------------------------------------------------------------------------------
1 | Details about this assignment can be found [on the course webpage](http://cs231n.github.io/), under Assignment #1 of Spring 2020`.
2 |
3 |
--------------------------------------------------------------------------------
/assignment1/collectSubmission.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #NOTE: DO NOT EDIT THIS FILE-- MAY RESULT IN INCOMPLETE SUBMISSIONS
3 | set -euo pipefail
4 |
5 | CODE=(
6 | "cs231n/classifiers/k_nearest_neighbor.py"
7 | "cs231n/classifiers/linear_classifier.py"
8 | "cs231n/classifiers/linear_svm.py"
9 | "cs231n/classifiers/softmax.py"
10 | "cs231n/classifiers/neural_net.py"
11 | )
12 |
13 | # these notebooks should ideally
14 | # be in order of questions so
15 | # that the generated pdf is
16 | # in order of questions
17 | NOTEBOOKS=(
18 | "knn.ipynb"
19 | "svm.ipynb"
20 | "softmax.ipynb"
21 | "two_layer_net.ipynb"
22 | "features.ipynb"
23 | )
24 |
25 | FILES=( "${CODE[@]}" "${NOTEBOOKS[@]}" )
26 |
27 | LOCAL_DIR=`pwd`
28 | ASSIGNMENT_NO=1
29 | ZIP_FILENAME="a1.zip"
30 |
31 | C_R="\e[31m"
32 | C_G="\e[32m"
33 | C_BLD="\e[1m"
34 | C_E="\e[0m"
35 |
36 | for FILE in "${FILES[@]}"
37 | do
38 | if [ ! -f ${FILE} ]; then
39 | echo -e "${C_R}Required file ${FILE} not found, Exiting.${C_E}"
40 | exit 0
41 | fi
42 | done
43 |
44 | echo -e "### Zipping file ###"
45 | rm -f ${ZIP_FILENAME}
46 | zip -q "${ZIP_FILENAME}" -r ${NOTEBOOKS[@]} $(find . -name "*.py") -x "makepdf.py"
47 |
48 | echo -e "### Creating PDFs ###"
49 | python makepdf.py --notebooks "${NOTEBOOKS[@]}"
50 |
51 | echo -e "### Done! Please submit a1.zip and the pdfs to Gradescope. ###"
52 |
--------------------------------------------------------------------------------
/assignment1/cs231n/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment1/cs231n/__init__.py
--------------------------------------------------------------------------------
/assignment1/cs231n/classifiers/__init__.py:
--------------------------------------------------------------------------------
1 | from cs231n.classifiers.k_nearest_neighbor import *
2 | from cs231n.classifiers.linear_classifier import *
3 |
--------------------------------------------------------------------------------
/assignment1/cs231n/classifiers/k_nearest_neighbor.py:
--------------------------------------------------------------------------------
1 | from builtins import range
2 | from builtins import object
3 | import numpy as np
4 | from past.builtins import xrange
5 |
6 |
7 | class KNearestNeighbor(object):
8 | """ a kNN classifier with L2 distance """
9 |
10 | def __init__(self):
11 | pass
12 |
13 | def train(self, X, y):
14 | """
15 | Train the classifier. For k-nearest neighbors this is just
16 | memorizing the training data.
17 |
18 | Inputs:
19 | - X: A numpy array of shape (num_train, D) containing the training data
20 | consisting of num_train samples each of dimension D.
21 | - y: A numpy array of shape (N,) containing the training labels, where
22 | y[i] is the label for X[i].
23 | """
24 | self.X_train = X
25 | self.y_train = y
26 |
27 | def predict(self, X, k=1, num_loops=0):
28 | """
29 | Predict labels for test data using this classifier.
30 |
31 | Inputs:
32 | - X: A numpy array of shape (num_test, D) containing test data consisting
33 | of num_test samples each of dimension D.
34 | - k: The number of nearest neighbors that vote for the predicted labels.
35 | - num_loops: Determines which implementation to use to compute distances
36 | between training points and testing points.
37 |
38 | Returns:
39 | - y: A numpy array of shape (num_test,) containing predicted labels for the
40 | test data, where y[i] is the predicted label for the test point X[i].
41 | """
42 | if num_loops == 0:
43 | dists = self.compute_distances_no_loops(X)
44 | elif num_loops == 1:
45 | dists = self.compute_distances_one_loop(X)
46 | elif num_loops == 2:
47 | dists = self.compute_distances_two_loops(X)
48 | else:
49 | raise ValueError('Invalid value %d for num_loops' % num_loops)
50 |
51 | return self.predict_labels(dists, k=k)
52 |
53 |
54 |
55 |
56 | def compute_distances_two_loops(self, X):
57 | """
58 | Compute the distance between each test point in X and each training point
59 | in self.X_train using a nested loop over both the training data and the
60 | test data.
61 |
62 | Inputs:
63 | - X: A numpy array of shape (num_test, D) containing test data.
64 |
65 | Returns:
66 | - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
67 | is the Euclidean distance between the ith test point and the jth training
68 | point.
69 | """
70 | num_test = X.shape[0]
71 | num_train = self.X_train.shape[0]
72 | dists = np.zeros((num_test, num_train))
73 | for i in range(num_test):
74 | for j in range(num_train):
75 | #####################################################################
76 | # TODO: #
77 | # Compute the l2 distance between the ith test point and the jth #
78 | # training point, and store the result in dists[i, j]. You should #
79 | # not use a loop over dimension, nor use np.linalg.norm(). #
80 | #####################################################################
81 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
82 | dists[i,j] = np.sqrt(np.sum(np.square(self.X_train[j] - X[i])))
83 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
84 | return dists
85 |
86 | def compute_distances_one_loop(self, X):
87 | """
88 | Compute the distance between each test point in X and each training point
89 | in self.X_train using a single loop over the test data.
90 |
91 | Input / Output: Same as compute_distances_two_loops
92 | """
93 | num_test = X.shape[0]
94 | num_train = self.X_train.shape[0]
95 | dists = np.zeros((num_test, num_train))
96 | # num_test = 5
97 | for i in range(num_test):
98 | #######################################################################
99 | # TODO: #
100 | # Compute the l2 distance between the ith test point and all training #
101 | # points, and store the result in dists[i, :]. #
102 | # Do not use np.linalg.norm(). #
103 | #######################################################################
104 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
105 | dists[i, :] = np.sqrt(np.sum(np.square(self.X_train - X[i]), axis = 1))
106 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
107 | return dists
108 |
109 | def compute_distances_no_loops(self, X):
110 | """
111 | Compute the distance between each test point in X and each training point
112 | in self.X_train using no explicit loops.
113 |
114 | Input / Output: Same as compute_distances_two_loops
115 | """
116 | num_test = X.shape[0]
117 | num_train = self.X_train.shape[0]
118 | dists = np.zeros((num_test, num_train))
119 | #########################################################################
120 | # TODO: #
121 | # Compute the l2 distance between all test points and all training #
122 | # points without using any explicit loops, and store the result in #
123 | # dists. #
124 | # #
125 | # You should implement this function using only basic array operations; #
126 | # in particular you should not use functions from scipy, #
127 | # nor use np.linalg.norm(). #
128 | # #
129 | # HINT: Try to formulate the l2 distance using matrix multiplication #
130 | # and two broadcast sums. #
131 | #########################################################################
132 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
133 | # def D3Broadcast(a, b):
134 | # # a is Axn, b is Bxn
135 | # # Return BxA
136 | # assert b.shape[1] == a.shape[1]
137 | # b_new = np.reshape(b,(b.shape[0],1,b.shape[1]))
138 | # a_new = np.tile(a,(b.shape[0],1))
139 | # a_new = np.reshape(a_new,(b.shape[0],a.shape[0],b.shape[1]))
140 | # f = a_new-b_new
141 | # return np.sum(np.abs(f),axis = 2)
142 |
143 | # dists = D3Broadcast(X, self.X_train)
144 | # dists = D3Broadcast(self.X_train, X)
145 | # a = X
146 | # b = self.X_train
147 | # dists = np.sum(np.abs(a.reshape(a.shape[0],1,a.shape[1])-b),axis = 2)
148 | dists = np.sqrt(-2*np.dot(X, self.X_train.T) + np.sum(np.square(self.X_train), axis = 1) + np.transpose([np.sum(np.square(X), axis = 1)]))
149 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
150 | return dists
151 |
152 | def predict_labels(self, dists, k=1):
153 | """
154 | Given a matrix of distances between test points and training points,
155 | predict a label for each test point.
156 |
157 | Inputs:
158 | - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
159 | gives the distance betwen the ith test point and the jth training point.
160 |
161 | Returns:
162 | - y: A numpy array of shape (num_test,) containing predicted labels for the
163 | test data, where y[i] is the predicted label for the test point X[i].
164 | """
165 | num_test = dists.shape[0]
166 | y_pred = np.zeros(num_test)
167 | # num_test = 4
168 | for i in range(num_test):
169 | # A list of length k storing the labels of the k nearest neighbors to
170 | # the ith test point.
171 | closest_y = []
172 | #########################################################################
173 | # TODO: #
174 | # Use the distance matrix to find the k nearest neighbors of the ith #
175 | # testing point, and use self.y_train to find the labels of these #
176 | # neighbors. Store these labels in closest_y. #
177 | # Hint: Look up the function numpy.argsort. #
178 | #########################################################################
179 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
180 | row_i = dists[i,:].reshape(1,-1)[0]
181 | sort_index = np.argsort(row_i) # Here no need to use negative, because
182 | # samaller is better
183 | kth_near = sort_index[:k]
184 | closest_y = [self.y_train[i] for i in kth_near]
185 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
186 | #########################################################################
187 | # TODO: #
188 | # Now that you have found the labels of the k nearest neighbors, you #
189 | # need to find the most common label in the list closest_y of labels. #
190 | # Store this label in y_pred[i]. Break ties by choosing the smaller #
191 | # label. #
192 | #########################################################################
193 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
194 | # if k == 1:
195 | # y_pred[i] = self.y_train[sort_index[0]]
196 | # continue
197 | maxlabel = max(closest_y, key=closest_y.count)
198 | # print(maxlabel)
199 | y_pred[i] = maxlabel
200 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
201 |
202 | return y_pred
203 |
--------------------------------------------------------------------------------
/assignment1/cs231n/classifiers/linear_classifier.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | from builtins import range
4 | from builtins import object
5 | import numpy as np
6 | from cs231n.classifiers.linear_svm import *
7 | from cs231n.classifiers.softmax import *
8 | from past.builtins import xrange
9 |
10 |
11 | class LinearClassifier(object):
12 |
13 | def __init__(self):
14 | self.W = None
15 |
16 | def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,
17 | batch_size=200, verbose=False):
18 | """
19 | Train this linear classifier using stochastic gradient descent.
20 |
21 | Inputs:
22 | - X: A numpy array of shape (N, D) containing training data; there are N
23 | training samples each of dimension D.
24 | - y: A numpy array of shape (N,) containing training labels; y[i] = c
25 | means that X[i] has label 0 <= c < C for C classes.
26 | - learning_rate: (float) learning rate for optimization.
27 | - reg: (float) regularization strength.
28 | - num_iters: (integer) number of steps to take when optimizing
29 | - batch_size: (integer) number of training examples to use at each step.
30 | - verbose: (boolean) If true, print progress during optimization.
31 |
32 | Outputs:
33 | A list containing the value of the loss function at each training iteration.
34 | """
35 | num_train, dim = X.shape
36 | num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes
37 | if self.W is None:
38 | # lazily initialize W
39 | self.W = 0.001 * np.random.randn(dim, num_classes)
40 |
41 | # Run stochastic gradient descent to optimize W
42 | loss_history = []
43 | for it in range(num_iters):
44 | X_batch = None
45 | y_batch = None
46 |
47 | #########################################################################
48 | # TODO: #
49 | # Sample batch_size elements from the training data and their #
50 | # corresponding labels to use in this round of gradient descent. #
51 | # Store the data in X_batch and their corresponding labels in #
52 | # y_batch; after sampling X_batch should have shape (batch_size, dim) #
53 | # and y_batch should have shape (batch_size,) #
54 | # #
55 | # Hint: Use np.random.choice to generate indices. Sampling with #
56 | # replacement is faster than sampling without replacement. #
57 | #########################################################################
58 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
59 | mask = np.random.choice(num_train, batch_size, replace=True)
60 | X_batch = X[mask, :]
61 | y_batch = y[mask]
62 | # assert (batch_size, dim == X_batch.shape), X_batch.shape
63 | # assert (batch_size == y_batch.shape[0]), y_batch.shape
64 |
65 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
66 |
67 | # evaluate loss and gradient
68 | loss, grad = self.loss(X_batch, y_batch, reg)
69 | loss_history.append(loss)
70 |
71 | # perform parameter update
72 | #########################################################################
73 | # TODO: #
74 | # Update the weights using the gradient and the learning rate. #
75 | #########################################################################
76 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
77 |
78 | self.W -= learning_rate * grad
79 |
80 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
81 |
82 | if verbose and it % 100 == 0:
83 | print('iteration %d / %d: loss %f' % (it, num_iters, loss))
84 |
85 | return loss_history
86 |
87 | def predict(self, X):
88 | """
89 | Use the trained weights of this linear classifier to predict labels for
90 | data points.
91 |
92 | Inputs:
93 | - X: A numpy array of shape (N, D) containing training data; there are N
94 | training samples each of dimension D.
95 |
96 | Returns:
97 | - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
98 | array of length N, and each element is an integer giving the predicted
99 | class.
100 | """
101 | y_pred = np.zeros(X.shape[0])
102 | ###########################################################################
103 | # TODO: #
104 | # Implement this method. Store the predicted labels in y_pred. #
105 | ###########################################################################
106 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
107 |
108 | scores = X@self.W
109 | y_pred = np.argmax(scores, axis=1)
110 |
111 |
112 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
113 | return y_pred
114 |
115 | def loss(self, X_batch, y_batch, reg):
116 | """
117 | Compute the loss function and its derivative.
118 | Subclasses will override this.
119 |
120 | Inputs:
121 | - X_batch: A numpy array of shape (N, D) containing a minibatch of N
122 | data points; each point has dimension D.
123 | - y_batch: A numpy array of shape (N,) containing labels for the minibatch.
124 | - reg: (float) regularization strength.
125 |
126 | Returns: A tuple containing:
127 | - loss as a single float
128 | - gradient with respect to self.W; an array of the same shape as W
129 | """
130 | pass
131 |
132 |
133 | class LinearSVM(LinearClassifier):
134 | """ A subclass that uses the Multiclass SVM loss function """
135 |
136 | def loss(self, X_batch, y_batch, reg):
137 | return svm_loss_vectorized(self.W, X_batch, y_batch, reg)
138 |
139 |
140 | class Softmax(LinearClassifier):
141 | """ A subclass that uses the Softmax + Cross-entropy loss function """
142 |
143 | def loss(self, X_batch, y_batch, reg):
144 | return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)
145 |
--------------------------------------------------------------------------------
/assignment1/cs231n/classifiers/linear_svm.py:
--------------------------------------------------------------------------------
1 | from builtins import range
2 | import numpy as np
3 | from random import shuffle
4 | from past.builtins import xrange
5 |
6 | def svm_loss_naive(W, X, y, reg):
7 | """
8 | Structured SVM loss function, naive implementation (with loops).
9 |
10 | Inputs have dimension D, there are C classes, and we operate on minibatches
11 | of N examples.
12 |
13 | Inputs:
14 | - W: A numpy array of shape (D, C) containing weights.
15 | - X: A numpy array of shape (N, D) containing a minibatch of data.
16 | - y: A numpy array of shape (N,) containing training labels; y[i] = c means
17 | that X[i] has label c, where 0 <= c < C.
18 | - reg: (float) regularization strength
19 |
20 | Returns a tuple of:
21 | - loss as single float
22 | - gradient with respect to weights W; an array of same shape as W
23 | """
24 | dW = np.zeros(W.shape) # initialize the gradient as zero
25 |
26 | # compute the loss and the gradient
27 | num_classes = W.shape[1]
28 | num_train = X.shape[0]
29 | loss = 0.0
30 | for i in range(num_train):
31 | scores = X[i].dot(W)
32 | correct_class_score = scores[y[i]]
33 | for j in range(num_classes):
34 | if j == y[i]:
35 | continue # loss += 0
36 |
37 | margin = scores[j] - correct_class_score + 1 # note delta = 1
38 | if margin > 0:
39 | loss += margin
40 | dW[:,j] += X[i].T
41 | dW[:,y[i]] -= X[i].T
42 |
43 |
44 |
45 | # Right now the loss is a sum over all training examples, but we want it
46 | # to be an average instead so we divide by num_train.
47 | loss /= num_train
48 | dW /= num_train
49 |
50 | # Add regularization to the loss.
51 | loss += reg * np.sum(W * W)
52 | dW += 2* reg*W
53 | #############################################################################
54 | # TODO: #
55 | # Compute the gradient of the loss function and store it dW. #
56 | # Rather than first computing the loss and then computing the derivative, #
57 | # it may be simpler to compute the derivative at the same time that the #
58 | # loss is being computed. As a result you may need to modify some of the #
59 | # code above to compute the gradient. #
60 | #############################################################################
61 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
62 |
63 | "Please check the code above"
64 |
65 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
66 |
67 | return loss, dW
68 |
69 |
70 |
71 | def svm_loss_vectorized(W, X, y, reg):
72 | """
73 | Structured SVM loss function, vectorized implementation.
74 |
75 | Inputs and outputs are the same as svm_loss_naive.
76 | """
77 | loss = 0.0
78 | dW = np.zeros(W.shape) # initialize the gradient as zero
79 |
80 | #############################################################################
81 | # TODO: #
82 | # Implement a vectorized version of the structured SVM loss, storing the #
83 | # result in loss. #
84 | #############################################################################
85 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
86 |
87 | scores = X.dot(W)
88 | correct_class_scores = scores[np.arange(len(y)), y] # select the correct items
89 | margins = np.maximum(0, (scores.T - correct_class_scores).T + 1) # note delta = 1
90 | margins[np.arange(X.shape[0]), y] = 0 # set correct items to 0
91 | loss = np.sum(margins)/X.shape[0] + reg*np.sum(W*W)
92 |
93 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
94 |
95 | #############################################################################
96 | # TODO: #
97 | # Implement a vectorized version of the gradient for the structured SVM #
98 | # loss, storing the result in dW. #
99 | # #
100 | # Hint: Instead of computing the gradient from scratch, it may be easier #
101 | # to reuse some of the intermediate values that you used to compute the #
102 | # loss. #
103 | #############################################################################
104 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
105 |
106 | # d_score = np.int64((scores.T - correct_class_scores).T + 1 >0)
107 | # # print('shape d_score', d_score.shape)
108 | # d_score[np.arange(scores.shape[0]), y] = 0
109 | # sum_by_row = np.sum(d_score, axis=1)
110 | # d_score[np.arange(scores.shape[0]), y] = -sum_by_row
111 | # dW = X.T@d_score
112 | # # print("dW.shape1", dW.shape)
113 | # dW = dW/X.shape[0] + 2*reg*W
114 | # # print("dW.shape2", dW.shape)
115 | d_score = (scores.T - correct_class_scores).T + 1
116 | d_score[d_score<0]=0
117 | d_score[d_score>0]=1
118 | d_score[np.arange(scores.shape[0]), y] = 0
119 | d_score[np.arange(scores.shape[0]), y] = -np.sum(d_score, axis=1)
120 | dW = X.T@d_score
121 | dW = dW/X.shape[0] + 2*reg*W
122 |
123 |
124 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
125 |
126 | return loss, dW
127 |
--------------------------------------------------------------------------------
/assignment1/cs231n/classifiers/softmax.py:
--------------------------------------------------------------------------------
1 | from builtins import range
2 | import numpy as np
3 | from random import shuffle
4 | from past.builtins import xrange
5 |
6 | def softmax_loss_naive(W, X, y, reg):
7 | """
8 | Softmax loss function, naive implementation (with loops)
9 |
10 | Inputs have dimension D, there are C classes, and we operate on minibatches
11 | of N examples.
12 |
13 | Inputs:
14 | - W: A numpy array of shape (D, C) containing weights.
15 | - X: A numpy array of shape (N, D) containing a minibatch of data.
16 | - y: A numpy array of shape (N,) containing training labels; y[i] = c means
17 | that X[i] has label c, where 0 <= c < C.
18 | - reg: (float) regularization strength
19 |
20 | Returns a tuple of:
21 | - loss as single float
22 | - gradient with respect to weights W; an array of same shape as W
23 | """
24 | # Initialize the loss and gradient to zero.
25 | loss = 0.0
26 | dW = np.zeros_like(W)
27 |
28 | #############################################################################
29 | # TODO: Compute the softmax loss and its gradient using explicit loops. #
30 | # Store the loss in loss and the gradient in dW. If you are not careful #
31 | # here, it is easy to run into numeric instability. Don't forget the #
32 | # regularization! #
33 | #############################################################################
34 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
35 |
36 | num_classes = W.shape[1]
37 | num_train = X.shape[0]
38 | loss = 0.0
39 | for i in range(num_train):
40 | scores = X[i].dot(W)
41 | scores -= np.max(scores) # to avoid numeric instability
42 | # Otherwise: RuntimeWarning: invalid value encountered in true_divide
43 | correct_class_score = scores[y[i]]
44 | # loss += -np.log(np.exp(correct_class_score)/np.sum(np.exp(scores)))
45 | loss += -correct_class_score + np.log(np.sum(np.exp(scores)))
46 |
47 |
48 | for j in range(num_classes):
49 | p = np.exp(scores[j])/np.sum(np.exp(scores))
50 | if j == y[i]:
51 | dW[:,j] += (p-1)*X[i].T
52 | else:
53 | dW[:,j] += p*X[i].T
54 |
55 | loss /= num_train
56 | dW /= num_train
57 |
58 | loss += reg * np.sum(W * W)
59 | dW += 2* reg*W
60 |
61 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
62 |
63 | return loss, dW
64 |
65 |
66 | def softmax_loss_vectorized(W, X, y, reg):
67 | """
68 | Softmax loss function, vectorized version.
69 |
70 | Inputs and outputs are the same as softmax_loss_naive.
71 | """
72 | # Initialize the loss and gradient to zero.
73 | loss = 0.0
74 | dW = np.zeros_like(W)
75 |
76 | #############################################################################
77 | # TODO: Compute the softmax loss and its gradient using no explicit loops. #
78 | # Store the loss in loss and the gradient in dW. If you are not careful #
79 | # here, it is easy to run into numeric instability. Don't forget the #
80 | # regularization! #
81 | #############################################################################
82 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
83 | num_train = X.shape[0]
84 |
85 | scores = X@W
86 | scores = (scores.T - np.max(scores, axis = 1)).T # to avoid numeric instability
87 | correct4rows = scores[range(len(y)),y]
88 | loss = np.sum(-correct4rows + np.log(np.sum(np.exp(scores), axis = 1)))
89 | dscore = np.exp(scores)/np.sum(np.exp(scores), axis = 1).reshape(-1,1)
90 | dscore[range(len(y)),y] -= 1
91 | dW = X.T@dscore
92 |
93 | loss /= num_train
94 | dW /= num_train
95 |
96 | loss += reg * np.sum(W * W)
97 | dW += 2* reg*W
98 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
99 |
100 | return loss, dW
101 |
--------------------------------------------------------------------------------
/assignment1/cs231n/data_utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | from builtins import range
4 | from six.moves import cPickle as pickle
5 | import numpy as np
6 | import os
7 | from imageio import imread
8 | import platform
9 |
10 | def load_pickle(f):
11 | version = platform.python_version_tuple()
12 | if version[0] == '2':
13 | return pickle.load(f)
14 | elif version[0] == '3':
15 | return pickle.load(f, encoding='latin1')
16 | raise ValueError("invalid python version: {}".format(version))
17 |
18 | def load_CIFAR_batch(filename):
19 | """ load single batch of cifar """
20 | with open(filename, 'rb') as f:
21 | datadict = load_pickle(f)
22 | X = datadict['data']
23 | Y = datadict['labels']
24 | X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
25 | Y = np.array(Y)
26 | return X, Y
27 |
28 | def load_CIFAR10(ROOT):
29 | """ load all of cifar """
30 | xs = []
31 | ys = []
32 | for b in range(1,6):
33 | f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
34 | X, Y = load_CIFAR_batch(f)
35 | xs.append(X)
36 | ys.append(Y)
37 | Xtr = np.concatenate(xs)
38 | Ytr = np.concatenate(ys)
39 | del X, Y
40 | Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
41 | return Xtr, Ytr, Xte, Yte
42 |
43 |
44 | def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000,
45 | subtract_mean=True):
46 | """
47 | Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
48 | it for classifiers. These are the same steps as we used for the SVM, but
49 | condensed to a single function.
50 | """
51 | # Load the raw CIFAR-10 data
52 | cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
53 | X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
54 |
55 | # Subsample the data
56 | mask = list(range(num_training, num_training + num_validation))
57 | X_val = X_train[mask]
58 | y_val = y_train[mask]
59 | mask = list(range(num_training))
60 | X_train = X_train[mask]
61 | y_train = y_train[mask]
62 | mask = list(range(num_test))
63 | X_test = X_test[mask]
64 | y_test = y_test[mask]
65 |
66 | # Normalize the data: subtract the mean image
67 | if subtract_mean:
68 | mean_image = np.mean(X_train, axis=0)
69 | X_train -= mean_image
70 | X_val -= mean_image
71 | X_test -= mean_image
72 |
73 | # Transpose so that channels come first
74 | X_train = X_train.transpose(0, 3, 1, 2).copy()
75 | X_val = X_val.transpose(0, 3, 1, 2).copy()
76 | X_test = X_test.transpose(0, 3, 1, 2).copy()
77 |
78 | # Package data into a dictionary
79 | return {
80 | 'X_train': X_train, 'y_train': y_train,
81 | 'X_val': X_val, 'y_val': y_val,
82 | 'X_test': X_test, 'y_test': y_test,
83 | }
84 |
85 |
86 | def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True):
87 | """
88 | Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
89 | TinyImageNet-200 have the same directory structure, so this can be used
90 | to load any of them.
91 |
92 | Inputs:
93 | - path: String giving path to the directory to load.
94 | - dtype: numpy datatype used to load the data.
95 | - subtract_mean: Whether to subtract the mean training image.
96 |
97 | Returns: A dictionary with the following entries:
98 | - class_names: A list where class_names[i] is a list of strings giving the
99 | WordNet names for class i in the loaded dataset.
100 | - X_train: (N_tr, 3, 64, 64) array of training images
101 | - y_train: (N_tr,) array of training labels
102 | - X_val: (N_val, 3, 64, 64) array of validation images
103 | - y_val: (N_val,) array of validation labels
104 | - X_test: (N_test, 3, 64, 64) array of testing images.
105 | - y_test: (N_test,) array of test labels; if test labels are not available
106 | (such as in student code) then y_test will be None.
107 | - mean_image: (3, 64, 64) array giving mean training image
108 | """
109 | # First load wnids
110 | with open(os.path.join(path, 'wnids.txt'), 'r') as f:
111 | wnids = [x.strip() for x in f]
112 |
113 | # Map wnids to integer labels
114 | wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}
115 |
116 | # Use words.txt to get names for each class
117 | with open(os.path.join(path, 'words.txt'), 'r') as f:
118 | wnid_to_words = dict(line.split('\t') for line in f)
119 | for wnid, words in wnid_to_words.items():
120 | wnid_to_words[wnid] = [w.strip() for w in words.split(',')]
121 | class_names = [wnid_to_words[wnid] for wnid in wnids]
122 |
123 | # Next load training data.
124 | X_train = []
125 | y_train = []
126 | for i, wnid in enumerate(wnids):
127 | if (i + 1) % 20 == 0:
128 | print('loading training data for synset %d / %d'
129 | % (i + 1, len(wnids)))
130 | # To figure out the filenames we need to open the boxes file
131 | boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)
132 | with open(boxes_file, 'r') as f:
133 | filenames = [x.split('\t')[0] for x in f]
134 | num_images = len(filenames)
135 |
136 | X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
137 | y_train_block = wnid_to_label[wnid] * \
138 | np.ones(num_images, dtype=np.int64)
139 | for j, img_file in enumerate(filenames):
140 | img_file = os.path.join(path, 'train', wnid, 'images', img_file)
141 | img = imread(img_file)
142 | if img.ndim == 2:
143 | ## grayscale file
144 | img.shape = (64, 64, 1)
145 | X_train_block[j] = img.transpose(2, 0, 1)
146 | X_train.append(X_train_block)
147 | y_train.append(y_train_block)
148 |
149 | # We need to concatenate all training data
150 | X_train = np.concatenate(X_train, axis=0)
151 | y_train = np.concatenate(y_train, axis=0)
152 |
153 | # Next load validation data
154 | with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f:
155 | img_files = []
156 | val_wnids = []
157 | for line in f:
158 | img_file, wnid = line.split('\t')[:2]
159 | img_files.append(img_file)
160 | val_wnids.append(wnid)
161 | num_val = len(img_files)
162 | y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
163 | X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
164 | for i, img_file in enumerate(img_files):
165 | img_file = os.path.join(path, 'val', 'images', img_file)
166 | img = imread(img_file)
167 | if img.ndim == 2:
168 | img.shape = (64, 64, 1)
169 | X_val[i] = img.transpose(2, 0, 1)
170 |
171 | # Next load test images
172 | # Students won't have test labels, so we need to iterate over files in the
173 | # images directory.
174 | img_files = os.listdir(os.path.join(path, 'test', 'images'))
175 | X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
176 | for i, img_file in enumerate(img_files):
177 | img_file = os.path.join(path, 'test', 'images', img_file)
178 | img = imread(img_file)
179 | if img.ndim == 2:
180 | img.shape = (64, 64, 1)
181 | X_test[i] = img.transpose(2, 0, 1)
182 |
183 | y_test = None
184 | y_test_file = os.path.join(path, 'test', 'test_annotations.txt')
185 | if os.path.isfile(y_test_file):
186 | with open(y_test_file, 'r') as f:
187 | img_file_to_wnid = {}
188 | for line in f:
189 | line = line.split('\t')
190 | img_file_to_wnid[line[0]] = line[1]
191 | y_test = [wnid_to_label[img_file_to_wnid[img_file]]
192 | for img_file in img_files]
193 | y_test = np.array(y_test)
194 |
195 | mean_image = X_train.mean(axis=0)
196 | if subtract_mean:
197 | X_train -= mean_image[None]
198 | X_val -= mean_image[None]
199 | X_test -= mean_image[None]
200 |
201 | return {
202 | 'class_names': class_names,
203 | 'X_train': X_train,
204 | 'y_train': y_train,
205 | 'X_val': X_val,
206 | 'y_val': y_val,
207 | 'X_test': X_test,
208 | 'y_test': y_test,
209 | 'class_names': class_names,
210 | 'mean_image': mean_image,
211 | }
212 |
213 |
214 | def load_models(models_dir):
215 | """
216 | Load saved models from disk. This will attempt to unpickle all files in a
217 | directory; any files that give errors on unpickling (such as README.txt)
218 | will be skipped.
219 |
220 | Inputs:
221 | - models_dir: String giving the path to a directory containing model files.
222 | Each model file is a pickled dictionary with a 'model' field.
223 |
224 | Returns:
225 | A dictionary mapping model file names to models.
226 | """
227 | models = {}
228 | for model_file in os.listdir(models_dir):
229 | with open(os.path.join(models_dir, model_file), 'rb') as f:
230 | try:
231 | models[model_file] = load_pickle(f)['model']
232 | except pickle.UnpicklingError:
233 | continue
234 | return models
235 |
236 |
237 | def load_imagenet_val(num=None):
238 | """Load a handful of validation images from ImageNet.
239 |
240 | Inputs:
241 | - num: Number of images to load (max of 25)
242 |
243 | Returns:
244 | - X: numpy array with shape [num, 224, 224, 3]
245 | - y: numpy array of integer image labels, shape [num]
246 | - class_names: dict mapping integer label to class name
247 | """
248 | imagenet_fn = 'cs231n/datasets/imagenet_val_25.npz'
249 | if not os.path.isfile(imagenet_fn):
250 | print('file %s not found' % imagenet_fn)
251 | print('Run the following:')
252 | print('cd cs231n/datasets')
253 | print('bash get_imagenet_val.sh')
254 | assert False, 'Need to download imagenet_val_25.npz'
255 | f = np.load(imagenet_fn)
256 | X = f['X']
257 | y = f['y']
258 | class_names = f['label_map'].item()
259 | if num is not None:
260 | X = X[:num]
261 | y = y[:num]
262 | return X, y, class_names
263 |
--------------------------------------------------------------------------------
/assignment1/cs231n/datasets/get_datasets.sh:
--------------------------------------------------------------------------------
1 | # Get CIFAR10
2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz -O cifar-10-python.tar.gz
3 | tar -xzvf cifar-10-python.tar.gz
4 | rm cifar-10-python.tar.gz
5 |
--------------------------------------------------------------------------------
/assignment1/cs231n/features.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | from builtins import zip
3 | from builtins import range
4 | from past.builtins import xrange
5 |
6 | import matplotlib
7 | import numpy as np
8 | from scipy.ndimage import uniform_filter
9 |
10 |
11 | def extract_features(imgs, feature_fns, verbose=False):
12 | """
13 | Given pixel data for images and several feature functions that can operate on
14 | single images, apply all feature functions to all images, concatenating the
15 | feature vectors for each image and storing the features for all images in
16 | a single matrix.
17 |
18 | Inputs:
19 | - imgs: N x H X W X C array of pixel data for N images.
20 | - feature_fns: List of k feature functions. The ith feature function should
21 | take as input an H x W x D array and return a (one-dimensional) array of
22 | length F_i.
23 | - verbose: Boolean; if true, print progress.
24 |
25 | Returns:
26 | An array of shape (N, F_1 + ... + F_k) where each column is the concatenation
27 | of all features for a single image.
28 | """
29 | num_images = imgs.shape[0]
30 | if num_images == 0:
31 | return np.array([])
32 |
33 | # Use the first image to determine feature dimensions
34 | feature_dims = []
35 | first_image_features = []
36 | for feature_fn in feature_fns:
37 | feats = feature_fn(imgs[0].squeeze())
38 | assert len(feats.shape) == 1, 'Feature functions must be one-dimensional'
39 | feature_dims.append(feats.size)
40 | first_image_features.append(feats)
41 |
42 | # Now that we know the dimensions of the features, we can allocate a single
43 | # big array to store all features as columns.
44 | total_feature_dim = sum(feature_dims)
45 | imgs_features = np.zeros((num_images, total_feature_dim))
46 | imgs_features[0] = np.hstack(first_image_features).T
47 |
48 | # Extract features for the rest of the images.
49 | for i in range(1, num_images):
50 | idx = 0
51 | for feature_fn, feature_dim in zip(feature_fns, feature_dims):
52 | next_idx = idx + feature_dim
53 | imgs_features[i, idx:next_idx] = feature_fn(imgs[i].squeeze())
54 | idx = next_idx
55 | if verbose and i % 1000 == 999:
56 | print('Done extracting features for %d / %d images' % (i+1, num_images))
57 |
58 | return imgs_features
59 |
60 |
61 | def rgb2gray(rgb):
62 | """Convert RGB image to grayscale
63 |
64 | Parameters:
65 | rgb : RGB image
66 |
67 | Returns:
68 | gray : grayscale image
69 |
70 | """
71 | return np.dot(rgb[...,:3], [0.299, 0.587, 0.144])
72 |
73 |
74 | def hog_feature(im):
75 | """Compute Histogram of Gradient (HOG) feature for an image
76 |
77 | Modified from skimage.feature.hog
78 | https://scikit-image.org/docs/dev/api/skimage.feature.html#skimage.feature.hog
79 |
80 | Reference:
81 | Histograms of Oriented Gradients for Human Detection
82 | Navneet Dalal and Bill Triggs, CVPR 2005
83 |
84 | Parameters:
85 | im : an input grayscale or rgb image
86 |
87 | Returns:
88 | feat: Histogram of Gradient (HOG) feature
89 |
90 | """
91 |
92 | # convert rgb to grayscale if needed
93 | if im.ndim == 3:
94 | image = rgb2gray(im)
95 | else:
96 | image = np.at_least_2d(im)
97 |
98 | sx, sy = image.shape # image size
99 | orientations = 9 # number of gradient bins
100 | cx, cy = (8, 8) # pixels per cell
101 |
102 | gx = np.zeros(image.shape)
103 | gy = np.zeros(image.shape)
104 | gx[:, :-1] = np.diff(image, n=1, axis=1) # compute gradient on x-direction
105 | gy[:-1, :] = np.diff(image, n=1, axis=0) # compute gradient on y-direction
106 | grad_mag = np.sqrt(gx ** 2 + gy ** 2) # gradient magnitude
107 | grad_ori = np.arctan2(gy, (gx + 1e-15)) * (180 / np.pi) + 90 # gradient orientation
108 |
109 | n_cellsx = int(np.floor(sx / cx)) # number of cells in x
110 | n_cellsy = int(np.floor(sy / cy)) # number of cells in y
111 | # compute orientations integral images
112 | orientation_histogram = np.zeros((n_cellsx, n_cellsy, orientations))
113 | for i in range(orientations):
114 | # create new integral image for this orientation
115 | # isolate orientations in this range
116 | temp_ori = np.where(grad_ori < 180 / orientations * (i + 1),
117 | grad_ori, 0)
118 | temp_ori = np.where(grad_ori >= 180 / orientations * i,
119 | temp_ori, 0)
120 | # select magnitudes for those orientations
121 | cond2 = temp_ori > 0
122 | temp_mag = np.where(cond2, grad_mag, 0)
123 | orientation_histogram[:,:,i] = uniform_filter(temp_mag, size=(cx, cy))[round(cx/2)::cx, round(cy/2)::cy].T
124 |
125 | return orientation_histogram.ravel()
126 |
127 |
128 | def color_histogram_hsv(im, nbin=10, xmin=0, xmax=255, normalized=True):
129 | """
130 | Compute color histogram for an image using hue.
131 |
132 | Inputs:
133 | - im: H x W x C array of pixel data for an RGB image.
134 | - nbin: Number of histogram bins. (default: 10)
135 | - xmin: Minimum pixel value (default: 0)
136 | - xmax: Maximum pixel value (default: 255)
137 | - normalized: Whether to normalize the histogram (default: True)
138 |
139 | Returns:
140 | 1D vector of length nbin giving the color histogram over the hue of the
141 | input image.
142 | """
143 | ndim = im.ndim
144 | bins = np.linspace(xmin, xmax, nbin+1)
145 | hsv = matplotlib.colors.rgb_to_hsv(im/xmax) * xmax
146 | imhist, bin_edges = np.histogram(hsv[:,:,0], bins=bins, density=normalized)
147 | imhist = imhist * np.diff(bin_edges)
148 |
149 | # return histogram
150 | return imhist
151 |
152 |
153 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
154 |
155 | pass
156 |
157 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
158 |
--------------------------------------------------------------------------------
/assignment1/cs231n/gradient_check.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | from builtins import range
3 | from past.builtins import xrange
4 |
5 | import numpy as np
6 | from random import randrange
7 |
8 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
9 | """
10 | a naive implementation of numerical gradient of f at x
11 | - f should be a function that takes a single argument
12 | - x is the point (numpy array) to evaluate the gradient at
13 | """
14 |
15 | fx = f(x) # evaluate function value at original point
16 | grad = np.zeros_like(x)
17 | # iterate over all indexes in x
18 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
19 | while not it.finished:
20 |
21 | # evaluate function at x+h
22 | ix = it.multi_index
23 | oldval = x[ix]
24 | x[ix] = oldval + h # increment by h
25 | fxph = f(x) # evalute f(x + h)
26 | x[ix] = oldval - h
27 | fxmh = f(x) # evaluate f(x - h)
28 | x[ix] = oldval # restore
29 |
30 | # compute the partial derivative with centered formula
31 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope
32 | if verbose:
33 | print(ix, grad[ix])
34 | it.iternext() # step to next dimension
35 |
36 | return grad
37 |
38 |
39 | def eval_numerical_gradient_array(f, x, df, h=1e-5):
40 | """
41 | Evaluate a numeric gradient for a function that accepts a numpy
42 | array and returns a numpy array.
43 | """
44 | grad = np.zeros_like(x)
45 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
46 | while not it.finished:
47 | ix = it.multi_index
48 |
49 | oldval = x[ix]
50 | x[ix] = oldval + h
51 | pos = f(x).copy()
52 | x[ix] = oldval - h
53 | neg = f(x).copy()
54 | x[ix] = oldval
55 |
56 | grad[ix] = np.sum((pos - neg) * df) / (2 * h)
57 | it.iternext()
58 | return grad
59 |
60 |
61 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
62 | """
63 | Compute numeric gradients for a function that operates on input
64 | and output blobs.
65 |
66 | We assume that f accepts several input blobs as arguments, followed by a
67 | blob where outputs will be written. For example, f might be called like:
68 |
69 | f(x, w, out)
70 |
71 | where x and w are input Blobs, and the result of f will be written to out.
72 |
73 | Inputs:
74 | - f: function
75 | - inputs: tuple of input blobs
76 | - output: output blob
77 | - h: step size
78 | """
79 | numeric_diffs = []
80 | for input_blob in inputs:
81 | diff = np.zeros_like(input_blob.diffs)
82 | it = np.nditer(input_blob.vals, flags=['multi_index'],
83 | op_flags=['readwrite'])
84 | while not it.finished:
85 | idx = it.multi_index
86 | orig = input_blob.vals[idx]
87 |
88 | input_blob.vals[idx] = orig + h
89 | f(*(inputs + (output,)))
90 | pos = np.copy(output.vals)
91 | input_blob.vals[idx] = orig - h
92 | f(*(inputs + (output,)))
93 | neg = np.copy(output.vals)
94 | input_blob.vals[idx] = orig
95 |
96 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)
97 |
98 | it.iternext()
99 | numeric_diffs.append(diff)
100 | return numeric_diffs
101 |
102 |
103 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
104 | return eval_numerical_gradient_blobs(lambda *args: net.forward(),
105 | inputs, output, h=h)
106 |
107 |
108 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
109 | """
110 | sample a few random elements and only return numerical
111 | in this dimensions.
112 | """
113 |
114 | for i in range(num_checks):
115 | ix = tuple([randrange(m) for m in x.shape])
116 |
117 | oldval = x[ix]
118 | x[ix] = oldval + h # increment by h
119 | fxph = f(x) # evaluate f(x + h)
120 | x[ix] = oldval - h # increment by h
121 | fxmh = f(x) # evaluate f(x - h)
122 | x[ix] = oldval # reset
123 |
124 | grad_numerical = (fxph - fxmh) / (2 * h)
125 | grad_analytic = analytic_grad[ix]
126 | rel_error = (abs(grad_numerical - grad_analytic) /
127 | (abs(grad_numerical) + abs(grad_analytic)))
128 | print('numerical: %f analytic: %f, relative error: %e'
129 | %(grad_numerical, grad_analytic, rel_error))
130 |
--------------------------------------------------------------------------------
/assignment1/cs231n/vis_utils.py:
--------------------------------------------------------------------------------
1 | from builtins import range
2 | from past.builtins import xrange
3 |
4 | from math import sqrt, ceil
5 | import numpy as np
6 |
7 | def visualize_grid(Xs, ubound=255.0, padding=1):
8 | """
9 | Reshape a 4D tensor of image data to a grid for easy visualization.
10 |
11 | Inputs:
12 | - Xs: Data of shape (N, H, W, C)
13 | - ubound: Output grid will have values scaled to the range [0, ubound]
14 | - padding: The number of blank pixels between elements of the grid
15 | """
16 | (N, H, W, C) = Xs.shape
17 | grid_size = int(ceil(sqrt(N)))
18 | grid_height = H * grid_size + padding * (grid_size - 1)
19 | grid_width = W * grid_size + padding * (grid_size - 1)
20 | grid = np.zeros((grid_height, grid_width, C))
21 | next_idx = 0
22 | y0, y1 = 0, H
23 | for y in range(grid_size):
24 | x0, x1 = 0, W
25 | for x in range(grid_size):
26 | if next_idx < N:
27 | img = Xs[next_idx]
28 | low, high = np.min(img), np.max(img)
29 | grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low)
30 | # grid[y0:y1, x0:x1] = Xs[next_idx]
31 | next_idx += 1
32 | x0 += W + padding
33 | x1 += W + padding
34 | y0 += H + padding
35 | y1 += H + padding
36 | # grid_max = np.max(grid)
37 | # grid_min = np.min(grid)
38 | # grid = ubound * (grid - grid_min) / (grid_max - grid_min)
39 | return grid
40 |
41 | def vis_grid(Xs):
42 | """ visualize a grid of images """
43 | (N, H, W, C) = Xs.shape
44 | A = int(ceil(sqrt(N)))
45 | G = np.ones((A*H+A, A*W+A, C), Xs.dtype)
46 | G *= np.min(Xs)
47 | n = 0
48 | for y in range(A):
49 | for x in range(A):
50 | if n < N:
51 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:]
52 | n += 1
53 | # normalize to [0,1]
54 | maxg = G.max()
55 | ming = G.min()
56 | G = (G - ming)/(maxg-ming)
57 | return G
58 |
59 | def vis_nn(rows):
60 | """ visualize array of arrays of images """
61 | N = len(rows)
62 | D = len(rows[0])
63 | H,W,C = rows[0][0].shape
64 | Xs = rows[0][0]
65 | G = np.ones((N*H+N, D*W+D, C), Xs.dtype)
66 | for y in range(N):
67 | for x in range(D):
68 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x]
69 | # normalize to [0,1]
70 | maxg = G.max()
71 | ming = G.min()
72 | G = (G - ming)/(maxg-ming)
73 | return G
74 |
--------------------------------------------------------------------------------
/assignment1/frameworkpython:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # what real Python executable to use
4 | #PYVER=2.7
5 | #PATHTOPYTHON=/usr/local/bin/
6 | #PYTHON=${PATHTOPYTHON}python${PYVER}
7 |
8 | PYTHON=$(which $(readlink .env/bin/python)) # only works with python3
9 |
10 | # find the root of the virtualenv, it should be the parent of the dir this script is in
11 | ENV=`$PYTHON -c "import os; print(os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..')))"`
12 |
13 | # now run Python with the virtualenv set as Python's HOME
14 | export PYTHONHOME=$ENV
15 | exec $PYTHON "$@"
16 |
--------------------------------------------------------------------------------
/assignment1/makepdf.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import subprocess
4 |
5 | try:
6 | from PyPDF2 import PdfFileMerger
7 | MERGE = True
8 | except ImportError:
9 | print("Could not find PyPDF2. Leaving pdf files unmerged.")
10 | MERGE = False
11 |
12 |
13 | def main(files):
14 | os_args = [
15 | 'jupyter',
16 | 'nbconvert',
17 | '--log-level',
18 | 'CRITICAL',
19 | '--to',
20 | 'pdf',
21 | ]
22 | for f in files:
23 | os_args.append(f)
24 | subprocess.run(os_args)
25 | print("Created PDF {}.".format(f))
26 | if MERGE:
27 | pdfs = [f.split('.')[0] + ".pdf" for f in files]
28 | merger = PdfFileMerger()
29 | for pdf in pdfs:
30 | merger.append(pdf)
31 | merger.write("assignment.pdf")
32 | merger.close()
33 | for pdf in pdfs:
34 | os.remove(pdf)
35 |
36 |
37 | if __name__ == "__main__":
38 | parser = argparse.ArgumentParser()
39 | # we pass in explicit notebook arg so that we can provide
40 | # an ordered list and produce an ordered pdf
41 | parser.add_argument("--notebooks", type=str, nargs='+', required=True)
42 | args = parser.parse_args()
43 | main(args.notebooks)
44 |
--------------------------------------------------------------------------------
/assignment1/requirements.txt:
--------------------------------------------------------------------------------
1 | attrs==19.1.0
2 | backcall==0.1.0
3 | bleach==3.1.0
4 | certifi==2019.3.9
5 | chardet==3.0.4
6 | colorama==0.4.1
7 | cycler==0.10.0
8 | decorator==4.4.0
9 | defusedxml==0.5.0
10 | entrypoints==0.3
11 | future==0.17.1
12 | gitdb2==2.0.5
13 | GitPython==2.1.11
14 | idna==2.8
15 | ipykernel==5.1.0
16 | ipython==7.4.0
17 | ipython-genutils==0.2.0
18 | ipywidgets==7.4.2
19 | imageio==2.8.0
20 | jedi==0.13.3
21 | Jinja2==2.10
22 | jsonschema==3.0.1
23 | jupyter==1.0.0
24 | jupyter-client==5.2.4
25 | jupyter-console==6.0.0
26 | jupyter-core==4.4.0
27 | jupyterlab==0.35.4
28 | jupyterlab-server==0.2.0
29 | kiwisolver==1.0.1
30 | MarkupSafe==1.1.1
31 | matplotlib==3.0.3
32 | mistune==0.8.4
33 | nbconvert==5.4.1
34 | nbdime==1.0.5
35 | nbformat==4.4.0
36 | notebook==5.7.8
37 | numpy==1.16.2
38 | pandocfilters==1.4.2
39 | parso==0.3.4
40 | pexpect==4.6.0
41 | pickleshare==0.7.5
42 | Pillow==6.0.0
43 | prometheus-client==0.6.0
44 | prompt-toolkit==2.0.9
45 | ptyprocess==0.6.0
46 | Pygments==2.3.1
47 | pyparsing==2.3.1
48 | pyrsistent==0.14.11
49 | python-dateutil==2.8.0
50 | pyzmq==18.0.1
51 | qtconsole==4.4.3
52 | requests==2.21.0
53 | scipy==1.2.1
54 | Send2Trash==1.5.0
55 | six==1.12.0
56 | smmap2==2.0.5
57 | terminado==0.8.2
58 | testpath==0.4.2
59 | tornado==6.0.2
60 | traitlets==4.3.2
61 | urllib3==1.24.1
62 | wcwidth==0.1.7
63 | webencodings==0.5.1
64 | widgetsnbextension==3.4.2
65 |
--------------------------------------------------------------------------------
/assignment2/collectSubmission.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #NOTE: DO NOT EDIT THIS FILE-- MAY RESULT IN INCOMPLETE SUBMISSIONS
3 | set -euo pipefail
4 |
5 | CODE=(
6 | "cs231n/layers.py"
7 | "cs231n/classifiers/fc_net.py"
8 | "cs231n/optim.py"
9 | "cs231n/solver.py"
10 | "cs231n/classifiers/cnn.py"
11 | )
12 |
13 | # these notebooks should ideally
14 | # be in order of questions so
15 | # that the generated pdf is
16 | # in order of questions
17 | NOTEBOOKS=(
18 | "FullyConnectedNets.ipynb"
19 | "BatchNormalization.ipynb"
20 | "Dropout.ipynb"
21 | "ConvolutionalNetworks.ipynb"
22 | "PyTorch.ipynb"
23 | "TensorFlow.ipynb"
24 | )
25 |
26 | FILES=( "${CODE[@]}" "${NOTEBOOKS[@]}" )
27 |
28 | LOCAL_DIR=`pwd`
29 | ASSIGNMENT_NO=2
30 | ZIP_FILENAME="a2.zip"
31 |
32 | C_R="\e[31m"
33 | C_G="\e[32m"
34 | C_BLD="\e[1m"
35 | C_E="\e[0m"
36 |
37 | for FILE in "${FILES[@]}"
38 | do
39 | if [ ! -f ${FILE} ]; then
40 | echo -e "${C_R}Required file ${FILE} not found, Exiting.${C_E}"
41 | exit 0
42 | fi
43 | done
44 |
45 | echo -e "### Zipping file ###"
46 | rm -f ${ZIP_FILENAME}
47 | zip -q "${ZIP_FILENAME}" -r ${NOTEBOOKS[@]} $(find . -name "*.py") $(find . -name "*.pyx") -x "makepdf.py"
48 |
49 | echo -e "### Creating PDFs ###"
50 | python makepdf.py --notebooks "${NOTEBOOKS[@]}"
51 |
52 | echo -e "### Done! Please submit a2.zip and the pdfs to Gradescope. ###"
53 |
--------------------------------------------------------------------------------
/assignment2/cs231n/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment2/cs231n/__init__.py
--------------------------------------------------------------------------------
/assignment2/cs231n/classifiers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment2/cs231n/classifiers/__init__.py
--------------------------------------------------------------------------------
/assignment2/cs231n/classifiers/cnn.py:
--------------------------------------------------------------------------------
1 | from builtins import object
2 | import numpy as np
3 |
4 | from ..layers import *
5 | from ..fast_layers import *
6 | from ..layer_utils import *
7 |
8 |
9 | class ThreeLayerConvNet(object):
10 | """
11 | A three-layer convolutional network with the following architecture:
12 |
13 | conv - relu - 2x2 max pool - affine - relu - affine - softmax
14 |
15 | The network operates on minibatches of data that have shape (N, C, H, W)
16 | consisting of N images, each with height H and width W and with C input
17 | channels.
18 | """
19 |
20 | def __init__(
21 | self,
22 | input_dim=(3, 32, 32),
23 | num_filters=32,
24 | filter_size=7,
25 | hidden_dim=100,
26 | num_classes=10,
27 | weight_scale=1e-3,
28 | reg=0.0,
29 | dtype=np.float32,
30 | ):
31 | """
32 | Initialize a new network.
33 |
34 | Inputs:
35 | - input_dim: Tuple (C, H, W) giving size of input data
36 | - num_filters: Number of filters to use in the convolutional layer
37 | - filter_size: Width/height of filters to use in the convolutional layer
38 | - hidden_dim: Number of units to use in the fully-connected hidden layer
39 | - num_classes: Number of scores to produce from the final affine layer.
40 | - weight_scale: Scalar giving standard deviation for random initialization
41 | of weights.
42 | - reg: Scalar giving L2 regularization strength
43 | - dtype: numpy datatype to use for computation.
44 | """
45 | self.params = {}
46 | self.reg = reg
47 | self.dtype = dtype
48 |
49 | ############################################################################
50 | # TODO: Initialize weights and biases for the three-layer convolutional #
51 | # network. Weights should be initialized from a Gaussian centered at 0.0 #
52 | # with standard deviation equal to weight_scale; biases should be #
53 | # initialized to zero. All weights and biases should be stored in the #
54 | # dictionary self.params. Store weights and biases for the convolutional #
55 | # layer using the keys 'W1' and 'b1'; use keys 'W2' and 'b2' for the #
56 | # weights and biases of the hidden affine layer, and keys 'W3' and 'b3' #
57 | # for the weights and biases of the output affine layer. #
58 | # #
59 | # IMPORTANT: For this assignment, you can assume that the padding #
60 | # and stride of the first convolutional layer are chosen so that #
61 | # **the width and height of the input are preserved**. Take a look at #
62 | # the start of the loss() function to see how that happens. #
63 | ############################################################################
64 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
65 |
66 | pad = 1
67 | stride = 2
68 | filter_size = pad*2 + 1
69 | W1 = weight_scale * np.random.randn(num_filters, input_dim[0], filter_size, filter_size)
70 | b1 = np.zeros(num_filters)
71 |
72 | out_h = (input_dim[1] - filter_size + 2*pad) // stride + 1
73 | out_w = (input_dim[2] - filter_size + 2*pad) // stride + 1
74 |
75 | W2 = weight_scale * np.random.randn(num_filters*out_h*out_w, hidden_dim)
76 | b2 = np.zeros(hidden_dim)
77 |
78 | W3 = weight_scale * np.random.randn(hidden_dim, num_classes)
79 | b3 = np.zeros(num_classes)
80 |
81 | self.params["W1"], self.params["b1"] = W1, b1
82 | self.params["W2"], self.params["b2"] = W2, b2
83 | self.params["W3"], self.params["b3"] = W3, b3
84 |
85 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
86 | ############################################################################
87 | # END OF YOUR CODE #
88 | ############################################################################
89 |
90 | for k, v in self.params.items():
91 | self.params[k] = v.astype(dtype)
92 |
93 | def loss(self, X, y=None):
94 | """
95 | Evaluate loss and gradient for the three-layer convolutional network.
96 |
97 | Input / output: Same API as TwoLayerNet in fc_net.py.
98 | """
99 | W1, b1 = self.params["W1"], self.params["b1"]
100 | W2, b2 = self.params["W2"], self.params["b2"]
101 | W3, b3 = self.params["W3"], self.params["b3"]
102 |
103 | # pass conv_param to the forward pass for the convolutional layer
104 | # Padding and stride chosen to preserve the input spatial size
105 | filter_size = W1.shape[2]
106 | conv_param = {"stride": 1, "pad": (filter_size - 1) // 2}
107 |
108 | # pass pool_param to the forward pass for the max-pooling layer
109 | pool_param = {"pool_height": 2, "pool_width": 2, "stride": 2}
110 |
111 | scores = None
112 | ############################################################################
113 | # TODO: Implement the forward pass for the three-layer convolutional net, #
114 | # computing the class scores for X and storing them in the scores #
115 | # variable. #
116 | # #
117 | # Remember you can use the functions defined in cs231n/fast_layers.py and #
118 | # cs231n/layer_utils.py in your implementation (already imported). #
119 | ############################################################################
120 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
121 |
122 | conv_relu_pool_out, conv_relu_pool_cache = conv_relu_pool_forward(X, W1, b1, conv_param, pool_param)
123 | conv_relu_pool_out_flat = conv_relu_pool_out.reshape(conv_relu_pool_out.shape[0], -1)
124 | affine_relu_out, affine_relu_cache = affine_relu_forward(conv_relu_pool_out_flat, W2, b2)
125 | affine_out, affine_cache = affine_forward(affine_relu_out, W3, b3)
126 | scores = affine_out
127 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
128 | ############################################################################
129 | # END OF YOUR CODE #
130 | ############################################################################
131 |
132 | if y is None:
133 | return scores
134 |
135 | loss, grads = 0, {}
136 | ############################################################################
137 | # TODO: Implement the backward pass for the three-layer convolutional net, #
138 | # storing the loss and gradients in the loss and grads variables. Compute #
139 | # data loss using softmax, and make sure that grads[k] holds the gradients #
140 | # for self.params[k]. Don't forget to add L2 regularization! #
141 | # #
142 | # NOTE: To ensure that your implementation matches ours and you pass the #
143 | # automated tests, make sure that your L2 regularization includes a factor #
144 | # of 0.5 to simplify the expression for the gradient. #
145 | ############################################################################
146 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
147 |
148 | reg = self.reg
149 | loss, dscore = softmax_loss(scores, y)
150 | loss += 0.5* reg * np.sum(W1 * W1) + 0.5 * reg * np.sum(W2 * W2) + 0.5 * reg * np.sum(W3 * W3)
151 |
152 | daffine_out, dW3, db3 = affine_backward(dscore, affine_cache)
153 | daffine_relu_out, dW2, db2 = affine_relu_backward(daffine_out, affine_relu_cache)
154 | daffine_relu_out_build = daffine_relu_out.reshape(conv_relu_pool_out.shape)
155 | dconv_relu_pool_out, dW1, db1 = conv_relu_pool_backward(daffine_relu_out_build, conv_relu_pool_cache)
156 |
157 | grads["W1"], grads["b1"] = dW1 + reg * W1, db1
158 | grads["W2"], grads["b2"] = dW2 + reg * W2, db2
159 | grads["W3"], grads["b3"] = dW3 + reg * W3, db3
160 |
161 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
162 | ############################################################################
163 | # END OF YOUR CODE #
164 | ############################################################################
165 |
166 | return loss, grads
167 |
--------------------------------------------------------------------------------
/assignment2/cs231n/data_utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | from builtins import range
4 | from six.moves import cPickle as pickle
5 | import numpy as np
6 | import os
7 | from imageio import imread
8 | import platform
9 |
10 |
11 | def load_pickle(f):
12 | version = platform.python_version_tuple()
13 | if version[0] == "2":
14 | return pickle.load(f)
15 | elif version[0] == "3":
16 | return pickle.load(f, encoding="latin1")
17 | raise ValueError("invalid python version: {}".format(version))
18 |
19 |
20 | def load_CIFAR_batch(filename):
21 | """ load single batch of cifar """
22 | with open(filename, "rb") as f:
23 | datadict = load_pickle(f)
24 | X = datadict["data"]
25 | Y = datadict["labels"]
26 | X = X.reshape(10000, 3, 32, 32).transpose(0, 2, 3, 1).astype("float")
27 | Y = np.array(Y)
28 | return X, Y
29 |
30 |
31 | def load_CIFAR10(ROOT):
32 | """ load all of cifar """
33 | xs = []
34 | ys = []
35 | for b in range(1, 6):
36 | f = os.path.join(ROOT, "data_batch_%d" % (b,))
37 | X, Y = load_CIFAR_batch(f)
38 | xs.append(X)
39 | ys.append(Y)
40 | Xtr = np.concatenate(xs)
41 | Ytr = np.concatenate(ys)
42 | del X, Y
43 | Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, "test_batch"))
44 | return Xtr, Ytr, Xte, Yte
45 |
46 |
47 | def get_CIFAR10_data(
48 | num_training=49000, num_validation=1000, num_test=1000, subtract_mean=True
49 | ):
50 | """
51 | Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
52 | it for classifiers. These are the same steps as we used for the SVM, but
53 | condensed to a single function.
54 | """
55 | # Load the raw CIFAR-10 data
56 | cifar10_dir = os.path.join(
57 | os.path.dirname(__file__), "datasets/cifar-10-batches-py"
58 | )
59 | X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
60 |
61 | # Subsample the data
62 | mask = list(range(num_training, num_training + num_validation))
63 | X_val = X_train[mask]
64 | y_val = y_train[mask]
65 | mask = list(range(num_training))
66 | X_train = X_train[mask]
67 | y_train = y_train[mask]
68 | mask = list(range(num_test))
69 | X_test = X_test[mask]
70 | y_test = y_test[mask]
71 |
72 | # Normalize the data: subtract the mean image
73 | if subtract_mean:
74 | mean_image = np.mean(X_train, axis=0)
75 | X_train -= mean_image
76 | X_val -= mean_image
77 | X_test -= mean_image
78 |
79 | # Transpose so that channels come first
80 | X_train = X_train.transpose(0, 3, 1, 2).copy()
81 | X_val = X_val.transpose(0, 3, 1, 2).copy()
82 | X_test = X_test.transpose(0, 3, 1, 2).copy()
83 |
84 | # Package data into a dictionary
85 | return {
86 | "X_train": X_train,
87 | "y_train": y_train,
88 | "X_val": X_val,
89 | "y_val": y_val,
90 | "X_test": X_test,
91 | "y_test": y_test,
92 | }
93 |
94 |
95 | def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True):
96 | """
97 | Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
98 | TinyImageNet-200 have the same directory structure, so this can be used
99 | to load any of them.
100 |
101 | Inputs:
102 | - path: String giving path to the directory to load.
103 | - dtype: numpy datatype used to load the data.
104 | - subtract_mean: Whether to subtract the mean training image.
105 |
106 | Returns: A dictionary with the following entries:
107 | - class_names: A list where class_names[i] is a list of strings giving the
108 | WordNet names for class i in the loaded dataset.
109 | - X_train: (N_tr, 3, 64, 64) array of training images
110 | - y_train: (N_tr,) array of training labels
111 | - X_val: (N_val, 3, 64, 64) array of validation images
112 | - y_val: (N_val,) array of validation labels
113 | - X_test: (N_test, 3, 64, 64) array of testing images.
114 | - y_test: (N_test,) array of test labels; if test labels are not available
115 | (such as in student code) then y_test will be None.
116 | - mean_image: (3, 64, 64) array giving mean training image
117 | """
118 | # First load wnids
119 | with open(os.path.join(path, "wnids.txt"), "r") as f:
120 | wnids = [x.strip() for x in f]
121 |
122 | # Map wnids to integer labels
123 | wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}
124 |
125 | # Use words.txt to get names for each class
126 | with open(os.path.join(path, "words.txt"), "r") as f:
127 | wnid_to_words = dict(line.split("\t") for line in f)
128 | for wnid, words in wnid_to_words.items():
129 | wnid_to_words[wnid] = [w.strip() for w in words.split(",")]
130 | class_names = [wnid_to_words[wnid] for wnid in wnids]
131 |
132 | # Next load training data.
133 | X_train = []
134 | y_train = []
135 | for i, wnid in enumerate(wnids):
136 | if (i + 1) % 20 == 0:
137 | print("loading training data for synset %d / %d" % (i + 1, len(wnids)))
138 | # To figure out the filenames we need to open the boxes file
139 | boxes_file = os.path.join(path, "train", wnid, "%s_boxes.txt" % wnid)
140 | with open(boxes_file, "r") as f:
141 | filenames = [x.split("\t")[0] for x in f]
142 | num_images = len(filenames)
143 |
144 | X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
145 | y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64)
146 | for j, img_file in enumerate(filenames):
147 | img_file = os.path.join(path, "train", wnid, "images", img_file)
148 | img = imread(img_file)
149 | if img.ndim == 2:
150 | ## grayscale file
151 | img.shape = (64, 64, 1)
152 | X_train_block[j] = img.transpose(2, 0, 1)
153 | X_train.append(X_train_block)
154 | y_train.append(y_train_block)
155 |
156 | # We need to concatenate all training data
157 | X_train = np.concatenate(X_train, axis=0)
158 | y_train = np.concatenate(y_train, axis=0)
159 |
160 | # Next load validation data
161 | with open(os.path.join(path, "val", "val_annotations.txt"), "r") as f:
162 | img_files = []
163 | val_wnids = []
164 | for line in f:
165 | img_file, wnid = line.split("\t")[:2]
166 | img_files.append(img_file)
167 | val_wnids.append(wnid)
168 | num_val = len(img_files)
169 | y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
170 | X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
171 | for i, img_file in enumerate(img_files):
172 | img_file = os.path.join(path, "val", "images", img_file)
173 | img = imread(img_file)
174 | if img.ndim == 2:
175 | img.shape = (64, 64, 1)
176 | X_val[i] = img.transpose(2, 0, 1)
177 |
178 | # Next load test images
179 | # Students won't have test labels, so we need to iterate over files in the
180 | # images directory.
181 | img_files = os.listdir(os.path.join(path, "test", "images"))
182 | X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
183 | for i, img_file in enumerate(img_files):
184 | img_file = os.path.join(path, "test", "images", img_file)
185 | img = imread(img_file)
186 | if img.ndim == 2:
187 | img.shape = (64, 64, 1)
188 | X_test[i] = img.transpose(2, 0, 1)
189 |
190 | y_test = None
191 | y_test_file = os.path.join(path, "test", "test_annotations.txt")
192 | if os.path.isfile(y_test_file):
193 | with open(y_test_file, "r") as f:
194 | img_file_to_wnid = {}
195 | for line in f:
196 | line = line.split("\t")
197 | img_file_to_wnid[line[0]] = line[1]
198 | y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files]
199 | y_test = np.array(y_test)
200 |
201 | mean_image = X_train.mean(axis=0)
202 | if subtract_mean:
203 | X_train -= mean_image[None]
204 | X_val -= mean_image[None]
205 | X_test -= mean_image[None]
206 |
207 | return {
208 | "class_names": class_names,
209 | "X_train": X_train,
210 | "y_train": y_train,
211 | "X_val": X_val,
212 | "y_val": y_val,
213 | "X_test": X_test,
214 | "y_test": y_test,
215 | "class_names": class_names,
216 | "mean_image": mean_image,
217 | }
218 |
219 |
220 | def load_models(models_dir):
221 | """
222 | Load saved models from disk. This will attempt to unpickle all files in a
223 | directory; any files that give errors on unpickling (such as README.txt)
224 | will be skipped.
225 |
226 | Inputs:
227 | - models_dir: String giving the path to a directory containing model files.
228 | Each model file is a pickled dictionary with a 'model' field.
229 |
230 | Returns:
231 | A dictionary mapping model file names to models.
232 | """
233 | models = {}
234 | for model_file in os.listdir(models_dir):
235 | with open(os.path.join(models_dir, model_file), "rb") as f:
236 | try:
237 | models[model_file] = load_pickle(f)["model"]
238 | except pickle.UnpicklingError:
239 | continue
240 | return models
241 |
242 |
243 | def load_imagenet_val(num=None):
244 | """Load a handful of validation images from ImageNet.
245 |
246 | Inputs:
247 | - num: Number of images to load (max of 25)
248 |
249 | Returns:
250 | - X: numpy array with shape [num, 224, 224, 3]
251 | - y: numpy array of integer image labels, shape [num]
252 | - class_names: dict mapping integer label to class name
253 | """
254 | imagenet_fn = os.path.join(
255 | os.path.dirname(__file__), "datasets/imagenet_val_25.npz"
256 | )
257 | if not os.path.isfile(imagenet_fn):
258 | print("file %s not found" % imagenet_fn)
259 | print("Run the following:")
260 | print("cd cs231n/datasets")
261 | print("bash get_imagenet_val.sh")
262 | assert False, "Need to download imagenet_val_25.npz"
263 | f = np.load(imagenet_fn)
264 | X = f["X"]
265 | y = f["y"]
266 | class_names = f["label_map"].item()
267 | if num is not None:
268 | X = X[:num]
269 | y = y[:num]
270 | return X, y, class_names
271 |
--------------------------------------------------------------------------------
/assignment2/cs231n/datasets/get_datasets.sh:
--------------------------------------------------------------------------------
1 | if [ ! -d "cifar-10-batches-py" ]; then
2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz -O cifar-10-python.tar.gz
3 | tar -xzvf cifar-10-python.tar.gz
4 | rm cifar-10-python.tar.gz
5 | fi
6 |
--------------------------------------------------------------------------------
/assignment2/cs231n/gradient_check.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | from builtins import range
3 | from past.builtins import xrange
4 |
5 | import numpy as np
6 | from random import randrange
7 |
8 |
9 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
10 | """
11 | a naive implementation of numerical gradient of f at x
12 | - f should be a function that takes a single argument
13 | - x is the point (numpy array) to evaluate the gradient at
14 | """
15 |
16 | fx = f(x) # evaluate function value at original point
17 | grad = np.zeros_like(x)
18 | # iterate over all indexes in x
19 | it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"])
20 | while not it.finished:
21 |
22 | # evaluate function at x+h
23 | ix = it.multi_index
24 | oldval = x[ix]
25 | x[ix] = oldval + h # increment by h
26 | fxph = f(x) # evalute f(x + h)
27 | x[ix] = oldval - h
28 | fxmh = f(x) # evaluate f(x - h)
29 | x[ix] = oldval # restore
30 |
31 | # compute the partial derivative with centered formula
32 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope
33 | if verbose:
34 | print(ix, grad[ix])
35 | it.iternext() # step to next dimension
36 |
37 | return grad
38 |
39 |
40 | def eval_numerical_gradient_array(f, x, df, h=1e-5):
41 | """
42 | Evaluate a numeric gradient for a function that accepts a numpy
43 | array and returns a numpy array.
44 | """
45 | grad = np.zeros_like(x)
46 | it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"])
47 | while not it.finished:
48 | ix = it.multi_index
49 |
50 | oldval = x[ix]
51 | x[ix] = oldval + h
52 | pos = f(x).copy()
53 | x[ix] = oldval - h
54 | neg = f(x).copy()
55 | x[ix] = oldval
56 |
57 | grad[ix] = np.sum((pos - neg) * df) / (2 * h)
58 | it.iternext()
59 | return grad
60 |
61 |
62 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
63 | """
64 | Compute numeric gradients for a function that operates on input
65 | and output blobs.
66 |
67 | We assume that f accepts several input blobs as arguments, followed by a
68 | blob where outputs will be written. For example, f might be called like:
69 |
70 | f(x, w, out)
71 |
72 | where x and w are input Blobs, and the result of f will be written to out.
73 |
74 | Inputs:
75 | - f: function
76 | - inputs: tuple of input blobs
77 | - output: output blob
78 | - h: step size
79 | """
80 | numeric_diffs = []
81 | for input_blob in inputs:
82 | diff = np.zeros_like(input_blob.diffs)
83 | it = np.nditer(input_blob.vals, flags=["multi_index"], op_flags=["readwrite"])
84 | while not it.finished:
85 | idx = it.multi_index
86 | orig = input_blob.vals[idx]
87 |
88 | input_blob.vals[idx] = orig + h
89 | f(*(inputs + (output,)))
90 | pos = np.copy(output.vals)
91 | input_blob.vals[idx] = orig - h
92 | f(*(inputs + (output,)))
93 | neg = np.copy(output.vals)
94 | input_blob.vals[idx] = orig
95 |
96 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)
97 |
98 | it.iternext()
99 | numeric_diffs.append(diff)
100 | return numeric_diffs
101 |
102 |
103 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
104 | return eval_numerical_gradient_blobs(
105 | lambda *args: net.forward(), inputs, output, h=h
106 | )
107 |
108 |
109 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
110 | """
111 | sample a few random elements and only return numerical
112 | in this dimensions.
113 | """
114 |
115 | for i in range(num_checks):
116 | ix = tuple([randrange(m) for m in x.shape])
117 |
118 | oldval = x[ix]
119 | x[ix] = oldval + h # increment by h
120 | fxph = f(x) # evaluate f(x + h)
121 | x[ix] = oldval - h # increment by h
122 | fxmh = f(x) # evaluate f(x - h)
123 | x[ix] = oldval # reset
124 |
125 | grad_numerical = (fxph - fxmh) / (2 * h)
126 | grad_analytic = analytic_grad[ix]
127 | rel_error = abs(grad_numerical - grad_analytic) / (
128 | abs(grad_numerical) + abs(grad_analytic)
129 | )
130 | print(
131 | "numerical: %f analytic: %f, relative error: %e"
132 | % (grad_numerical, grad_analytic, rel_error)
133 | )
134 |
--------------------------------------------------------------------------------
/assignment2/cs231n/im2col.py:
--------------------------------------------------------------------------------
1 | from builtins import range
2 | import numpy as np
3 |
4 |
5 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1):
6 | # First figure out what the size of the output should be
7 | N, C, H, W = x_shape
8 | assert (H + 2 * padding - field_height) % stride == 0
9 | assert (W + 2 * padding - field_height) % stride == 0
10 | out_height = (H + 2 * padding - field_height) / stride + 1
11 | out_width = (W + 2 * padding - field_width) / stride + 1
12 |
13 | i0 = np.repeat(np.arange(field_height), field_width)
14 | i0 = np.tile(i0, C)
15 | i1 = stride * np.repeat(np.arange(out_height), out_width)
16 | j0 = np.tile(np.arange(field_width), field_height * C)
17 | j1 = stride * np.tile(np.arange(out_width), out_height)
18 | i = i0.reshape(-1, 1) + i1.reshape(1, -1)
19 | j = j0.reshape(-1, 1) + j1.reshape(1, -1)
20 |
21 | k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1)
22 |
23 | return (k, i, j)
24 |
25 |
26 | def im2col_indices(x, field_height, field_width, padding=1, stride=1):
27 | """ An implementation of im2col based on some fancy indexing """
28 | # Zero-pad the input
29 | p = padding
30 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode="constant")
31 |
32 | k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, stride)
33 |
34 | cols = x_padded[:, k, i, j]
35 | C = x.shape[1]
36 | cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1)
37 | return cols
38 |
39 |
40 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1, stride=1):
41 | """ An implementation of col2im based on fancy indexing and np.add.at """
42 | N, C, H, W = x_shape
43 | H_padded, W_padded = H + 2 * padding, W + 2 * padding
44 | x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
45 | k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, stride)
46 | cols_reshaped = cols.reshape(C * field_height * field_width, -1, N)
47 | cols_reshaped = cols_reshaped.transpose(2, 0, 1)
48 | np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped)
49 | if padding == 0:
50 | return x_padded
51 | return x_padded[:, :, padding:-padding, padding:-padding]
52 |
53 |
54 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
55 |
56 | pass
57 |
58 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
59 |
--------------------------------------------------------------------------------
/assignment2/cs231n/im2col_cython.pyx:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | cimport numpy as np
3 | cimport cython
4 |
5 | # DTYPE = np.float64
6 | # ctypedef np.float64_t DTYPE_t
7 |
8 | ctypedef fused DTYPE_t:
9 | np.float32_t
10 | np.float64_t
11 |
12 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height,
13 | int field_width, int padding, int stride):
14 | cdef int N = x.shape[0]
15 | cdef int C = x.shape[1]
16 | cdef int H = x.shape[2]
17 | cdef int W = x.shape[3]
18 |
19 | cdef int HH = (H + 2 * padding - field_height) / stride + 1
20 | cdef int WW = (W + 2 * padding - field_width) / stride + 1
21 |
22 | cdef int p = padding
23 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x,
24 | ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
25 |
26 | cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros(
27 | (C * field_height * field_width, N * HH * WW),
28 | dtype=x.dtype)
29 |
30 | # Moving the inner loop to a C function with no bounds checking works, but does
31 | # not seem to help performance in any measurable way.
32 |
33 | im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW,
34 | field_height, field_width, padding, stride)
35 | return cols
36 |
37 |
38 | @cython.boundscheck(False)
39 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols,
40 | np.ndarray[DTYPE_t, ndim=4] x_padded,
41 | int N, int C, int H, int W, int HH, int WW,
42 | int field_height, int field_width, int padding, int stride) except? -1:
43 | cdef int c, ii, jj, row, yy, xx, i, col
44 |
45 | for c in range(C):
46 | for yy in range(HH):
47 | for xx in range(WW):
48 | for ii in range(field_height):
49 | for jj in range(field_width):
50 | row = c * field_width * field_height + ii * field_height + jj
51 | for i in range(N):
52 | col = yy * WW * N + xx * N + i
53 | cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj]
54 |
55 |
56 |
57 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W,
58 | int field_height, int field_width, int padding, int stride):
59 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype)
60 | cdef int HH = (H + 2 * padding - field_height) / stride + 1
61 | cdef int WW = (W + 2 * padding - field_width) / stride + 1
62 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding),
63 | dtype=cols.dtype)
64 |
65 | # Moving the inner loop to a C-function with no bounds checking improves
66 | # performance quite a bit for col2im.
67 | col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW,
68 | field_height, field_width, padding, stride)
69 | if padding > 0:
70 | return x_padded[:, :, padding:-padding, padding:-padding]
71 | return x_padded
72 |
73 |
74 | @cython.boundscheck(False)
75 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols,
76 | np.ndarray[DTYPE_t, ndim=4] x_padded,
77 | int N, int C, int H, int W, int HH, int WW,
78 | int field_height, int field_width, int padding, int stride) except? -1:
79 | cdef int c, ii, jj, row, yy, xx, i, col
80 |
81 | for c in range(C):
82 | for ii in range(field_height):
83 | for jj in range(field_width):
84 | row = c * field_width * field_height + ii * field_height + jj
85 | for yy in range(HH):
86 | for xx in range(WW):
87 | for i in range(N):
88 | col = yy * WW * N + xx * N + i
89 | x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col]
90 |
91 |
92 | @cython.boundscheck(False)
93 | @cython.wraparound(False)
94 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols,
95 | np.ndarray[DTYPE_t, ndim=4] x_padded,
96 | int N, int C, int H, int W, int HH, int WW,
97 | int out_h, int out_w, int pad, int stride):
98 |
99 | cdef int c, hh, ww, n, h, w
100 | for n in range(N):
101 | for c in range(C):
102 | for hh in range(HH):
103 | for ww in range(WW):
104 | for h in range(out_h):
105 | for w in range(out_w):
106 | x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w]
107 |
108 |
109 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W,
110 | int HH, int WW, int pad, int stride):
111 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype)
112 | cdef int out_h = (H + 2 * pad - HH) / stride + 1
113 | cdef int out_w = (W + 2 * pad - WW) / stride + 1
114 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad),
115 | dtype=cols.dtype)
116 |
117 | col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride)
118 |
119 | if pad > 0:
120 | return x_padded[:, :, pad:-pad, pad:-pad]
121 | return x_padded
122 |
--------------------------------------------------------------------------------
/assignment2/cs231n/layer_utils.py:
--------------------------------------------------------------------------------
1 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
2 |
3 | pass
4 |
5 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
6 | from .layers import *
7 | from .fast_layers import *
8 |
9 |
10 | def affine_relu_forward(x, w, b):
11 | """
12 | Convenience layer that perorms an affine transform followed by a ReLU
13 |
14 | Inputs:
15 | - x: Input to the affine layer
16 | - w, b: Weights for the affine layer
17 |
18 | Returns a tuple of:
19 | - out: Output from the ReLU
20 | - cache: Object to give to the backward pass
21 | """
22 | a, fc_cache = affine_forward(x, w, b)
23 | out, relu_cache = relu_forward(a)
24 | cache = (fc_cache, relu_cache)
25 | return out, cache
26 |
27 |
28 | def affine_relu_backward(dout, cache):
29 | """
30 | Backward pass for the affine-relu convenience layer
31 | """
32 | fc_cache, relu_cache = cache
33 | da = relu_backward(dout, relu_cache)
34 | dx, dw, db = affine_backward(da, fc_cache)
35 | return dx, dw, db
36 |
37 |
38 | ################### Bingcheng HU's Code for Q2 starts #####################
39 | def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param):
40 | """
41 | Convenience layer that perorms an affine transform followed by a Batch Norm then a ReLU
42 |
43 | Returns a tuple of:
44 | - out: Output from the ReLU
45 | - cache: Object to give to the backward pass
46 | """
47 | a, fc_cache = affine_forward(x, w, b)
48 | an, bn_cache = batchnorm_forward(a, gamma, beta, bn_param)
49 | out, relu_cache = relu_forward(an)
50 | cache = (fc_cache, bn_cache, relu_cache)
51 | return out, cache
52 |
53 | def affine_bn_relu_backward(dout, cache):
54 | """
55 | Backward pass for the affine_bn_relu convenience layer
56 | """
57 | fc_cache, bn_cache, relu_cache = cache
58 | da = relu_backward(dout, relu_cache)
59 | dan, dgamma, dbeta = batchnorm_backward_alt(da, bn_cache)
60 | dx, dw, db = affine_backward(dan, fc_cache)
61 | return dx, dw, db, dgamma, dbeta
62 |
63 | def affine_ln_relu_forward(x, w, b, gamma, beta, bn_param):
64 | """
65 | Convenience layer that perorms an affine transform followed by a Batch Norm then a ReLU
66 |
67 | Returns a tuple of:
68 | - out: Output from the ReLU
69 | - cache: Object to give to the backward pass
70 | """
71 | a, fc_cache = affine_forward(x, w, b)
72 | an, bn_cache = layernorm_forward(a, gamma, beta, bn_param)
73 | out, relu_cache = relu_forward(an)
74 | cache = (fc_cache, bn_cache, relu_cache)
75 | return out, cache
76 |
77 | def affine_ln_relu_backward(dout, cache):
78 | """
79 | Backward pass for the affine_bn_relu convenience layer
80 | """
81 | fc_cache, bn_cache, relu_cache = cache
82 | da = relu_backward(dout, relu_cache)
83 | dan, dgamma, dbeta = layernorm_backward(da, bn_cache)
84 | dx, dw, db = affine_backward(dan, fc_cache)
85 | return dx, dw, db, dgamma, dbeta
86 | ################### Bingcheng HU's Code for Q2 ends #####################
87 |
88 | def conv_relu_forward(x, w, b, conv_param):
89 | """
90 | A convenience layer that performs a convolution followed by a ReLU.
91 |
92 | Inputs:
93 | - x: Input to the convolutional layer
94 | - w, b, conv_param: Weights and parameters for the convolutional layer
95 |
96 | Returns a tuple of:
97 | - out: Output from the ReLU
98 | - cache: Object to give to the backward pass
99 | """
100 | a, conv_cache = conv_forward_fast(x, w, b, conv_param)
101 | out, relu_cache = relu_forward(a)
102 | cache = (conv_cache, relu_cache)
103 | return out, cache
104 |
105 |
106 | def conv_relu_backward(dout, cache):
107 | """
108 | Backward pass for the conv-relu convenience layer.
109 | """
110 | conv_cache, relu_cache = cache
111 | da = relu_backward(dout, relu_cache)
112 | dx, dw, db = conv_backward_fast(da, conv_cache)
113 | return dx, dw, db
114 |
115 |
116 | def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param):
117 | a, conv_cache = conv_forward_fast(x, w, b, conv_param)
118 | an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param)
119 | out, relu_cache = relu_forward(an)
120 | cache = (conv_cache, bn_cache, relu_cache)
121 | return out, cache
122 |
123 |
124 | def conv_bn_relu_backward(dout, cache):
125 | conv_cache, bn_cache, relu_cache = cache
126 | dan = relu_backward(dout, relu_cache)
127 | da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache)
128 | dx, dw, db = conv_backward_fast(da, conv_cache)
129 | return dx, dw, db, dgamma, dbeta
130 |
131 |
132 | def conv_relu_pool_forward(x, w, b, conv_param, pool_param):
133 | """
134 | Convenience layer that performs a convolution, a ReLU, and a pool.
135 |
136 | Inputs:
137 | - x: Input to the convolutional layer
138 | - w, b, conv_param: Weights and parameters for the convolutional layer
139 | - pool_param: Parameters for the pooling layer
140 |
141 | Returns a tuple of:
142 | - out: Output from the pooling layer
143 | - cache: Object to give to the backward pass
144 | """
145 | a, conv_cache = conv_forward_fast(x, w, b, conv_param)
146 | s, relu_cache = relu_forward(a)
147 | out, pool_cache = max_pool_forward_fast(s, pool_param)
148 | cache = (conv_cache, relu_cache, pool_cache)
149 | return out, cache
150 |
151 |
152 | def conv_relu_pool_backward(dout, cache):
153 | """
154 | Backward pass for the conv-relu-pool convenience layer
155 | """
156 | conv_cache, relu_cache, pool_cache = cache
157 | ds = max_pool_backward_fast(dout, pool_cache)
158 | da = relu_backward(ds, relu_cache)
159 | dx, dw, db = conv_backward_fast(da, conv_cache)
160 | return dx, dw, db
161 |
--------------------------------------------------------------------------------
/assignment2/cs231n/notebook_images/batchnorm_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment2/cs231n/notebook_images/batchnorm_graph.png
--------------------------------------------------------------------------------
/assignment2/cs231n/notebook_images/kitten.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment2/cs231n/notebook_images/kitten.jpg
--------------------------------------------------------------------------------
/assignment2/cs231n/notebook_images/normalization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment2/cs231n/notebook_images/normalization.png
--------------------------------------------------------------------------------
/assignment2/cs231n/notebook_images/puppy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment2/cs231n/notebook_images/puppy.jpg
--------------------------------------------------------------------------------
/assignment2/cs231n/optim.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | """
4 | This file implements various first-order update rules that are commonly used
5 | for training neural networks. Each update rule accepts current weights and the
6 | gradient of the loss with respect to those weights and produces the next set of
7 | weights. Each update rule has the same interface:
8 |
9 | def update(w, dw, config=None):
10 |
11 | Inputs:
12 | - w: A numpy array giving the current weights.
13 | - dw: A numpy array of the same shape as w giving the gradient of the
14 | loss with respect to w.
15 | - config: A dictionary containing hyperparameter values such as learning
16 | rate, momentum, etc. If the update rule requires caching values over many
17 | iterations, then config will also hold these cached values.
18 |
19 | Returns:
20 | - next_w: The next point after the update.
21 | - config: The config dictionary to be passed to the next iteration of the
22 | update rule.
23 |
24 | NOTE: For most update rules, the default learning rate will probably not
25 | perform well; however the default values of the other hyperparameters should
26 | work well for a variety of different problems.
27 |
28 | For efficiency, update rules may perform in-place updates, mutating w and
29 | setting next_w equal to w.
30 | """
31 |
32 |
33 | def sgd(w, dw, config=None):
34 | """
35 | Performs vanilla stochastic gradient descent.
36 |
37 | config format:
38 | - learning_rate: Scalar learning rate.
39 | """
40 | if config is None:
41 | config = {}
42 | config.setdefault("learning_rate", 1e-2)
43 |
44 | w -= config["learning_rate"] * dw
45 | return w, config
46 |
47 |
48 | def sgd_momentum(w, dw, config=None):
49 | """
50 | Performs stochastic gradient descent with momentum.
51 |
52 | config format:
53 | - learning_rate: Scalar learning rate.
54 | - momentum: Scalar between 0 and 1 giving the momentum value.
55 | Setting momentum = 0 reduces to sgd.
56 | - velocity: A numpy array of the same shape as w and dw used to store a
57 | moving average of the gradients.
58 | """
59 | if config is None:
60 | config = {}
61 | config.setdefault("learning_rate", 1e-2)
62 | config.setdefault("momentum", 0.9)
63 | v = config.get("velocity", np.zeros_like(w))
64 |
65 | next_w = None
66 | ###########################################################################
67 | # TODO: Implement the momentum update formula. Store the updated value in #
68 | # the next_w variable. You should also use and update the velocity v. #
69 | ###########################################################################
70 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
71 |
72 | v = config['momentum'] * v -config['learning_rate'] * dw
73 | next_w = w + v
74 |
75 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
76 | ###########################################################################
77 | # END OF YOUR CODE #
78 | ###########################################################################
79 | config["velocity"] = v
80 |
81 | return next_w, config
82 |
83 |
84 | def rmsprop(w, dw, config=None):
85 | """
86 | Uses the RMSProp update rule, which uses a moving average of squared
87 | gradient values to set adaptive per-parameter learning rates.
88 |
89 | config format:
90 | - learning_rate: Scalar learning rate.
91 | - decay_rate: Scalar between 0 and 1 giving the decay rate for the squared
92 | gradient cache.
93 | - epsilon: Small scalar used for smoothing to avoid dividing by zero.
94 | - cache: Moving average of second moments of gradients.
95 | """
96 | if config is None:
97 | config = {}
98 | config.setdefault("learning_rate", 1e-2)
99 | config.setdefault("decay_rate", 0.99)
100 | config.setdefault("epsilon", 1e-8)
101 | config.setdefault("cache", np.zeros_like(w))
102 |
103 | next_w = None
104 | ###########################################################################
105 | # TODO: Implement the RMSprop update formula, storing the next value of w #
106 | # in the next_w variable. Don't forget to update cache value stored in #
107 | # config['cache']. #
108 | ###########################################################################
109 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
110 | decay_rate = config['decay_rate']
111 | config['cache'] = decay_rate * config['cache'] + (1 - decay_rate) * (dw * dw)
112 | next_w = w - config['learning_rate'] * dw / (np.sqrt(config['cache']) + config['epsilon'])
113 |
114 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
115 | ###########################################################################
116 | # END OF YOUR CODE #
117 | ###########################################################################
118 |
119 | return next_w, config
120 |
121 |
122 | def adam(w, dw, config=None):
123 | """
124 | Uses the Adam update rule, which incorporates moving averages of both the
125 | gradient and its square and a bias correction term.
126 |
127 | config format:
128 | - learning_rate: Scalar learning rate.
129 | - beta1: Decay rate for moving average of first moment of gradient.
130 | - beta2: Decay rate for moving average of second moment of gradient.
131 | - epsilon: Small scalar used for smoothing to avoid dividing by zero.
132 | - m: Moving average of gradient.
133 | - v: Moving average of squared gradient.
134 | - t: Iteration number.
135 | """
136 | if config is None:
137 | config = {}
138 | config.setdefault("learning_rate", 1e-3)
139 | config.setdefault("beta1", 0.9)
140 | config.setdefault("beta2", 0.999)
141 | config.setdefault("epsilon", 1e-8)
142 | config.setdefault("m", np.zeros_like(w))
143 | config.setdefault("v", np.zeros_like(w))
144 | config.setdefault("t", 0)
145 |
146 | next_w = None
147 | ###########################################################################
148 | # TODO: Implement the Adam update formula, storing the next value of w in #
149 | # the next_w variable. Don't forget to update the m, v, and t variables #
150 | # stored in config. #
151 | # #
152 | # NOTE: In order to match the reference output, please modify t _before_ #
153 | # using it in any calculations. #
154 | ###########################################################################
155 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
156 |
157 | config['t'] += 1
158 | config['m'] = config['beta1']*config['m'] + (1-config['beta1']) * dw
159 | config['v'] = config['beta2']*config['v'] + (1-config['beta2']) * (dw*dw)
160 | m_hat = config['m'] / (1-config['beta1'])
161 | v_hat = config['v'] / (1-config['beta2'])
162 | next_w = w - config['learning_rate'] * m_hat/(np.sqrt(v_hat) + config['epsilon'])
163 |
164 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
165 | ###########################################################################
166 | # END OF YOUR CODE #
167 | ###########################################################################
168 |
169 | return next_w, config
170 |
--------------------------------------------------------------------------------
/assignment2/cs231n/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from distutils.extension import Extension
3 | from Cython.Build import cythonize
4 | import numpy
5 |
6 | extensions = [
7 | Extension(
8 | "im2col_cython", ["im2col_cython.pyx"], include_dirs=[numpy.get_include()]
9 | ),
10 | ]
11 |
12 | setup(ext_modules=cythonize(extensions),)
13 |
--------------------------------------------------------------------------------
/assignment2/cs231n/vis_utils.py:
--------------------------------------------------------------------------------
1 | from builtins import range
2 | from past.builtins import xrange
3 |
4 | from math import sqrt, ceil
5 | import numpy as np
6 |
7 |
8 | def visualize_grid(Xs, ubound=255.0, padding=1):
9 | """
10 | Reshape a 4D tensor of image data to a grid for easy visualization.
11 |
12 | Inputs:
13 | - Xs: Data of shape (N, H, W, C)
14 | - ubound: Output grid will have values scaled to the range [0, ubound]
15 | - padding: The number of blank pixels between elements of the grid
16 | """
17 | (N, H, W, C) = Xs.shape
18 | grid_size = int(ceil(sqrt(N)))
19 | grid_height = H * grid_size + padding * (grid_size - 1)
20 | grid_width = W * grid_size + padding * (grid_size - 1)
21 | grid = np.zeros((grid_height, grid_width, C))
22 | next_idx = 0
23 | y0, y1 = 0, H
24 | for y in range(grid_size):
25 | x0, x1 = 0, W
26 | for x in range(grid_size):
27 | if next_idx < N:
28 | img = Xs[next_idx]
29 | low, high = np.min(img), np.max(img)
30 | grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low)
31 | # grid[y0:y1, x0:x1] = Xs[next_idx]
32 | next_idx += 1
33 | x0 += W + padding
34 | x1 += W + padding
35 | y0 += H + padding
36 | y1 += H + padding
37 | # grid_max = np.max(grid)
38 | # grid_min = np.min(grid)
39 | # grid = ubound * (grid - grid_min) / (grid_max - grid_min)
40 | return grid
41 |
42 |
43 | def vis_grid(Xs):
44 | """ visualize a grid of images """
45 | (N, H, W, C) = Xs.shape
46 | A = int(ceil(sqrt(N)))
47 | G = np.ones((A * H + A, A * W + A, C), Xs.dtype)
48 | G *= np.min(Xs)
49 | n = 0
50 | for y in range(A):
51 | for x in range(A):
52 | if n < N:
53 | G[y * H + y : (y + 1) * H + y, x * W + x : (x + 1) * W + x, :] = Xs[
54 | n, :, :, :
55 | ]
56 | n += 1
57 | # normalize to [0,1]
58 | maxg = G.max()
59 | ming = G.min()
60 | G = (G - ming) / (maxg - ming)
61 | return G
62 |
63 |
64 | def vis_nn(rows):
65 | """ visualize array of arrays of images """
66 | N = len(rows)
67 | D = len(rows[0])
68 | H, W, C = rows[0][0].shape
69 | Xs = rows[0][0]
70 | G = np.ones((N * H + N, D * W + D, C), Xs.dtype)
71 | for y in range(N):
72 | for x in range(D):
73 | G[y * H + y : (y + 1) * H + y, x * W + x : (x + 1) * W + x, :] = rows[y][x]
74 | # normalize to [0,1]
75 | maxg = G.max()
76 | ming = G.min()
77 | G = (G - ming) / (maxg - ming)
78 | return G
79 |
--------------------------------------------------------------------------------
/assignment2/frameworkpython:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # what real Python executable to use
4 | #PYVER=2.7
5 | #PATHTOPYTHON=/usr/local/bin/
6 | #PYTHON=${PATHTOPYTHON}python${PYVER}
7 |
8 | PYTHON=$(which $(readlink .env/bin/python)) # only works with python3
9 |
10 | # find the root of the virtualenv, it should be the parent of the dir this script is in
11 | ENV=`$PYTHON -c "import os; print(os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..')))"`
12 |
13 | # now run Python with the virtualenv set as Python's HOME
14 | export PYTHONHOME=$ENV
15 | exec $PYTHON "$@"
16 |
--------------------------------------------------------------------------------
/assignment2/makepdf.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import subprocess
4 |
5 | try:
6 | from PyPDF2 import PdfFileMerger
7 |
8 | MERGE = True
9 | except ImportError:
10 | print("Could not find PyPDF2. Leaving pdf files unmerged.")
11 | MERGE = False
12 |
13 |
14 | def main(files):
15 | os_args = [
16 | "jupyter",
17 | "nbconvert",
18 | "--log-level",
19 | "CRITICAL",
20 | "--to",
21 | "pdf",
22 | ]
23 | for f in files:
24 | os_args.append(f)
25 | subprocess.run(os_args)
26 | print("Created PDF {}.".format(f))
27 | if MERGE:
28 | pdfs = [f.split(".")[0] + ".pdf" for f in files]
29 | merger = PdfFileMerger()
30 | for pdf in pdfs:
31 | merger.append(pdf)
32 | merger.write("assignment.pdf")
33 | merger.close()
34 | for pdf in pdfs:
35 | os.remove(pdf)
36 |
37 |
38 | if __name__ == "__main__":
39 | parser = argparse.ArgumentParser()
40 | # we pass in explicit notebook arg so that we can provide
41 | # an ordered list and produce an ordered pdf
42 | parser.add_argument("--notebooks", type=str, nargs="+", required=True)
43 | args = parser.parse_args()
44 | main(args.notebooks)
45 |
--------------------------------------------------------------------------------
/assignment2/requirements.txt:
--------------------------------------------------------------------------------
1 | attrs==19.1.0
2 | backcall==0.1.0
3 | bleach==3.1.0
4 | certifi==2019.3.9
5 | chardet==3.0.4
6 | colorama==0.4.1
7 | cycler==0.10.0
8 | Cython==0.29.16
9 | decorator==4.4.0
10 | defusedxml==0.5.0
11 | entrypoints==0.3
12 | future==0.17.1
13 | gitdb2==2.0.5
14 | GitPython==2.1.11
15 | idna==2.8
16 | ipykernel==5.1.0
17 | ipython==7.4.0
18 | ipython-genutils==0.2.0
19 | ipywidgets==7.4.2
20 | imageio==2.8.0
21 | jedi==0.13.3
22 | Jinja2==2.10
23 | jsonschema==3.0.1
24 | jupyter==1.0.0
25 | jupyter-client==5.2.4
26 | jupyter-console==6.0.0
27 | jupyter-core==4.4.0
28 | jupyterlab==0.35.4
29 | jupyterlab-server==0.2.0
30 | kiwisolver==1.0.1
31 | MarkupSafe==1.1.1
32 | matplotlib==3.0.3
33 | mistune==0.8.4
34 | nbconvert==5.4.1
35 | nbdime==1.0.5
36 | nbformat==4.4.0
37 | notebook==5.7.8
38 | numpy==1.16.2
39 | pandocfilters==1.4.2
40 | parso==0.3.4
41 | pexpect==4.6.0
42 | pickleshare==0.7.5
43 | Pillow==6.0.0
44 | prometheus-client==0.6.0
45 | prompt-toolkit==2.0.9
46 | ptyprocess==0.6.0
47 | Pygments==2.3.1
48 | pyparsing==2.3.1
49 | pyrsistent==0.14.11
50 | python-dateutil==2.8.0
51 | pyzmq==18.0.1
52 | qtconsole==4.4.3
53 | requests==2.21.0
54 | scipy==1.2.1
55 | Send2Trash==1.5.0
56 | six==1.12.0
57 | smmap2==2.0.5
58 | terminado==0.8.2
59 | testpath==0.4.2
60 | tornado==6.0.2
61 | traitlets==4.3.2
62 | urllib3==1.24.1
63 | wcwidth==0.1.7
64 | webencodings==0.5.1
65 | widgetsnbextension==3.4.2
66 |
--------------------------------------------------------------------------------
/assignment2/start_ipython_osx.sh:
--------------------------------------------------------------------------------
1 | # Assume the virtualenv is called .env
2 |
3 | cp frameworkpython .env/bin
4 | .env/bin/frameworkpython -m IPython notebook
5 |
--------------------------------------------------------------------------------
/assignment3/collectSubmission.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #NOTE: DO NOT EDIT THIS FILE-- MAY RESULT IN INCOMPLETE SUBMISSIONS
3 | set -euo pipefail
4 |
5 | tensorflow=0
6 | while getopts "t::" flag
7 | do
8 | case "${flag}" in
9 | t)
10 | tensorflow=${OPTARG}
11 | ;;
12 | esac
13 | done
14 |
15 | CODE=(
16 | "cs231n/rnn_layers.py"
17 | "cs231n/classifiers/rnn.py"
18 | "cs231n/net_visualization_pytorch.py"
19 | "cs231n/style_transfer_pytorch.py"
20 | "cs231n/gan_pytorch.py"
21 | )
22 | NOTEBOOKS=(
23 | "RNN_Captioning.ipynb"
24 | "LSTM_Captioning.ipynb"
25 | "NetworkVisualization-PyTorch.ipynb"
26 | "StyleTransfer-PyTorch.ipynb"
27 | "Generative_Adversarial_Networks_PyTorch.ipynb"
28 | )
29 |
30 | if $tensorflow; then
31 | CODE=(
32 | "cs231n/rnn_layers.py"
33 | "cs231n/classifiers/rnn.py"
34 | "cs231n/net_visualization_tensorflow.py"
35 | "cs231n/style_transfer_tensorflow.py"
36 | "cs231n/gan_tf.py"
37 | )
38 | NOTEBOOKS=(
39 | "RNN_Captioning.ipynb"
40 | "LSTM_Captioning.ipynb"
41 | "NetworkVisualization-TensorFlow.ipynb"
42 | "StyleTransfer-TensorFlow.ipynb"
43 | "Generative_Adversarial_Networks_TF.ipynb"
44 | )
45 | fi
46 |
47 | FILES=( "${CODE[@]}" "${NOTEBOOKS[@]}" )
48 | ZIP_FILENAME="a3.zip"
49 |
50 | for FILE in "${FILES[@]}"
51 | do
52 | if [ ! -f ${FILE} ]; then
53 | echo -e "${C_R}Required file ${FILE} not found, Exiting.${C_E}"
54 | exit 0
55 | fi
56 | done
57 |
58 | echo -e "### Zipping file ###"
59 | rm -f ${ZIP_FILENAME}
60 | zip -q "${ZIP_FILENAME}" -r ${NOTEBOOKS[@]} $(find . -name "*.py") $(find . -name "*.pyx") -x "makepdf.py"
61 |
62 | echo -e "### Creating PDFs ###"
63 | python makepdf.py --notebooks "${NOTEBOOKS[@]}"
64 |
65 | echo -e "### Done! Please submit a3.zip and the pdfs to Gradescope. ###"
66 |
--------------------------------------------------------------------------------
/assignment3/cs231n/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/cs231n/__init__.py
--------------------------------------------------------------------------------
/assignment3/cs231n/captioning_solver.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division
2 | from builtins import range
3 | from builtins import object
4 | import numpy as np
5 |
6 | from . import optim
7 | from .coco_utils import sample_coco_minibatch
8 |
9 |
10 | class CaptioningSolver(object):
11 | """
12 | A CaptioningSolver encapsulates all the logic necessary for training
13 | image captioning models. The CaptioningSolver performs stochastic gradient
14 | descent using different update rules defined in optim.py.
15 |
16 | The solver accepts both training and validataion data and labels so it can
17 | periodically check classification accuracy on both training and validation
18 | data to watch out for overfitting.
19 |
20 | To train a model, you will first construct a CaptioningSolver instance,
21 | passing the model, dataset, and various options (learning rate, batch size,
22 | etc) to the constructor. You will then call the train() method to run the
23 | optimization procedure and train the model.
24 |
25 | After the train() method returns, model.params will contain the parameters
26 | that performed best on the validation set over the course of training.
27 | In addition, the instance variable solver.loss_history will contain a list
28 | of all losses encountered during training and the instance variables
29 | solver.train_acc_history and solver.val_acc_history will be lists containing
30 | the accuracies of the model on the training and validation set at each epoch.
31 |
32 | Example usage might look something like this:
33 |
34 | data = load_coco_data()
35 | model = MyAwesomeModel(hidden_dim=100)
36 | solver = CaptioningSolver(model, data,
37 | update_rule='sgd',
38 | optim_config={
39 | 'learning_rate': 1e-3,
40 | },
41 | lr_decay=0.95,
42 | num_epochs=10, batch_size=100,
43 | print_every=100)
44 | solver.train()
45 |
46 |
47 | A CaptioningSolver works on a model object that must conform to the following
48 | API:
49 |
50 | - model.params must be a dictionary mapping string parameter names to numpy
51 | arrays containing parameter values.
52 |
53 | - model.loss(features, captions) must be a function that computes
54 | training-time loss and gradients, with the following inputs and outputs:
55 |
56 | Inputs:
57 | - features: Array giving a minibatch of features for images, of shape (N, D
58 | - captions: Array of captions for those images, of shape (N, T) where
59 | each element is in the range (0, V].
60 |
61 | Returns:
62 | - loss: Scalar giving the loss
63 | - grads: Dictionary with the same keys as self.params mapping parameter
64 | names to gradients of the loss with respect to those parameters.
65 | """
66 |
67 | def __init__(self, model, data, **kwargs):
68 | """
69 | Construct a new CaptioningSolver instance.
70 |
71 | Required arguments:
72 | - model: A model object conforming to the API described above
73 | - data: A dictionary of training and validation data from load_coco_data
74 |
75 | Optional arguments:
76 | - update_rule: A string giving the name of an update rule in optim.py.
77 | Default is 'sgd'.
78 | - optim_config: A dictionary containing hyperparameters that will be
79 | passed to the chosen update rule. Each update rule requires different
80 | hyperparameters (see optim.py) but all update rules require a
81 | 'learning_rate' parameter so that should always be present.
82 | - lr_decay: A scalar for learning rate decay; after each epoch the learning
83 | rate is multiplied by this value.
84 | - batch_size: Size of minibatches used to compute loss and gradient during
85 | training.
86 | - num_epochs: The number of epochs to run for during training.
87 | - print_every: Integer; training losses will be printed every print_every
88 | iterations.
89 | - verbose: Boolean; if set to false then no output will be printed during
90 | training.
91 | """
92 | self.model = model
93 | self.data = data
94 |
95 | # Unpack keyword arguments
96 | self.update_rule = kwargs.pop("update_rule", "sgd")
97 | self.optim_config = kwargs.pop("optim_config", {})
98 | self.lr_decay = kwargs.pop("lr_decay", 1.0)
99 | self.batch_size = kwargs.pop("batch_size", 100)
100 | self.num_epochs = kwargs.pop("num_epochs", 10)
101 |
102 | self.print_every = kwargs.pop("print_every", 10)
103 | self.verbose = kwargs.pop("verbose", True)
104 |
105 | # Throw an error if there are extra keyword arguments
106 | if len(kwargs) > 0:
107 | extra = ", ".join('"%s"' % k for k in list(kwargs.keys()))
108 | raise ValueError("Unrecognized arguments %s" % extra)
109 |
110 | # Make sure the update rule exists, then replace the string
111 | # name with the actual function
112 | if not hasattr(optim, self.update_rule):
113 | raise ValueError('Invalid update_rule "%s"' % self.update_rule)
114 | self.update_rule = getattr(optim, self.update_rule)
115 |
116 | self._reset()
117 |
118 | def _reset(self):
119 | """
120 | Set up some book-keeping variables for optimization. Don't call this
121 | manually.
122 | """
123 | # Set up some variables for book-keeping
124 | self.epoch = 0
125 | self.best_val_acc = 0
126 | self.best_params = {}
127 | self.loss_history = []
128 | self.train_acc_history = []
129 | self.val_acc_history = []
130 |
131 | # Make a deep copy of the optim_config for each parameter
132 | self.optim_configs = {}
133 | for p in self.model.params:
134 | d = {k: v for k, v in self.optim_config.items()}
135 | self.optim_configs[p] = d
136 |
137 | def _step(self):
138 | """
139 | Make a single gradient update. This is called by train() and should not
140 | be called manually.
141 | """
142 | # Make a minibatch of training data
143 | minibatch = sample_coco_minibatch(
144 | self.data, batch_size=self.batch_size, split="train"
145 | )
146 | captions, features, urls = minibatch
147 |
148 | # Compute loss and gradient
149 | loss, grads = self.model.loss(features, captions)
150 | self.loss_history.append(loss)
151 |
152 | # Perform a parameter update
153 | for p, w in self.model.params.items():
154 | dw = grads[p]
155 | config = self.optim_configs[p]
156 | next_w, next_config = self.update_rule(w, dw, config)
157 | self.model.params[p] = next_w
158 | self.optim_configs[p] = next_config
159 |
160 | def check_accuracy(self, X, y, num_samples=None, batch_size=100):
161 | """
162 | Check accuracy of the model on the provided data.
163 |
164 | Inputs:
165 | - X: Array of data, of shape (N, d_1, ..., d_k)
166 | - y: Array of labels, of shape (N,)
167 | - num_samples: If not None, subsample the data and only test the model
168 | on num_samples datapoints.
169 | - batch_size: Split X and y into batches of this size to avoid using too
170 | much memory.
171 |
172 | Returns:
173 | - acc: Scalar giving the fraction of instances that were correctly
174 | classified by the model.
175 | """
176 | return 0.0
177 |
178 | # Maybe subsample the data
179 | N = X.shape[0]
180 | if num_samples is not None and N > num_samples:
181 | mask = np.random.choice(N, num_samples)
182 | N = num_samples
183 | X = X[mask]
184 | y = y[mask]
185 |
186 | # Compute predictions in batches
187 | num_batches = N / batch_size
188 | if N % batch_size != 0:
189 | num_batches += 1
190 | y_pred = []
191 | for i in range(num_batches):
192 | start = i * batch_size
193 | end = (i + 1) * batch_size
194 | scores = self.model.loss(X[start:end])
195 | y_pred.append(np.argmax(scores, axis=1))
196 | y_pred = np.hstack(y_pred)
197 | acc = np.mean(y_pred == y)
198 |
199 | return acc
200 |
201 | def train(self):
202 | """
203 | Run optimization to train the model.
204 | """
205 | num_train = self.data["train_captions"].shape[0]
206 | iterations_per_epoch = max(num_train // self.batch_size, 1)
207 | num_iterations = self.num_epochs * iterations_per_epoch
208 |
209 | for t in range(num_iterations):
210 | self._step()
211 |
212 | # Maybe print training loss
213 | if self.verbose and t % self.print_every == 0:
214 | print(
215 | "(Iteration %d / %d) loss: %f"
216 | % (t + 1, num_iterations, self.loss_history[-1])
217 | )
218 |
219 | # At the end of every epoch, increment the epoch counter and decay the
220 | # learning rate.
221 | epoch_end = (t + 1) % iterations_per_epoch == 0
222 | if epoch_end:
223 | self.epoch += 1
224 | for k in self.optim_configs:
225 | self.optim_configs[k]["learning_rate"] *= self.lr_decay
226 |
--------------------------------------------------------------------------------
/assignment3/cs231n/classifiers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/cs231n/classifiers/__init__.py
--------------------------------------------------------------------------------
/assignment3/cs231n/classifiers/squeezenet.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | NUM_CLASSES = 1000
4 |
5 | class Fire(tf.keras.Model):
6 | def __init__(self, inplanes, squeeze_planes, expand1x1_planes, expand3x3_planes,name=None):
7 | super(Fire, self).__init__(name='%s/fire'%name)
8 | self.inplanes = inplanes
9 | self.squeeze = tf.keras.layers.Conv2D(squeeze_planes, input_shape=(inplanes,), kernel_size=1, strides=(1,1), padding="VALID", activation='relu',name='squeeze')
10 | self.expand1x1 = tf.keras.layers.Conv2D(expand1x1_planes, kernel_size=1, padding="VALID", strides=(1,1), activation='relu',name='e11')
11 | self.expand3x3 = tf.keras.layers.Conv2D(expand3x3_planes, kernel_size=3, padding="SAME", strides=(1,1), activation='relu',name='e33')
12 |
13 | def call(self, x):
14 | x = self.squeeze(x)
15 | return tf.concat([
16 | self.expand1x1(x),
17 | self.expand3x3(x)
18 | ], axis=3)
19 |
20 |
21 | class SqueezeNet(tf.keras.Model):
22 | def __init__(self, num_classes=NUM_CLASSES):
23 | super(SqueezeNet, self).__init__()
24 | self.num_classes = num_classes
25 |
26 | self.net = tf.keras.models.Sequential([
27 | tf.keras.layers.Conv2D(64, kernel_size=(3, 3), strides=(2,2), padding="VALID", activation='relu', input_shape=(224, 224, 3), name='features/layer0'),
28 | tf.keras.layers.MaxPool2D(pool_size=3, strides=2, name='features/layer2'),
29 | Fire(64, 16, 64, 64, name='features/layer3'),
30 | Fire(128, 16, 64, 64, name='features/layer4'),
31 | tf.keras.layers.MaxPool2D(pool_size=3, strides=2, name='features/layer5'),
32 | Fire(128, 32, 128, 128, name='features/layer6'),
33 | Fire(256, 32, 128, 128, name='features/layer7'),
34 | tf.keras.layers.MaxPool2D(pool_size=3, strides=2, name='features/layer8'),
35 | Fire(256, 48, 192, 192, name='features/layer9'),
36 | Fire(384, 48, 192, 192, name='features/layer10'),
37 | Fire(384, 64, 256, 256, name='features/layer11'),
38 | Fire(512, 64, 256, 256, name='features/layer12'),
39 | tf.keras.layers.Conv2D(self.num_classes, kernel_size=1, padding="VALID", activation='relu', name='classifier/layer1'),
40 | tf.keras.layers.AveragePooling2D(pool_size=13, strides=13, padding="VALID", name='classifier/layer3')
41 | ])
42 |
43 | def call(self, x, save_path=None):
44 | x = self.net(x)
45 | scores = tf.reshape(x, (-1, self.num_classes))
46 | return scores
47 |
--------------------------------------------------------------------------------
/assignment3/cs231n/coco_utils.py:
--------------------------------------------------------------------------------
1 | from builtins import range
2 | import os, json
3 | import numpy as np
4 | import h5py
5 |
6 | dir_path = os.path.dirname(os.path.realpath(__file__))
7 | BASE_DIR = os.path.join(dir_path, "datasets/coco_captioning")
8 |
9 | def load_coco_data(base_dir=BASE_DIR, max_train=None, pca_features=True):
10 | print('base dir ', base_dir)
11 | data = {}
12 | caption_file = os.path.join(base_dir, "coco2014_captions.h5")
13 | with h5py.File(caption_file, "r") as f:
14 | for k, v in f.items():
15 | data[k] = np.asarray(v)
16 |
17 | if pca_features:
18 | train_feat_file = os.path.join(base_dir, "train2014_vgg16_fc7_pca.h5")
19 | else:
20 | train_feat_file = os.path.join(base_dir, "train2014_vgg16_fc7.h5")
21 | with h5py.File(train_feat_file, "r") as f:
22 | data["train_features"] = np.asarray(f["features"])
23 |
24 | if pca_features:
25 | val_feat_file = os.path.join(base_dir, "val2014_vgg16_fc7_pca.h5")
26 | else:
27 | val_feat_file = os.path.join(base_dir, "val2014_vgg16_fc7.h5")
28 | with h5py.File(val_feat_file, "r") as f:
29 | data["val_features"] = np.asarray(f["features"])
30 |
31 | dict_file = os.path.join(base_dir, "coco2014_vocab.json")
32 | with open(dict_file, "r") as f:
33 | dict_data = json.load(f)
34 | for k, v in dict_data.items():
35 | data[k] = v
36 |
37 | train_url_file = os.path.join(base_dir, "train2014_urls.txt")
38 | with open(train_url_file, "r") as f:
39 | train_urls = np.asarray([line.strip() for line in f])
40 | data["train_urls"] = train_urls
41 |
42 | val_url_file = os.path.join(base_dir, "val2014_urls.txt")
43 | with open(val_url_file, "r") as f:
44 | val_urls = np.asarray([line.strip() for line in f])
45 | data["val_urls"] = val_urls
46 |
47 | # Maybe subsample the training data
48 | if max_train is not None:
49 | num_train = data["train_captions"].shape[0]
50 | mask = np.random.randint(num_train, size=max_train)
51 | data["train_captions"] = data["train_captions"][mask]
52 | data["train_image_idxs"] = data["train_image_idxs"][mask]
53 |
54 | return data
55 |
56 |
57 | def decode_captions(captions, idx_to_word):
58 | singleton = False
59 | if captions.ndim == 1:
60 | singleton = True
61 | captions = captions[None]
62 | decoded = []
63 | N, T = captions.shape
64 | for i in range(N):
65 | words = []
66 | for t in range(T):
67 | word = idx_to_word[captions[i, t]]
68 | if word != "":
69 | words.append(word)
70 | if word == "":
71 | break
72 | decoded.append(" ".join(words))
73 | if singleton:
74 | decoded = decoded[0]
75 | return decoded
76 |
77 |
78 | def sample_coco_minibatch(data, batch_size=100, split="train"):
79 | split_size = data["%s_captions" % split].shape[0]
80 | mask = np.random.choice(split_size, batch_size)
81 | captions = data["%s_captions" % split][mask]
82 | image_idxs = data["%s_image_idxs" % split][mask]
83 | image_features = data["%s_features" % split][image_idxs]
84 | urls = data["%s_urls" % split][image_idxs]
85 | return captions, image_features, urls
86 |
--------------------------------------------------------------------------------
/assignment3/cs231n/data_utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | from builtins import range
4 | from six.moves import cPickle as pickle
5 | import numpy as np
6 | import os
7 | from imageio import imread
8 | import platform
9 |
10 |
11 | def load_pickle(f):
12 | version = platform.python_version_tuple()
13 | if version[0] == "2":
14 | return pickle.load(f)
15 | elif version[0] == "3":
16 | return pickle.load(f, encoding="latin1")
17 | raise ValueError("invalid python version: {}".format(version))
18 |
19 |
20 | def load_CIFAR_batch(filename):
21 | """ load single batch of cifar """
22 | with open(filename, "rb") as f:
23 | datadict = load_pickle(f)
24 | X = datadict["data"]
25 | Y = datadict["labels"]
26 | X = X.reshape(10000, 3, 32, 32).transpose(0, 2, 3, 1).astype("float")
27 | Y = np.array(Y)
28 | return X, Y
29 |
30 |
31 | def load_CIFAR10(ROOT):
32 | """ load all of cifar """
33 | xs = []
34 | ys = []
35 | for b in range(1, 6):
36 | f = os.path.join(ROOT, "data_batch_%d" % (b,))
37 | X, Y = load_CIFAR_batch(f)
38 | xs.append(X)
39 | ys.append(Y)
40 | Xtr = np.concatenate(xs)
41 | Ytr = np.concatenate(ys)
42 | del X, Y
43 | Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, "test_batch"))
44 | return Xtr, Ytr, Xte, Yte
45 |
46 |
47 | def get_CIFAR10_data(
48 | num_training=49000, num_validation=1000, num_test=1000, subtract_mean=True
49 | ):
50 | """
51 | Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
52 | it for classifiers. These are the same steps as we used for the SVM, but
53 | condensed to a single function.
54 | """
55 | # Load the raw CIFAR-10 data
56 | cifar10_dir = os.path.join(
57 | os.path.dirname(__file__), "datasets/cifar-10-batches-py"
58 | )
59 | X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
60 |
61 | # Subsample the data
62 | mask = list(range(num_training, num_training + num_validation))
63 | X_val = X_train[mask]
64 | y_val = y_train[mask]
65 | mask = list(range(num_training))
66 | X_train = X_train[mask]
67 | y_train = y_train[mask]
68 | mask = list(range(num_test))
69 | X_test = X_test[mask]
70 | y_test = y_test[mask]
71 |
72 | # Normalize the data: subtract the mean image
73 | if subtract_mean:
74 | mean_image = np.mean(X_train, axis=0)
75 | X_train -= mean_image
76 | X_val -= mean_image
77 | X_test -= mean_image
78 |
79 | # Transpose so that channels come first
80 | X_train = X_train.transpose(0, 3, 1, 2).copy()
81 | X_val = X_val.transpose(0, 3, 1, 2).copy()
82 | X_test = X_test.transpose(0, 3, 1, 2).copy()
83 |
84 | # Package data into a dictionary
85 | return {
86 | "X_train": X_train,
87 | "y_train": y_train,
88 | "X_val": X_val,
89 | "y_val": y_val,
90 | "X_test": X_test,
91 | "y_test": y_test,
92 | }
93 |
94 |
95 | def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True):
96 | """
97 | Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
98 | TinyImageNet-200 have the same directory structure, so this can be used
99 | to load any of them.
100 |
101 | Inputs:
102 | - path: String giving path to the directory to load.
103 | - dtype: numpy datatype used to load the data.
104 | - subtract_mean: Whether to subtract the mean training image.
105 |
106 | Returns: A dictionary with the following entries:
107 | - class_names: A list where class_names[i] is a list of strings giving the
108 | WordNet names for class i in the loaded dataset.
109 | - X_train: (N_tr, 3, 64, 64) array of training images
110 | - y_train: (N_tr,) array of training labels
111 | - X_val: (N_val, 3, 64, 64) array of validation images
112 | - y_val: (N_val,) array of validation labels
113 | - X_test: (N_test, 3, 64, 64) array of testing images.
114 | - y_test: (N_test,) array of test labels; if test labels are not available
115 | (such as in student code) then y_test will be None.
116 | - mean_image: (3, 64, 64) array giving mean training image
117 | """
118 | # First load wnids
119 | with open(os.path.join(path, "wnids.txt"), "r") as f:
120 | wnids = [x.strip() for x in f]
121 |
122 | # Map wnids to integer labels
123 | wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}
124 |
125 | # Use words.txt to get names for each class
126 | with open(os.path.join(path, "words.txt"), "r") as f:
127 | wnid_to_words = dict(line.split("\t") for line in f)
128 | for wnid, words in wnid_to_words.items():
129 | wnid_to_words[wnid] = [w.strip() for w in words.split(",")]
130 | class_names = [wnid_to_words[wnid] for wnid in wnids]
131 |
132 | # Next load training data.
133 | X_train = []
134 | y_train = []
135 | for i, wnid in enumerate(wnids):
136 | if (i + 1) % 20 == 0:
137 | print("loading training data for synset %d / %d" % (i + 1, len(wnids)))
138 | # To figure out the filenames we need to open the boxes file
139 | boxes_file = os.path.join(path, "train", wnid, "%s_boxes.txt" % wnid)
140 | with open(boxes_file, "r") as f:
141 | filenames = [x.split("\t")[0] for x in f]
142 | num_images = len(filenames)
143 |
144 | X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
145 | y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64)
146 | for j, img_file in enumerate(filenames):
147 | img_file = os.path.join(path, "train", wnid, "images", img_file)
148 | img = imread(img_file)
149 | if img.ndim == 2:
150 | ## grayscale file
151 | img.shape = (64, 64, 1)
152 | X_train_block[j] = img.transpose(2, 0, 1)
153 | X_train.append(X_train_block)
154 | y_train.append(y_train_block)
155 |
156 | # We need to concatenate all training data
157 | X_train = np.concatenate(X_train, axis=0)
158 | y_train = np.concatenate(y_train, axis=0)
159 |
160 | # Next load validation data
161 | with open(os.path.join(path, "val", "val_annotations.txt"), "r") as f:
162 | img_files = []
163 | val_wnids = []
164 | for line in f:
165 | img_file, wnid = line.split("\t")[:2]
166 | img_files.append(img_file)
167 | val_wnids.append(wnid)
168 | num_val = len(img_files)
169 | y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
170 | X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
171 | for i, img_file in enumerate(img_files):
172 | img_file = os.path.join(path, "val", "images", img_file)
173 | img = imread(img_file)
174 | if img.ndim == 2:
175 | img.shape = (64, 64, 1)
176 | X_val[i] = img.transpose(2, 0, 1)
177 |
178 | # Next load test images
179 | # Students won't have test labels, so we need to iterate over files in the
180 | # images directory.
181 | img_files = os.listdir(os.path.join(path, "test", "images"))
182 | X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
183 | for i, img_file in enumerate(img_files):
184 | img_file = os.path.join(path, "test", "images", img_file)
185 | img = imread(img_file)
186 | if img.ndim == 2:
187 | img.shape = (64, 64, 1)
188 | X_test[i] = img.transpose(2, 0, 1)
189 |
190 | y_test = None
191 | y_test_file = os.path.join(path, "test", "test_annotations.txt")
192 | if os.path.isfile(y_test_file):
193 | with open(y_test_file, "r") as f:
194 | img_file_to_wnid = {}
195 | for line in f:
196 | line = line.split("\t")
197 | img_file_to_wnid[line[0]] = line[1]
198 | y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files]
199 | y_test = np.array(y_test)
200 |
201 | mean_image = X_train.mean(axis=0)
202 | if subtract_mean:
203 | X_train -= mean_image[None]
204 | X_val -= mean_image[None]
205 | X_test -= mean_image[None]
206 |
207 | return {
208 | "class_names": class_names,
209 | "X_train": X_train,
210 | "y_train": y_train,
211 | "X_val": X_val,
212 | "y_val": y_val,
213 | "X_test": X_test,
214 | "y_test": y_test,
215 | "class_names": class_names,
216 | "mean_image": mean_image,
217 | }
218 |
219 |
220 | def load_models(models_dir):
221 | """
222 | Load saved models from disk. This will attempt to unpickle all files in a
223 | directory; any files that give errors on unpickling (such as README.txt)
224 | will be skipped.
225 |
226 | Inputs:
227 | - models_dir: String giving the path to a directory containing model files.
228 | Each model file is a pickled dictionary with a 'model' field.
229 |
230 | Returns:
231 | A dictionary mapping model file names to models.
232 | """
233 | models = {}
234 | for model_file in os.listdir(models_dir):
235 | with open(os.path.join(models_dir, model_file), "rb") as f:
236 | try:
237 | models[model_file] = load_pickle(f)["model"]
238 | except pickle.UnpicklingError:
239 | continue
240 | return models
241 |
242 |
243 | def load_imagenet_val(num=None):
244 | """Load a handful of validation images from ImageNet.
245 |
246 | Inputs:
247 | - num: Number of images to load (max of 25)
248 |
249 | Returns:
250 | - X: numpy array with shape [num, 224, 224, 3]
251 | - y: numpy array of integer image labels, shape [num]
252 | - class_names: dict mapping integer label to class name
253 | """
254 | imagenet_fn = os.path.join(
255 | os.path.dirname(__file__), "datasets/imagenet_val_25.npz"
256 | )
257 | if not os.path.isfile(imagenet_fn):
258 | print("file %s not found" % imagenet_fn)
259 | print("Run the following:")
260 | print("cd cs231n/datasets")
261 | print("bash get_imagenet_val.sh")
262 | assert False, "Need to download imagenet_val_25.npz"
263 |
264 | # modify the default parameters of np.load
265 | # https://stackoverflow.com/questions/55890813/how-to-fix-object-arrays-cannot-be-loaded-when-allow-pickle-false-for-imdb-loa
266 | np_load_old = np.load
267 | np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)
268 | f = np.load(imagenet_fn)
269 | np.load = np_load_old
270 | X = f["X"]
271 | y = f["y"]
272 | class_names = f["label_map"].item()
273 | if num is not None:
274 | X = X[:num]
275 | y = y[:num]
276 | return X, y, class_names
277 |
--------------------------------------------------------------------------------
/assignment3/cs231n/gradient_check.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | from builtins import range
3 | from past.builtins import xrange
4 |
5 | import numpy as np
6 | from random import randrange
7 |
8 |
9 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
10 | """
11 | a naive implementation of numerical gradient of f at x
12 | - f should be a function that takes a single argument
13 | - x is the point (numpy array) to evaluate the gradient at
14 | """
15 |
16 | fx = f(x) # evaluate function value at original point
17 | grad = np.zeros_like(x)
18 | # iterate over all indexes in x
19 | it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"])
20 | while not it.finished:
21 |
22 | # evaluate function at x+h
23 | ix = it.multi_index
24 | oldval = x[ix]
25 | x[ix] = oldval + h # increment by h
26 | fxph = f(x) # evalute f(x + h)
27 | x[ix] = oldval - h
28 | fxmh = f(x) # evaluate f(x - h)
29 | x[ix] = oldval # restore
30 |
31 | # compute the partial derivative with centered formula
32 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope
33 | if verbose:
34 | print(ix, grad[ix])
35 | it.iternext() # step to next dimension
36 |
37 | return grad
38 |
39 |
40 | def eval_numerical_gradient_array(f, x, df, h=1e-5):
41 | """
42 | Evaluate a numeric gradient for a function that accepts a numpy
43 | array and returns a numpy array.
44 | """
45 | grad = np.zeros_like(x)
46 | it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"])
47 | while not it.finished:
48 | ix = it.multi_index
49 |
50 | oldval = x[ix]
51 | x[ix] = oldval + h
52 | pos = f(x).copy()
53 | x[ix] = oldval - h
54 | neg = f(x).copy()
55 | x[ix] = oldval
56 |
57 | grad[ix] = np.sum((pos - neg) * df) / (2 * h)
58 | it.iternext()
59 | return grad
60 |
61 |
62 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
63 | """
64 | Compute numeric gradients for a function that operates on input
65 | and output blobs.
66 |
67 | We assume that f accepts several input blobs as arguments, followed by a
68 | blob where outputs will be written. For example, f might be called like:
69 |
70 | f(x, w, out)
71 |
72 | where x and w are input Blobs, and the result of f will be written to out.
73 |
74 | Inputs:
75 | - f: function
76 | - inputs: tuple of input blobs
77 | - output: output blob
78 | - h: step size
79 | """
80 | numeric_diffs = []
81 | for input_blob in inputs:
82 | diff = np.zeros_like(input_blob.diffs)
83 | it = np.nditer(input_blob.vals, flags=["multi_index"], op_flags=["readwrite"])
84 | while not it.finished:
85 | idx = it.multi_index
86 | orig = input_blob.vals[idx]
87 |
88 | input_blob.vals[idx] = orig + h
89 | f(*(inputs + (output,)))
90 | pos = np.copy(output.vals)
91 | input_blob.vals[idx] = orig - h
92 | f(*(inputs + (output,)))
93 | neg = np.copy(output.vals)
94 | input_blob.vals[idx] = orig
95 |
96 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)
97 |
98 | it.iternext()
99 | numeric_diffs.append(diff)
100 | return numeric_diffs
101 |
102 |
103 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
104 | return eval_numerical_gradient_blobs(
105 | lambda *args: net.forward(), inputs, output, h=h
106 | )
107 |
108 |
109 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
110 | """
111 | sample a few random elements and only return numerical
112 | in this dimensions.
113 | """
114 |
115 | for i in range(num_checks):
116 | ix = tuple([randrange(m) for m in x.shape])
117 |
118 | oldval = x[ix]
119 | x[ix] = oldval + h # increment by h
120 | fxph = f(x) # evaluate f(x + h)
121 | x[ix] = oldval - h # increment by h
122 | fxmh = f(x) # evaluate f(x - h)
123 | x[ix] = oldval # reset
124 |
125 | grad_numerical = (fxph - fxmh) / (2 * h)
126 | grad_analytic = analytic_grad[ix]
127 | rel_error = abs(grad_numerical - grad_analytic) / (
128 | abs(grad_numerical) + abs(grad_analytic)
129 | )
130 | print(
131 | "numerical: %f analytic: %f, relative error: %e"
132 | % (grad_numerical, grad_analytic, rel_error)
133 | )
134 |
--------------------------------------------------------------------------------
/assignment3/cs231n/im2col.py:
--------------------------------------------------------------------------------
1 | from builtins import range
2 | import numpy as np
3 |
4 |
5 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1):
6 | # First figure out what the size of the output should be
7 | N, C, H, W = x_shape
8 | assert (H + 2 * padding - field_height) % stride == 0
9 | assert (W + 2 * padding - field_height) % stride == 0
10 | out_height = (H + 2 * padding - field_height) / stride + 1
11 | out_width = (W + 2 * padding - field_width) / stride + 1
12 |
13 | i0 = np.repeat(np.arange(field_height), field_width)
14 | i0 = np.tile(i0, C)
15 | i1 = stride * np.repeat(np.arange(out_height), out_width)
16 | j0 = np.tile(np.arange(field_width), field_height * C)
17 | j1 = stride * np.tile(np.arange(out_width), out_height)
18 | i = i0.reshape(-1, 1) + i1.reshape(1, -1)
19 | j = j0.reshape(-1, 1) + j1.reshape(1, -1)
20 |
21 | k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1)
22 |
23 | return (k, i, j)
24 |
25 |
26 | def im2col_indices(x, field_height, field_width, padding=1, stride=1):
27 | """ An implementation of im2col based on some fancy indexing """
28 | # Zero-pad the input
29 | p = padding
30 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode="constant")
31 |
32 | k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, stride)
33 |
34 | cols = x_padded[:, k, i, j]
35 | C = x.shape[1]
36 | cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1)
37 | return cols
38 |
39 |
40 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1, stride=1):
41 | """ An implementation of col2im based on fancy indexing and np.add.at """
42 | N, C, H, W = x_shape
43 | H_padded, W_padded = H + 2 * padding, W + 2 * padding
44 | x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
45 | k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, stride)
46 | cols_reshaped = cols.reshape(C * field_height * field_width, -1, N)
47 | cols_reshaped = cols_reshaped.transpose(2, 0, 1)
48 | np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped)
49 | if padding == 0:
50 | return x_padded
51 | return x_padded[:, :, padding:-padding, padding:-padding]
52 |
53 |
54 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
55 |
56 | pass
57 |
58 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
59 |
--------------------------------------------------------------------------------
/assignment3/cs231n/im2col_cython.pyx:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | cimport numpy as np
3 | cimport cython
4 |
5 | # DTYPE = np.float64
6 | # ctypedef np.float64_t DTYPE_t
7 |
8 | ctypedef fused DTYPE_t:
9 | np.float32_t
10 | np.float64_t
11 |
12 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height,
13 | int field_width, int padding, int stride):
14 | cdef int N = x.shape[0]
15 | cdef int C = x.shape[1]
16 | cdef int H = x.shape[2]
17 | cdef int W = x.shape[3]
18 |
19 | cdef int HH = (H + 2 * padding - field_height) / stride + 1
20 | cdef int WW = (W + 2 * padding - field_width) / stride + 1
21 |
22 | cdef int p = padding
23 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x,
24 | ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
25 |
26 | cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros(
27 | (C * field_height * field_width, N * HH * WW),
28 | dtype=x.dtype)
29 |
30 | # Moving the inner loop to a C function with no bounds checking works, but does
31 | # not seem to help performance in any measurable way.
32 |
33 | im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW,
34 | field_height, field_width, padding, stride)
35 | return cols
36 |
37 |
38 | @cython.boundscheck(False)
39 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols,
40 | np.ndarray[DTYPE_t, ndim=4] x_padded,
41 | int N, int C, int H, int W, int HH, int WW,
42 | int field_height, int field_width, int padding, int stride) except? -1:
43 | cdef int c, ii, jj, row, yy, xx, i, col
44 |
45 | for c in range(C):
46 | for yy in range(HH):
47 | for xx in range(WW):
48 | for ii in range(field_height):
49 | for jj in range(field_width):
50 | row = c * field_width * field_height + ii * field_height + jj
51 | for i in range(N):
52 | col = yy * WW * N + xx * N + i
53 | cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj]
54 |
55 |
56 |
57 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W,
58 | int field_height, int field_width, int padding, int stride):
59 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype)
60 | cdef int HH = (H + 2 * padding - field_height) / stride + 1
61 | cdef int WW = (W + 2 * padding - field_width) / stride + 1
62 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding),
63 | dtype=cols.dtype)
64 |
65 | # Moving the inner loop to a C-function with no bounds checking improves
66 | # performance quite a bit for col2im.
67 | col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW,
68 | field_height, field_width, padding, stride)
69 | if padding > 0:
70 | return x_padded[:, :, padding:-padding, padding:-padding]
71 | return x_padded
72 |
73 |
74 | @cython.boundscheck(False)
75 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols,
76 | np.ndarray[DTYPE_t, ndim=4] x_padded,
77 | int N, int C, int H, int W, int HH, int WW,
78 | int field_height, int field_width, int padding, int stride) except? -1:
79 | cdef int c, ii, jj, row, yy, xx, i, col
80 |
81 | for c in range(C):
82 | for ii in range(field_height):
83 | for jj in range(field_width):
84 | row = c * field_width * field_height + ii * field_height + jj
85 | for yy in range(HH):
86 | for xx in range(WW):
87 | for i in range(N):
88 | col = yy * WW * N + xx * N + i
89 | x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col]
90 |
91 |
92 | @cython.boundscheck(False)
93 | @cython.wraparound(False)
94 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols,
95 | np.ndarray[DTYPE_t, ndim=4] x_padded,
96 | int N, int C, int H, int W, int HH, int WW,
97 | int out_h, int out_w, int pad, int stride):
98 |
99 | cdef int c, hh, ww, n, h, w
100 | for n in range(N):
101 | for c in range(C):
102 | for hh in range(HH):
103 | for ww in range(WW):
104 | for h in range(out_h):
105 | for w in range(out_w):
106 | x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w]
107 |
108 |
109 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W,
110 | int HH, int WW, int pad, int stride):
111 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype)
112 | cdef int out_h = (H + 2 * pad - HH) / stride + 1
113 | cdef int out_w = (W + 2 * pad - WW) / stride + 1
114 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad),
115 | dtype=cols.dtype)
116 |
117 | col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride)
118 |
119 | if pad > 0:
120 | return x_padded[:, :, pad:-pad, pad:-pad]
121 | return x_padded
122 |
--------------------------------------------------------------------------------
/assignment3/cs231n/image_utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | from future import standard_library
3 |
4 | standard_library.install_aliases()
5 | from builtins import range
6 | import urllib.request, urllib.error, urllib.parse, os, tempfile
7 |
8 | import numpy as np
9 | from imageio import imread
10 | from PIL import Image
11 |
12 | """
13 | Utility functions used for viewing and processing images.
14 | """
15 |
16 |
17 | def blur_image(X):
18 | """
19 | A very gentle image blurring operation, to be used as a regularizer for
20 | image generation.
21 |
22 | Inputs:
23 | - X: Image data of shape (N, 3, H, W)
24 |
25 | Returns:
26 | - X_blur: Blurred version of X, of shape (N, 3, H, W)
27 | """
28 | from .fast_layers import conv_forward_fast
29 |
30 | w_blur = np.zeros((3, 3, 3, 3))
31 | b_blur = np.zeros(3)
32 | blur_param = {"stride": 1, "pad": 1}
33 | for i in range(3):
34 | w_blur[i, i] = np.asarray([[1, 2, 1], [2, 188, 2], [1, 2, 1]], dtype=np.float32)
35 | w_blur /= 200.0
36 | return conv_forward_fast(X, w_blur, b_blur, blur_param)[0]
37 |
38 |
39 | SQUEEZENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
40 | SQUEEZENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32)
41 |
42 |
43 | def preprocess_image(img):
44 | """Preprocess an image for squeezenet.
45 |
46 | Subtracts the pixel mean and divides by the standard deviation.
47 | """
48 | return (img.astype(np.float32) / 255.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD
49 |
50 |
51 | def deprocess_image(img, rescale=False):
52 | """Undo preprocessing on an image and convert back to uint8."""
53 | img = img * SQUEEZENET_STD + SQUEEZENET_MEAN
54 | if rescale:
55 | vmin, vmax = img.min(), img.max()
56 | img = (img - vmin) / (vmax - vmin)
57 | return np.clip(255 * img, 0.0, 255.0).astype(np.uint8)
58 |
59 | def get_default_image():
60 | url = 'https://tva1.sinaimg.cn/large/007S8ZIlgy1gjfpgrdb33j30dw07t0sk.jpg'
61 | try:
62 | f = urllib.request.urlopen(url)
63 | _, fname = tempfile.mkstemp()
64 | with open(fname, "wb") as ff:
65 | ff.write(f.read())
66 | img = imread(fname)
67 | os.remove(fname)
68 | return img
69 | except urllib.error.URLError as e:
70 | print("URL Error: ", e.reason, url)
71 | return e.reason
72 | except urllib.error.HTTPError as e:
73 | print("HTTP Error: ", e.code, url)
74 | return e.code
75 |
76 | def image_from_url(url):
77 | """
78 | Read an image from a URL. Returns a numpy array with the pixel data.
79 | We write the image to a temporary file then read it back. Kinda gross.
80 | """
81 | try:
82 | f = urllib.request.urlopen(url)
83 | _, fname = tempfile.mkstemp()
84 | with open(fname, "wb") as ff:
85 | ff.write(f.read())
86 | img = imread(fname)
87 | os.remove(fname)
88 | return img
89 | except urllib.error.URLError as e:
90 | print("URL Error: ", e.reason, url)
91 | return get_default_image()
92 | except urllib.error.HTTPError as e:
93 | print("HTTP Error: ", e.code, url)
94 | return e.code
95 |
96 |
97 | def load_image(filename, size=None):
98 | """Load and resize an image from disk.
99 |
100 | Inputs:
101 | - filename: path to file
102 | - size: size of shortest dimension after rescaling
103 | """
104 | img = imread(filename)
105 | if size is not None:
106 | orig_shape = np.array(img.shape[:2])
107 | min_idx = np.argmin(orig_shape)
108 | scale_factor = float(size) / orig_shape[min_idx]
109 | new_shape = (orig_shape * scale_factor).astype(int)
110 | img = np.array(Image.fromarray(img).resize(new_shape))
111 | return img
112 |
--------------------------------------------------------------------------------
/assignment3/cs231n/layer_utils.py:
--------------------------------------------------------------------------------
1 | from .layers import *
2 | from .fast_layers import *
3 |
4 |
5 | def affine_relu_forward(x, w, b):
6 | """
7 | Convenience layer that perorms an affine transform followed by a ReLU
8 |
9 | Inputs:
10 | - x: Input to the affine layer
11 | - w, b: Weights for the affine layer
12 |
13 | Returns a tuple of:
14 | - out: Output from the ReLU
15 | - cache: Object to give to the backward pass
16 | """
17 | a, fc_cache = affine_forward(x, w, b)
18 | out, relu_cache = relu_forward(a)
19 | cache = (fc_cache, relu_cache)
20 | return out, cache
21 |
22 |
23 | def affine_relu_backward(dout, cache):
24 | """
25 | Backward pass for the affine-relu convenience layer
26 | """
27 | fc_cache, relu_cache = cache
28 | da = relu_backward(dout, relu_cache)
29 | dx, dw, db = affine_backward(da, fc_cache)
30 | return dx, dw, db
31 |
32 |
33 | def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param):
34 | """
35 | Convenience layer that performs an affine transform, batch normalization,
36 | and ReLU.
37 |
38 | Inputs:
39 | - x: Array of shape (N, D1); input to the affine layer
40 | - w, b: Arrays of shape (D2, D2) and (D2,) giving the weight and bias for
41 | the affine transform.
42 | - gamma, beta: Arrays of shape (D2,) and (D2,) giving scale and shift
43 | parameters for batch normalization.
44 | - bn_param: Dictionary of parameters for batch normalization.
45 |
46 | Returns:
47 | - out: Output from ReLU, of shape (N, D2)
48 | - cache: Object to give to the backward pass.
49 | """
50 | a, fc_cache = affine_forward(x, w, b)
51 | a_bn, bn_cache = batchnorm_forward(a, gamma, beta, bn_param)
52 | out, relu_cache = relu_forward(a_bn)
53 | cache = (fc_cache, bn_cache, relu_cache)
54 | return out, cache
55 |
56 |
57 | def affine_bn_relu_backward(dout, cache):
58 | """
59 | Backward pass for the affine-batchnorm-relu convenience layer.
60 | """
61 | fc_cache, bn_cache, relu_cache = cache
62 | da_bn = relu_backward(dout, relu_cache)
63 | da, dgamma, dbeta = batchnorm_backward(da_bn, bn_cache)
64 | dx, dw, db = affine_backward(da, fc_cache)
65 | return dx, dw, db, dgamma, dbeta
66 |
67 |
68 | def affine_ln_relu_forward(x, w, b, gamma, beta, ln_param):
69 | """
70 | Convenience layer that performs an affine transform, layer normalization,
71 | and ReLU.
72 |
73 | Inputs:
74 | - x: Array of shape (N, D1); input to the affine layer
75 | - w, b: Arrays of shape (D2, D2) and (D2,) giving the weight and bias for
76 | the affine transform.
77 | - gamma, beta: Arrays of shape (D2,) and (D2,) giving scale and shift
78 | parameters for batch normalization.
79 | - ln_param: Dictionary of parameters for layer normalization.
80 |
81 | Returns:
82 | - out: Output from ReLU, of shape (N, D2)
83 | - cache: Object to give to the backward pass.
84 | """
85 | a, fc_cache = affine_forward(x, w, b)
86 | a_ln, ln_cache = layernorm_forward(a, gamma, beta, ln_param)
87 | out, relu_cache = relu_forward(a_ln)
88 | cache = (fc_cache, ln_cache, relu_cache)
89 | return out, cache
90 |
91 |
92 | def affine_ln_relu_backward(dout, cache):
93 | """
94 | Backward pass for the affine-layernorm-relu convenience layer.
95 | """
96 | fc_cache, ln_cache, relu_cache = cache
97 | da_ln = relu_backward(dout, relu_cache)
98 | da, dgamma, dbeta = layernorm_backward(da_ln, ln_cache)
99 | dx, dw, db = affine_backward(da, fc_cache)
100 | return dx, dw, db, dgamma, dbeta
101 |
102 |
103 | def conv_relu_forward(x, w, b, conv_param):
104 | """
105 | A convenience layer that performs a convolution followed by a ReLU.
106 |
107 | Inputs:
108 | - x: Input to the convolutional layer
109 | - w, b, conv_param: Weights and parameters for the convolutional layer
110 |
111 | Returns a tuple of:
112 | - out: Output from the ReLU
113 | - cache: Object to give to the backward pass
114 | """
115 | a, conv_cache = conv_forward_fast(x, w, b, conv_param)
116 | out, relu_cache = relu_forward(a)
117 | cache = (conv_cache, relu_cache)
118 | return out, cache
119 |
120 |
121 | def conv_relu_backward(dout, cache):
122 | """
123 | Backward pass for the conv-relu convenience layer.
124 | """
125 | conv_cache, relu_cache = cache
126 | da = relu_backward(dout, relu_cache)
127 | dx, dw, db = conv_backward_fast(da, conv_cache)
128 | return dx, dw, db
129 |
130 |
131 | def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param):
132 | a, conv_cache = conv_forward_fast(x, w, b, conv_param)
133 | an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param)
134 | out, relu_cache = relu_forward(an)
135 | cache = (conv_cache, bn_cache, relu_cache)
136 | return out, cache
137 |
138 |
139 | def conv_bn_relu_backward(dout, cache):
140 | conv_cache, bn_cache, relu_cache = cache
141 | dan = relu_backward(dout, relu_cache)
142 | da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache)
143 | dx, dw, db = conv_backward_fast(da, conv_cache)
144 | return dx, dw, db, dgamma, dbeta
145 |
146 |
147 | def conv_relu_pool_forward(x, w, b, conv_param, pool_param):
148 | """
149 | Convenience layer that performs a convolution, a ReLU, and a pool.
150 |
151 | Inputs:
152 | - x: Input to the convolutional layer
153 | - w, b, conv_param: Weights and parameters for the convolutional layer
154 | - pool_param: Parameters for the pooling layer
155 |
156 | Returns a tuple of:
157 | - out: Output from the pooling layer
158 | - cache: Object to give to the backward pass
159 | """
160 | a, conv_cache = conv_forward_fast(x, w, b, conv_param)
161 | s, relu_cache = relu_forward(a)
162 | out, pool_cache = max_pool_forward_fast(s, pool_param)
163 | cache = (conv_cache, relu_cache, pool_cache)
164 | return out, cache
165 |
166 |
167 | def conv_relu_pool_backward(dout, cache):
168 | """
169 | Backward pass for the conv-relu-pool convenience layer
170 | """
171 | conv_cache, relu_cache, pool_cache = cache
172 | ds = max_pool_backward_fast(dout, pool_cache)
173 | da = relu_backward(ds, relu_cache)
174 | dx, dw, db = conv_backward_fast(da, conv_cache)
175 | return dx, dw, db
176 |
--------------------------------------------------------------------------------
/assignment3/cs231n/layers.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def affine_forward(x, w, b):
5 | """
6 | Computes the forward pass for an affine (fully-connected) layer.
7 |
8 | The input x has shape (N, d_1, ..., d_k) where x[i] is the ith input.
9 | We multiply this against a weight matrix of shape (D, M) where
10 | D = prod_i d_i
11 |
12 | Inputs:
13 | x - Input data, of shape (N, d_1, ..., d_k)
14 | w - Weights, of shape (D, M)
15 | b - Biases, of shape (M,)
16 |
17 | Returns a tuple of:
18 | - out: output, of shape (N, M)
19 | - cache: (x, w, b)
20 | """
21 | out = x.reshape(x.shape[0], -1).dot(w) + b
22 | cache = (x, w, b)
23 | return out, cache
24 |
25 |
26 | def affine_backward(dout, cache):
27 | """
28 | Computes the backward pass for an affine layer.
29 |
30 | Inputs:
31 | - dout: Upstream derivative, of shape (N, M)
32 | - cache: Tuple of:
33 | - x: Input data, of shape (N, d_1, ... d_k)
34 | - w: Weights, of shape (D, M)
35 |
36 | Returns a tuple of:
37 | - dx: Gradient with respect to x, of shape (N, d1, ..., d_k)
38 | - dw: Gradient with respect to w, of shape (D, M)
39 | - db: Gradient with respect to b, of shape (M,)
40 | """
41 | x, w, b = cache
42 | dx = dout.dot(w.T).reshape(x.shape)
43 | dw = x.reshape(x.shape[0], -1).T.dot(dout)
44 | db = np.sum(dout, axis=0)
45 | return dx, dw, db
46 |
47 |
48 | def relu_forward(x):
49 | """
50 | Computes the forward pass for a layer of rectified linear units (ReLUs).
51 |
52 | Input:
53 | - x: Inputs, of any shape
54 |
55 | Returns a tuple of:
56 | - out: Output, of the same shape as x
57 | - cache: x
58 | """
59 | out = np.maximum(0, x)
60 | cache = x
61 | return out, cache
62 |
63 |
64 | def relu_backward(dout, cache):
65 | """
66 | Computes the backward pass for a layer of rectified linear units (ReLUs).
67 |
68 | Input:
69 | - dout: Upstream derivatives, of any shape
70 | - cache: Input x, of same shape as dout
71 |
72 | Returns:
73 | - dx: Gradient with respect to x
74 | """
75 | x = cache
76 | dx = np.where(x > 0, dout, 0)
77 | return dx
78 |
79 |
80 | def batchnorm_forward(x, gamma, beta, bn_param):
81 | """
82 | Forward pass for batch normalization.
83 |
84 | During training the sample mean and (uncorrected) sample variance are
85 | computed from minibatch statistics and used to normalize the incoming data.
86 | During training we also keep an exponentially decaying running mean of the mean
87 | and variance of each feature, and these averages are used to normalize data
88 | at test-time.
89 |
90 | At each timestep we update the running averages for mean and variance using
91 | an exponential decay based on the momentum parameter:
92 |
93 | running_mean = momentum * running_mean + (1 - momentum) * sample_mean
94 | running_var = momentum * running_var + (1 - momentum) * sample_var
95 |
96 | Note that the batch normalization paper suggests a different test-time
97 | behavior: they compute sample mean and variance for each feature using a
98 | large number of training images rather than using a running average. For
99 | this implementation we have chosen to use running averages instead since
100 | they do not require an additional estimation step; the torch7 implementation
101 | of batch normalization also uses running averages.
102 |
103 | Input:
104 | - x: Data of shape (N, D)
105 | - gamma: Scale parameter of shape (D,)
106 | - beta: Shift paremeter of shape (D,)
107 | - bn_param: Dictionary with the following keys:
108 | - mode: 'train' or 'test'; required
109 | - eps: Constant for numeric stability
110 | - momentum: Constant for running mean / variance.
111 | - running_mean: Array of shape (D,) giving running mean of features
112 | - running_var Array of shape (D,) giving running variance of features
113 |
114 | Returns a tuple of:
115 | - out: of shape (N, D)
116 | - cache: A tuple of values needed in the backward pass
117 | """
118 | mode = bn_param["mode"]
119 | eps = bn_param.get("eps", 1e-5)
120 | momentum = bn_param.get("momentum", 0.9)
121 |
122 | N, D = x.shape
123 | running_mean = bn_param.get("running_mean", np.zeros(D, dtype=x.dtype))
124 | running_var = bn_param.get("running_var", np.zeros(D, dtype=x.dtype))
125 |
126 | out, cache = None, None
127 | if mode == "train":
128 | # Compute output
129 | mu = x.mean(axis=0)
130 | xc = x - mu
131 | var = np.mean(xc ** 2, axis=0)
132 | std = np.sqrt(var + eps)
133 | xn = xc / std
134 | out = gamma * xn + beta
135 |
136 | cache = (mode, x, gamma, xc, std, xn, out)
137 |
138 | # Update running average of mean
139 | running_mean *= momentum
140 | running_mean += (1 - momentum) * mu
141 |
142 | # Update running average of variance
143 | running_var *= momentum
144 | running_var += (1 - momentum) * var
145 | elif mode == "test":
146 | # Using running mean and variance to normalize
147 | std = np.sqrt(running_var + eps)
148 | xn = (x - running_mean) / std
149 | out = gamma * xn + beta
150 | cache = (mode, x, xn, gamma, beta, std)
151 | else:
152 | raise ValueError('Invalid forward batchnorm mode "%s"' % mode)
153 |
154 | # Store the updated running means back into bn_param
155 | bn_param["running_mean"] = running_mean
156 | bn_param["running_var"] = running_var
157 |
158 | return out, cache
159 |
160 |
161 | def batchnorm_backward(dout, cache):
162 | """
163 | Backward pass for batch normalization.
164 |
165 | For this implementation, you should write out a computation graph for
166 | batch normalization on paper and propagate gradients backward through
167 | intermediate nodes.
168 |
169 | Inputs:
170 | - dout: Upstream derivatives, of shape (N, D)
171 | - cache: Variable of intermediates from batchnorm_forward.
172 |
173 | Returns a tuple of:
174 | - dx: Gradient with respect to inputs x, of shape (N, D)
175 | - dgamma: Gradient with respect to scale parameter gamma, of shape (D,)
176 | - dbeta: Gradient with respect to shift parameter beta, of shape (D,)
177 | """
178 | mode = cache[0]
179 | if mode == "train":
180 | mode, x, gamma, xc, std, xn, out = cache
181 |
182 | N = x.shape[0]
183 | dbeta = dout.sum(axis=0)
184 | dgamma = np.sum(xn * dout, axis=0)
185 | dxn = gamma * dout
186 | dxc = dxn / std
187 | dstd = -np.sum((dxn * xc) / (std * std), axis=0)
188 | dvar = 0.5 * dstd / std
189 | dxc += (2.0 / N) * xc * dvar
190 | dmu = np.sum(dxc, axis=0)
191 | dx = dxc - dmu / N
192 | elif mode == "test":
193 | mode, x, xn, gamma, beta, std = cache
194 | dbeta = dout.sum(axis=0)
195 | dgamma = np.sum(xn * dout, axis=0)
196 | dxn = gamma * dout
197 | dx = dxn / std
198 | else:
199 | raise ValueError(mode)
200 |
201 | return dx, dgamma, dbeta
202 |
203 |
204 | def spatial_batchnorm_forward(x, gamma, beta, bn_param):
205 | """
206 | Computes the forward pass for spatial batch normalization.
207 |
208 | Inputs:
209 | - x: Input data of shape (N, C, H, W)
210 | - gamma: Scale parameter, of shape (C,)
211 | - beta: Shift parameter, of shape (C,)
212 | - bn_param: Dictionary with the following keys:
213 | - mode: 'train' or 'test'; required
214 | - eps: Constant for numeric stability
215 | - momentum: Constant for running mean / variance. momentum=0 means that
216 | old information is discarded completely at every time step, while
217 | momentum=1 means that new information is never incorporated. The
218 | default of momentum=0.9 should work well in most situations.
219 | - running_mean: Array of shape (D,) giving running mean of features
220 | - running_var Array of shape (D,) giving running variance of features
221 |
222 | Returns a tuple of:
223 | - out: Output data, of shape (N, C, H, W)
224 | - cache: Values needed for the backward pass
225 | """
226 | N, C, H, W = x.shape
227 | x_flat = x.transpose(0, 2, 3, 1).reshape(-1, C)
228 | out_flat, cache = batchnorm_forward(x_flat, gamma, beta, bn_param)
229 | out = out_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2)
230 | return out, cache
231 |
232 |
233 | def spatial_batchnorm_backward(dout, cache):
234 | """
235 | Computes the backward pass for spatial batch normalization.
236 |
237 | Inputs:
238 | - dout: Upstream derivatives, of shape (N, C, H, W)
239 | - cache: Values from the forward pass
240 |
241 | Returns a tuple of:
242 | - dx: Gradient with respect to inputs, of shape (N, C, H, W)
243 | - dgamma: Gradient with respect to scale parameter, of shape (C,)
244 | - dbeta: Gradient with respect to shift parameter, of shape (C,)
245 | """
246 | N, C, H, W = dout.shape
247 | dout_flat = dout.transpose(0, 2, 3, 1).reshape(-1, C)
248 | dx_flat, dgamma, dbeta = batchnorm_backward(dout_flat, cache)
249 | dx = dx_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2)
250 | return dx, dgamma, dbeta
251 |
252 |
253 | def svm_loss(x, y):
254 | """
255 | Computes the loss and gradient using for multiclass SVM classification.
256 |
257 | Inputs:
258 | - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
259 | for the ith input.
260 | - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and
261 | 0 <= y[i] < C
262 |
263 | Returns a tuple of:
264 | - loss: Scalar giving the loss
265 | - dx: Gradient of the loss with respect to x
266 | """
267 | N = x.shape[0]
268 | correct_class_scores = x[np.arange(N), y]
269 | margins = np.maximum(0, x - correct_class_scores[:, np.newaxis] + 1.0)
270 | margins[np.arange(N), y] = 0
271 | loss = np.sum(margins) / N
272 | num_pos = np.sum(margins > 0, axis=1)
273 | dx = np.zeros_like(x)
274 | dx[margins > 0] = 1
275 | dx[np.arange(N), y] -= num_pos
276 | dx /= N
277 | return loss, dx
278 |
279 |
280 | def softmax_loss(x, y):
281 | """
282 | Computes the loss and gradient for softmax classification.
283 |
284 | Inputs:
285 | - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
286 | for the ith input.
287 | - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and
288 | 0 <= y[i] < C
289 |
290 | Returns a tuple of:
291 | - loss: Scalar giving the loss
292 | - dx: Gradient of the loss with respect to x
293 | """
294 | probs = np.exp(x - np.max(x, axis=1, keepdims=True))
295 | probs /= np.sum(probs, axis=1, keepdims=True)
296 | N = x.shape[0]
297 | loss = -np.sum(np.log(probs[np.arange(N), y])) / N
298 | dx = probs.copy()
299 | dx[np.arange(N), y] -= 1
300 | dx /= N
301 | return loss, dx
302 |
--------------------------------------------------------------------------------
/assignment3/cs231n/net_visualization_pytorch.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import random
3 | import torchvision.transforms as T
4 | import numpy as np
5 | from .image_utils import SQUEEZENET_MEAN, SQUEEZENET_STD
6 | from scipy.ndimage.filters import gaussian_filter1d
7 |
8 | def compute_saliency_maps(X, y, model):
9 | """
10 | Compute a class saliency map using the model for images X and labels y.
11 |
12 | Input:
13 | - X: Input images; Tensor of shape (N, 3, H, W)
14 | - y: Labels for X; LongTensor of shape (N,)
15 | - model: A pretrained CNN that will be used to compute the saliency map.
16 |
17 | Returns:
18 | - saliency: A Tensor of shape (N, H, W) giving the saliency maps for the input
19 | images.
20 | """
21 | # Make sure the model is in "test" mode
22 | model.eval()
23 |
24 | # Make input tensor require gradient
25 | X.requires_grad_()
26 |
27 | saliency = None
28 | ##############################################################################
29 | # TODO: Implement this function. Perform a forward and backward pass through #
30 | # the model to compute the gradient of the correct class score with respect #
31 | # to each input image. You first want to compute the loss over the correct #
32 | # scores (we'll combine losses across a batch by summing), and then compute #
33 | # the gradients with a backward pass. #
34 | ##############################################################################
35 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
36 |
37 | scores = model(X)
38 | scores=scores.gather(1,y.view(-1,1)).squeeze()
39 | # RuntimeError: grad can be implicitly created only for scalar outputs
40 | # More Info here: https://discuss.pytorch.org/t/loss-backward-raises-error-grad-can-be-implicitly-created-only-for-scalar-outputs/12152
41 | scores.backward(torch.FloatTensor([1]*(scores.shape[0])))
42 | gradients = X.grad.data
43 | gradients = torch.abs(gradients)
44 | saliency, _ = torch.max(gradients,dim=1)
45 | saliency = saliency.squeeze()
46 |
47 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
48 | ##############################################################################
49 | # END OF YOUR CODE #
50 | ##############################################################################
51 | return saliency
52 |
53 | def make_fooling_image(X, target_y, model):
54 | """
55 | Generate a fooling image that is close to X, but that the model classifies
56 | as target_y.
57 |
58 | Inputs:
59 | - X: Input image; Tensor of shape (1, 3, 224, 224)
60 | - target_y: An integer in the range [0, 1000)
61 | - model: A pretrained CNN
62 |
63 | Returns:
64 | - X_fooling: An image that is close to X, but that is classifed as target_y
65 | by the model.
66 | """
67 | # Initialize our fooling image to the input image, and make it require gradient
68 | X_fooling = X.clone()
69 | X_fooling = X_fooling.requires_grad_()
70 |
71 | learning_rate = 1
72 | ##############################################################################
73 | # TODO: Generate a fooling image X_fooling that the model will classify as #
74 | # the class target_y. You should perform gradient ascent on the score of the #
75 | # target class, stopping when the model is fooled. #
76 | # When computing an update step, first normalize the gradient: #
77 | # dX = learning_rate * g / ||g||_2 #
78 | # #
79 | # You should write a training loop. #
80 | # #
81 | # HINT: For most examples, you should be able to generate a fooling image #
82 | # in fewer than 100 iterations of gradient ascent. #
83 | # You can print your progress over iterations to check your algorithm. #
84 | ##############################################################################
85 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
86 |
87 | for i in range(100):
88 | scores = model(X_fooling)
89 | _, pred_y = scores.max(dim = 1)
90 | if pred_y == target_y:
91 | break
92 | scores=scores[0,target_y]
93 | scores.backward()
94 | gradients = X_fooling.grad.data
95 | dx = learning_rate * gradients / gradients.L2()
96 | X_fooling.data += dx
97 | X_fooling.grad.zero_()
98 |
99 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
100 | ##############################################################################
101 | # END OF YOUR CODE #
102 | ##############################################################################
103 | return X_fooling
104 |
105 | def class_visualization_update_step(img, model, target_y, l2_reg, learning_rate):
106 | ########################################################################
107 | # TODO: Use the model to compute the gradient of the score for the #
108 | # class target_y with respect to the pixels of the image, and make a #
109 | # gradient step on the image using the learning rate. Don't forget the #
110 | # L2 regularization term! #
111 | # Be very careful about the signs of elements in your code. #
112 | ########################################################################
113 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
114 |
115 | scores = model(img)
116 | scores = scores[0, target_y] - l2_reg * torch.norm(img.data)
117 | scores.backward()
118 | gradients = img.grad.data
119 | dx = learning_rate * gradients / torch.norm(gradients)
120 | img.data += dx
121 | img.grad.zero_()
122 |
123 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
124 | ########################################################################
125 | # END OF YOUR CODE #
126 | ########################################################################
127 |
128 |
129 | def preprocess(img, size=224):
130 | transform = T.Compose([
131 | T.Resize(size),
132 | T.ToTensor(),
133 | T.Normalize(mean=SQUEEZENET_MEAN.tolist(),
134 | std=SQUEEZENET_STD.tolist()),
135 | T.Lambda(lambda x: x[None]),
136 | ])
137 | return transform(img)
138 |
139 | def deprocess(img, should_rescale=True):
140 | transform = T.Compose([
141 | T.Lambda(lambda x: x[0]),
142 | T.Normalize(mean=[0, 0, 0], std=(1.0 / SQUEEZENET_STD).tolist()),
143 | T.Normalize(mean=(-SQUEEZENET_MEAN).tolist(), std=[1, 1, 1]),
144 | T.Lambda(rescale) if should_rescale else T.Lambda(lambda x: x),
145 | T.ToPILImage(),
146 | ])
147 | return transform(img)
148 |
149 | def rescale(x):
150 | low, high = x.min(), x.max()
151 | x_rescaled = (x - low) / (high - low)
152 | return x_rescaled
153 |
154 | def blur_image(X, sigma=1):
155 | X_np = X.cpu().clone().numpy()
156 | X_np = gaussian_filter1d(X_np, sigma, axis=2)
157 | X_np = gaussian_filter1d(X_np, sigma, axis=3)
158 | X.copy_(torch.Tensor(X_np).type_as(X))
159 | return X
160 |
161 | def jitter(X, ox, oy):
162 | """
163 | Helper function to randomly jitter an image.
164 |
165 | Inputs
166 | - X: PyTorch Tensor of shape (N, C, H, W)
167 | - ox, oy: Integers giving number of pixels to jitter along W and H axes
168 |
169 | Returns: A new PyTorch Tensor of shape (N, C, H, W)
170 | """
171 | if ox != 0:
172 | left = X[:, :, :, :-ox]
173 | right = X[:, :, :, -ox:]
174 | X = torch.cat([right, left], dim=3)
175 | if oy != 0:
176 | top = X[:, :, :-oy]
177 | bottom = X[:, :, -oy:]
178 | X = torch.cat([bottom, top], dim=2)
179 | return X
180 |
--------------------------------------------------------------------------------
/assignment3/cs231n/net_visualization_tensorflow.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from scipy.ndimage.filters import gaussian_filter1d
4 |
5 | def compute_saliency_maps(X, y, model):
6 | """
7 | Compute a class saliency map using the model for images X and labels y.
8 |
9 | Input:
10 | - X: Input images, numpy array of shape (N, H, W, 3)
11 | - y: Labels for X, numpy of shape (N,)
12 | - model: A SqueezeNet model that will be used to compute the saliency map.
13 |
14 | Returns:
15 | - saliency: A numpy array of shape (N, H, W) giving the saliency maps for the
16 | input images.
17 | """
18 | saliency = None
19 | # Compute the score of the correct class for each example.
20 | # This gives a Tensor with shape [N], the number of examples.
21 | #
22 | # Note: this is equivalent to scores[np.arange(N), y] we used in NumPy
23 | # for computing vectorized losses.
24 |
25 | ###############################################################################
26 | # TODO: Produce the saliency maps over a batch of images. #
27 | # #
28 | # 1) Define a gradient tape object and watch input Image variable #
29 | # 2) Compute the “loss” for the batch of given input images. #
30 | # - get scores output by the model for the given batch of input images #
31 | # - use tf.gather_nd or tf.gather to get correct scores #
32 | # 3) Use the gradient() method of the gradient tape object to compute the #
33 | # gradient of the loss with respect to the image #
34 | # 4) Finally, process the returned gradient to compute the saliency map. #
35 | ###############################################################################
36 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
37 |
38 | pass
39 |
40 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
41 | ##############################################################################
42 | # END OF YOUR CODE #
43 | ##############################################################################
44 | return saliency
45 |
46 | def make_fooling_image(X, target_y, model):
47 | """
48 | Generate a fooling image that is close to X, but that the model classifies
49 | as target_y.
50 |
51 | Inputs:
52 | - X: Input image, a numpy array of shape (1, 224, 224, 3)
53 | - target_y: An integer in the range [0, 1000)
54 | - model: Pretrained SqueezeNet model
55 |
56 | Returns:
57 | - X_fooling: An image that is close to X, but that is classifed as target_y
58 | by the model.
59 | """
60 |
61 | # Make a copy of the input that we will modify
62 | X_fooling = X.copy()
63 |
64 | # Step size for the update
65 | learning_rate = 1
66 |
67 | ##############################################################################
68 | # TODO: Generate a fooling image X_fooling that the model will classify as #
69 | # the class target_y. Use gradient *ascent* on the target class score, using #
70 | # the model.scores Tensor to get the class scores for the model.image. #
71 | # When computing an update step, first normalize the gradient: #
72 | # dX = learning_rate * g / ||g||_2 #
73 | # #
74 | # You should write a training loop, where in each iteration, you make an #
75 | # update to the input image X_fooling (don't modify X). The loop should #
76 | # stop when the predicted class for the input is the same as target_y. #
77 | # #
78 | # HINT: Use tf.GradientTape() to keep track of your gradients and #
79 | # use tape.gradient to get the actual gradient with respect to X_fooling. #
80 | # #
81 | # HINT 2: For most examples, you should be able to generate a fooling image #
82 | # in fewer than 100 iterations of gradient ascent. You can print your #
83 | # progress over iterations to check your algorithm. #
84 | ##############################################################################
85 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
86 |
87 | pass
88 |
89 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
90 | ##############################################################################
91 | # END OF YOUR CODE #
92 | ##############################################################################
93 | return X_fooling
94 |
95 | def class_visualization_update_step(X, model, target_y, l2_reg, learning_rate):
96 | ########################################################################
97 | # TODO: Compute the value of the gradient of the score for #
98 | # class target_y with respect to the pixels of the image, and make a #
99 | # gradient step on the image using the learning rate. You should use #
100 | # the tf.GradientTape() and tape.gradient to compute gradients. #
101 | # #
102 | # Be very careful about the signs of elements in your code. #
103 | ########################################################################
104 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
105 |
106 | pass
107 |
108 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
109 | ############################################################################
110 | # END OF YOUR CODE #
111 | ############################################################################
112 |
113 | return X
114 |
115 | def blur_image(X, sigma=1):
116 | X = gaussian_filter1d(X, sigma, axis=1)
117 | X = gaussian_filter1d(X, sigma, axis=2)
118 | return X
119 |
120 | def jitter(X, ox, oy):
121 | """
122 | Helper function to randomly jitter an image.
123 |
124 | Inputs
125 | - X: Tensor of shape (N, H, W, C)
126 | - ox, oy: Integers giving number of pixels to jitter along W and H axes
127 |
128 | Returns: A new Tensor of shape (N, H, W, C)
129 | """
130 | if ox != 0:
131 | left = X[:, :, :-ox]
132 | right = X[:, :, -ox:]
133 | X = tf.concat([right, left], axis=2)
134 | if oy != 0:
135 | top = X[:, :-oy]
136 | bottom = X[:, -oy:]
137 | X = tf.concat([bottom, top], axis=1)
138 | return X
139 |
--------------------------------------------------------------------------------
/assignment3/cs231n/optim.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | """
4 | This file implements various first-order update rules that are commonly used for
5 | training neural networks. Each update rule accepts current weights and the
6 | gradient of the loss with respect to those weights and produces the next set of
7 | weights. Each update rule has the same interface:
8 |
9 | def update(w, dw, config=None):
10 |
11 | Inputs:
12 | - w: A numpy array giving the current weights.
13 | - dw: A numpy array of the same shape as w giving the gradient of the
14 | loss with respect to w.
15 | - config: A dictionary containing hyperparameter values such as learning rate,
16 | momentum, etc. If the update rule requires caching values over many
17 | iterations, then config will also hold these cached values.
18 |
19 | Returns:
20 | - next_w: The next point after the update.
21 | - config: The config dictionary to be passed to the next iteration of the
22 | update rule.
23 |
24 | NOTE: For most update rules, the default learning rate will probably not perform
25 | well; however the default values of the other hyperparameters should work well
26 | for a variety of different problems.
27 |
28 | For efficiency, update rules may perform in-place updates, mutating w and
29 | setting next_w equal to w.
30 | """
31 |
32 |
33 | def sgd(w, dw, config=None):
34 | """
35 | Performs vanilla stochastic gradient descent.
36 |
37 | config format:
38 | - learning_rate: Scalar learning rate.
39 | """
40 | if config is None:
41 | config = {}
42 | config.setdefault("learning_rate", 1e-2)
43 |
44 | w -= config["learning_rate"] * dw
45 | return w, config
46 |
47 |
48 | def adam(x, dx, config=None):
49 | """
50 | Uses the Adam update rule, which incorporates moving averages of both the
51 | gradient and its square and a bias correction term.
52 |
53 | config format:
54 | - learning_rate: Scalar learning rate.
55 | - beta1: Decay rate for moving average of first moment of gradient.
56 | - beta2: Decay rate for moving average of second moment of gradient.
57 | - epsilon: Small scalar used for smoothing to avoid dividing by zero.
58 | - m: Moving average of gradient.
59 | - v: Moving average of squared gradient.
60 | - t: Iteration number.
61 | """
62 | if config is None:
63 | config = {}
64 | config.setdefault("learning_rate", 1e-3)
65 | config.setdefault("beta1", 0.9)
66 | config.setdefault("beta2", 0.999)
67 | config.setdefault("epsilon", 1e-8)
68 | config.setdefault("m", np.zeros_like(x))
69 | config.setdefault("v", np.zeros_like(x))
70 | config.setdefault("t", 0)
71 |
72 | next_x = None
73 | beta1, beta2, eps = config["beta1"], config["beta2"], config["epsilon"]
74 | t, m, v = config["t"], config["m"], config["v"]
75 | m = beta1 * m + (1 - beta1) * dx
76 | v = beta2 * v + (1 - beta2) * (dx * dx)
77 | t += 1
78 | alpha = config["learning_rate"] * np.sqrt(1 - beta2 ** t) / (1 - beta1 ** t)
79 | x -= alpha * (m / (np.sqrt(v) + eps))
80 | config["t"] = t
81 | config["m"] = m
82 | config["v"] = v
83 | next_x = x
84 |
85 | return next_x, config
86 |
--------------------------------------------------------------------------------
/assignment3/cs231n/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from distutils.extension import Extension
3 | from Cython.Build import cythonize
4 | import numpy
5 |
6 | extensions = [
7 | Extension(
8 | "im2col_cython", ["im2col_cython.pyx"], include_dirs=[numpy.get_include()]
9 | ),
10 | ]
11 |
12 | setup(ext_modules=cythonize(extensions),)
13 |
--------------------------------------------------------------------------------
/assignment3/cs231n/style_transfer_pytorch.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torchvision
4 | import torchvision.transforms as T
5 | import PIL
6 |
7 | import numpy as np
8 |
9 | from .image_utils import SQUEEZENET_MEAN, SQUEEZENET_STD
10 |
11 | dtype = torch.FloatTensor
12 | # Uncomment out the following line if you're on a machine with a GPU set up for PyTorch!
13 | #dtype = torch.cuda.FloatTensor
14 | def content_loss(content_weight, content_current, content_original):
15 | """
16 | Compute the content loss for style transfer.
17 |
18 | Inputs:
19 | - content_weight: Scalar giving the weighting for the content loss.
20 | - content_current: features of the current image; this is a PyTorch Tensor of shape
21 | (1, C_l, H_l, W_l).
22 | - content_target: features of the content image, Tensor with shape (1, C_l, H_l, W_l).
23 |
24 | Returns:
25 | - scalar content loss
26 | """
27 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
28 |
29 | return content_weight * torch.sum(torch.pow(content_current - content_original, 2))
30 |
31 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
32 |
33 | def gram_matrix(features, normalize=True):
34 | """
35 | Compute the Gram matrix from features.
36 |
37 | Inputs:
38 | - features: PyTorch Tensor of shape (N, C, H, W) giving features for
39 | a batch of N images.
40 | - normalize: optional, whether to normalize the Gram matrix
41 | If True, divide the Gram matrix by the number of neurons (H * W * C)
42 |
43 | Returns:
44 | - gram: PyTorch Tensor of shape (N, C, C) giving the
45 | (optionally normalized) Gram matrices for the N input images.
46 | """
47 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
48 |
49 | N, C, H, W = features.shape
50 | features = features.view(N*C, H*W)
51 | G = torch.mm(features, features.T)
52 | if normalize:
53 | G = G.div(C * H * W)
54 | return G.view((N, C, C))
55 |
56 |
57 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
58 |
59 | # Now put it together in the style_loss function...
60 | def style_loss(feats, style_layers, style_targets, style_weights):
61 | """
62 | Computes the style loss at a set of layers.
63 |
64 | Inputs:
65 | - feats: list of the features at every layer of the current image, as produced by
66 | the extract_features function.
67 | - style_layers: List of layer indices into feats giving the layers to include in the
68 | style loss.
69 | - style_targets: List of the same length as style_layers, where style_targets[i] is
70 | a PyTorch Tensor giving the Gram matrix of the source style image computed at
71 | layer style_layers[i].
72 | - style_weights: List of the same length as style_layers, where style_weights[i]
73 | is a scalar giving the weight for the style loss at layer style_layers[i].
74 |
75 | Returns:
76 | - style_loss: A PyTorch Tensor holding a scalar giving the style loss.
77 | """
78 | # Hint: you can do this with one for loop over the style layers, and should
79 | # not be very much code (~5 lines). You will need to use your gram_matrix function.
80 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
81 |
82 | loss = 0
83 | for i in range(len(style_layers)):
84 | G = gram_matrix(feats[style_layers[i]])
85 | loss += style_weights[i] * torch.sum(torch.pow((G - style_targets[i]),2))
86 | return loss
87 |
88 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
89 |
90 | def tv_loss(img, tv_weight):
91 | """
92 | Compute total variation loss.
93 |
94 | Inputs:
95 | - img: PyTorch Variable of shape (1, 3, H, W) holding an input image.
96 | - tv_weight: Scalar giving the weight w_t to use for the TV loss.
97 |
98 | Returns:
99 | - loss: PyTorch Variable holding a scalar giving the total variation loss
100 | for img weighted by tv_weight.
101 | """
102 | # Your implementation should be vectorized and not require any loops!
103 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
104 |
105 | a = torch.sum(torch.pow(img[:, :, :-1, :] - img[:, :, 1:, :], 2))
106 | b = torch.sum(torch.pow(img[:, :, :, :-1] - img[:, :, :, 1:], 2))
107 | return tv_weight * (a + b)
108 |
109 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
110 | def preprocess(img, size=512):
111 | """ Preprocesses a PIL JPG Image object to become a Pytorch tensor
112 | that is ready to be used as an input into the CNN model.
113 | Preprocessing steps:
114 | 1) Resize the image (preserving aspect ratio) until the shortest side is of length `size`.
115 | 2) Convert the PIL Image to a Pytorch Tensor.
116 | 3) Normalize the mean of the image pixel values to be SqueezeNet's expected mean, and
117 | the standard deviation to be SqueezeNet's expected std dev.
118 | 4) Add a batch dimension in the first position of the tensor: aka, a tensor of shape
119 | (H, W, C) will become -> (1, H, W, C).
120 | """
121 | transform = T.Compose([
122 | T.Resize(size),
123 | T.ToTensor(),
124 | T.Normalize(mean=SQUEEZENET_MEAN.tolist(),
125 | std=SQUEEZENET_STD.tolist()),
126 | T.Lambda(lambda x: x[None]),
127 | ])
128 | return transform(img)
129 |
130 | def deprocess(img):
131 | """ De-processes a Pytorch tensor from the output of the CNN model to become
132 | a PIL JPG Image that we can display, save, etc.
133 | De-processing steps:
134 | 1) Remove the batch dimension at the first position by accessing the slice at index 0.
135 | A tensor of dims (1, H, W, C) will become -> (H, W, C).
136 | 2) Normalize the standard deviation: multiply each channel of the output tensor by 1/s,
137 | scaling the elements back to before scaling by SqueezeNet's standard devs.
138 | No change to the mean.
139 | 3) Normalize the mean: subtract the mean (hence the -m) from each channel of the output tensor,
140 | centering the elements back to before centering on SqueezeNet's input mean.
141 | No change to the std dev.
142 | 4) Rescale all the values in the tensor so that they lie in the interval [0, 1] to prepare for
143 | transforming it into image pixel values.
144 | 5) Convert the Pytorch Tensor to a PIL Image.
145 | """
146 | transform = T.Compose([
147 | T.Lambda(lambda x: x[0]),
148 | T.Normalize(mean=[0, 0, 0], std=[1.0 / s for s in SQUEEZENET_STD.tolist()]),
149 | T.Normalize(mean=[-m for m in SQUEEZENET_MEAN.tolist()], std=[1, 1, 1]),
150 | T.Lambda(rescale),
151 | T.ToPILImage(),
152 | ])
153 | return transform(img)
154 |
155 | def rescale(x):
156 | """ A function used internally inside `deprocess`.
157 | Rescale elements of x linearly to be in the interval [0, 1]
158 | with the minimum element(s) mapped to 0, and the maximum element(s)
159 | mapped to 1.
160 | """
161 | low, high = x.min(), x.max()
162 | x_rescaled = (x - low) / (high - low)
163 | return x_rescaled
164 |
165 | def rel_error(x,y):
166 | return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
167 |
168 | # We provide this helper code which takes an image, a model (cnn), and returns a list of
169 | # feature maps, one per layer.
170 | def extract_features(x, cnn):
171 | """
172 | Use the CNN to extract features from the input image x.
173 |
174 | Inputs:
175 | - x: A PyTorch Tensor of shape (N, C, H, W) holding a minibatch of images that
176 | will be fed to the CNN.
177 | - cnn: A PyTorch model that we will use to extract features.
178 |
179 | Returns:
180 | - features: A list of feature for the input images x extracted using the cnn model.
181 | features[i] is a PyTorch Tensor of shape (N, C_i, H_i, W_i); recall that features
182 | from different layers of the network may have different numbers of channels (C_i) and
183 | spatial dimensions (H_i, W_i).
184 | """
185 | features = []
186 | prev_feat = x
187 | for i, module in enumerate(cnn._modules.values()):
188 | next_feat = module(prev_feat)
189 | features.append(next_feat)
190 | prev_feat = next_feat
191 | return features
192 |
193 | #please disregard warnings about initialization
194 | def features_from_img(imgpath, imgsize, cnn):
195 | img = preprocess(PIL.Image.open(imgpath), size=imgsize)
196 | img_var = img.type(dtype)
197 | return extract_features(img_var, cnn), img_var
198 |
199 |
200 |
201 |
202 |
--------------------------------------------------------------------------------
/assignment3/cs231n/style_transfer_tensorflow.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 |
4 | def tv_loss(img, tv_weight):
5 | """
6 | Compute total variation loss.
7 |
8 | Inputs:
9 | - img: Tensor of shape (1, H, W, 3) holding an input image.
10 | - tv_weight: Scalar giving the weight w_t to use for the TV loss.
11 |
12 | Returns:
13 | - loss: Tensor holding a scalar giving the total variation loss
14 | for img weighted by tv_weight.
15 | """
16 | # Your implementation should be vectorized and not require any loops!
17 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
18 |
19 | pass
20 |
21 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
22 |
23 | def style_loss(feats, style_layers, style_targets, style_weights):
24 | """
25 | Computes the style loss at a set of layers.
26 |
27 | Inputs:
28 | - feats: list of the features at every layer of the current image, as produced by
29 | the extract_features function.
30 | - style_layers: List of layer indices into feats giving the layers to include in the
31 | style loss.
32 | - style_targets: List of the same length as style_layers, where style_targets[i] is
33 | a Tensor giving the Gram matrix of the source style image computed at
34 | layer style_layers[i].
35 | - style_weights: List of the same length as style_layers, where style_weights[i]
36 | is a scalar giving the weight for the style loss at layer style_layers[i].
37 |
38 | Returns:
39 | - style_loss: A Tensor containing the scalar style loss.
40 | """
41 | # Hint: you can do this with one for loop over the style layers, and should
42 | # not be short code (~5 lines). You will need to use your gram_matrix function.
43 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
44 |
45 | pass
46 |
47 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
48 |
49 | def gram_matrix(features, normalize=True):
50 | """
51 | Compute the Gram matrix from features.
52 |
53 | Inputs:
54 | - features: Tensor of shape (1, H, W, C) giving features for
55 | a single image.
56 | - normalize: optional, whether to normalize the Gram matrix
57 | If True, divide the Gram matrix by the number of neurons (H * W * C)
58 |
59 | Returns:
60 | - gram: Tensor of shape (C, C) giving the (optionally normalized)
61 | Gram matrices for the input image.
62 | """
63 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
64 |
65 | pass
66 |
67 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
68 |
69 | def content_loss(content_weight, content_current, content_original):
70 | """
71 | Compute the content loss for style transfer.
72 |
73 | Inputs:
74 | - content_weight: scalar constant we multiply the content_loss by.
75 | - content_current: features of the current image, Tensor with shape [1, height, width, channels]
76 | - content_target: features of the content image, Tensor with shape [1, height, width, channels]
77 |
78 | Returns:
79 | - scalar content loss
80 | """
81 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
82 |
83 | pass
84 |
85 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
86 |
87 | # We provide this helper code which takes an image, a model (cnn), and returns a list of
88 | # feature maps, one per layer.
89 | def extract_features(x, cnn):
90 | """
91 | Use the CNN to extract features from the input image x.
92 |
93 | Inputs:
94 | - x: A Tensor of shape (N, H, W, C) holding a minibatch of images that
95 | will be fed to the CNN.
96 | - cnn: A Tensorflow model that we will use to extract features.
97 |
98 | Returns:
99 | - features: A list of feature for the input images x extracted using the cnn model.
100 | features[i] is a Tensor of shape (N, H_i, W_i, C_i); recall that features
101 | from different layers of the network may have different numbers of channels (C_i) and
102 | spatial dimensions (H_i, W_i).
103 | """
104 | features = []
105 | prev_feat = x
106 | for i, layer in enumerate(cnn.net.layers[:-2]):
107 | next_feat = layer(prev_feat)
108 | features.append(next_feat)
109 | prev_feat = next_feat
110 | return features
111 |
112 | def rel_error(x,y):
113 | return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
114 |
--------------------------------------------------------------------------------
/assignment3/example_styletransfer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/example_styletransfer.png
--------------------------------------------------------------------------------
/assignment3/frameworkpython:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # what real Python executable to use
4 | #PYVER=2.7
5 | #PATHTOPYTHON=/usr/local/bin/
6 | #PYTHON=${PATHTOPYTHON}python${PYVER}
7 |
8 | PYTHON=$(which $(readlink .env/bin/python)) # only works with python3
9 |
10 | # find the root of the virtualenv, it should be the parent of the dir this script is in
11 | ENV=`$PYTHON -c "import os; print(os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..')))"`
12 |
13 | # now run Python with the virtualenv set as Python's HOME
14 | export PYTHONHOME=$ENV
15 | exec $PYTHON "$@"
16 |
--------------------------------------------------------------------------------
/assignment3/gan-checks-tf.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/gan-checks-tf.npz
--------------------------------------------------------------------------------
/assignment3/gan_outputs_pytorch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/gan_outputs_pytorch.png
--------------------------------------------------------------------------------
/assignment3/gan_outputs_tf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/gan_outputs_tf.png
--------------------------------------------------------------------------------
/assignment3/kitten.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/kitten.jpg
--------------------------------------------------------------------------------
/assignment3/makepdf.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import subprocess
4 |
5 | try:
6 | from PyPDF2 import PdfFileMerger
7 |
8 | MERGE = True
9 | except ImportError:
10 | print("Could not find PyPDF2. Leaving pdf files unmerged.")
11 | MERGE = False
12 |
13 |
14 | def main(files):
15 | os_args = [
16 | "jupyter",
17 | "nbconvert",
18 | "--log-level",
19 | "CRITICAL",
20 | "--to",
21 | "pdf",
22 | ]
23 | for f in files:
24 | os_args.append(f)
25 | subprocess.run(os_args)
26 | print("Created PDF {}.".format(f))
27 | if MERGE:
28 | pdfs = [f.split(".")[0] + ".pdf" for f in files]
29 | merger = PdfFileMerger()
30 | for pdf in pdfs:
31 | merger.append(pdf)
32 | merger.write("assignment.pdf")
33 | merger.close()
34 | for pdf in pdfs:
35 | os.remove(pdf)
36 |
37 |
38 | if __name__ == "__main__":
39 | parser = argparse.ArgumentParser()
40 | # we pass in explicit notebook arg so that we can provide
41 | # an ordered list and produce an ordered pdf
42 | parser.add_argument("--notebooks", type=str, nargs="+", required=True)
43 | args = parser.parse_args()
44 | main(args.notebooks)
45 |
--------------------------------------------------------------------------------
/assignment3/requirements.txt:
--------------------------------------------------------------------------------
1 | attrs==19.1.0
2 | backcall==0.1.0
3 | bleach==3.1.0
4 | certifi==2019.3.9
5 | chardet==3.0.4
6 | colorama==0.4.1
7 | cycler==0.10.0
8 | Cython==0.29.16
9 | decorator==4.4.0
10 | defusedxml==0.5.0
11 | entrypoints==0.3
12 | future==0.17.1
13 | gitdb2==2.0.5
14 | GitPython==2.1.11
15 | idna==2.8
16 | ipykernel==5.1.0
17 | ipython==7.4.0
18 | ipython-genutils==0.2.0
19 | ipywidgets==7.4.2
20 | imageio==2.8.0
21 | jedi==0.13.3
22 | Jinja2==2.10
23 | jsonschema==3.0.1
24 | jupyter==1.0.0
25 | jupyter-client==5.2.4
26 | jupyter-console==6.0.0
27 | jupyter-core==4.4.0
28 | jupyterlab==0.35.4
29 | jupyterlab-server==0.2.0
30 | kiwisolver==1.0.1
31 | MarkupSafe==1.1.1
32 | matplotlib==3.0.3
33 | mistune==0.8.4
34 | nbconvert==5.4.1
35 | nbdime==1.0.5
36 | nbformat==4.4.0
37 | notebook==5.7.8
38 | numpy==1.18.4
39 | pandocfilters==1.4.2
40 | parso==0.3.4
41 | pexpect==4.6.0
42 | pickleshare==0.7.5
43 | Pillow==6.0.0
44 | prometheus-client==0.6.0
45 | prompt-toolkit==2.0.9
46 | ptyprocess==0.6.0
47 | Pygments==2.3.1
48 | pyparsing==2.3.1
49 | pyrsistent==0.14.11
50 | python-dateutil==2.8.0
51 | pyzmq==18.0.1
52 | qtconsole==4.4.3
53 | requests==2.21.0
54 | scipy==1.2.1
55 | Send2Trash==1.5.0
56 | six==1.12.0
57 | smmap2==2.0.5
58 | terminado==0.8.2
59 | testpath==0.4.2
60 | tornado==6.0.2
61 | traitlets==4.3.2
62 | urllib3==1.24.1
63 | wcwidth==0.1.7
64 | webencodings==0.5.1
65 | widgetsnbextension==3.4.2
66 |
--------------------------------------------------------------------------------
/assignment3/sky.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/sky.jpg
--------------------------------------------------------------------------------
/assignment3/start_ipython_osx.sh:
--------------------------------------------------------------------------------
1 | # Assume the virtualenv is called .env
2 |
3 | cp frameworkpython .env/bin
4 | .env/bin/frameworkpython -m IPython notebook
5 |
--------------------------------------------------------------------------------
/assignment3/style-transfer-checks-tf.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/style-transfer-checks-tf.npz
--------------------------------------------------------------------------------
/assignment3/style-transfer-checks.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/style-transfer-checks.npz
--------------------------------------------------------------------------------
/assignment3/style_stransfer.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/style_stransfer.gif
--------------------------------------------------------------------------------
/assignment3/style_stransfer2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/style_stransfer2.gif
--------------------------------------------------------------------------------
/assignment3/styles/composition_vii.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/styles/composition_vii.jpg
--------------------------------------------------------------------------------
/assignment3/styles/muse.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/styles/muse.jpg
--------------------------------------------------------------------------------
/assignment3/styles/starry_night.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/styles/starry_night.jpg
--------------------------------------------------------------------------------
/assignment3/styles/the_scream.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/styles/the_scream.jpg
--------------------------------------------------------------------------------
/assignment3/styles/tubingen.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bingcheng1998/CS231n-2020-spring-assignment-solution/56b459a488abb25bcb31d3916e361400efb426aa/assignment3/styles/tubingen.jpg
--------------------------------------------------------------------------------