├── README.md ├── cs231n ├── assignment1 │ ├── README.md │ ├── cs231n │ │ ├── __init__.py │ │ ├── classifiers │ │ │ ├── __init__.py │ │ │ ├── k_nearest_neighbor.py │ │ │ ├── linear_classifier.py │ │ │ ├── linear_svm.py │ │ │ ├── neural_net.py │ │ │ └── softmax.py │ │ ├── data_utils.py │ │ ├── datasets │ │ │ └── get_datasets.sh │ │ ├── features.py │ │ ├── gradient_check.py │ │ └── vis_utils.py │ ├── features.ipynb │ ├── knn.ipynb │ ├── requirements.txt │ ├── softmax.ipynb │ ├── svm.ipynb │ └── two_layer_net.ipynb ├── assignment2 │ ├── BatchNormalization.ipynb │ ├── ConvolutionalNetworks.ipynb │ ├── Dropout.ipynb │ ├── FullyConnectedNets.ipynb │ ├── PyTorch.ipynb │ ├── README.md │ ├── cs231n │ │ ├── __init__.py │ │ ├── build │ │ │ └── temp.linux-x86_64-3.6 │ │ │ │ └── im2col_cython.o │ │ ├── classifiers │ │ │ ├── __init__.py │ │ │ ├── cnn.py │ │ │ └── fc_net.py │ │ ├── data_utils.py │ │ ├── datasets │ │ │ └── get_datasets.sh │ │ ├── fast_layers.py │ │ ├── gradient_check.py │ │ ├── im2col.py │ │ ├── im2col_cython.c │ │ ├── im2col_cython.cpython-36m-x86_64-linux-gnu.so │ │ ├── im2col_cython.pyx │ │ ├── layer_utils.py │ │ ├── layers.py │ │ ├── notebook_images │ │ │ ├── batchnorm_graph.png │ │ │ ├── kitten.jpg │ │ │ ├── normalization.png │ │ │ └── puppy.jpg │ │ ├── optim.py │ │ ├── setup.py │ │ ├── solver.py │ │ └── vis_utils.py │ └── requirements.txt └── assignment3 │ ├── Generative_Adversarial_Networks_PyTorch.ipynb │ ├── LSTM_Captioning.ipynb │ ├── NetworkVisualization-PyTorch.ipynb │ ├── README.md │ ├── RNN_Captioning.ipynb │ ├── StyleTransfer-PyTorch.ipynb │ ├── cs231n │ ├── __init__.py │ ├── captioning_solver.py │ ├── classifiers │ │ ├── __init__.py │ │ ├── rnn.py │ │ └── squeezenet.py │ ├── coco_utils.py │ ├── data_utils.py │ ├── datasets │ │ ├── get_assignment3_data.sh │ │ ├── get_coco_captioning.sh │ │ ├── get_dataset.sh │ │ ├── get_datasets.sh │ │ ├── get_imagenet_val.sh │ │ └── get_squeezenet_tf.sh │ ├── fast_layers.py │ ├── gan_pytorch.py │ ├── gradient_check.py │ ├── im2col.py │ ├── im2col_cython.pyx │ ├── image_utils.py │ ├── layer_utils.py │ ├── layers.py │ ├── net_visualization_pytorch.py │ ├── optim.py │ ├── rnn_layers.py │ ├── setup.py │ └── style_transfer_pytorch.py │ ├── example_styletransfer.png │ ├── gan_outputs_pytorch.png │ ├── requirements.txt │ ├── style-transfer-checks.npz │ └── styles │ ├── bicentennial_print.jpg │ ├── composition_vii.jpg │ ├── horses_seashore.jpg │ ├── muse.jpg │ ├── ritmo_plastico.jpg │ ├── starry_night.jpg │ ├── the_scream.jpg │ └── tubingen.jpg ├── eecs498-007 ├── A4 │ ├── README.md │ ├── a4_helper.py │ ├── adversarial_attacks_results.jpg │ ├── class_viz_result.jpg │ ├── eecs598 │ │ ├── __init__.py │ │ ├── data.py │ │ ├── grad.py │ │ ├── solver.py │ │ ├── submit.py │ │ ├── utils.py │ │ └── vis.py │ ├── feature_inversion_result.jpg │ ├── network_visualization.ipynb │ ├── network_visualization.py │ ├── pytorch_autograd_and_nn.ipynb │ ├── pytorch_autograd_and_nn.pkl │ ├── pytorch_autograd_and_nn.py │ ├── rnn_lstm_attention_captioning.ipynb │ ├── rnn_lstm_attention_captioning.py │ ├── rnn_lstm_attention_submission.pkl │ ├── saliency_maps_results.jpg │ ├── style_transfer.ipynb │ ├── style_transfer.py │ └── style_transfer_result.jpg ├── A5 │ ├── README.md │ ├── a5_helper.py │ ├── eecs598 │ │ ├── __init__.py │ │ ├── data.py │ │ ├── grad.py │ │ ├── solver.py │ │ ├── submit.py │ │ ├── utils.py │ │ └── vis.py │ ├── frcnn_detector.pt │ ├── single_stage_detector.py │ ├── single_stage_detector_yolo.ipynb │ ├── two_stage_detector.py │ ├── two_stage_detector_faster_rcnn.ipynb │ └── yolo_detector.pt └── A6 │ ├── README.md │ ├── a6_helper.py │ ├── conditional_vae_generation.jpg │ ├── dc_gan_results.jpg │ ├── eecs598 │ ├── __init__.py │ ├── data.py │ ├── grad.py │ ├── solver.py │ ├── submit.py │ ├── utils.py │ └── vis.py │ ├── fc_gan_results.jpg │ ├── gan.py │ ├── generative_adversarial_networks.ipynb │ ├── ls_gan_results.jpg │ ├── vae.py │ ├── vae_generation.jpg │ └── variational_autoencoders.ipynb └── examples ├── analog_clock.gif ├── dcgan.gif ├── dining_table.gif ├── kit_fox.gif ├── style_transfer.png ├── tarantula.gif └── vanilla_gan.gif /cs231n/assignment1/README.md: -------------------------------------------------------------------------------- 1 |
2 |

CS231n: Convolutional Neural Networks for Visual Recognition

3 |

Assignment 1 (2020)

4 |
5 | 6 | # Goals 7 | 8 | In this assignment you will practice putting together a simple image classification pipeline based on the k-Nearest Neighbor or the SVM/Softmax classifier. The goals of this assignment are as follows: 9 | 10 | - Understand the basic **Image Classification pipeline** and the data-driven approach (train/predict stages). 11 | - Understand the train/val/test **splits** and the use of validation data for **hyperparameter tuning**. 12 | - Develop proficiency in writing efficient **vectorized** code with numpy. 13 | - Implement and apply a k-Nearest Neighbor (**kNN**) classifier. 14 | - Implement and apply a Multiclass Support Vector Machine (**SVM**) classifier. 15 | - Implement and apply a **Softmax** classifier. 16 | - Implement and apply a **Two layer neural network** classifier. 17 | - Understand the differences and tradeoffs between these classifiers. 18 | - Get a basic understanding of performance improvements from using **higher-level representations** as opposed to raw pixels, e.g. color histograms, Histogram of Gradient (HOG) features, etc. 19 | 20 | # Questions 21 | 22 | ## Q1: k-Nearest Neighbor classifier 23 | 24 | The notebook [``knn.ipynb``](knn.ipynb) will walk you through implementing the kNN classifier. 25 | 26 | ## Q2: Training a Support Vector Machine 27 | 28 | The notebook [``svm.ipynb``](svm.ipynb) will walk you through implementing the SVM classifier. 29 | 30 | ## Q3: Implement a Softmax classifier 31 | 32 | The notebook [``softmax.ipynb``](softmax.ipynb) will walk you through implementing the Softmax classifier. 33 | 34 | ## Q4: Two-Layer Neural Network 35 | 36 | The notebook [``two_layer_net.ipynb``](two_layer_net.ipynb) will walk you through the implementation of a two-layer neural network classifier. 37 | 38 | ## Q5: Higher Level Representations: Image Features 39 | 40 | The notebook [``features.ipynb``](features.ipynb) will examine the improvements gained by using higher-level representations as opposed to using raw pixel values. 41 | -------------------------------------------------------------------------------- /cs231n/assignment1/cs231n/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment1/cs231n/__init__.py -------------------------------------------------------------------------------- /cs231n/assignment1/cs231n/classifiers/__init__.py: -------------------------------------------------------------------------------- 1 | from cs231n.classifiers.k_nearest_neighbor import * 2 | from cs231n.classifiers.linear_classifier import * 3 | -------------------------------------------------------------------------------- /cs231n/assignment1/cs231n/classifiers/linear_classifier.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from builtins import range 4 | from builtins import object 5 | import numpy as np 6 | from cs231n.classifiers.linear_svm import * 7 | from cs231n.classifiers.softmax import * 8 | from past.builtins import xrange 9 | 10 | 11 | class LinearClassifier(object): 12 | 13 | def __init__(self): 14 | self.W = None 15 | 16 | def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100, 17 | batch_size=200, verbose=False): 18 | """ 19 | Train this linear classifier using stochastic gradient descent. 20 | 21 | Inputs: 22 | - X: A numpy array of shape (N, D) containing training data; there are N 23 | training samples each of dimension D. 24 | - y: A numpy array of shape (N,) containing training labels; y[i] = c 25 | means that X[i] has label 0 <= c < C for C classes. 26 | - learning_rate: (float) learning rate for optimization. 27 | - reg: (float) regularization strength. 28 | - num_iters: (integer) number of steps to take when optimizing 29 | - batch_size: (integer) number of training examples to use at each step. 30 | - verbose: (boolean) If true, print progress during optimization. 31 | 32 | Outputs: 33 | A list containing the value of the loss function at each training iteration. 34 | """ 35 | num_train, dim = X.shape 36 | num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes 37 | if self.W is None: 38 | # lazily initialize W 39 | self.W = 0.001 * np.random.randn(dim, num_classes) 40 | 41 | # Run stochastic gradient descent to optimize W 42 | loss_history = [] 43 | for it in range(num_iters): 44 | X_batch = None 45 | y_batch = None 46 | 47 | ######################################################################### 48 | # TODO: # 49 | # Sample batch_size elements from the training data and their # 50 | # corresponding labels to use in this round of gradient descent. # 51 | # Store the data in X_batch and their corresponding labels in # 52 | # y_batch; after sampling X_batch should have shape (batch_size, dim) # 53 | # and y_batch should have shape (batch_size,) # 54 | # # 55 | # Hint: Use np.random.choice to generate indices. Sampling with # 56 | # replacement is faster than sampling without replacement. # 57 | ######################################################################### 58 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 59 | 60 | batch_indexes = np.random.choice(num_train, batch_size) 61 | X_batch = X[batch_indexes] 62 | y_batch = y[batch_indexes] 63 | 64 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 65 | 66 | # evaluate loss and gradient 67 | loss, grad = self.loss(X_batch, y_batch, reg) 68 | loss_history.append(loss) 69 | 70 | # perform parameter update 71 | ######################################################################### 72 | # TODO: # 73 | # Update the weights using the gradient and the learning rate. # 74 | ######################################################################### 75 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 76 | 77 | self.W += - learning_rate * grad 78 | 79 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 80 | 81 | if verbose and it % 100 == 0: 82 | print('iteration %d / %d: loss %f' % (it, num_iters, loss)) 83 | 84 | return loss_history 85 | 86 | def predict(self, X): 87 | """ 88 | Use the trained weights of this linear classifier to predict labels for 89 | data points. 90 | 91 | Inputs: 92 | - X: A numpy array of shape (N, D) containing training data; there are N 93 | training samples each of dimension D. 94 | 95 | Returns: 96 | - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional 97 | array of length N, and each element is an integer giving the predicted 98 | class. 99 | """ 100 | y_pred = np.zeros(X.shape[0]) 101 | ########################################################################### 102 | # TODO: # 103 | # Implement this method. Store the predicted labels in y_pred. # 104 | ########################################################################### 105 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 106 | 107 | predict_matrix = X.dot(self.W) 108 | y_pred = predict_matrix.argmax(axis=1) 109 | 110 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 111 | return y_pred 112 | 113 | def loss(self, X_batch, y_batch, reg): 114 | """ 115 | Compute the loss function and its derivative. 116 | Subclasses will override this. 117 | 118 | Inputs: 119 | - X_batch: A numpy array of shape (N, D) containing a minibatch of N 120 | data points; each point has dimension D. 121 | - y_batch: A numpy array of shape (N,) containing labels for the minibatch. 122 | - reg: (float) regularization strength. 123 | 124 | Returns: A tuple containing: 125 | - loss as a single float 126 | - gradient with respect to self.W; an array of the same shape as W 127 | """ 128 | pass 129 | 130 | 131 | class LinearSVM(LinearClassifier): 132 | """ A subclass that uses the Multiclass SVM loss function """ 133 | 134 | def loss(self, X_batch, y_batch, reg): 135 | return svm_loss_vectorized(self.W, X_batch, y_batch, reg) 136 | 137 | 138 | class Softmax(LinearClassifier): 139 | """ A subclass that uses the Softmax + Cross-entropy loss function """ 140 | 141 | def loss(self, X_batch, y_batch, reg): 142 | return softmax_loss_vectorized(self.W, X_batch, y_batch, reg) 143 | -------------------------------------------------------------------------------- /cs231n/assignment1/cs231n/classifiers/linear_svm.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | import numpy as np 3 | from random import shuffle 4 | from past.builtins import xrange 5 | 6 | def svm_loss_naive(W, X, y, reg): 7 | """ 8 | Structured SVM loss function, naive implementation (with loops). 9 | 10 | Inputs have dimension D, there are C classes, and we operate on minibatches 11 | of N examples. 12 | 13 | Inputs: 14 | - W: A numpy array of shape (D, C) containing weights. 15 | - X: A numpy array of shape (N, D) containing a minibatch of data. 16 | - y: A numpy array of shape (N,) containing training labels; y[i] = c means 17 | that X[i] has label c, where 0 <= c < C. 18 | - reg: (float) regularization strength 19 | 20 | Returns a tuple of: 21 | - loss as single float 22 | - gradient with respect to weights W; an array of same shape as W 23 | """ 24 | dW = np.zeros(W.shape) # initialize the gradient as zero 25 | 26 | # compute the loss and the gradient 27 | num_classes = W.shape[1] 28 | num_train = X.shape[0] 29 | loss = 0.0 30 | for i in range(num_train): 31 | scores = X[i].dot(W) 32 | correct_class_score = scores[y[i]] 33 | no_meet_margin = 0 34 | for j in range(num_classes): 35 | if j == y[i]: 36 | continue 37 | margin = scores[j] - correct_class_score + 1 # note delta = 1 38 | if margin > 0: 39 | loss += margin 40 | no_meet_margin += 1 41 | dW[:, j] += X[i] 42 | dW[:, y[i]] -= no_meet_margin * X[i] 43 | 44 | # Right now the loss is a sum over all training examples, but we want it 45 | # to be an average instead so we divide by num_train. 46 | loss /= num_train 47 | 48 | # Add regularization to the loss. 49 | loss += reg * np.sum(W * W) 50 | 51 | ############################################################################# 52 | # TODO: # 53 | # Compute the gradient of the loss function and store it dW. # 54 | # Rather than first computing the loss and then computing the derivative, # 55 | # it may be simpler to compute the derivative at the same time that the # 56 | # loss is being computed. As a result you may need to modify some of the # 57 | # code above to compute the gradient. # 58 | ############################################################################# 59 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 60 | 61 | dW /= num_train 62 | 63 | # Add the regularization to 'dW' (derivate of L2 norm) 64 | dW += 2 * reg * W 65 | 66 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 67 | 68 | return loss, dW 69 | 70 | 71 | 72 | def svm_loss_vectorized(W, X, y, reg): 73 | """ 74 | Structured SVM loss function, vectorized implementation. 75 | 76 | Inputs and outputs are the same as svm_loss_naive. 77 | """ 78 | loss = 0.0 79 | dW = np.zeros(W.shape) # initialize the gradient as zero 80 | 81 | ############################################################################# 82 | # TODO: # 83 | # Implement a vectorized version of the structured SVM loss, storing the # 84 | # result in loss. # 85 | ############################################################################# 86 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 87 | 88 | scores = X.dot(W) 89 | 90 | correct_class_score = scores[np.arange(len(scores)), y] 91 | 92 | scores_check_margin = scores - correct_class_score.reshape(-1, 1) + 1 93 | scores_check_margin[np.arange(len(scores_check_margin)), y] = 0 94 | 95 | num_meet_margin = - np.sum(scores_check_margin > 0, axis=1) 96 | 97 | scores_check_margin[scores_check_margin < 0] = 0 98 | 99 | loss = scores_check_margin.sum() 100 | loss /= X.shape[0] 101 | loss += reg * np.sum(W * W) 102 | 103 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 104 | 105 | ############################################################################# 106 | # TODO: # 107 | # Implement a vectorized version of the gradient for the structured SVM # 108 | # loss, storing the result in dW. # 109 | # # 110 | # Hint: Instead of computing the gradient from scratch, it may be easier # 111 | # to reuse some of the intermediate values that you used to compute the # 112 | # loss. # 113 | ############################################################################# 114 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 115 | 116 | scores_check_margin[scores_check_margin > 0] = 1 117 | 118 | scores_check_margin[np.arange(len(scores_check_margin)), y] = num_meet_margin 119 | 120 | dW = X.T.dot(scores_check_margin) 121 | 122 | dW /= X.shape[0] 123 | dW += reg * 2 * W # Derivate of L2 norm 124 | 125 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 126 | 127 | return loss, dW 128 | -------------------------------------------------------------------------------- /cs231n/assignment1/cs231n/classifiers/softmax.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | import numpy as np 3 | from random import shuffle 4 | from past.builtins import xrange 5 | 6 | def softmax_loss_naive(W, X, y, reg): 7 | """ 8 | Softmax loss function, naive implementation (with loops) 9 | 10 | Inputs have dimension D, there are C classes, and we operate on minibatches 11 | of N examples. 12 | 13 | Inputs: 14 | - W: A numpy array of shape (D, C) containing weights. 15 | - X: A numpy array of shape (N, D) containing a minibatch of data. 16 | - y: A numpy array of shape (N,) containing training labels; y[i] = c means 17 | that X[i] has label c, where 0 <= c < C. 18 | - reg: (float) regularization strength 19 | 20 | Returns a tuple of: 21 | - loss as single float 22 | - gradient with respect to weights W; an array of same shape as W 23 | """ 24 | # Initialize the loss and gradient to zero. 25 | loss = 0.0 26 | dW = np.zeros_like(W) 27 | 28 | ############################################################################# 29 | # TODO: Compute the softmax loss and its gradient using explicit loops. # 30 | # Store the loss in loss and the gradient in dW. If you are not careful # 31 | # here, it is easy to run into numeric instability. Don't forget the # 32 | # regularization! # 33 | ############################################################################# 34 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 35 | 36 | # Gradient for the Softmax function. 37 | # Check: https://stackoverflow.com/a/53972798 38 | 39 | num_classes = W.shape[1] 40 | num_train = X.shape[0] 41 | for i in range(num_train): 42 | scores = X[i].dot(W) 43 | 44 | correct_class_score = scores[y[i]] 45 | numerator = np.exp(correct_class_score) 46 | 47 | denominator = 0.0 48 | for j in range(num_classes): 49 | denominator += np.exp(scores[j]) 50 | 51 | loss += - np.log(numerator / denominator) 52 | 53 | dW[:, y[i]] += (numerator / denominator - 1) * X[i] 54 | for j in range(num_classes): 55 | if j != y[i]: 56 | dW[:, j] += (np.exp(scores[j]) / denominator) * X[i] 57 | 58 | loss /= num_train 59 | loss += reg * np.sum(W * W) 60 | 61 | dW /= num_train 62 | dW += reg * 2 * W # Derivate of L2 norm 63 | 64 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 65 | 66 | return loss, dW 67 | 68 | 69 | def softmax_loss_vectorized(W, X, y, reg): 70 | """ 71 | Softmax loss function, vectorized version. 72 | 73 | Inputs and outputs are the same as softmax_loss_naive. 74 | """ 75 | # Initialize the loss and gradient to zero. 76 | loss = 0.0 77 | dW = np.zeros_like(W) 78 | 79 | ############################################################################# 80 | # TODO: Compute the softmax loss and its gradient using no explicit loops. # 81 | # Store the loss in loss and the gradient in dW. If you are not careful # 82 | # here, it is easy to run into numeric instability. Don't forget the # 83 | # regularization! # 84 | ############################################################################# 85 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 86 | 87 | scores = np.exp(X.dot(W)) 88 | 89 | sum_scores = scores.sum(axis=1) 90 | 91 | y_scores = scores[np.arange(len(scores)), y] 92 | 93 | loss = - np.log(y_scores / sum_scores) 94 | loss = np.mean(loss) 95 | loss += reg * np.sum(W * W) 96 | 97 | scores /= sum_scores.reshape(-1, 1) 98 | scores[np.arange(len(scores)), y] -= 1 99 | 100 | dW = X.T.dot(scores) 101 | dW /= X.shape[0] 102 | dW += reg * 2 * W # Derivate of L2 norm 103 | 104 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 105 | 106 | return loss, dW 107 | -------------------------------------------------------------------------------- /cs231n/assignment1/cs231n/datasets/get_datasets.sh: -------------------------------------------------------------------------------- 1 | # Get CIFAR10 2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz -O cifar-10-python.tar.gz 3 | tar -xzvf cifar-10-python.tar.gz 4 | rm cifar-10-python.tar.gz 5 | -------------------------------------------------------------------------------- /cs231n/assignment1/cs231n/features.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from builtins import zip 3 | from builtins import range 4 | from past.builtins import xrange 5 | 6 | import matplotlib 7 | import numpy as np 8 | from scipy.ndimage import uniform_filter 9 | 10 | 11 | def extract_features(imgs, feature_fns, verbose=False): 12 | """ 13 | Given pixel data for images and several feature functions that can operate on 14 | single images, apply all feature functions to all images, concatenating the 15 | feature vectors for each image and storing the features for all images in 16 | a single matrix. 17 | 18 | Inputs: 19 | - imgs: N x H X W X C array of pixel data for N images. 20 | - feature_fns: List of k feature functions. The ith feature function should 21 | take as input an H x W x D array and return a (one-dimensional) array of 22 | length F_i. 23 | - verbose: Boolean; if true, print progress. 24 | 25 | Returns: 26 | An array of shape (N, F_1 + ... + F_k) where each column is the concatenation 27 | of all features for a single image. 28 | """ 29 | num_images = imgs.shape[0] 30 | if num_images == 0: 31 | return np.array([]) 32 | 33 | # Use the first image to determine feature dimensions 34 | feature_dims = [] 35 | first_image_features = [] 36 | for feature_fn in feature_fns: 37 | feats = feature_fn(imgs[0].squeeze()) 38 | assert len(feats.shape) == 1, 'Feature functions must be one-dimensional' 39 | feature_dims.append(feats.size) 40 | first_image_features.append(feats) 41 | 42 | # Now that we know the dimensions of the features, we can allocate a single 43 | # big array to store all features as columns. 44 | total_feature_dim = sum(feature_dims) 45 | imgs_features = np.zeros((num_images, total_feature_dim)) 46 | imgs_features[0] = np.hstack(first_image_features).T 47 | 48 | # Extract features for the rest of the images. 49 | for i in range(1, num_images): 50 | idx = 0 51 | for feature_fn, feature_dim in zip(feature_fns, feature_dims): 52 | next_idx = idx + feature_dim 53 | imgs_features[i, idx:next_idx] = feature_fn(imgs[i].squeeze()) 54 | idx = next_idx 55 | if verbose and i % 1000 == 999: 56 | print('Done extracting features for %d / %d images' % (i+1, num_images)) 57 | 58 | return imgs_features 59 | 60 | 61 | def rgb2gray(rgb): 62 | """Convert RGB image to grayscale 63 | 64 | Parameters: 65 | rgb : RGB image 66 | 67 | Returns: 68 | gray : grayscale image 69 | 70 | """ 71 | return np.dot(rgb[...,:3], [0.299, 0.587, 0.144]) 72 | 73 | 74 | def hog_feature(im): 75 | """Compute Histogram of Gradient (HOG) feature for an image 76 | 77 | Modified from skimage.feature.hog 78 | http://pydoc.net/Python/scikits-image/0.4.2/skimage.feature.hog 79 | 80 | Reference: 81 | Histograms of Oriented Gradients for Human Detection 82 | Navneet Dalal and Bill Triggs, CVPR 2005 83 | 84 | Parameters: 85 | im : an input grayscale or rgb image 86 | 87 | Returns: 88 | feat: Histogram of Gradient (HOG) feature 89 | 90 | """ 91 | 92 | # convert rgb to grayscale if needed 93 | if im.ndim == 3: 94 | image = rgb2gray(im) 95 | else: 96 | image = np.at_least_2d(im) 97 | 98 | sx, sy = image.shape # image size 99 | orientations = 9 # number of gradient bins 100 | cx, cy = (8, 8) # pixels per cell 101 | 102 | gx = np.zeros(image.shape) 103 | gy = np.zeros(image.shape) 104 | gx[:, :-1] = np.diff(image, n=1, axis=1) # compute gradient on x-direction 105 | gy[:-1, :] = np.diff(image, n=1, axis=0) # compute gradient on y-direction 106 | grad_mag = np.sqrt(gx ** 2 + gy ** 2) # gradient magnitude 107 | grad_ori = np.arctan2(gy, (gx + 1e-15)) * (180 / np.pi) + 90 # gradient orientation 108 | 109 | n_cellsx = int(np.floor(sx / cx)) # number of cells in x 110 | n_cellsy = int(np.floor(sy / cy)) # number of cells in y 111 | # compute orientations integral images 112 | orientation_histogram = np.zeros((n_cellsx, n_cellsy, orientations)) 113 | for i in range(orientations): 114 | # create new integral image for this orientation 115 | # isolate orientations in this range 116 | temp_ori = np.where(grad_ori < 180 / orientations * (i + 1), 117 | grad_ori, 0) 118 | temp_ori = np.where(grad_ori >= 180 / orientations * i, 119 | temp_ori, 0) 120 | # select magnitudes for those orientations 121 | cond2 = temp_ori > 0 122 | temp_mag = np.where(cond2, grad_mag, 0) 123 | orientation_histogram[:,:,i] = uniform_filter(temp_mag, size=(cx, cy))[round(cx/2)::cx, round(cy/2)::cy].T 124 | 125 | return orientation_histogram.ravel() 126 | 127 | 128 | def color_histogram_hsv(im, nbin=10, xmin=0, xmax=255, normalized=True): 129 | """ 130 | Compute color histogram for an image using hue. 131 | 132 | Inputs: 133 | - im: H x W x C array of pixel data for an RGB image. 134 | - nbin: Number of histogram bins. (default: 10) 135 | - xmin: Minimum pixel value (default: 0) 136 | - xmax: Maximum pixel value (default: 255) 137 | - normalized: Whether to normalize the histogram (default: True) 138 | 139 | Returns: 140 | 1D vector of length nbin giving the color histogram over the hue of the 141 | input image. 142 | """ 143 | ndim = im.ndim 144 | bins = np.linspace(xmin, xmax, nbin+1) 145 | hsv = matplotlib.colors.rgb_to_hsv(im/xmax) * xmax 146 | imhist, bin_edges = np.histogram(hsv[:,:,0], bins=bins, density=normalized) 147 | imhist = imhist * np.diff(bin_edges) 148 | 149 | # return histogram 150 | return imhist 151 | 152 | 153 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 154 | 155 | pass 156 | 157 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 158 | -------------------------------------------------------------------------------- /cs231n/assignment1/cs231n/gradient_check.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from builtins import range 3 | from past.builtins import xrange 4 | 5 | import numpy as np 6 | from random import randrange 7 | 8 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001): 9 | """ 10 | a naive implementation of numerical gradient of f at x 11 | - f should be a function that takes a single argument 12 | - x is the point (numpy array) to evaluate the gradient at 13 | """ 14 | 15 | fx = f(x) # evaluate function value at original point 16 | grad = np.zeros_like(x) 17 | # iterate over all indexes in x 18 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 19 | while not it.finished: 20 | 21 | # evaluate function at x+h 22 | ix = it.multi_index 23 | oldval = x[ix] 24 | x[ix] = oldval + h # increment by h 25 | fxph = f(x) # evalute f(x + h) 26 | x[ix] = oldval - h 27 | fxmh = f(x) # evaluate f(x - h) 28 | x[ix] = oldval # restore 29 | 30 | # compute the partial derivative with centered formula 31 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 32 | if verbose: 33 | print(ix, grad[ix]) 34 | it.iternext() # step to next dimension 35 | 36 | return grad 37 | 38 | 39 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 40 | """ 41 | Evaluate a numeric gradient for a function that accepts a numpy 42 | array and returns a numpy array. 43 | """ 44 | grad = np.zeros_like(x) 45 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 46 | while not it.finished: 47 | ix = it.multi_index 48 | 49 | oldval = x[ix] 50 | x[ix] = oldval + h 51 | pos = f(x).copy() 52 | x[ix] = oldval - h 53 | neg = f(x).copy() 54 | x[ix] = oldval 55 | 56 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 57 | it.iternext() 58 | return grad 59 | 60 | 61 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): 62 | """ 63 | Compute numeric gradients for a function that operates on input 64 | and output blobs. 65 | 66 | We assume that f accepts several input blobs as arguments, followed by a 67 | blob where outputs will be written. For example, f might be called like: 68 | 69 | f(x, w, out) 70 | 71 | where x and w are input Blobs, and the result of f will be written to out. 72 | 73 | Inputs: 74 | - f: function 75 | - inputs: tuple of input blobs 76 | - output: output blob 77 | - h: step size 78 | """ 79 | numeric_diffs = [] 80 | for input_blob in inputs: 81 | diff = np.zeros_like(input_blob.diffs) 82 | it = np.nditer(input_blob.vals, flags=['multi_index'], 83 | op_flags=['readwrite']) 84 | while not it.finished: 85 | idx = it.multi_index 86 | orig = input_blob.vals[idx] 87 | 88 | input_blob.vals[idx] = orig + h 89 | f(*(inputs + (output,))) 90 | pos = np.copy(output.vals) 91 | input_blob.vals[idx] = orig - h 92 | f(*(inputs + (output,))) 93 | neg = np.copy(output.vals) 94 | input_blob.vals[idx] = orig 95 | 96 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) 97 | 98 | it.iternext() 99 | numeric_diffs.append(diff) 100 | return numeric_diffs 101 | 102 | 103 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5): 104 | return eval_numerical_gradient_blobs(lambda *args: net.forward(), 105 | inputs, output, h=h) 106 | 107 | 108 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): 109 | """ 110 | sample a few random elements and only return numerical 111 | in this dimensions. 112 | """ 113 | 114 | for i in range(num_checks): 115 | ix = tuple([randrange(m) for m in x.shape]) 116 | 117 | oldval = x[ix] 118 | x[ix] = oldval + h # increment by h 119 | fxph = f(x) # evaluate f(x + h) 120 | x[ix] = oldval - h # increment by h 121 | fxmh = f(x) # evaluate f(x - h) 122 | x[ix] = oldval # reset 123 | 124 | grad_numerical = (fxph - fxmh) / (2 * h) 125 | grad_analytic = analytic_grad[ix] 126 | rel_error = (abs(grad_numerical - grad_analytic) / 127 | (abs(grad_numerical) + abs(grad_analytic))) 128 | print('numerical: %f analytic: %f, relative error: %e' 129 | %(grad_numerical, grad_analytic, rel_error)) 130 | -------------------------------------------------------------------------------- /cs231n/assignment1/cs231n/vis_utils.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | from past.builtins import xrange 3 | 4 | from math import sqrt, ceil 5 | import numpy as np 6 | 7 | def visualize_grid(Xs, ubound=255.0, padding=1): 8 | """ 9 | Reshape a 4D tensor of image data to a grid for easy visualization. 10 | 11 | Inputs: 12 | - Xs: Data of shape (N, H, W, C) 13 | - ubound: Output grid will have values scaled to the range [0, ubound] 14 | - padding: The number of blank pixels between elements of the grid 15 | """ 16 | (N, H, W, C) = Xs.shape 17 | grid_size = int(ceil(sqrt(N))) 18 | grid_height = H * grid_size + padding * (grid_size - 1) 19 | grid_width = W * grid_size + padding * (grid_size - 1) 20 | grid = np.zeros((grid_height, grid_width, C)) 21 | next_idx = 0 22 | y0, y1 = 0, H 23 | for y in range(grid_size): 24 | x0, x1 = 0, W 25 | for x in range(grid_size): 26 | if next_idx < N: 27 | img = Xs[next_idx] 28 | low, high = np.min(img), np.max(img) 29 | grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low) 30 | # grid[y0:y1, x0:x1] = Xs[next_idx] 31 | next_idx += 1 32 | x0 += W + padding 33 | x1 += W + padding 34 | y0 += H + padding 35 | y1 += H + padding 36 | # grid_max = np.max(grid) 37 | # grid_min = np.min(grid) 38 | # grid = ubound * (grid - grid_min) / (grid_max - grid_min) 39 | return grid 40 | 41 | def vis_grid(Xs): 42 | """ visualize a grid of images """ 43 | (N, H, W, C) = Xs.shape 44 | A = int(ceil(sqrt(N))) 45 | G = np.ones((A*H+A, A*W+A, C), Xs.dtype) 46 | G *= np.min(Xs) 47 | n = 0 48 | for y in range(A): 49 | for x in range(A): 50 | if n < N: 51 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:] 52 | n += 1 53 | # normalize to [0,1] 54 | maxg = G.max() 55 | ming = G.min() 56 | G = (G - ming)/(maxg-ming) 57 | return G 58 | 59 | def vis_nn(rows): 60 | """ visualize array of arrays of images """ 61 | N = len(rows) 62 | D = len(rows[0]) 63 | H,W,C = rows[0][0].shape 64 | Xs = rows[0][0] 65 | G = np.ones((N*H+N, D*W+D, C), Xs.dtype) 66 | for y in range(N): 67 | for x in range(D): 68 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x] 69 | # normalize to [0,1] 70 | maxg = G.max() 71 | ming = G.min() 72 | G = (G - ming)/(maxg-ming) 73 | return G 74 | -------------------------------------------------------------------------------- /cs231n/assignment1/requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==19.1.0 2 | backcall==0.1.0 3 | bleach==3.1.0 4 | certifi==2019.3.9 5 | chardet==3.0.4 6 | colorama==0.4.1 7 | cycler==0.10.0 8 | decorator==4.4.0 9 | defusedxml==0.5.0 10 | entrypoints==0.3 11 | future==0.17.1 12 | gitdb2==2.0.5 13 | GitPython==2.1.11 14 | idna==2.8 15 | ipykernel==5.1.0 16 | ipython==7.4.0 17 | ipython-genutils==0.2.0 18 | ipywidgets==7.4.2 19 | imageio==2.8.0 20 | jedi==0.13.3 21 | Jinja2==2.10 22 | jsonschema==3.0.1 23 | jupyter==1.0.0 24 | jupyter-client==5.2.4 25 | jupyter-console==6.0.0 26 | jupyter-core==4.4.0 27 | jupyterlab==0.35.4 28 | jupyterlab-server==0.2.0 29 | kiwisolver==1.0.1 30 | MarkupSafe==1.1.1 31 | matplotlib==3.0.3 32 | mistune==0.8.4 33 | nbconvert==5.4.1 34 | nbdime==1.0.5 35 | nbformat==4.4.0 36 | notebook==5.7.8 37 | numpy==1.16.2 38 | pandocfilters==1.4.2 39 | parso==0.3.4 40 | pexpect==4.6.0 41 | pickleshare==0.7.5 42 | Pillow==6.0.0 43 | prometheus-client==0.6.0 44 | prompt-toolkit==2.0.9 45 | ptyprocess==0.6.0 46 | Pygments==2.3.1 47 | pyparsing==2.3.1 48 | pyrsistent==0.14.11 49 | python-dateutil==2.8.0 50 | pyzmq==18.0.1 51 | qtconsole==4.4.3 52 | requests==2.21.0 53 | scipy==1.2.1 54 | Send2Trash==1.5.0 55 | six==1.12.0 56 | smmap2==2.0.5 57 | terminado==0.8.2 58 | testpath==0.4.2 59 | tornado==6.0.2 60 | traitlets==4.3.2 61 | urllib3==1.24.1 62 | wcwidth==0.1.7 63 | webencodings==0.5.1 64 | widgetsnbextension==3.4.2 65 | -------------------------------------------------------------------------------- /cs231n/assignment2/README.md: -------------------------------------------------------------------------------- 1 |
2 |

CS231n: Convolutional Neural Networks for Visual Recognition

3 |

Assignment 2 (2020)

4 |
5 | 6 | # Goals 7 | 8 | In this assignment you will practice writing backpropagation code, and training Neural Networks and Convolutional Neural Networks. The goals of this assignment are as follows: 9 | 10 | - Understand **Neural Networks** and how they are arranged in layered architectures. 11 | - Understand and be able to implement (vectorized) **backpropagation**. 12 | - Implement various **update rules** used to optimize Neural Networks. 13 | - Implement **Batch Normalization** and **Layer Normalization** for training deep networks. 14 | - Implement **Dropout** to regularize networks. 15 | - Understand the architecture of **Convolutional Neural Networks** and get practice with training them. 16 | - Gain experience with a major deep learning framework: **PyTorch**. 17 | 18 | # Questions 19 | 20 | ## Q1: Fully-connected Neural Network 21 | 22 | The notebook [``FullyConnectedNets.ipynb``](FullyConnectedNets.ipynb) will introduce you to our modular layer design, and then use those layers to implement fully-connected networks of arbitrary depth. To optimize these models you will implement several popular update rules. 23 | 24 | ## Q2: Batch Normalization 25 | 26 | In notebook [``BatchNormalization.ipynb``](BatchNormalization.ipynb) you will implement batch normalization, and use it to train deep fully-connected networks. 27 | 28 | ## Q3: Dropout 29 | 30 | The notebook [``Dropout.ipynb``](Dropout.ipynb) will help you implement Dropout and explore its effects on model generalization. 31 | 32 | ## Q4: Convolutional Networks 33 | 34 | In the IPython Notebook [``ConvolutionalNetworks.ipynb``](ConvolutionalNetworks.ipynb) you will implement several new layers that are commonly used in convolutional networks. 35 | 36 | ## Q5: PyTorch on CIFAR-10 37 | 38 | For this last part, you will be working in PyTorch, a popular and powerful deep learning framework. Open up [``PyTorch.ipynb``](PyTorch.ipynb). There, you will learn how the framework works, culminating in training a convolutional network of your own design on CIFAR-10 to get the best performance you can. 39 | -------------------------------------------------------------------------------- /cs231n/assignment2/cs231n/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment2/cs231n/__init__.py -------------------------------------------------------------------------------- /cs231n/assignment2/cs231n/build/temp.linux-x86_64-3.6/im2col_cython.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment2/cs231n/build/temp.linux-x86_64-3.6/im2col_cython.o -------------------------------------------------------------------------------- /cs231n/assignment2/cs231n/classifiers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment2/cs231n/classifiers/__init__.py -------------------------------------------------------------------------------- /cs231n/assignment2/cs231n/datasets/get_datasets.sh: -------------------------------------------------------------------------------- 1 | if [ ! -d "cifar-10-batches-py" ]; then 2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz -O cifar-10-python.tar.gz 3 | tar -xzvf cifar-10-python.tar.gz 4 | rm cifar-10-python.tar.gz 5 | fi 6 | -------------------------------------------------------------------------------- /cs231n/assignment2/cs231n/gradient_check.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from builtins import range 3 | from past.builtins import xrange 4 | 5 | import numpy as np 6 | from random import randrange 7 | 8 | 9 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001): 10 | """ 11 | a naive implementation of numerical gradient of f at x 12 | - f should be a function that takes a single argument 13 | - x is the point (numpy array) to evaluate the gradient at 14 | """ 15 | 16 | fx = f(x) # evaluate function value at original point 17 | grad = np.zeros_like(x) 18 | # iterate over all indexes in x 19 | it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"]) 20 | while not it.finished: 21 | 22 | # evaluate function at x+h 23 | ix = it.multi_index 24 | oldval = x[ix] 25 | x[ix] = oldval + h # increment by h 26 | fxph = f(x) # evalute f(x + h) 27 | x[ix] = oldval - h 28 | fxmh = f(x) # evaluate f(x - h) 29 | x[ix] = oldval # restore 30 | 31 | # compute the partial derivative with centered formula 32 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 33 | if verbose: 34 | print(ix, grad[ix]) 35 | it.iternext() # step to next dimension 36 | 37 | return grad 38 | 39 | 40 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 41 | """ 42 | Evaluate a numeric gradient for a function that accepts a numpy 43 | array and returns a numpy array. 44 | """ 45 | grad = np.zeros_like(x) 46 | it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"]) 47 | while not it.finished: 48 | ix = it.multi_index 49 | 50 | oldval = x[ix] 51 | x[ix] = oldval + h 52 | pos = f(x).copy() 53 | x[ix] = oldval - h 54 | neg = f(x).copy() 55 | x[ix] = oldval 56 | 57 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 58 | it.iternext() 59 | return grad 60 | 61 | 62 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): 63 | """ 64 | Compute numeric gradients for a function that operates on input 65 | and output blobs. 66 | 67 | We assume that f accepts several input blobs as arguments, followed by a 68 | blob where outputs will be written. For example, f might be called like: 69 | 70 | f(x, w, out) 71 | 72 | where x and w are input Blobs, and the result of f will be written to out. 73 | 74 | Inputs: 75 | - f: function 76 | - inputs: tuple of input blobs 77 | - output: output blob 78 | - h: step size 79 | """ 80 | numeric_diffs = [] 81 | for input_blob in inputs: 82 | diff = np.zeros_like(input_blob.diffs) 83 | it = np.nditer(input_blob.vals, flags=["multi_index"], op_flags=["readwrite"]) 84 | while not it.finished: 85 | idx = it.multi_index 86 | orig = input_blob.vals[idx] 87 | 88 | input_blob.vals[idx] = orig + h 89 | f(*(inputs + (output,))) 90 | pos = np.copy(output.vals) 91 | input_blob.vals[idx] = orig - h 92 | f(*(inputs + (output,))) 93 | neg = np.copy(output.vals) 94 | input_blob.vals[idx] = orig 95 | 96 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) 97 | 98 | it.iternext() 99 | numeric_diffs.append(diff) 100 | return numeric_diffs 101 | 102 | 103 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5): 104 | return eval_numerical_gradient_blobs( 105 | lambda *args: net.forward(), inputs, output, h=h 106 | ) 107 | 108 | 109 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): 110 | """ 111 | sample a few random elements and only return numerical 112 | in this dimensions. 113 | """ 114 | 115 | for i in range(num_checks): 116 | ix = tuple([randrange(m) for m in x.shape]) 117 | 118 | oldval = x[ix] 119 | x[ix] = oldval + h # increment by h 120 | fxph = f(x) # evaluate f(x + h) 121 | x[ix] = oldval - h # increment by h 122 | fxmh = f(x) # evaluate f(x - h) 123 | x[ix] = oldval # reset 124 | 125 | grad_numerical = (fxph - fxmh) / (2 * h) 126 | grad_analytic = analytic_grad[ix] 127 | rel_error = abs(grad_numerical - grad_analytic) / ( 128 | abs(grad_numerical) + abs(grad_analytic) 129 | ) 130 | print( 131 | "numerical: %f analytic: %f, relative error: %e" 132 | % (grad_numerical, grad_analytic, rel_error) 133 | ) 134 | -------------------------------------------------------------------------------- /cs231n/assignment2/cs231n/im2col.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | import numpy as np 3 | 4 | 5 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1): 6 | # First figure out what the size of the output should be 7 | N, C, H, W = x_shape 8 | assert (H + 2 * padding - field_height) % stride == 0 9 | assert (W + 2 * padding - field_height) % stride == 0 10 | out_height = (H + 2 * padding - field_height) / stride + 1 11 | out_width = (W + 2 * padding - field_width) / stride + 1 12 | 13 | i0 = np.repeat(np.arange(field_height), field_width) 14 | i0 = np.tile(i0, C) 15 | i1 = stride * np.repeat(np.arange(out_height), out_width) 16 | j0 = np.tile(np.arange(field_width), field_height * C) 17 | j1 = stride * np.tile(np.arange(out_width), out_height) 18 | i = i0.reshape(-1, 1) + i1.reshape(1, -1) 19 | j = j0.reshape(-1, 1) + j1.reshape(1, -1) 20 | 21 | k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1) 22 | 23 | return (k, i, j) 24 | 25 | 26 | def im2col_indices(x, field_height, field_width, padding=1, stride=1): 27 | """ An implementation of im2col based on some fancy indexing """ 28 | # Zero-pad the input 29 | p = padding 30 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode="constant") 31 | 32 | k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, stride) 33 | 34 | cols = x_padded[:, k, i, j] 35 | C = x.shape[1] 36 | cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1) 37 | return cols 38 | 39 | 40 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1, stride=1): 41 | """ An implementation of col2im based on fancy indexing and np.add.at """ 42 | N, C, H, W = x_shape 43 | H_padded, W_padded = H + 2 * padding, W + 2 * padding 44 | x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype) 45 | k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, stride) 46 | cols_reshaped = cols.reshape(C * field_height * field_width, -1, N) 47 | cols_reshaped = cols_reshaped.transpose(2, 0, 1) 48 | np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped) 49 | if padding == 0: 50 | return x_padded 51 | return x_padded[:, :, padding:-padding, padding:-padding] 52 | 53 | 54 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 55 | 56 | pass 57 | 58 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 59 | -------------------------------------------------------------------------------- /cs231n/assignment2/cs231n/im2col_cython.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment2/cs231n/im2col_cython.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /cs231n/assignment2/cs231n/im2col_cython.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | 5 | # DTYPE = np.float64 6 | # ctypedef np.float64_t DTYPE_t 7 | 8 | ctypedef fused DTYPE_t: 9 | np.float32_t 10 | np.float64_t 11 | 12 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height, 13 | int field_width, int padding, int stride): 14 | cdef int N = x.shape[0] 15 | cdef int C = x.shape[1] 16 | cdef int H = x.shape[2] 17 | cdef int W = x.shape[3] 18 | 19 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 20 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 21 | 22 | cdef int p = padding 23 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x, 24 | ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 25 | 26 | cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros( 27 | (C * field_height * field_width, N * HH * WW), 28 | dtype=x.dtype) 29 | 30 | # Moving the inner loop to a C function with no bounds checking works, but does 31 | # not seem to help performance in any measurable way. 32 | 33 | im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 34 | field_height, field_width, padding, stride) 35 | return cols 36 | 37 | 38 | @cython.boundscheck(False) 39 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 40 | np.ndarray[DTYPE_t, ndim=4] x_padded, 41 | int N, int C, int H, int W, int HH, int WW, 42 | int field_height, int field_width, int padding, int stride) except? -1: 43 | cdef int c, ii, jj, row, yy, xx, i, col 44 | 45 | for c in range(C): 46 | for yy in range(HH): 47 | for xx in range(WW): 48 | for ii in range(field_height): 49 | for jj in range(field_width): 50 | row = c * field_width * field_height + ii * field_height + jj 51 | for i in range(N): 52 | col = yy * WW * N + xx * N + i 53 | cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj] 54 | 55 | 56 | 57 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W, 58 | int field_height, int field_width, int padding, int stride): 59 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 60 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 61 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 62 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding), 63 | dtype=cols.dtype) 64 | 65 | # Moving the inner loop to a C-function with no bounds checking improves 66 | # performance quite a bit for col2im. 67 | col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 68 | field_height, field_width, padding, stride) 69 | if padding > 0: 70 | return x_padded[:, :, padding:-padding, padding:-padding] 71 | return x_padded 72 | 73 | 74 | @cython.boundscheck(False) 75 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 76 | np.ndarray[DTYPE_t, ndim=4] x_padded, 77 | int N, int C, int H, int W, int HH, int WW, 78 | int field_height, int field_width, int padding, int stride) except? -1: 79 | cdef int c, ii, jj, row, yy, xx, i, col 80 | 81 | for c in range(C): 82 | for ii in range(field_height): 83 | for jj in range(field_width): 84 | row = c * field_width * field_height + ii * field_height + jj 85 | for yy in range(HH): 86 | for xx in range(WW): 87 | for i in range(N): 88 | col = yy * WW * N + xx * N + i 89 | x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col] 90 | 91 | 92 | @cython.boundscheck(False) 93 | @cython.wraparound(False) 94 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols, 95 | np.ndarray[DTYPE_t, ndim=4] x_padded, 96 | int N, int C, int H, int W, int HH, int WW, 97 | int out_h, int out_w, int pad, int stride): 98 | 99 | cdef int c, hh, ww, n, h, w 100 | for n in range(N): 101 | for c in range(C): 102 | for hh in range(HH): 103 | for ww in range(WW): 104 | for h in range(out_h): 105 | for w in range(out_w): 106 | x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w] 107 | 108 | 109 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W, 110 | int HH, int WW, int pad, int stride): 111 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 112 | cdef int out_h = (H + 2 * pad - HH) / stride + 1 113 | cdef int out_w = (W + 2 * pad - WW) / stride + 1 114 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad), 115 | dtype=cols.dtype) 116 | 117 | col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride) 118 | 119 | if pad > 0: 120 | return x_padded[:, :, pad:-pad, pad:-pad] 121 | return x_padded 122 | -------------------------------------------------------------------------------- /cs231n/assignment2/cs231n/layer_utils.py: -------------------------------------------------------------------------------- 1 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 2 | 3 | pass 4 | 5 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 6 | from .layers import * 7 | from .fast_layers import * 8 | 9 | 10 | def affine_relu_forward(x, w, b): 11 | """ 12 | Convenience layer that perorms an affine transform followed by a ReLU 13 | 14 | Inputs: 15 | - x: Input to the affine layer 16 | - w, b: Weights for the affine layer 17 | 18 | Returns a tuple of: 19 | - out: Output from the ReLU 20 | - cache: Object to give to the backward pass 21 | """ 22 | a, fc_cache = affine_forward(x, w, b) 23 | out, relu_cache = relu_forward(a) 24 | cache = (fc_cache, relu_cache) 25 | return out, cache 26 | 27 | 28 | def affine_relu_backward(dout, cache): 29 | """ 30 | Backward pass for the affine-relu convenience layer 31 | """ 32 | fc_cache, relu_cache = cache 33 | da = relu_backward(dout, relu_cache) 34 | dx, dw, db = affine_backward(da, fc_cache) 35 | return dx, dw, db 36 | 37 | 38 | def conv_relu_forward(x, w, b, conv_param): 39 | """ 40 | A convenience layer that performs a convolution followed by a ReLU. 41 | 42 | Inputs: 43 | - x: Input to the convolutional layer 44 | - w, b, conv_param: Weights and parameters for the convolutional layer 45 | 46 | Returns a tuple of: 47 | - out: Output from the ReLU 48 | - cache: Object to give to the backward pass 49 | """ 50 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 51 | out, relu_cache = relu_forward(a) 52 | cache = (conv_cache, relu_cache) 53 | return out, cache 54 | 55 | 56 | def conv_relu_backward(dout, cache): 57 | """ 58 | Backward pass for the conv-relu convenience layer. 59 | """ 60 | conv_cache, relu_cache = cache 61 | da = relu_backward(dout, relu_cache) 62 | dx, dw, db = conv_backward_fast(da, conv_cache) 63 | return dx, dw, db 64 | 65 | 66 | def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param): 67 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 68 | an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param) 69 | out, relu_cache = relu_forward(an) 70 | cache = (conv_cache, bn_cache, relu_cache) 71 | return out, cache 72 | 73 | 74 | def conv_bn_relu_backward(dout, cache): 75 | conv_cache, bn_cache, relu_cache = cache 76 | dan = relu_backward(dout, relu_cache) 77 | da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache) 78 | dx, dw, db = conv_backward_fast(da, conv_cache) 79 | return dx, dw, db, dgamma, dbeta 80 | 81 | 82 | def conv_relu_pool_forward(x, w, b, conv_param, pool_param): 83 | """ 84 | Convenience layer that performs a convolution, a ReLU, and a pool. 85 | 86 | Inputs: 87 | - x: Input to the convolutional layer 88 | - w, b, conv_param: Weights and parameters for the convolutional layer 89 | - pool_param: Parameters for the pooling layer 90 | 91 | Returns a tuple of: 92 | - out: Output from the pooling layer 93 | - cache: Object to give to the backward pass 94 | """ 95 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 96 | s, relu_cache = relu_forward(a) 97 | out, pool_cache = max_pool_forward_fast(s, pool_param) 98 | cache = (conv_cache, relu_cache, pool_cache) 99 | return out, cache 100 | 101 | 102 | def conv_relu_pool_backward(dout, cache): 103 | """ 104 | Backward pass for the conv-relu-pool convenience layer 105 | """ 106 | conv_cache, relu_cache, pool_cache = cache 107 | ds = max_pool_backward_fast(dout, pool_cache) 108 | da = relu_backward(ds, relu_cache) 109 | dx, dw, db = conv_backward_fast(da, conv_cache) 110 | return dx, dw, db 111 | -------------------------------------------------------------------------------- /cs231n/assignment2/cs231n/notebook_images/batchnorm_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment2/cs231n/notebook_images/batchnorm_graph.png -------------------------------------------------------------------------------- /cs231n/assignment2/cs231n/notebook_images/kitten.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment2/cs231n/notebook_images/kitten.jpg -------------------------------------------------------------------------------- /cs231n/assignment2/cs231n/notebook_images/normalization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment2/cs231n/notebook_images/normalization.png -------------------------------------------------------------------------------- /cs231n/assignment2/cs231n/notebook_images/puppy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment2/cs231n/notebook_images/puppy.jpg -------------------------------------------------------------------------------- /cs231n/assignment2/cs231n/optim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | This file implements various first-order update rules that are commonly used 5 | for training neural networks. Each update rule accepts current weights and the 6 | gradient of the loss with respect to those weights and produces the next set of 7 | weights. Each update rule has the same interface: 8 | 9 | def update(w, dw, config=None): 10 | 11 | Inputs: 12 | - w: A numpy array giving the current weights. 13 | - dw: A numpy array of the same shape as w giving the gradient of the 14 | loss with respect to w. 15 | - config: A dictionary containing hyperparameter values such as learning 16 | rate, momentum, etc. If the update rule requires caching values over many 17 | iterations, then config will also hold these cached values. 18 | 19 | Returns: 20 | - next_w: The next point after the update. 21 | - config: The config dictionary to be passed to the next iteration of the 22 | update rule. 23 | 24 | NOTE: For most update rules, the default learning rate will probably not 25 | perform well; however the default values of the other hyperparameters should 26 | work well for a variety of different problems. 27 | 28 | For efficiency, update rules may perform in-place updates, mutating w and 29 | setting next_w equal to w. 30 | """ 31 | 32 | 33 | def sgd(w, dw, config=None): 34 | """ 35 | Performs vanilla stochastic gradient descent. 36 | 37 | config format: 38 | - learning_rate: Scalar learning rate. 39 | """ 40 | if config is None: 41 | config = {} 42 | config.setdefault("learning_rate", 1e-2) 43 | 44 | w -= config["learning_rate"] * dw 45 | return w, config 46 | 47 | 48 | def sgd_momentum(w, dw, config=None): 49 | """ 50 | Performs stochastic gradient descent with momentum. 51 | 52 | config format: 53 | - learning_rate: Scalar learning rate. 54 | - momentum: Scalar between 0 and 1 giving the momentum value. 55 | Setting momentum = 0 reduces to sgd. 56 | - velocity: A numpy array of the same shape as w and dw used to store a 57 | moving average of the gradients. 58 | """ 59 | if config is None: 60 | config = {} 61 | config.setdefault("learning_rate", 1e-2) 62 | config.setdefault("momentum", 0.9) 63 | v = config.get("velocity", np.zeros_like(w)) 64 | 65 | next_w = None 66 | ########################################################################### 67 | # TODO: Implement the momentum update formula. Store the updated value in # 68 | # the next_w variable. You should also use and update the velocity v. # 69 | ########################################################################### 70 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 71 | 72 | v = config["momentum"] * v - config["learning_rate"] * dw 73 | next_w = w + v 74 | 75 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 76 | ########################################################################### 77 | # END OF YOUR CODE # 78 | ########################################################################### 79 | config["velocity"] = v 80 | 81 | return next_w, config 82 | 83 | 84 | def rmsprop(w, dw, config=None): 85 | """ 86 | Uses the RMSProp update rule, which uses a moving average of squared 87 | gradient values to set adaptive per-parameter learning rates. 88 | 89 | config format: 90 | - learning_rate: Scalar learning rate. 91 | - decay_rate: Scalar between 0 and 1 giving the decay rate for the squared 92 | gradient cache. 93 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 94 | - cache: Moving average of second moments of gradients. 95 | """ 96 | if config is None: 97 | config = {} 98 | config.setdefault("learning_rate", 1e-2) 99 | config.setdefault("decay_rate", 0.99) 100 | config.setdefault("epsilon", 1e-8) 101 | config.setdefault("cache", np.zeros_like(w)) 102 | 103 | next_w = None 104 | ########################################################################### 105 | # TODO: Implement the RMSprop update formula, storing the next value of w # 106 | # in the next_w variable. Don't forget to update cache value stored in # 107 | # config['cache']. # 108 | ########################################################################### 109 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 110 | 111 | # Each equation was written in two lines (note "\" in the end) for readability. 112 | config["cache"] = config["decay_rate"] * config["cache"] \ 113 | + (1 - config["decay_rate"]) * dw**2 114 | 115 | next_w = w - config["learning_rate"] * dw \ 116 | / (np.sqrt(config["cache"]) + config["epsilon"]) 117 | 118 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 119 | ########################################################################### 120 | # END OF YOUR CODE # 121 | ########################################################################### 122 | 123 | return next_w, config 124 | 125 | 126 | def adam(w, dw, config=None): 127 | """ 128 | Uses the Adam update rule, which incorporates moving averages of both the 129 | gradient and its square and a bias correction term. 130 | 131 | config format: 132 | - learning_rate: Scalar learning rate. 133 | - beta1: Decay rate for moving average of first moment of gradient. 134 | - beta2: Decay rate for moving average of second moment of gradient. 135 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 136 | - m: Moving average of gradient. 137 | - v: Moving average of squared gradient. 138 | - t: Iteration number. 139 | """ 140 | if config is None: 141 | config = {} 142 | config.setdefault("learning_rate", 1e-3) 143 | config.setdefault("beta1", 0.9) 144 | config.setdefault("beta2", 0.999) 145 | config.setdefault("epsilon", 1e-8) 146 | config.setdefault("m", np.zeros_like(w)) 147 | config.setdefault("v", np.zeros_like(w)) 148 | config.setdefault("t", 0) 149 | 150 | next_w = None 151 | ########################################################################### 152 | # TODO: Implement the Adam update formula, storing the next value of w in # 153 | # the next_w variable. Don't forget to update the m, v, and t variables # 154 | # stored in config. # 155 | # # 156 | # NOTE: In order to match the reference output, please modify t _before_ # 157 | # using it in any calculations. # 158 | ########################################################################### 159 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 160 | 161 | # Increment iteration counter. 162 | config["t"] += 1 163 | 164 | config["m"] = config["beta1"]*config["m"] + (1-config["beta1"])*dw 165 | mt = config["m"] / (1-config["beta1"]**config["t"]) 166 | config["v"] = config["beta2"]*config["v"] + (1-config["beta2"])*(dw**2) 167 | vt = config["v"] / (1-config["beta2"]**config["t"]) 168 | next_w = w - config["learning_rate"] * mt / (np.sqrt(vt) + config["epsilon"]) 169 | 170 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 171 | ########################################################################### 172 | # END OF YOUR CODE # 173 | ########################################################################### 174 | 175 | return next_w, config 176 | -------------------------------------------------------------------------------- /cs231n/assignment2/cs231n/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Build import cythonize 4 | import numpy 5 | 6 | extensions = [ 7 | Extension( 8 | "im2col_cython", ["im2col_cython.pyx"], include_dirs=[numpy.get_include()] 9 | ), 10 | ] 11 | 12 | setup(ext_modules=cythonize(extensions),) 13 | -------------------------------------------------------------------------------- /cs231n/assignment2/cs231n/vis_utils.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | from past.builtins import xrange 3 | 4 | from math import sqrt, ceil 5 | import numpy as np 6 | 7 | 8 | def visualize_grid(Xs, ubound=255.0, padding=1): 9 | """ 10 | Reshape a 4D tensor of image data to a grid for easy visualization. 11 | 12 | Inputs: 13 | - Xs: Data of shape (N, H, W, C) 14 | - ubound: Output grid will have values scaled to the range [0, ubound] 15 | - padding: The number of blank pixels between elements of the grid 16 | """ 17 | (N, H, W, C) = Xs.shape 18 | grid_size = int(ceil(sqrt(N))) 19 | grid_height = H * grid_size + padding * (grid_size - 1) 20 | grid_width = W * grid_size + padding * (grid_size - 1) 21 | grid = np.zeros((grid_height, grid_width, C)) 22 | next_idx = 0 23 | y0, y1 = 0, H 24 | for y in range(grid_size): 25 | x0, x1 = 0, W 26 | for x in range(grid_size): 27 | if next_idx < N: 28 | img = Xs[next_idx] 29 | low, high = np.min(img), np.max(img) 30 | grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low) 31 | # grid[y0:y1, x0:x1] = Xs[next_idx] 32 | next_idx += 1 33 | x0 += W + padding 34 | x1 += W + padding 35 | y0 += H + padding 36 | y1 += H + padding 37 | # grid_max = np.max(grid) 38 | # grid_min = np.min(grid) 39 | # grid = ubound * (grid - grid_min) / (grid_max - grid_min) 40 | return grid 41 | 42 | 43 | def vis_grid(Xs): 44 | """ visualize a grid of images """ 45 | (N, H, W, C) = Xs.shape 46 | A = int(ceil(sqrt(N))) 47 | G = np.ones((A * H + A, A * W + A, C), Xs.dtype) 48 | G *= np.min(Xs) 49 | n = 0 50 | for y in range(A): 51 | for x in range(A): 52 | if n < N: 53 | G[y * H + y : (y + 1) * H + y, x * W + x : (x + 1) * W + x, :] = Xs[ 54 | n, :, :, : 55 | ] 56 | n += 1 57 | # normalize to [0,1] 58 | maxg = G.max() 59 | ming = G.min() 60 | G = (G - ming) / (maxg - ming) 61 | return G 62 | 63 | 64 | def vis_nn(rows): 65 | """ visualize array of arrays of images """ 66 | N = len(rows) 67 | D = len(rows[0]) 68 | H, W, C = rows[0][0].shape 69 | Xs = rows[0][0] 70 | G = np.ones((N * H + N, D * W + D, C), Xs.dtype) 71 | for y in range(N): 72 | for x in range(D): 73 | G[y * H + y : (y + 1) * H + y, x * W + x : (x + 1) * W + x, :] = rows[y][x] 74 | # normalize to [0,1] 75 | maxg = G.max() 76 | ming = G.min() 77 | G = (G - ming) / (maxg - ming) 78 | return G 79 | -------------------------------------------------------------------------------- /cs231n/assignment2/requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==19.1.0 2 | backcall==0.1.0 3 | bleach==3.1.0 4 | certifi==2019.3.9 5 | chardet==3.0.4 6 | colorama==0.4.1 7 | cycler==0.10.0 8 | Cython==0.29.16 9 | decorator==4.4.0 10 | defusedxml==0.5.0 11 | entrypoints==0.3 12 | future==0.17.1 13 | gitdb2==2.0.5 14 | GitPython==2.1.11 15 | idna==2.8 16 | ipykernel==5.1.0 17 | ipython==7.4.0 18 | ipython-genutils==0.2.0 19 | ipywidgets==7.4.2 20 | imageio==2.8.0 21 | jedi==0.13.3 22 | Jinja2==2.10 23 | jsonschema==3.0.1 24 | jupyter==1.0.0 25 | jupyter-client==5.2.4 26 | jupyter-console==6.0.0 27 | jupyter-core==4.4.0 28 | jupyterlab==0.35.4 29 | jupyterlab-server==0.2.0 30 | kiwisolver==1.0.1 31 | MarkupSafe==1.1.1 32 | matplotlib==3.0.3 33 | mistune==0.8.4 34 | nbconvert==5.4.1 35 | nbdime==1.0.5 36 | nbformat==4.4.0 37 | notebook==5.7.8 38 | numpy==1.16.2 39 | pandocfilters==1.4.2 40 | parso==0.3.4 41 | pexpect==4.6.0 42 | pickleshare==0.7.5 43 | Pillow==6.0.0 44 | prometheus-client==0.6.0 45 | prompt-toolkit==2.0.9 46 | ptyprocess==0.6.0 47 | Pygments==2.3.1 48 | pyparsing==2.3.1 49 | pyrsistent==0.14.11 50 | python-dateutil==2.8.0 51 | pyzmq==18.0.1 52 | qtconsole==4.4.3 53 | requests==2.21.0 54 | scipy==1.2.1 55 | Send2Trash==1.5.0 56 | six==1.12.0 57 | smmap2==2.0.5 58 | terminado==0.8.2 59 | testpath==0.4.2 60 | tornado==6.0.2 61 | traitlets==4.3.2 62 | urllib3==1.24.1 63 | wcwidth==0.1.7 64 | webencodings==0.5.1 65 | widgetsnbextension==3.4.2 66 | -------------------------------------------------------------------------------- /cs231n/assignment3/README.md: -------------------------------------------------------------------------------- 1 |
2 |

CS231n: Convolutional Neural Networks for Visual Recognition

3 |

Assignment 3 (2020)

4 |
5 | 6 | # Goals 7 | 8 | In this assignment, you will implement recurrent neural networks and apply them to image captioning on the Microsoft COCO data. You will also explore methods for visualizing the features of a pretrained model on ImageNet, and use this model to implement Style Transfer. Finally, you will train a Generative Adversarial Network to generate images that look like a training dataset! 9 | 10 | The goals of this assignment are as follows: 11 | 12 | - Understand the architecture of **recurrent neural networks (RNNs)** and how they operate on sequences by sharing weights over time. 13 | - Understand and implement both **Vanilla RNNs** and **Long-Short Term Memory (LSTM)** networks. 14 | - Understand how to combine convolutional neural nets and recurrent nets to implement an **image captioning** system. 15 | - Explore various applications of **image gradients**, including **saliency maps**, **fooling images**, **class visualizations**. 16 | - Understand and implement techniques for image **style transfer**. 17 | - Understand how to train and implement a **Generative Adversarial Network (GAN)** to produce images that resemble samples from a dataset. 18 | 19 | # Questions 20 | 21 | ## Q1: Image Captioning with Vanilla RNNs 22 | 23 | The notebook [``RNN_Captioning.ipynb``](RNN_Captioning.ipynb) will walk you through the implementation of an image captioning system on MS-COCO using vanilla recurrent networks. 24 | 25 | ## Q2: Image Captioning with LSTMs 26 | 27 | The notebook [``LSTM_Captioning.ipynb``](LSTM_Captioning.ipynb) will walk you through the implementation of Long-Short Term Memory (LSTM) RNNs, and apply them to image captioning on MS-COCO. 28 | 29 | ## Q3: Network Visualization: Saliency maps, Class Visualization, and Fooling Images 30 | 31 | The notebook [``NetworkVisualization-PyTorch.ipynb``](NetworkVisualization-PyTorch.ipynb) will introduce the pretrained SqueezeNet model, compute gradients with respect to images, and use them to produce saliency maps and fooling images. 32 | 33 | ## Q4: Style Transfer 34 | 35 | In the notebook [``StyleTransfer-PyTorch.ipynb``](StyleTransfer-PyTorch.ipynb) you will learn how to create images with the content of one image but the style of another. 36 | 37 | ## Q5: Generative Adversarial Networks 38 | 39 | In the notebook [``Generative_Adversarial_Networks_PyTorch.ipynb``](Generative_Adversarial_Networks_PyTorch.ipynb) you will learn how to generate images that match a training dataset, and use these models to improve classifier performance when training on a large amount of unlabeled data and a small amount of labeled data. 40 | -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment3/cs231n/__init__.py -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/captioning_solver.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from builtins import range 3 | from builtins import object 4 | import numpy as np 5 | 6 | from . import optim 7 | from .coco_utils import sample_coco_minibatch 8 | 9 | 10 | class CaptioningSolver(object): 11 | """ 12 | A CaptioningSolver encapsulates all the logic necessary for training 13 | image captioning models. The CaptioningSolver performs stochastic gradient 14 | descent using different update rules defined in optim.py. 15 | 16 | The solver accepts both training and validataion data and labels so it can 17 | periodically check classification accuracy on both training and validation 18 | data to watch out for overfitting. 19 | 20 | To train a model, you will first construct a CaptioningSolver instance, 21 | passing the model, dataset, and various options (learning rate, batch size, 22 | etc) to the constructor. You will then call the train() method to run the 23 | optimization procedure and train the model. 24 | 25 | After the train() method returns, model.params will contain the parameters 26 | that performed best on the validation set over the course of training. 27 | In addition, the instance variable solver.loss_history will contain a list 28 | of all losses encountered during training and the instance variables 29 | solver.train_acc_history and solver.val_acc_history will be lists containing 30 | the accuracies of the model on the training and validation set at each epoch. 31 | 32 | Example usage might look something like this: 33 | 34 | data = load_coco_data() 35 | model = MyAwesomeModel(hidden_dim=100) 36 | solver = CaptioningSolver(model, data, 37 | update_rule='sgd', 38 | optim_config={ 39 | 'learning_rate': 1e-3, 40 | }, 41 | lr_decay=0.95, 42 | num_epochs=10, batch_size=100, 43 | print_every=100) 44 | solver.train() 45 | 46 | 47 | A CaptioningSolver works on a model object that must conform to the following 48 | API: 49 | 50 | - model.params must be a dictionary mapping string parameter names to numpy 51 | arrays containing parameter values. 52 | 53 | - model.loss(features, captions) must be a function that computes 54 | training-time loss and gradients, with the following inputs and outputs: 55 | 56 | Inputs: 57 | - features: Array giving a minibatch of features for images, of shape (N, D 58 | - captions: Array of captions for those images, of shape (N, T) where 59 | each element is in the range (0, V]. 60 | 61 | Returns: 62 | - loss: Scalar giving the loss 63 | - grads: Dictionary with the same keys as self.params mapping parameter 64 | names to gradients of the loss with respect to those parameters. 65 | """ 66 | 67 | def __init__(self, model, data, **kwargs): 68 | """ 69 | Construct a new CaptioningSolver instance. 70 | 71 | Required arguments: 72 | - model: A model object conforming to the API described above 73 | - data: A dictionary of training and validation data from load_coco_data 74 | 75 | Optional arguments: 76 | - update_rule: A string giving the name of an update rule in optim.py. 77 | Default is 'sgd'. 78 | - optim_config: A dictionary containing hyperparameters that will be 79 | passed to the chosen update rule. Each update rule requires different 80 | hyperparameters (see optim.py) but all update rules require a 81 | 'learning_rate' parameter so that should always be present. 82 | - lr_decay: A scalar for learning rate decay; after each epoch the learning 83 | rate is multiplied by this value. 84 | - batch_size: Size of minibatches used to compute loss and gradient during 85 | training. 86 | - num_epochs: The number of epochs to run for during training. 87 | - print_every: Integer; training losses will be printed every print_every 88 | iterations. 89 | - verbose: Boolean; if set to false then no output will be printed during 90 | training. 91 | """ 92 | self.model = model 93 | self.data = data 94 | 95 | # Unpack keyword arguments 96 | self.update_rule = kwargs.pop("update_rule", "sgd") 97 | self.optim_config = kwargs.pop("optim_config", {}) 98 | self.lr_decay = kwargs.pop("lr_decay", 1.0) 99 | self.batch_size = kwargs.pop("batch_size", 100) 100 | self.num_epochs = kwargs.pop("num_epochs", 10) 101 | 102 | self.print_every = kwargs.pop("print_every", 10) 103 | self.verbose = kwargs.pop("verbose", True) 104 | 105 | # Throw an error if there are extra keyword arguments 106 | if len(kwargs) > 0: 107 | extra = ", ".join('"%s"' % k for k in list(kwargs.keys())) 108 | raise ValueError("Unrecognized arguments %s" % extra) 109 | 110 | # Make sure the update rule exists, then replace the string 111 | # name with the actual function 112 | if not hasattr(optim, self.update_rule): 113 | raise ValueError('Invalid update_rule "%s"' % self.update_rule) 114 | self.update_rule = getattr(optim, self.update_rule) 115 | 116 | self._reset() 117 | 118 | def _reset(self): 119 | """ 120 | Set up some book-keeping variables for optimization. Don't call this 121 | manually. 122 | """ 123 | # Set up some variables for book-keeping 124 | self.epoch = 0 125 | self.best_val_acc = 0 126 | self.best_params = {} 127 | self.loss_history = [] 128 | self.train_acc_history = [] 129 | self.val_acc_history = [] 130 | 131 | # Make a deep copy of the optim_config for each parameter 132 | self.optim_configs = {} 133 | for p in self.model.params: 134 | d = {k: v for k, v in self.optim_config.items()} 135 | self.optim_configs[p] = d 136 | 137 | def _step(self): 138 | """ 139 | Make a single gradient update. This is called by train() and should not 140 | be called manually. 141 | """ 142 | # Make a minibatch of training data 143 | minibatch = sample_coco_minibatch( 144 | self.data, batch_size=self.batch_size, split="train" 145 | ) 146 | captions, features, urls = minibatch 147 | 148 | # Compute loss and gradient 149 | loss, grads = self.model.loss(features, captions) 150 | self.loss_history.append(loss) 151 | 152 | # Perform a parameter update 153 | for p, w in self.model.params.items(): 154 | dw = grads[p] 155 | config = self.optim_configs[p] 156 | next_w, next_config = self.update_rule(w, dw, config) 157 | self.model.params[p] = next_w 158 | self.optim_configs[p] = next_config 159 | 160 | def check_accuracy(self, X, y, num_samples=None, batch_size=100): 161 | """ 162 | Check accuracy of the model on the provided data. 163 | 164 | Inputs: 165 | - X: Array of data, of shape (N, d_1, ..., d_k) 166 | - y: Array of labels, of shape (N,) 167 | - num_samples: If not None, subsample the data and only test the model 168 | on num_samples datapoints. 169 | - batch_size: Split X and y into batches of this size to avoid using too 170 | much memory. 171 | 172 | Returns: 173 | - acc: Scalar giving the fraction of instances that were correctly 174 | classified by the model. 175 | """ 176 | return 0.0 177 | 178 | # Maybe subsample the data 179 | N = X.shape[0] 180 | if num_samples is not None and N > num_samples: 181 | mask = np.random.choice(N, num_samples) 182 | N = num_samples 183 | X = X[mask] 184 | y = y[mask] 185 | 186 | # Compute predictions in batches 187 | num_batches = N / batch_size 188 | if N % batch_size != 0: 189 | num_batches += 1 190 | y_pred = [] 191 | for i in range(num_batches): 192 | start = i * batch_size 193 | end = (i + 1) * batch_size 194 | scores = self.model.loss(X[start:end]) 195 | y_pred.append(np.argmax(scores, axis=1)) 196 | y_pred = np.hstack(y_pred) 197 | acc = np.mean(y_pred == y) 198 | 199 | return acc 200 | 201 | def train(self): 202 | """ 203 | Run optimization to train the model. 204 | """ 205 | num_train = self.data["train_captions"].shape[0] 206 | iterations_per_epoch = max(num_train // self.batch_size, 1) 207 | num_iterations = self.num_epochs * iterations_per_epoch 208 | 209 | for t in range(num_iterations): 210 | self._step() 211 | 212 | # Maybe print training loss 213 | if self.verbose and t % self.print_every == 0: 214 | print( 215 | "(Iteration %d / %d) loss: %f" 216 | % (t + 1, num_iterations, self.loss_history[-1]) 217 | ) 218 | 219 | # At the end of every epoch, increment the epoch counter and decay the 220 | # learning rate. 221 | epoch_end = (t + 1) % iterations_per_epoch == 0 222 | if epoch_end: 223 | self.epoch += 1 224 | for k in self.optim_configs: 225 | self.optim_configs[k]["learning_rate"] *= self.lr_decay 226 | -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/classifiers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment3/cs231n/classifiers/__init__.py -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/classifiers/squeezenet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | NUM_CLASSES = 1000 4 | 5 | class Fire(tf.keras.Model): 6 | def __init__(self, inplanes, squeeze_planes, expand1x1_planes, expand3x3_planes,name=None): 7 | super(Fire, self).__init__(name='%s/fire'%name) 8 | self.inplanes = inplanes 9 | self.squeeze = tf.keras.layers.Conv2D(squeeze_planes, input_shape=(inplanes,), kernel_size=1, strides=(1,1), padding="VALID", activation='relu',name='squeeze') 10 | self.expand1x1 = tf.keras.layers.Conv2D(expand1x1_planes, kernel_size=1, padding="VALID", strides=(1,1), activation='relu',name='e11') 11 | self.expand3x3 = tf.keras.layers.Conv2D(expand3x3_planes, kernel_size=3, padding="SAME", strides=(1,1), activation='relu',name='e33') 12 | 13 | def call(self, x): 14 | x = self.squeeze(x) 15 | return tf.concat([ 16 | self.expand1x1(x), 17 | self.expand3x3(x) 18 | ], axis=3) 19 | 20 | 21 | class SqueezeNet(tf.keras.Model): 22 | def __init__(self, num_classes=NUM_CLASSES): 23 | super(SqueezeNet, self).__init__() 24 | self.num_classes = num_classes 25 | 26 | self.net = tf.keras.models.Sequential([ 27 | tf.keras.layers.Conv2D(64, kernel_size=(3, 3), strides=(2,2), padding="VALID", activation='relu', input_shape=(224, 224, 3), name='features/layer0'), 28 | tf.keras.layers.MaxPool2D(pool_size=3, strides=2, name='features/layer2'), 29 | Fire(64, 16, 64, 64, name='features/layer3'), 30 | Fire(128, 16, 64, 64, name='features/layer4'), 31 | tf.keras.layers.MaxPool2D(pool_size=3, strides=2, name='features/layer5'), 32 | Fire(128, 32, 128, 128, name='features/layer6'), 33 | Fire(256, 32, 128, 128, name='features/layer7'), 34 | tf.keras.layers.MaxPool2D(pool_size=3, strides=2, name='features/layer8'), 35 | Fire(256, 48, 192, 192, name='features/layer9'), 36 | Fire(384, 48, 192, 192, name='features/layer10'), 37 | Fire(384, 64, 256, 256, name='features/layer11'), 38 | Fire(512, 64, 256, 256, name='features/layer12'), 39 | tf.keras.layers.Conv2D(self.num_classes, kernel_size=1, padding="VALID", activation='relu', name='classifier/layer1'), 40 | tf.keras.layers.AveragePooling2D(pool_size=13, strides=13, padding="VALID", name='classifier/layer3') 41 | ]) 42 | 43 | def call(self, x, save_path=None): 44 | x = self.net(x) 45 | scores = tf.reshape(x, (-1, self.num_classes)) 46 | return scores 47 | -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/coco_utils.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | import os, json 3 | import numpy as np 4 | import h5py 5 | 6 | dir_path = os.path.dirname(os.path.realpath(__file__)) 7 | BASE_DIR = os.path.join(dir_path, "datasets/coco_captioning") 8 | 9 | def load_coco_data(base_dir=BASE_DIR, max_train=None, pca_features=True): 10 | print('base dir ', base_dir) 11 | data = {} 12 | caption_file = os.path.join(base_dir, "coco2014_captions.h5") 13 | with h5py.File(caption_file, "r") as f: 14 | for k, v in f.items(): 15 | data[k] = np.asarray(v) 16 | 17 | if pca_features: 18 | train_feat_file = os.path.join(base_dir, "train2014_vgg16_fc7_pca.h5") 19 | else: 20 | train_feat_file = os.path.join(base_dir, "train2014_vgg16_fc7.h5") 21 | with h5py.File(train_feat_file, "r") as f: 22 | data["train_features"] = np.asarray(f["features"]) 23 | 24 | if pca_features: 25 | val_feat_file = os.path.join(base_dir, "val2014_vgg16_fc7_pca.h5") 26 | else: 27 | val_feat_file = os.path.join(base_dir, "val2014_vgg16_fc7.h5") 28 | with h5py.File(val_feat_file, "r") as f: 29 | data["val_features"] = np.asarray(f["features"]) 30 | 31 | dict_file = os.path.join(base_dir, "coco2014_vocab.json") 32 | with open(dict_file, "r") as f: 33 | dict_data = json.load(f) 34 | for k, v in dict_data.items(): 35 | data[k] = v 36 | 37 | train_url_file = os.path.join(base_dir, "train2014_urls.txt") 38 | with open(train_url_file, "r") as f: 39 | train_urls = np.asarray([line.strip() for line in f]) 40 | data["train_urls"] = train_urls 41 | 42 | val_url_file = os.path.join(base_dir, "val2014_urls.txt") 43 | with open(val_url_file, "r") as f: 44 | val_urls = np.asarray([line.strip() for line in f]) 45 | data["val_urls"] = val_urls 46 | 47 | # Maybe subsample the training data 48 | if max_train is not None: 49 | num_train = data["train_captions"].shape[0] 50 | mask = np.random.randint(num_train, size=max_train) 51 | data["train_captions"] = data["train_captions"][mask] 52 | data["train_image_idxs"] = data["train_image_idxs"][mask] 53 | 54 | return data 55 | 56 | 57 | def decode_captions(captions, idx_to_word): 58 | singleton = False 59 | if captions.ndim == 1: 60 | singleton = True 61 | captions = captions[None] 62 | decoded = [] 63 | N, T = captions.shape 64 | for i in range(N): 65 | words = [] 66 | for t in range(T): 67 | word = idx_to_word[captions[i, t]] 68 | if word != "": 69 | words.append(word) 70 | if word == "": 71 | break 72 | decoded.append(" ".join(words)) 73 | if singleton: 74 | decoded = decoded[0] 75 | return decoded 76 | 77 | 78 | def sample_coco_minibatch(data, batch_size=100, split="train"): 79 | split_size = data["%s_captions" % split].shape[0] 80 | mask = np.random.choice(split_size, batch_size) 81 | captions = data["%s_captions" % split][mask] 82 | image_idxs = data["%s_image_idxs" % split][mask] 83 | image_features = data["%s_features" % split][image_idxs] 84 | urls = data["%s_urls" % split][image_idxs] 85 | return captions, image_features, urls 86 | -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/datasets/get_assignment3_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ ! -d "coco_captioning" ]; then 3 | sh get_coco_captioning.sh 4 | sh get_squeezenet_tf.sh 5 | sh get_imagenet_val.sh 6 | fi 7 | -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/datasets/get_coco_captioning.sh: -------------------------------------------------------------------------------- 1 | wget "http://cs231n.stanford.edu/coco_captioning.zip" 2 | unzip coco_captioning.zip 3 | rm coco_captioning.zip 4 | -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/datasets/get_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ ! -d "coco_captioning" ]; then 3 | sh get_coco_captioning.sh 4 | sh get_squeezenet_tf.sh 5 | sh get_imagenet_val.sh 6 | fi 7 | -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/datasets/get_datasets.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ ! -d "coco_captioning" ]; then 3 | sh get_coco_captioning.sh 4 | sh get_squeezenet_tf.sh 5 | sh get_imagenet_val.sh 6 | fi 7 | -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/datasets/get_imagenet_val.sh: -------------------------------------------------------------------------------- 1 | wget http://cs231n.stanford.edu/imagenet_val_25.npz 2 | -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/datasets/get_squeezenet_tf.sh: -------------------------------------------------------------------------------- 1 | wget "http://cs231n.stanford.edu/squeezenet_tf2.zip" 2 | unzip squeezenet_tf2.zip 3 | rm squeezenet_tf2.zip 4 | -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/gradient_check.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from builtins import range 3 | from past.builtins import xrange 4 | 5 | import numpy as np 6 | from random import randrange 7 | 8 | 9 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001): 10 | """ 11 | a naive implementation of numerical gradient of f at x 12 | - f should be a function that takes a single argument 13 | - x is the point (numpy array) to evaluate the gradient at 14 | """ 15 | 16 | fx = f(x) # evaluate function value at original point 17 | grad = np.zeros_like(x) 18 | # iterate over all indexes in x 19 | it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"]) 20 | while not it.finished: 21 | 22 | # evaluate function at x+h 23 | ix = it.multi_index 24 | oldval = x[ix] 25 | x[ix] = oldval + h # increment by h 26 | fxph = f(x) # evalute f(x + h) 27 | x[ix] = oldval - h 28 | fxmh = f(x) # evaluate f(x - h) 29 | x[ix] = oldval # restore 30 | 31 | # compute the partial derivative with centered formula 32 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 33 | if verbose: 34 | print(ix, grad[ix]) 35 | it.iternext() # step to next dimension 36 | 37 | return grad 38 | 39 | 40 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 41 | """ 42 | Evaluate a numeric gradient for a function that accepts a numpy 43 | array and returns a numpy array. 44 | """ 45 | grad = np.zeros_like(x) 46 | it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"]) 47 | while not it.finished: 48 | ix = it.multi_index 49 | 50 | oldval = x[ix] 51 | x[ix] = oldval + h 52 | pos = f(x).copy() 53 | x[ix] = oldval - h 54 | neg = f(x).copy() 55 | x[ix] = oldval 56 | 57 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 58 | it.iternext() 59 | return grad 60 | 61 | 62 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): 63 | """ 64 | Compute numeric gradients for a function that operates on input 65 | and output blobs. 66 | 67 | We assume that f accepts several input blobs as arguments, followed by a 68 | blob where outputs will be written. For example, f might be called like: 69 | 70 | f(x, w, out) 71 | 72 | where x and w are input Blobs, and the result of f will be written to out. 73 | 74 | Inputs: 75 | - f: function 76 | - inputs: tuple of input blobs 77 | - output: output blob 78 | - h: step size 79 | """ 80 | numeric_diffs = [] 81 | for input_blob in inputs: 82 | diff = np.zeros_like(input_blob.diffs) 83 | it = np.nditer(input_blob.vals, flags=["multi_index"], op_flags=["readwrite"]) 84 | while not it.finished: 85 | idx = it.multi_index 86 | orig = input_blob.vals[idx] 87 | 88 | input_blob.vals[idx] = orig + h 89 | f(*(inputs + (output,))) 90 | pos = np.copy(output.vals) 91 | input_blob.vals[idx] = orig - h 92 | f(*(inputs + (output,))) 93 | neg = np.copy(output.vals) 94 | input_blob.vals[idx] = orig 95 | 96 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) 97 | 98 | it.iternext() 99 | numeric_diffs.append(diff) 100 | return numeric_diffs 101 | 102 | 103 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5): 104 | return eval_numerical_gradient_blobs( 105 | lambda *args: net.forward(), inputs, output, h=h 106 | ) 107 | 108 | 109 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): 110 | """ 111 | sample a few random elements and only return numerical 112 | in this dimensions. 113 | """ 114 | 115 | for i in range(num_checks): 116 | ix = tuple([randrange(m) for m in x.shape]) 117 | 118 | oldval = x[ix] 119 | x[ix] = oldval + h # increment by h 120 | fxph = f(x) # evaluate f(x + h) 121 | x[ix] = oldval - h # increment by h 122 | fxmh = f(x) # evaluate f(x - h) 123 | x[ix] = oldval # reset 124 | 125 | grad_numerical = (fxph - fxmh) / (2 * h) 126 | grad_analytic = analytic_grad[ix] 127 | rel_error = abs(grad_numerical - grad_analytic) / ( 128 | abs(grad_numerical) + abs(grad_analytic) 129 | ) 130 | print( 131 | "numerical: %f analytic: %f, relative error: %e" 132 | % (grad_numerical, grad_analytic, rel_error) 133 | ) 134 | -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/im2col.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | import numpy as np 3 | 4 | 5 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1): 6 | # First figure out what the size of the output should be 7 | N, C, H, W = x_shape 8 | assert (H + 2 * padding - field_height) % stride == 0 9 | assert (W + 2 * padding - field_height) % stride == 0 10 | out_height = (H + 2 * padding - field_height) / stride + 1 11 | out_width = (W + 2 * padding - field_width) / stride + 1 12 | 13 | i0 = np.repeat(np.arange(field_height), field_width) 14 | i0 = np.tile(i0, C) 15 | i1 = stride * np.repeat(np.arange(out_height), out_width) 16 | j0 = np.tile(np.arange(field_width), field_height * C) 17 | j1 = stride * np.tile(np.arange(out_width), out_height) 18 | i = i0.reshape(-1, 1) + i1.reshape(1, -1) 19 | j = j0.reshape(-1, 1) + j1.reshape(1, -1) 20 | 21 | k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1) 22 | 23 | return (k, i, j) 24 | 25 | 26 | def im2col_indices(x, field_height, field_width, padding=1, stride=1): 27 | """ An implementation of im2col based on some fancy indexing """ 28 | # Zero-pad the input 29 | p = padding 30 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode="constant") 31 | 32 | k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, stride) 33 | 34 | cols = x_padded[:, k, i, j] 35 | C = x.shape[1] 36 | cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1) 37 | return cols 38 | 39 | 40 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1, stride=1): 41 | """ An implementation of col2im based on fancy indexing and np.add.at """ 42 | N, C, H, W = x_shape 43 | H_padded, W_padded = H + 2 * padding, W + 2 * padding 44 | x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype) 45 | k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, stride) 46 | cols_reshaped = cols.reshape(C * field_height * field_width, -1, N) 47 | cols_reshaped = cols_reshaped.transpose(2, 0, 1) 48 | np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped) 49 | if padding == 0: 50 | return x_padded 51 | return x_padded[:, :, padding:-padding, padding:-padding] 52 | 53 | 54 | # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 55 | 56 | pass 57 | 58 | # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** 59 | -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/im2col_cython.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | 5 | # DTYPE = np.float64 6 | # ctypedef np.float64_t DTYPE_t 7 | 8 | ctypedef fused DTYPE_t: 9 | np.float32_t 10 | np.float64_t 11 | 12 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height, 13 | int field_width, int padding, int stride): 14 | cdef int N = x.shape[0] 15 | cdef int C = x.shape[1] 16 | cdef int H = x.shape[2] 17 | cdef int W = x.shape[3] 18 | 19 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 20 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 21 | 22 | cdef int p = padding 23 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x, 24 | ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 25 | 26 | cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros( 27 | (C * field_height * field_width, N * HH * WW), 28 | dtype=x.dtype) 29 | 30 | # Moving the inner loop to a C function with no bounds checking works, but does 31 | # not seem to help performance in any measurable way. 32 | 33 | im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 34 | field_height, field_width, padding, stride) 35 | return cols 36 | 37 | 38 | @cython.boundscheck(False) 39 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 40 | np.ndarray[DTYPE_t, ndim=4] x_padded, 41 | int N, int C, int H, int W, int HH, int WW, 42 | int field_height, int field_width, int padding, int stride) except? -1: 43 | cdef int c, ii, jj, row, yy, xx, i, col 44 | 45 | for c in range(C): 46 | for yy in range(HH): 47 | for xx in range(WW): 48 | for ii in range(field_height): 49 | for jj in range(field_width): 50 | row = c * field_width * field_height + ii * field_height + jj 51 | for i in range(N): 52 | col = yy * WW * N + xx * N + i 53 | cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj] 54 | 55 | 56 | 57 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W, 58 | int field_height, int field_width, int padding, int stride): 59 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 60 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 61 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 62 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding), 63 | dtype=cols.dtype) 64 | 65 | # Moving the inner loop to a C-function with no bounds checking improves 66 | # performance quite a bit for col2im. 67 | col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 68 | field_height, field_width, padding, stride) 69 | if padding > 0: 70 | return x_padded[:, :, padding:-padding, padding:-padding] 71 | return x_padded 72 | 73 | 74 | @cython.boundscheck(False) 75 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 76 | np.ndarray[DTYPE_t, ndim=4] x_padded, 77 | int N, int C, int H, int W, int HH, int WW, 78 | int field_height, int field_width, int padding, int stride) except? -1: 79 | cdef int c, ii, jj, row, yy, xx, i, col 80 | 81 | for c in range(C): 82 | for ii in range(field_height): 83 | for jj in range(field_width): 84 | row = c * field_width * field_height + ii * field_height + jj 85 | for yy in range(HH): 86 | for xx in range(WW): 87 | for i in range(N): 88 | col = yy * WW * N + xx * N + i 89 | x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col] 90 | 91 | 92 | @cython.boundscheck(False) 93 | @cython.wraparound(False) 94 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols, 95 | np.ndarray[DTYPE_t, ndim=4] x_padded, 96 | int N, int C, int H, int W, int HH, int WW, 97 | int out_h, int out_w, int pad, int stride): 98 | 99 | cdef int c, hh, ww, n, h, w 100 | for n in range(N): 101 | for c in range(C): 102 | for hh in range(HH): 103 | for ww in range(WW): 104 | for h in range(out_h): 105 | for w in range(out_w): 106 | x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w] 107 | 108 | 109 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W, 110 | int HH, int WW, int pad, int stride): 111 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 112 | cdef int out_h = (H + 2 * pad - HH) / stride + 1 113 | cdef int out_w = (W + 2 * pad - WW) / stride + 1 114 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad), 115 | dtype=cols.dtype) 116 | 117 | col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride) 118 | 119 | if pad > 0: 120 | return x_padded[:, :, pad:-pad, pad:-pad] 121 | return x_padded 122 | -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/image_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from future import standard_library 3 | 4 | standard_library.install_aliases() 5 | from builtins import range 6 | import urllib.request, urllib.error, urllib.parse, os, tempfile 7 | 8 | import numpy as np 9 | from imageio import imread 10 | from PIL import Image 11 | 12 | """ 13 | Utility functions used for viewing and processing images. 14 | """ 15 | 16 | 17 | def blur_image(X): 18 | """ 19 | A very gentle image blurring operation, to be used as a regularizer for 20 | image generation. 21 | 22 | Inputs: 23 | - X: Image data of shape (N, 3, H, W) 24 | 25 | Returns: 26 | - X_blur: Blurred version of X, of shape (N, 3, H, W) 27 | """ 28 | from .fast_layers import conv_forward_fast 29 | 30 | w_blur = np.zeros((3, 3, 3, 3)) 31 | b_blur = np.zeros(3) 32 | blur_param = {"stride": 1, "pad": 1} 33 | for i in range(3): 34 | w_blur[i, i] = np.asarray([[1, 2, 1], [2, 188, 2], [1, 2, 1]], dtype=np.float32) 35 | w_blur /= 200.0 36 | return conv_forward_fast(X, w_blur, b_blur, blur_param)[0] 37 | 38 | 39 | SQUEEZENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32) 40 | SQUEEZENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32) 41 | 42 | 43 | def preprocess_image(img): 44 | """Preprocess an image for squeezenet. 45 | 46 | Subtracts the pixel mean and divides by the standard deviation. 47 | """ 48 | return (img.astype(np.float32) / 255.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD 49 | 50 | 51 | def deprocess_image(img, rescale=False): 52 | """Undo preprocessing on an image and convert back to uint8.""" 53 | img = img * SQUEEZENET_STD + SQUEEZENET_MEAN 54 | if rescale: 55 | vmin, vmax = img.min(), img.max() 56 | img = (img - vmin) / (vmax - vmin) 57 | return np.clip(255 * img, 0.0, 255.0).astype(np.uint8) 58 | 59 | 60 | def image_from_url(url): 61 | """ 62 | Read an image from a URL. Returns a numpy array with the pixel data. 63 | We write the image to a temporary file then read it back. Kinda gross. 64 | """ 65 | try: 66 | f = urllib.request.urlopen(url) 67 | _, fname = tempfile.mkstemp() 68 | with open(fname, "wb") as ff: 69 | ff.write(f.read()) 70 | img = imread(fname) 71 | os.remove(fname) 72 | return img 73 | except urllib.error.URLError as e: 74 | print("URL Error: ", e.reason, url) 75 | except urllib.error.HTTPError as e: 76 | print("HTTP Error: ", e.code, url) 77 | 78 | 79 | def load_image(filename, size=None): 80 | """Load and resize an image from disk. 81 | 82 | Inputs: 83 | - filename: path to file 84 | - size: size of shortest dimension after rescaling 85 | """ 86 | img = imread(filename) 87 | if size is not None: 88 | orig_shape = np.array(img.shape[:2]) 89 | min_idx = np.argmin(orig_shape) 90 | scale_factor = float(size) / orig_shape[min_idx] 91 | new_shape = (orig_shape * scale_factor).astype(int) 92 | img = np.array(Image.fromarray(img).resize(new_shape)) 93 | return img 94 | -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/layer_utils.py: -------------------------------------------------------------------------------- 1 | from .layers import * 2 | from .fast_layers import * 3 | 4 | 5 | def affine_relu_forward(x, w, b): 6 | """ 7 | Convenience layer that perorms an affine transform followed by a ReLU 8 | 9 | Inputs: 10 | - x: Input to the affine layer 11 | - w, b: Weights for the affine layer 12 | 13 | Returns a tuple of: 14 | - out: Output from the ReLU 15 | - cache: Object to give to the backward pass 16 | """ 17 | a, fc_cache = affine_forward(x, w, b) 18 | out, relu_cache = relu_forward(a) 19 | cache = (fc_cache, relu_cache) 20 | return out, cache 21 | 22 | 23 | def affine_relu_backward(dout, cache): 24 | """ 25 | Backward pass for the affine-relu convenience layer 26 | """ 27 | fc_cache, relu_cache = cache 28 | da = relu_backward(dout, relu_cache) 29 | dx, dw, db = affine_backward(da, fc_cache) 30 | return dx, dw, db 31 | 32 | 33 | def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param): 34 | """ 35 | Convenience layer that performs an affine transform, batch normalization, 36 | and ReLU. 37 | 38 | Inputs: 39 | - x: Array of shape (N, D1); input to the affine layer 40 | - w, b: Arrays of shape (D2, D2) and (D2,) giving the weight and bias for 41 | the affine transform. 42 | - gamma, beta: Arrays of shape (D2,) and (D2,) giving scale and shift 43 | parameters for batch normalization. 44 | - bn_param: Dictionary of parameters for batch normalization. 45 | 46 | Returns: 47 | - out: Output from ReLU, of shape (N, D2) 48 | - cache: Object to give to the backward pass. 49 | """ 50 | a, fc_cache = affine_forward(x, w, b) 51 | a_bn, bn_cache = batchnorm_forward(a, gamma, beta, bn_param) 52 | out, relu_cache = relu_forward(a_bn) 53 | cache = (fc_cache, bn_cache, relu_cache) 54 | return out, cache 55 | 56 | 57 | def affine_bn_relu_backward(dout, cache): 58 | """ 59 | Backward pass for the affine-batchnorm-relu convenience layer. 60 | """ 61 | fc_cache, bn_cache, relu_cache = cache 62 | da_bn = relu_backward(dout, relu_cache) 63 | da, dgamma, dbeta = batchnorm_backward(da_bn, bn_cache) 64 | dx, dw, db = affine_backward(da, fc_cache) 65 | return dx, dw, db, dgamma, dbeta 66 | 67 | 68 | def affine_ln_relu_forward(x, w, b, gamma, beta, ln_param): 69 | """ 70 | Convenience layer that performs an affine transform, layer normalization, 71 | and ReLU. 72 | 73 | Inputs: 74 | - x: Array of shape (N, D1); input to the affine layer 75 | - w, b: Arrays of shape (D2, D2) and (D2,) giving the weight and bias for 76 | the affine transform. 77 | - gamma, beta: Arrays of shape (D2,) and (D2,) giving scale and shift 78 | parameters for batch normalization. 79 | - ln_param: Dictionary of parameters for layer normalization. 80 | 81 | Returns: 82 | - out: Output from ReLU, of shape (N, D2) 83 | - cache: Object to give to the backward pass. 84 | """ 85 | a, fc_cache = affine_forward(x, w, b) 86 | a_ln, ln_cache = layernorm_forward(a, gamma, beta, ln_param) 87 | out, relu_cache = relu_forward(a_ln) 88 | cache = (fc_cache, ln_cache, relu_cache) 89 | return out, cache 90 | 91 | 92 | def affine_ln_relu_backward(dout, cache): 93 | """ 94 | Backward pass for the affine-layernorm-relu convenience layer. 95 | """ 96 | fc_cache, ln_cache, relu_cache = cache 97 | da_ln = relu_backward(dout, relu_cache) 98 | da, dgamma, dbeta = layernorm_backward(da_ln, ln_cache) 99 | dx, dw, db = affine_backward(da, fc_cache) 100 | return dx, dw, db, dgamma, dbeta 101 | 102 | 103 | def conv_relu_forward(x, w, b, conv_param): 104 | """ 105 | A convenience layer that performs a convolution followed by a ReLU. 106 | 107 | Inputs: 108 | - x: Input to the convolutional layer 109 | - w, b, conv_param: Weights and parameters for the convolutional layer 110 | 111 | Returns a tuple of: 112 | - out: Output from the ReLU 113 | - cache: Object to give to the backward pass 114 | """ 115 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 116 | out, relu_cache = relu_forward(a) 117 | cache = (conv_cache, relu_cache) 118 | return out, cache 119 | 120 | 121 | def conv_relu_backward(dout, cache): 122 | """ 123 | Backward pass for the conv-relu convenience layer. 124 | """ 125 | conv_cache, relu_cache = cache 126 | da = relu_backward(dout, relu_cache) 127 | dx, dw, db = conv_backward_fast(da, conv_cache) 128 | return dx, dw, db 129 | 130 | 131 | def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param): 132 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 133 | an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param) 134 | out, relu_cache = relu_forward(an) 135 | cache = (conv_cache, bn_cache, relu_cache) 136 | return out, cache 137 | 138 | 139 | def conv_bn_relu_backward(dout, cache): 140 | conv_cache, bn_cache, relu_cache = cache 141 | dan = relu_backward(dout, relu_cache) 142 | da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache) 143 | dx, dw, db = conv_backward_fast(da, conv_cache) 144 | return dx, dw, db, dgamma, dbeta 145 | 146 | 147 | def conv_relu_pool_forward(x, w, b, conv_param, pool_param): 148 | """ 149 | Convenience layer that performs a convolution, a ReLU, and a pool. 150 | 151 | Inputs: 152 | - x: Input to the convolutional layer 153 | - w, b, conv_param: Weights and parameters for the convolutional layer 154 | - pool_param: Parameters for the pooling layer 155 | 156 | Returns a tuple of: 157 | - out: Output from the pooling layer 158 | - cache: Object to give to the backward pass 159 | """ 160 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 161 | s, relu_cache = relu_forward(a) 162 | out, pool_cache = max_pool_forward_fast(s, pool_param) 163 | cache = (conv_cache, relu_cache, pool_cache) 164 | return out, cache 165 | 166 | 167 | def conv_relu_pool_backward(dout, cache): 168 | """ 169 | Backward pass for the conv-relu-pool convenience layer 170 | """ 171 | conv_cache, relu_cache, pool_cache = cache 172 | ds = max_pool_backward_fast(dout, pool_cache) 173 | da = relu_backward(ds, relu_cache) 174 | dx, dw, db = conv_backward_fast(da, conv_cache) 175 | return dx, dw, db 176 | -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/optim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | This file implements various first-order update rules that are commonly used for 5 | training neural networks. Each update rule accepts current weights and the 6 | gradient of the loss with respect to those weights and produces the next set of 7 | weights. Each update rule has the same interface: 8 | 9 | def update(w, dw, config=None): 10 | 11 | Inputs: 12 | - w: A numpy array giving the current weights. 13 | - dw: A numpy array of the same shape as w giving the gradient of the 14 | loss with respect to w. 15 | - config: A dictionary containing hyperparameter values such as learning rate, 16 | momentum, etc. If the update rule requires caching values over many 17 | iterations, then config will also hold these cached values. 18 | 19 | Returns: 20 | - next_w: The next point after the update. 21 | - config: The config dictionary to be passed to the next iteration of the 22 | update rule. 23 | 24 | NOTE: For most update rules, the default learning rate will probably not perform 25 | well; however the default values of the other hyperparameters should work well 26 | for a variety of different problems. 27 | 28 | For efficiency, update rules may perform in-place updates, mutating w and 29 | setting next_w equal to w. 30 | """ 31 | 32 | 33 | def sgd(w, dw, config=None): 34 | """ 35 | Performs vanilla stochastic gradient descent. 36 | 37 | config format: 38 | - learning_rate: Scalar learning rate. 39 | """ 40 | if config is None: 41 | config = {} 42 | config.setdefault("learning_rate", 1e-2) 43 | 44 | w -= config["learning_rate"] * dw 45 | return w, config 46 | 47 | 48 | def adam(x, dx, config=None): 49 | """ 50 | Uses the Adam update rule, which incorporates moving averages of both the 51 | gradient and its square and a bias correction term. 52 | 53 | config format: 54 | - learning_rate: Scalar learning rate. 55 | - beta1: Decay rate for moving average of first moment of gradient. 56 | - beta2: Decay rate for moving average of second moment of gradient. 57 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 58 | - m: Moving average of gradient. 59 | - v: Moving average of squared gradient. 60 | - t: Iteration number. 61 | """ 62 | if config is None: 63 | config = {} 64 | config.setdefault("learning_rate", 1e-3) 65 | config.setdefault("beta1", 0.9) 66 | config.setdefault("beta2", 0.999) 67 | config.setdefault("epsilon", 1e-8) 68 | config.setdefault("m", np.zeros_like(x)) 69 | config.setdefault("v", np.zeros_like(x)) 70 | config.setdefault("t", 0) 71 | 72 | next_x = None 73 | beta1, beta2, eps = config["beta1"], config["beta2"], config["epsilon"] 74 | t, m, v = config["t"], config["m"], config["v"] 75 | m = beta1 * m + (1 - beta1) * dx 76 | v = beta2 * v + (1 - beta2) * (dx * dx) 77 | t += 1 78 | alpha = config["learning_rate"] * np.sqrt(1 - beta2 ** t) / (1 - beta1 ** t) 79 | x -= alpha * (m / (np.sqrt(v) + eps)) 80 | config["t"] = t 81 | config["m"] = m 82 | config["v"] = v 83 | next_x = x 84 | 85 | return next_x, config 86 | -------------------------------------------------------------------------------- /cs231n/assignment3/cs231n/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Build import cythonize 4 | import numpy 5 | 6 | extensions = [ 7 | Extension( 8 | "im2col_cython", ["im2col_cython.pyx"], include_dirs=[numpy.get_include()] 9 | ), 10 | ] 11 | 12 | setup(ext_modules=cythonize(extensions),) 13 | -------------------------------------------------------------------------------- /cs231n/assignment3/example_styletransfer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment3/example_styletransfer.png -------------------------------------------------------------------------------- /cs231n/assignment3/gan_outputs_pytorch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment3/gan_outputs_pytorch.png -------------------------------------------------------------------------------- /cs231n/assignment3/requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==19.1.0 2 | backcall==0.1.0 3 | bleach==3.1.0 4 | certifi==2019.3.9 5 | chardet==3.0.4 6 | colorama==0.4.1 7 | cycler==0.10.0 8 | Cython==0.29.16 9 | decorator==4.4.0 10 | defusedxml==0.5.0 11 | entrypoints==0.3 12 | future==0.17.1 13 | gitdb2==2.0.5 14 | GitPython==2.1.11 15 | idna==2.8 16 | ipykernel==5.1.0 17 | ipython==7.4.0 18 | ipython-genutils==0.2.0 19 | ipywidgets==7.4.2 20 | imageio==2.8.0 21 | jedi==0.13.3 22 | Jinja2==2.10 23 | jsonschema==3.0.1 24 | jupyter==1.0.0 25 | jupyter-client==5.2.4 26 | jupyter-console==6.0.0 27 | jupyter-core==4.4.0 28 | jupyterlab==0.35.4 29 | jupyterlab-server==0.2.0 30 | kiwisolver==1.0.1 31 | MarkupSafe==1.1.1 32 | matplotlib==3.0.3 33 | mistune==0.8.4 34 | nbconvert==5.4.1 35 | nbdime==1.0.5 36 | nbformat==4.4.0 37 | notebook==5.7.8 38 | numpy==1.18.4 39 | pandocfilters==1.4.2 40 | parso==0.3.4 41 | pexpect==4.6.0 42 | pickleshare==0.7.5 43 | Pillow==6.0.0 44 | prometheus-client==0.6.0 45 | prompt-toolkit==2.0.9 46 | ptyprocess==0.6.0 47 | Pygments==2.3.1 48 | pyparsing==2.3.1 49 | pyrsistent==0.14.11 50 | python-dateutil==2.8.0 51 | pyzmq==18.0.1 52 | qtconsole==4.4.3 53 | requests==2.21.0 54 | scipy==1.2.1 55 | Send2Trash==1.5.0 56 | six==1.12.0 57 | smmap2==2.0.5 58 | terminado==0.8.2 59 | testpath==0.4.2 60 | tornado==6.0.2 61 | traitlets==4.3.2 62 | urllib3==1.24.1 63 | wcwidth==0.1.7 64 | webencodings==0.5.1 65 | widgetsnbextension==3.4.2 66 | -------------------------------------------------------------------------------- /cs231n/assignment3/style-transfer-checks.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment3/style-transfer-checks.npz -------------------------------------------------------------------------------- /cs231n/assignment3/styles/bicentennial_print.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment3/styles/bicentennial_print.jpg -------------------------------------------------------------------------------- /cs231n/assignment3/styles/composition_vii.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment3/styles/composition_vii.jpg -------------------------------------------------------------------------------- /cs231n/assignment3/styles/horses_seashore.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment3/styles/horses_seashore.jpg -------------------------------------------------------------------------------- /cs231n/assignment3/styles/muse.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment3/styles/muse.jpg -------------------------------------------------------------------------------- /cs231n/assignment3/styles/ritmo_plastico.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment3/styles/ritmo_plastico.jpg -------------------------------------------------------------------------------- /cs231n/assignment3/styles/starry_night.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment3/styles/starry_night.jpg -------------------------------------------------------------------------------- /cs231n/assignment3/styles/the_scream.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment3/styles/the_scream.jpg -------------------------------------------------------------------------------- /cs231n/assignment3/styles/tubingen.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/cs231n/assignment3/styles/tubingen.jpg -------------------------------------------------------------------------------- /eecs498-007/A4/README.md: -------------------------------------------------------------------------------- 1 |
2 |

EECS 498-007 / 598-005: Deep Learning for Computer Vision

3 |

Assignment 4 (2020)

4 |
5 | 6 | # Goals 7 | 8 | From this assignment forward, you will use autograd in PyTorch to perform backpropgation for you. This will enable you to easily build complex models without worrying about writing code for the backward pass by hand. 9 | 10 | The goals of this assignment are: 11 | 12 | - Understand how autograd can help automate gradient computation. 13 | - See how to use PyTorch Modules to build up complex neural network architectures. 14 | - Understand and implement recurrent neural networks. 15 | - See how recurrent neural networks can be used for image captioning. 16 | - Understand how to augment recurrent neural networks with attention. 17 | - Use image gradients to synthesize saliency maps, adversarial examples, and perform class visualizations. 18 | - Combine content and style losses to perform artistic style transfer. 19 | 20 | # Questions 21 | 22 | ## Q1: PyTorch Autograd 23 | 24 | The notebook [``pytorch_autograd_and_nn.ipynb``](pytorch_autograd_and_nn.ipynb) will introduce you to the different levels of abstraction that PyTorch provides for building neural network models. You will use this knowledge to implement and train Residual Networks for image classification. 25 | 26 | ## Q2: Image Captioning with Recurrent Neural Networks 27 | 28 | The notebook [``rnn_lstm_attention_captioning.ipynb``](rnn_lstm_attention_captioning.ipynb) will walk you through the implementation of vanilla recurrent neural networks (RNN) and Long Short Term Memory (LSTM) RNNs. You will use these networks to train an image captioning model. You will then augment your implementation to perform spatial attention over image regions while generating captions. 29 | 30 | ## Q3: Network Visualization 31 | 32 | The notebook [``network_visualization.ipynb``](network_visualization.ipynb) will walk you through the use of image gradients for generating saliency maps, adversarial examples, and class visualizations. 33 | 34 | ## Q4: Style Transfer 35 | 36 | In the notebook [``style_transfer.ipynb``](style_transfer.ipynb), you will learn how to create images with the artistic style of one image and the content of another image. 37 | -------------------------------------------------------------------------------- /eecs498-007/A4/a4_helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torch.optim as optim 7 | from torch.utils.data import DataLoader 8 | from torch.utils.data import sampler 9 | from collections import OrderedDict 10 | import torchvision.datasets as dset 11 | import torchvision.transforms as T 12 | 13 | import random 14 | import numpy as np 15 | from scipy.ndimage.filters import gaussian_filter1d 16 | 17 | SQUEEZENET_MEAN = torch.tensor([0.485, 0.456, 0.406], dtype=torch.float) 18 | SQUEEZENET_STD = torch.tensor([0.229, 0.224, 0.225], dtype=torch.float) 19 | 20 | ### Helper Functions 21 | ''' 22 | Our pretrained model was trained on images that had been preprocessed by subtracting 23 | the per-color mean and dividing by the per-color standard deviation. We define a few helper 24 | functions for performing and undoing this preprocessing. 25 | ''' 26 | def preprocess(img, size=224): 27 | transform = T.Compose([ 28 | T.Resize(size), 29 | T.ToTensor(), 30 | T.Normalize(mean=SQUEEZENET_MEAN.tolist(), 31 | std=SQUEEZENET_STD.tolist()), 32 | T.Lambda(lambda x: x[None]), 33 | ]) 34 | return transform(img) 35 | 36 | def deprocess(img, should_rescale=True): 37 | # should_rescale true for style transfer 38 | transform = T.Compose([ 39 | T.Lambda(lambda x: x[0]), 40 | T.Normalize(mean=[0, 0, 0], std=(1.0 / SQUEEZENET_STD).tolist()), 41 | T.Normalize(mean=(-SQUEEZENET_MEAN).tolist(), std=[1, 1, 1]), 42 | T.Lambda(rescale) if should_rescale else T.Lambda(lambda x: x), 43 | T.ToPILImage(), 44 | ]) 45 | return transform(img) 46 | 47 | # def deprocess(img): 48 | # transform = T.Compose([ 49 | # T.Lambda(lambda x: x[0]), 50 | # T.Normalize(mean=[0, 0, 0], std=[1.0 / s for s in SQUEEZENET_STD.tolist()]), 51 | # T.Normalize(mean=[-m for m in SQUEEZENET_MEAN.tolist()], std=[1, 1, 1]), 52 | # T.Lambda(rescale), 53 | # T.ToPILImage(), 54 | # ]) 55 | # return transform(img) 56 | 57 | def rescale(x): 58 | low, high = x.min(), x.max() 59 | x_rescaled = (x - low) / (high - low) 60 | return x_rescaled 61 | 62 | def blur_image(X, sigma=1): 63 | X_np = X.cpu().clone().numpy() 64 | X_np = gaussian_filter1d(X_np, sigma, axis=2) 65 | X_np = gaussian_filter1d(X_np, sigma, axis=3) 66 | X.copy_(torch.Tensor(X_np).type_as(X)) 67 | return X 68 | 69 | 70 | # Older versions of scipy.misc.imresize yield different results 71 | # from newer versions, so we check to make sure scipy is up to date. 72 | def check_scipy(): 73 | import scipy 74 | vnum = int(scipy.__version__.split('.')[1]) 75 | major_vnum = int(scipy.__version__.split('.')[0]) 76 | 77 | assert vnum >= 16 or major_vnum >= 1, "You must install SciPy >= 0.16.0 to complete this notebook." 78 | 79 | def jitter(X, ox, oy): 80 | """ 81 | Helper function to randomly jitter an image. 82 | 83 | Inputs 84 | - X: PyTorch Tensor of shape (N, C, H, W) 85 | - ox, oy: Integers giving number of pixels to jitter along W and H axes 86 | 87 | Returns: A new PyTorch Tensor of shape (N, C, H, W) 88 | """ 89 | if ox != 0: 90 | left = X[:, :, :, :-ox] 91 | right = X[:, :, :, -ox:] 92 | X = torch.cat([right, left], dim=3) 93 | if oy != 0: 94 | top = X[:, :, :-oy] 95 | bottom = X[:, :, -oy:] 96 | X = torch.cat([bottom, top], dim=2) 97 | return X 98 | 99 | 100 | def load_CIFAR(path='./datasets/'): 101 | NUM_TRAIN = 49000 102 | # The torchvision.transforms package provides tools for preprocessing data 103 | # and for performing data augmentation; here we set up a transform to 104 | # preprocess the data by subtracting the mean RGB value and dividing by the 105 | # standard deviation of each RGB value; we've hardcoded the mean and std. 106 | transform = T.Compose([ 107 | T.ToTensor(), 108 | T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) 109 | ]) 110 | 111 | # We set up a Dataset object for each split (train / val / test); Datasets load 112 | # training examples one at a time, so we wrap each Dataset in a DataLoader which 113 | # iterates through the Dataset and forms minibatches. We divide the CIFAR-10 114 | # training set into train and val sets by passing a Sampler object to the 115 | # DataLoader telling how it should sample from the underlying Dataset. 116 | cifar10_train = dset.CIFAR10(path, train=True, download=True, 117 | transform=transform) 118 | loader_train = DataLoader(cifar10_train, batch_size=64, 119 | sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN))) 120 | 121 | cifar10_val = dset.CIFAR10(path, train=True, download=True, 122 | transform=transform) 123 | loader_val = DataLoader(cifar10_val, batch_size=64, 124 | sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000))) 125 | 126 | cifar10_test = dset.CIFAR10(path, train=False, download=True, 127 | transform=transform) 128 | loader_test = DataLoader(cifar10_test, batch_size=64) 129 | return loader_train, loader_val, loader_test 130 | 131 | 132 | def load_imagenet_val(num=None, path='./datasets/imagenet_val_25.npz'): 133 | """Load a handful of validation images from ImageNet. 134 | Inputs: 135 | - num: Number of images to load (max of 25) 136 | Returns: 137 | - X: numpy array with shape [num, 224, 224, 3] 138 | - y: numpy array of integer image labels, shape [num] 139 | - class_names: dict mapping integer label to class name 140 | """ 141 | imagenet_fn = os.path.join(path) 142 | if not os.path.isfile(imagenet_fn): 143 | print('file %s not found' % imagenet_fn) 144 | print('Run the above cell to download the data') 145 | assert False, 'Need to download imagenet_val_25.npz' 146 | f = np.load(imagenet_fn, allow_pickle=True) 147 | X = f['X'] 148 | y = f['y'] 149 | class_names = f['label_map'].item() 150 | if num is not None: 151 | X = X[:num] 152 | y = y[:num] 153 | return X, y, class_names 154 | 155 | 156 | def load_COCO(path = './datasets/coco.pt'): 157 | ''' 158 | Download and load serialized COCO data from coco.pt 159 | It contains a dictionary of 160 | "train_images" - resized training images (112x112) 161 | "val_images" - resized validation images (112x112) 162 | "train_captions" - tokenized and numericalized training captions 163 | "val_captions" - tokenized and numericalized validation captions 164 | "vocab" - caption vocabulary, including "idx_to_token" and "token_to_idx" 165 | 166 | Returns: a data dictionary 167 | ''' 168 | data_dict = torch.load(path) 169 | # print out all the keys and values from the data dictionary 170 | for k, v in data_dict.items(): 171 | if type(v) == torch.Tensor: 172 | print(k, type(v), v.shape, v.dtype) 173 | else: 174 | print(k, type(v), v.keys()) 175 | 176 | num_train = data_dict['train_images'].size(0) 177 | num_val = data_dict['val_images'].size(0) 178 | assert data_dict['train_images'].size(0) == data_dict['train_captions'].size(0) and \ 179 | data_dict['val_images'].size(0) == data_dict['val_captions'].size(0), \ 180 | 'shapes of data mismatch!' 181 | 182 | print('\nTrain images shape: ', data_dict['train_images'].shape) 183 | print('Train caption tokens shape: ', data_dict['train_captions'].shape) 184 | print('Validation images shape: ', data_dict['val_images'].shape) 185 | print('Validation caption tokens shape: ', data_dict['val_captions'].shape) 186 | print('total number of caption tokens: ', len(data_dict['vocab']['idx_to_token'])) 187 | print('mappings (list) from index to caption token: ', data_dict['vocab']['idx_to_token']) 188 | print('mappings (dict) from caption token to index: ', data_dict['vocab']['token_to_idx']) 189 | 190 | 191 | return data_dict 192 | 193 | 194 | ## Dump files for submission 195 | def dump_results(submission, path): 196 | ''' 197 | Dumps a dictionary as a .pkl file for autograder 198 | results: a dictionary 199 | path: path for saving the dict object 200 | ''' 201 | # del submission['rnn_model'] 202 | # del submission['lstm_model'] 203 | # del submission['attn_model'] 204 | with open(path, "wb") as f: 205 | pickle.dump(submission, f) 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | -------------------------------------------------------------------------------- /eecs498-007/A4/adversarial_attacks_results.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/eecs498-007/A4/adversarial_attacks_results.jpg -------------------------------------------------------------------------------- /eecs498-007/A4/class_viz_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/eecs498-007/A4/class_viz_result.jpg -------------------------------------------------------------------------------- /eecs498-007/A4/eecs598/__init__.py: -------------------------------------------------------------------------------- 1 | from . import data, grad, submit 2 | from .solver import Solver 3 | from .utils import reset_seed 4 | from .vis import tensor_to_image, visualize_dataset 5 | -------------------------------------------------------------------------------- /eecs498-007/A4/eecs598/data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | import matplotlib.pyplot as plt 5 | import torch 6 | import torchvision 7 | from torchvision.datasets import CIFAR10 8 | 9 | import eecs598 10 | 11 | 12 | def _extract_tensors(dset, num=None, x_dtype=torch.float32): 13 | """ 14 | Extract the data and labels from a CIFAR10 dataset object and convert them to 15 | tensors. 16 | 17 | Input: 18 | - dset: A torchvision.datasets.CIFAR10 object 19 | - num: Optional. If provided, the number of samples to keep. 20 | - x_dtype: Optional. data type of the input image 21 | 22 | Returns: 23 | - x: `x_dtype` tensor of shape (N, 3, 32, 32) 24 | - y: int64 tensor of shape (N,) 25 | """ 26 | x = torch.tensor(dset.data, dtype=x_dtype).permute(0, 3, 1, 2).div_(255) 27 | y = torch.tensor(dset.targets, dtype=torch.int64) 28 | if num is not None: 29 | if num <= 0 or num > x.shape[0]: 30 | raise ValueError( 31 | "Invalid value num=%d; must be in the range [0, %d]" % (num, x.shape[0]) 32 | ) 33 | x = x[:num].clone() 34 | y = y[:num].clone() 35 | return x, y 36 | 37 | 38 | def cifar10(num_train=None, num_test=None, x_dtype=torch.float32): 39 | """ 40 | Return the CIFAR10 dataset, automatically downloading it if necessary. 41 | This function can also subsample the dataset. 42 | 43 | Inputs: 44 | - num_train: [Optional] How many samples to keep from the training set. 45 | If not provided, then keep the entire training set. 46 | - num_test: [Optional] How many samples to keep from the test set. 47 | If not provided, then keep the entire test set. 48 | - x_dtype: [Optional] Data type of the input image 49 | 50 | Returns: 51 | - x_train: `x_dtype` tensor of shape (num_train, 3, 32, 32) 52 | - y_train: int64 tensor of shape (num_train, 3, 32, 32) 53 | - x_test: `x_dtype` tensor of shape (num_test, 3, 32, 32) 54 | - y_test: int64 tensor of shape (num_test, 3, 32, 32) 55 | """ 56 | download = not os.path.isdir("cifar-10-batches-py") 57 | dset_train = CIFAR10(root=".", download=download, train=True) 58 | dset_test = CIFAR10(root=".", train=False) 59 | x_train, y_train = _extract_tensors(dset_train, num_train, x_dtype) 60 | x_test, y_test = _extract_tensors(dset_test, num_test, x_dtype) 61 | 62 | return x_train, y_train, x_test, y_test 63 | 64 | 65 | def preprocess_cifar10( 66 | cuda=True, 67 | show_examples=True, 68 | bias_trick=False, 69 | flatten=True, 70 | validation_ratio=0.2, 71 | dtype=torch.float32, 72 | ): 73 | """ 74 | Returns a preprocessed version of the CIFAR10 dataset, automatically 75 | downloading if necessary. We perform the following steps: 76 | 77 | (0) [Optional] Visualize some images from the dataset 78 | (1) Normalize the data by subtracting the mean 79 | (2) Reshape each image of shape (3, 32, 32) into a vector of shape (3072,) 80 | (3) [Optional] Bias trick: add an extra dimension of ones to the data 81 | (4) Carve out a validation set from the training set 82 | 83 | Inputs: 84 | - cuda: If true, move the entire dataset to the GPU 85 | - validation_ratio: Float in the range (0, 1) giving the fraction of the train 86 | set to reserve for validation 87 | - bias_trick: Boolean telling whether or not to apply the bias trick 88 | - show_examples: Boolean telling whether or not to visualize data samples 89 | - dtype: Optional, data type of the input image X 90 | 91 | Returns a dictionary with the following keys: 92 | - 'X_train': `dtype` tensor of shape (N_train, D) giving training images 93 | - 'X_val': `dtype` tensor of shape (N_val, D) giving val images 94 | - 'X_test': `dtype` tensor of shape (N_test, D) giving test images 95 | - 'y_train': int64 tensor of shape (N_train,) giving training labels 96 | - 'y_val': int64 tensor of shape (N_val,) giving val labels 97 | - 'y_test': int64 tensor of shape (N_test,) giving test labels 98 | 99 | N_train, N_val, and N_test are the number of examples in the train, val, and 100 | test sets respectively. The precise values of N_train and N_val are determined 101 | by the input parameter validation_ratio. D is the dimension of the image data; 102 | if bias_trick is False, then D = 32 * 32 * 3 = 3072; 103 | if bias_trick is True then D = 1 + 32 * 32 * 3 = 3073. 104 | """ 105 | X_train, y_train, X_test, y_test = cifar10(x_dtype=dtype) 106 | 107 | # Move data to the GPU 108 | if cuda: 109 | X_train = X_train.cuda() 110 | y_train = y_train.cuda() 111 | X_test = X_test.cuda() 112 | y_test = y_test.cuda() 113 | 114 | # 0. Visualize some examples from the dataset. 115 | if show_examples: 116 | classes = [ 117 | "plane", 118 | "car", 119 | "bird", 120 | "cat", 121 | "deer", 122 | "dog", 123 | "frog", 124 | "horse", 125 | "ship", 126 | "truck", 127 | ] 128 | samples_per_class = 12 129 | samples = [] 130 | eecs598.reset_seed(0) 131 | for y, cls in enumerate(classes): 132 | plt.text(-4, 34 * y + 18, cls, ha="right") 133 | (idxs,) = (y_train == y).nonzero(as_tuple=True) 134 | for i in range(samples_per_class): 135 | idx = idxs[random.randrange(idxs.shape[0])].item() 136 | samples.append(X_train[idx]) 137 | img = torchvision.utils.make_grid(samples, nrow=samples_per_class) 138 | plt.imshow(eecs598.tensor_to_image(img)) 139 | plt.axis("off") 140 | plt.show() 141 | 142 | # 1. Normalize the data: subtract the mean RGB (zero mean) 143 | mean_image = X_train.mean(dim=(0, 2, 3), keepdim=True) 144 | X_train -= mean_image 145 | X_test -= mean_image 146 | 147 | # 2. Reshape the image data into rows 148 | if flatten: 149 | X_train = X_train.reshape(X_train.shape[0], -1) 150 | X_test = X_test.reshape(X_test.shape[0], -1) 151 | 152 | # 3. Add bias dimension and transform into columns 153 | if bias_trick: 154 | ones_train = torch.ones(X_train.shape[0], 1, device=X_train.device) 155 | X_train = torch.cat([X_train, ones_train], dim=1) 156 | ones_test = torch.ones(X_test.shape[0], 1, device=X_test.device) 157 | X_test = torch.cat([X_test, ones_test], dim=1) 158 | 159 | # 4. take the validation set from the training set 160 | # Note: It should not be taken from the test set 161 | # For random permumation, you can use torch.randperm or torch.randint 162 | # But, for this homework, we use slicing instead. 163 | num_training = int(X_train.shape[0] * (1.0 - validation_ratio)) 164 | num_validation = X_train.shape[0] - num_training 165 | 166 | # return the dataset 167 | data_dict = {} 168 | data_dict["X_val"] = X_train[num_training : num_training + num_validation] 169 | data_dict["y_val"] = y_train[num_training : num_training + num_validation] 170 | data_dict["X_train"] = X_train[0:num_training] 171 | data_dict["y_train"] = y_train[0:num_training] 172 | 173 | data_dict["X_test"] = X_test 174 | data_dict["y_test"] = y_test 175 | return data_dict 176 | -------------------------------------------------------------------------------- /eecs498-007/A4/eecs598/grad.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import torch 4 | 5 | import eecs598 6 | 7 | """ Utilities for computing and checking gradients. """ 8 | 9 | 10 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-7): 11 | """ 12 | Utility function to perform numeric gradient checking. We use the centered 13 | difference formula to compute a numeric derivative: 14 | 15 | f'(x) =~ (f(x + h) - f(x - h)) / (2h) 16 | 17 | Rather than computing a full numeric gradient, we sparsely sample a few 18 | dimensions along which to compute numeric derivatives. 19 | 20 | Inputs: 21 | - f: A function that inputs a torch tensor and returns a torch scalar 22 | - x: A torch tensor of the point at which to evaluate the numeric gradient 23 | - analytic_grad: A torch tensor giving the analytic gradient of f at x 24 | - num_checks: The number of dimensions along which to check 25 | - h: Step size for computing numeric derivatives 26 | """ 27 | # fix random seed to 0 28 | eecs598.reset_seed(0) 29 | for i in range(num_checks): 30 | 31 | ix = tuple([random.randrange(m) for m in x.shape]) 32 | 33 | oldval = x[ix].item() 34 | x[ix] = oldval + h # increment by h 35 | fxph = f(x).item() # evaluate f(x + h) 36 | x[ix] = oldval - h # increment by h 37 | fxmh = f(x).item() # evaluate f(x - h) 38 | x[ix] = oldval # reset 39 | 40 | grad_numerical = (fxph - fxmh) / (2 * h) 41 | grad_analytic = analytic_grad[ix] 42 | rel_error_top = abs(grad_numerical - grad_analytic) 43 | rel_error_bot = abs(grad_numerical) + abs(grad_analytic) + 1e-12 44 | rel_error = rel_error_top / rel_error_bot 45 | msg = "numerical: %f analytic: %f, relative error: %e" 46 | print(msg % (grad_numerical, grad_analytic, rel_error)) 47 | 48 | 49 | def compute_numeric_gradient(f, x, dLdf=None, h=1e-7): 50 | """ 51 | Compute the numeric gradient of f at x using a finite differences 52 | approximation. We use the centered difference: 53 | 54 | df f(x + h) - f(x - h) 55 | -- ~= ------------------- 56 | dx 2 * h 57 | 58 | Function can also expand this easily to intermediate layers using the 59 | chain rule: 60 | 61 | dL df dL 62 | -- = -- * -- 63 | dx dx df 64 | 65 | Inputs: 66 | - f: A function that inputs a torch tensor and returns a torch scalar 67 | - x: A torch tensor giving the point at which to compute the gradient 68 | - dLdf: optional upstream gradient for intermediate layers 69 | - h: epsilon used in the finite difference calculation 70 | Returns: 71 | - grad: A tensor of the same shape as x giving the gradient of f at x 72 | """ 73 | flat_x = x.contiguous().flatten() 74 | grad = torch.zeros_like(x) 75 | flat_grad = grad.flatten() 76 | 77 | # Initialize upstream gradient to be ones if not provide 78 | if dLdf is None: 79 | y = f(x) 80 | dLdf = torch.ones_like(y) 81 | dLdf = dLdf.flatten() 82 | 83 | # iterate over all indexes in x 84 | for i in range(flat_x.shape[0]): 85 | oldval = flat_x[i].item() # Store the original value 86 | flat_x[i] = oldval + h # Increment by h 87 | fxph = f(x).flatten() # Evaluate f(x + h) 88 | flat_x[i] = oldval - h # Decrement by h 89 | fxmh = f(x).flatten() # Evaluate f(x - h) 90 | flat_x[i] = oldval # Restore original value 91 | 92 | # compute the partial derivative with centered formula 93 | dfdxi = (fxph - fxmh) / (2 * h) 94 | 95 | # use chain rule to compute dLdx 96 | flat_grad[i] = dLdf.dot(dfdxi).item() 97 | 98 | # Note that since flat_grad was only a reference to grad, 99 | # we can just return the object in the shape of x by returning grad 100 | return grad 101 | 102 | 103 | def rel_error(x, y, eps=1e-10): 104 | """ 105 | Compute the relative error between a pair of tensors x and y, 106 | which is defined as: 107 | 108 | max_i |x_i - y_i]| 109 | rel_error(x, y) = ------------------------------- 110 | max_i |x_i| + max_i |y_i| + eps 111 | 112 | Inputs: 113 | - x, y: Tensors of the same shape 114 | - eps: Small positive constant for numeric stability 115 | 116 | Returns: 117 | - rel_error: Scalar giving the relative error between x and y 118 | """ 119 | """ returns relative error between x and y """ 120 | top = (x - y).abs().max().item() 121 | bot = (x.abs() + y.abs()).clamp(min=eps).max().item() 122 | return top / bot 123 | -------------------------------------------------------------------------------- /eecs498-007/A4/eecs598/submit.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | 4 | _A1_FILES = [ 5 | "pytorch101.py", 6 | "pytorch101.ipynb", 7 | "knn.py", 8 | "knn.ipynb", 9 | ] 10 | 11 | _A2_FILES = [ 12 | "linear_classifier.py", 13 | "linear_classifier.ipynb", 14 | "two_layer_net.py", 15 | "two_layer_net.ipynb", 16 | "svm_best_model.pt", 17 | "softmax_best_model.pt", 18 | "nn_best_model.pt", 19 | ] 20 | 21 | _A3_FILES = [ 22 | "fully_connected_networks.py", 23 | "fully_connected_networks.ipynb", 24 | "convolutional_networks.py", 25 | "convolutional_networks.ipynb", 26 | "best_overfit_five_layer_net.pth", 27 | "best_two_layer_net.pth", 28 | "one_minute_deepconvnet.pth", 29 | "overfit_deepconvnet.pth", 30 | ] 31 | 32 | _A4_FILES = [ 33 | 'network_visualization.py', 34 | 'network_visualization.ipynb', 35 | 'style_transfer.py', 36 | 'style_transfer.ipynb', 37 | 'pytorch_autograd_and_nn.py', 38 | 'pytorch_autograd_and_nn.ipynb', 39 | 'rnn_lstm_attention_captioning.py', 40 | 'rnn_lstm_attention_captioning.ipynb', 41 | # result files 42 | 'pytorch_autograd_and_nn.pkl', 43 | 'rnn_lstm_attention_submission.pkl', 44 | 'saliency_maps_results.jpg', 45 | 'adversarial_attacks_results.jpg', 46 | 'class_viz_result.jpg', 47 | 'style_transfer_result.jpg', 48 | 'feature_inversion_result.jpg' 49 | ] 50 | 51 | def make_a1_submission(assignment_path, uniquename=None, umid=None): 52 | _make_submission(assignment_path, _A1_FILES, "A1", uniquename, umid) 53 | 54 | 55 | def make_a2_submission(assignment_path, uniquename=None, umid=None): 56 | _make_submission(assignment_path, _A2_FILES, "A2", uniquename, umid) 57 | 58 | 59 | def make_a3_submission(assignment_path, uniquename=None, umid=None): 60 | _make_submission(assignment_path, _A3_FILES, "A3", uniquename, umid) 61 | 62 | def make_a4_submission(assignment_path, uniquename=None, umid=None): 63 | _make_submission(assignment_path, _A4_FILES, "A4", uniquename, umid) 64 | 65 | 66 | def _make_submission( 67 | assignment_path, file_list, assignment_no, uniquename=None, umid=None 68 | ): 69 | if uniquename is None or umid is None: 70 | uniquename, umid = _get_user_info() 71 | zip_path = "{}_{}_{}.zip".format(uniquename, umid, assignment_no) 72 | zip_path = os.path.join(assignment_path, zip_path) 73 | print("Writing zip file to: ", zip_path) 74 | with zipfile.ZipFile(zip_path, "w") as zf: 75 | for filename in file_list: 76 | in_path = os.path.join(assignment_path, filename) 77 | if not os.path.isfile(in_path): 78 | raise ValueError('Could not find file "%s"' % filename) 79 | zf.write(in_path, filename) 80 | 81 | 82 | def _get_user_info(): 83 | if uniquename is None: 84 | uniquename = input("Enter your uniquename (e.g. justincj): ") 85 | if umid is None: 86 | umid = input("Enter your umid (e.g. 12345678): ") 87 | return uniquename, umid 88 | -------------------------------------------------------------------------------- /eecs498-007/A4/eecs598/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | from torchvision.utils import make_grid 4 | import matplotlib.pyplot as plt 5 | import cv2 6 | import numpy as np 7 | 8 | """ 9 | General utilities to help with implementation 10 | """ 11 | 12 | def reset_seed(number): 13 | """ 14 | Reset random seed to the specific number 15 | 16 | Inputs: 17 | - number: A seed number to use 18 | """ 19 | random.seed(number) 20 | torch.manual_seed(number) 21 | return 22 | 23 | def tensor_to_image(tensor): 24 | """ 25 | Convert a torch tensor into a numpy ndarray for visualization. 26 | 27 | Inputs: 28 | - tensor: A torch tensor of shape (3, H, W) with elements in the range [0, 1] 29 | 30 | Returns: 31 | - ndarr: A uint8 numpy array of shape (H, W, 3) 32 | """ 33 | tensor = tensor.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0) 34 | ndarr = tensor.to('cpu', torch.uint8).numpy() 35 | return ndarr 36 | 37 | 38 | def visualize_dataset(X_data, y_data, samples_per_class, class_list): 39 | """ 40 | Make a grid-shape image to plot 41 | 42 | Inputs: 43 | - X_data: set of [batch, 3, width, height] data 44 | - y_data: paired label of X_data in [batch] shape 45 | - samples_per_class: number of samples want to present 46 | - class_list: list of class names 47 | e.g.) ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] 48 | 49 | Outputs: 50 | - An grid-image that visualize samples_per_class number of samples per class 51 | """ 52 | img_half_width = X_data.shape[2] // 2 53 | samples = [] 54 | for y, cls in enumerate(class_list): 55 | plt.text(-4, (img_half_width * 2 + 2) * y + (img_half_width + 2), cls, ha='right') 56 | idxs = (y_data == y).nonzero().view(-1) 57 | for i in range(samples_per_class): 58 | idx = idxs[random.randrange(idxs.shape[0])].item() 59 | samples.append(X_data[idx]) 60 | 61 | img = make_grid(samples, nrow=samples_per_class) 62 | return tensor_to_image(img) 63 | 64 | 65 | def decode_captions(captions, idx_to_word): 66 | """ 67 | Decoding caption indexes into words. 68 | Inputs: 69 | - captions: Caption indexes in a tensor of shape (Nx)T. 70 | - idx_to_word: Mapping from the vocab index to word. 71 | 72 | Outputs: 73 | - decoded: A sentence (or a list of N sentences). 74 | """ 75 | singleton = False 76 | if captions.ndim == 1: 77 | singleton = True 78 | captions = captions[None] 79 | decoded = [] 80 | N, T = captions.shape 81 | for i in range(N): 82 | words = [] 83 | for t in range(T): 84 | word = idx_to_word[captions[i, t]] 85 | if word != '': 86 | words.append(word) 87 | if word == '': 88 | break 89 | decoded.append(' '.join(words)) 90 | if singleton: 91 | decoded = decoded[0] 92 | return decoded 93 | 94 | 95 | def attention_visualizer(img, attn_weights, token): 96 | """ 97 | Visuailze the attended regions on a single frame from a single query word. 98 | Inputs: 99 | - img: Image tensor input, of shape (3, H, W) 100 | - attn_weights: Attention weight tensor, on the final activation map 101 | - token: The token string you want to display above the image 102 | 103 | Outputs: 104 | - img_output: Image tensor output, of shape (3, H+25, W) 105 | 106 | """ 107 | C, H, W = img.shape 108 | assert C == 3, 'We only support image with three color channels!' 109 | 110 | # Reshape attention map 111 | attn_weights = cv2.resize(attn_weights.data.numpy().copy(), 112 | (H, W), interpolation=cv2.INTER_NEAREST) 113 | attn_weights = np.repeat(np.expand_dims(attn_weights, axis=2), 3, axis=2) 114 | 115 | # Combine image and attention map 116 | img_copy = img.float().div(255.).permute(1, 2, 0 117 | ).numpy()[:, :, ::-1].copy() # covert to BGR for cv2 118 | masked_img = cv2.addWeighted(attn_weights, 0.5, img_copy, 0.5, 0) 119 | img_copy = np.concatenate((np.zeros((25, W, 3)), 120 | masked_img), axis=0) 121 | 122 | # Add text 123 | cv2.putText(img_copy, '%s' % (token), (10, 15), 124 | cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), thickness=1) 125 | 126 | return img_copy -------------------------------------------------------------------------------- /eecs498-007/A4/eecs598/vis.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import matplotlib.pyplot as plt 4 | import torch 5 | from torchvision.utils import make_grid 6 | 7 | 8 | """ 9 | Utilities to help with visualizing images and other data 10 | """ 11 | 12 | 13 | def tensor_to_image(tensor): 14 | """ 15 | Convert a torch tensor into a numpy ndarray for visualization. 16 | 17 | Inputs: 18 | - tensor: A torch tensor of shape (3, H, W) with elements in the range [0, 1] 19 | 20 | Returns: 21 | - ndarr: A uint8 numpy array of shape (H, W, 3) 22 | """ 23 | tensor = tensor.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0) 24 | ndarr = tensor.to("cpu", torch.uint8).numpy() 25 | return ndarr 26 | 27 | 28 | def visualize_dataset(X_data, y_data, samples_per_class, class_list): 29 | """ 30 | Make a grid-shape image to plot 31 | 32 | Inputs: 33 | - X_data: set of [batch, 3, width, height] data 34 | - y_data: paired label of X_data in [batch] shape 35 | - samples_per_class: number of samples want to present 36 | - class_list: list of class names; eg, 37 | ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] 38 | 39 | Outputs: 40 | - An grid-image that visualize samples_per_class number of samples per class 41 | """ 42 | img_half_width = X_data.shape[2] // 2 43 | samples = [] 44 | for y, cls in enumerate(class_list): 45 | tx = -4 46 | ty = (img_half_width * 2 + 2) * y + (img_half_width + 2) 47 | plt.text(tx, ty, cls, ha="right") 48 | idxs = (y_data == y).nonzero().view(-1) 49 | for i in range(samples_per_class): 50 | idx = idxs[random.randrange(idxs.shape[0])].item() 51 | samples.append(X_data[idx]) 52 | 53 | img = make_grid(samples, nrow=samples_per_class) 54 | return tensor_to_image(img) 55 | -------------------------------------------------------------------------------- /eecs498-007/A4/feature_inversion_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/eecs498-007/A4/feature_inversion_result.jpg -------------------------------------------------------------------------------- /eecs498-007/A4/pytorch_autograd_and_nn.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/eecs498-007/A4/pytorch_autograd_and_nn.pkl -------------------------------------------------------------------------------- /eecs498-007/A4/rnn_lstm_attention_submission.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/eecs498-007/A4/rnn_lstm_attention_submission.pkl -------------------------------------------------------------------------------- /eecs498-007/A4/saliency_maps_results.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/eecs498-007/A4/saliency_maps_results.jpg -------------------------------------------------------------------------------- /eecs498-007/A4/style_transfer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implements a style transfer in PyTorch. 3 | WARNING: you SHOULD NOT use ".to()" or ".cuda()" in each implementation block. 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | from a4_helper import * 9 | 10 | def hello(): 11 | """ 12 | This is a sample function that we will try to import and run to ensure that 13 | our environment is correctly set up on Google Colab. 14 | """ 15 | print('Hello from style_transfer.py!') 16 | 17 | def content_loss(content_weight, content_current, content_original): 18 | """ 19 | Compute the content loss for style transfer. 20 | 21 | Inputs: 22 | - content_weight: Scalar giving the weighting for the content loss. 23 | - content_current: features of the current image; this is a PyTorch Tensor of shape 24 | (1, C_l, H_l, W_l). 25 | - content_target: features of the content image, Tensor with shape (1, C_l, H_l, W_l). 26 | 27 | Returns: 28 | - scalar content loss 29 | """ 30 | ############################################################################## 31 | # TODO: Compute the content loss for style transfer. # 32 | ############################################################################## 33 | # Replace "pass" statement with your code 34 | 35 | # Sum of the squared difference of current/original feature maps. 36 | sum_fm = torch.sum((content_current - content_original) ** 2) 37 | # Compute the content loss. 38 | loss = content_weight * sum_fm 39 | 40 | return loss 41 | 42 | ############################################################################## 43 | # END OF YOUR CODE # 44 | ############################################################################## 45 | 46 | 47 | def gram_matrix(features, normalize=True): 48 | """ 49 | Compute the Gram matrix from features. 50 | 51 | Inputs: 52 | - features: PyTorch Tensor of shape (N, C, H, W) giving features for 53 | a batch of N images. 54 | - normalize: optional, whether to normalize the Gram matrix 55 | If True, divide the Gram matrix by the number of neurons (H * W * C) 56 | 57 | Returns: 58 | - gram: PyTorch Tensor of shape (N, C, C) giving the 59 | (optionally normalized) Gram matrices for the N input images. 60 | """ 61 | gram = None 62 | ############################################################################## 63 | # TODO: Compute the Gram matrix from features. # 64 | # Don't forget to implement for both normalized and non-normalized version # 65 | ############################################################################## 66 | # Replace "pass" statement with your code 67 | 68 | N, C, H, W = features.shape 69 | 70 | # Reshape "features" to (N, C, M), where M=H*W. 71 | rfeatures = features.view(N, C, H*W) 72 | # Transpose the last two axes of the "reshaped features". 73 | # "trfeatures" shape is (N, M, C) 74 | # This is needed to apply the "matmul" operator below. 75 | trfeatures = torch.transpose(rfeatures, 1, 2) 76 | 77 | # Apply the "matmul" operator, which computes the "dot product" 78 | # of the last two axes (which must be linear). 79 | # Shapes: (N, C, M) @ (N, M, C) -> (N, C, C) 80 | gram = rfeatures @ trfeatures 81 | 82 | # Optionally, normalize the Gram matrix. 83 | if normalize: 84 | gram /= (H * W * C) 85 | 86 | ############################################################################## 87 | # END OF YOUR CODE # 88 | ############################################################################## 89 | return gram 90 | 91 | 92 | def style_loss(feats, style_layers, style_targets, style_weights): 93 | """ 94 | Computes the style loss at a set of layers. 95 | 96 | Inputs: 97 | - feats: list of the features at every layer of the current image, as produced by 98 | the extract_features function. 99 | - style_layers: List of layer indices into feats giving the layers to include in the 100 | style loss. 101 | - style_targets: List of the same length as style_layers, where style_targets[i] is 102 | a PyTorch Tensor giving the Gram matrix of the source style image computed at 103 | layer style_layers[i]. 104 | - style_weights: List of the same length as style_layers, where style_weights[i] 105 | is a scalar giving the weight for the style loss at layer style_layers[i]. 106 | 107 | Returns: 108 | - style_loss: A PyTorch Tensor holding a scalar giving the style loss. 109 | """ 110 | ############################################################################## 111 | # TODO: Computes the style loss at a set of layers. # 112 | # Hint: you can do this with one for loop over the style layers, and should # 113 | # not be very much code (~5 lines). # 114 | # You will need to use your gram_matrix function. # 115 | ############################################################################## 116 | # Replace "pass" statement with your code 117 | 118 | # Initialize the "loss". 119 | loss = 0.0 120 | 121 | # Loop over "style_layers", track the "layer index" (li) and its "index" (idx). 122 | for idx, li in enumerate(style_layers): 123 | # Compute the Gram matrix of "features" in the current layer index. 124 | current_gm = gram_matrix(feats[li]) 125 | # Compute the current layer "style loss". 126 | curr_loss = style_weights[idx] * \ 127 | torch.sum((current_gm - style_targets[idx]) ** 2) 128 | # Add the computed "style loss" to "loss". 129 | loss += curr_loss 130 | 131 | return loss 132 | 133 | ############################################################################## 134 | # END OF YOUR CODE # 135 | ############################################################################## 136 | 137 | 138 | def tv_loss(img, tv_weight): 139 | """ 140 | Compute total variation loss. 141 | 142 | Inputs: 143 | - img: PyTorch Variable of shape (1, 3, H, W) holding an input image. 144 | - tv_weight: Scalar giving the weight w_t to use for the TV loss. 145 | 146 | Returns: 147 | - loss: PyTorch Variable holding a scalar giving the total variation loss 148 | for img weighted by tv_weight. 149 | """ 150 | ############################################################################## 151 | # TODO: Compute total variation loss. # 152 | # Your implementation should be vectorized and not require any loops! # 153 | ############################################################################## 154 | # Replace "pass" statement with your code 155 | 156 | # Sum through the height. 157 | sumh = torch.sum((img[..., 1:, :] - img[..., :-1, :]) ** 2) 158 | # Sum through the width. 159 | sumw = torch.sum((img[..., 1:] - img[..., :-1]) ** 2) 160 | # Compute the loss. 161 | loss = tv_weight * (sumh + sumw) 162 | 163 | return loss 164 | 165 | ############################################################################## 166 | # END OF YOUR CODE # 167 | ############################################################################## -------------------------------------------------------------------------------- /eecs498-007/A4/style_transfer_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/eecs498-007/A4/style_transfer_result.jpg -------------------------------------------------------------------------------- /eecs498-007/A5/README.md: -------------------------------------------------------------------------------- 1 |
2 |

EECS 498-007 / 598-005: Deep Learning for Computer Vision

3 |

Assignment 5 (2020)

4 |
5 | 6 | # Goals 7 | 8 | In this assignment you will implement two different object detection systems. 9 | 10 | The goals of this assignment are: 11 | 12 | - Learn about the object detection pipeline. 13 | - Understand how to build an anchor-based single-stage object detectors. 14 | - Understand how to build a two-stage object detector that combines a region proposal network with a recognition network. 15 | 16 | # Questions 17 | 18 | ## Q1: Single-Stage Detector 19 | 20 | The notebook [``single_stage_detector_yolo.ipynb``](single_stage_detector_yolo.ipynb) will walk you through the implementation of a fully-convolutional single-stage object detector similar to YOLO (Redmon et al, CVPR 2016). You will train and evaluate your detector on the [PASCAL VOC 2007](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/index.html) object detection dataset. 21 | 22 | ## Q2: Two-Stage Detector 23 | 24 | The notebook [``two_stage_detector_faster_rcnn.ipynb``](two_stage_detector_faster_rcnn.ipynb) will walk you through the implementation of a two-stage object detector similar to Faster R-CNN (Ren et al, NeurIPS 2015). This will combine a fully-convolutional Region Proposal Network (RPN) and a second-stage recognition network. 25 | -------------------------------------------------------------------------------- /eecs498-007/A5/eecs598/__init__.py: -------------------------------------------------------------------------------- 1 | from . import data, grad, submit 2 | from .solver import Solver 3 | from .utils import reset_seed 4 | from .vis import tensor_to_image, visualize_dataset 5 | -------------------------------------------------------------------------------- /eecs498-007/A5/eecs598/data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | import matplotlib.pyplot as plt 5 | import torch 6 | import torchvision 7 | from torchvision.datasets import CIFAR10 8 | 9 | import eecs598 10 | 11 | 12 | def _extract_tensors(dset, num=None, x_dtype=torch.float32): 13 | """ 14 | Extract the data and labels from a CIFAR10 dataset object and convert them to 15 | tensors. 16 | 17 | Input: 18 | - dset: A torchvision.datasets.CIFAR10 object 19 | - num: Optional. If provided, the number of samples to keep. 20 | - x_dtype: Optional. data type of the input image 21 | 22 | Returns: 23 | - x: `x_dtype` tensor of shape (N, 3, 32, 32) 24 | - y: int64 tensor of shape (N,) 25 | """ 26 | x = torch.tensor(dset.data, dtype=x_dtype).permute(0, 3, 1, 2).div_(255) 27 | y = torch.tensor(dset.targets, dtype=torch.int64) 28 | if num is not None: 29 | if num <= 0 or num > x.shape[0]: 30 | raise ValueError( 31 | "Invalid value num=%d; must be in the range [0, %d]" % (num, x.shape[0]) 32 | ) 33 | x = x[:num].clone() 34 | y = y[:num].clone() 35 | return x, y 36 | 37 | 38 | def cifar10(num_train=None, num_test=None, x_dtype=torch.float32): 39 | """ 40 | Return the CIFAR10 dataset, automatically downloading it if necessary. 41 | This function can also subsample the dataset. 42 | 43 | Inputs: 44 | - num_train: [Optional] How many samples to keep from the training set. 45 | If not provided, then keep the entire training set. 46 | - num_test: [Optional] How many samples to keep from the test set. 47 | If not provided, then keep the entire test set. 48 | - x_dtype: [Optional] Data type of the input image 49 | 50 | Returns: 51 | - x_train: `x_dtype` tensor of shape (num_train, 3, 32, 32) 52 | - y_train: int64 tensor of shape (num_train, 3, 32, 32) 53 | - x_test: `x_dtype` tensor of shape (num_test, 3, 32, 32) 54 | - y_test: int64 tensor of shape (num_test, 3, 32, 32) 55 | """ 56 | download = not os.path.isdir("cifar-10-batches-py") 57 | dset_train = CIFAR10(root=".", download=download, train=True) 58 | dset_test = CIFAR10(root=".", train=False) 59 | x_train, y_train = _extract_tensors(dset_train, num_train, x_dtype) 60 | x_test, y_test = _extract_tensors(dset_test, num_test, x_dtype) 61 | 62 | return x_train, y_train, x_test, y_test 63 | 64 | 65 | def preprocess_cifar10( 66 | cuda=True, 67 | show_examples=True, 68 | bias_trick=False, 69 | flatten=True, 70 | validation_ratio=0.2, 71 | dtype=torch.float32, 72 | ): 73 | """ 74 | Returns a preprocessed version of the CIFAR10 dataset, automatically 75 | downloading if necessary. We perform the following steps: 76 | 77 | (0) [Optional] Visualize some images from the dataset 78 | (1) Normalize the data by subtracting the mean 79 | (2) Reshape each image of shape (3, 32, 32) into a vector of shape (3072,) 80 | (3) [Optional] Bias trick: add an extra dimension of ones to the data 81 | (4) Carve out a validation set from the training set 82 | 83 | Inputs: 84 | - cuda: If true, move the entire dataset to the GPU 85 | - validation_ratio: Float in the range (0, 1) giving the fraction of the train 86 | set to reserve for validation 87 | - bias_trick: Boolean telling whether or not to apply the bias trick 88 | - show_examples: Boolean telling whether or not to visualize data samples 89 | - dtype: Optional, data type of the input image X 90 | 91 | Returns a dictionary with the following keys: 92 | - 'X_train': `dtype` tensor of shape (N_train, D) giving training images 93 | - 'X_val': `dtype` tensor of shape (N_val, D) giving val images 94 | - 'X_test': `dtype` tensor of shape (N_test, D) giving test images 95 | - 'y_train': int64 tensor of shape (N_train,) giving training labels 96 | - 'y_val': int64 tensor of shape (N_val,) giving val labels 97 | - 'y_test': int64 tensor of shape (N_test,) giving test labels 98 | 99 | N_train, N_val, and N_test are the number of examples in the train, val, and 100 | test sets respectively. The precise values of N_train and N_val are determined 101 | by the input parameter validation_ratio. D is the dimension of the image data; 102 | if bias_trick is False, then D = 32 * 32 * 3 = 3072; 103 | if bias_trick is True then D = 1 + 32 * 32 * 3 = 3073. 104 | """ 105 | X_train, y_train, X_test, y_test = cifar10(x_dtype=dtype) 106 | 107 | # Move data to the GPU 108 | if cuda: 109 | X_train = X_train.cuda() 110 | y_train = y_train.cuda() 111 | X_test = X_test.cuda() 112 | y_test = y_test.cuda() 113 | 114 | # 0. Visualize some examples from the dataset. 115 | if show_examples: 116 | classes = [ 117 | "plane", 118 | "car", 119 | "bird", 120 | "cat", 121 | "deer", 122 | "dog", 123 | "frog", 124 | "horse", 125 | "ship", 126 | "truck", 127 | ] 128 | samples_per_class = 12 129 | samples = [] 130 | eecs598.reset_seed(0) 131 | for y, cls in enumerate(classes): 132 | plt.text(-4, 34 * y + 18, cls, ha="right") 133 | (idxs,) = (y_train == y).nonzero(as_tuple=True) 134 | for i in range(samples_per_class): 135 | idx = idxs[random.randrange(idxs.shape[0])].item() 136 | samples.append(X_train[idx]) 137 | img = torchvision.utils.make_grid(samples, nrow=samples_per_class) 138 | plt.imshow(eecs598.tensor_to_image(img)) 139 | plt.axis("off") 140 | plt.show() 141 | 142 | # 1. Normalize the data: subtract the mean RGB (zero mean) 143 | mean_image = X_train.mean(dim=(0, 2, 3), keepdim=True) 144 | X_train -= mean_image 145 | X_test -= mean_image 146 | 147 | # 2. Reshape the image data into rows 148 | if flatten: 149 | X_train = X_train.reshape(X_train.shape[0], -1) 150 | X_test = X_test.reshape(X_test.shape[0], -1) 151 | 152 | # 3. Add bias dimension and transform into columns 153 | if bias_trick: 154 | ones_train = torch.ones(X_train.shape[0], 1, device=X_train.device) 155 | X_train = torch.cat([X_train, ones_train], dim=1) 156 | ones_test = torch.ones(X_test.shape[0], 1, device=X_test.device) 157 | X_test = torch.cat([X_test, ones_test], dim=1) 158 | 159 | # 4. take the validation set from the training set 160 | # Note: It should not be taken from the test set 161 | # For random permumation, you can use torch.randperm or torch.randint 162 | # But, for this homework, we use slicing instead. 163 | num_training = int(X_train.shape[0] * (1.0 - validation_ratio)) 164 | num_validation = X_train.shape[0] - num_training 165 | 166 | # return the dataset 167 | data_dict = {} 168 | data_dict["X_val"] = X_train[num_training : num_training + num_validation] 169 | data_dict["y_val"] = y_train[num_training : num_training + num_validation] 170 | data_dict["X_train"] = X_train[0:num_training] 171 | data_dict["y_train"] = y_train[0:num_training] 172 | 173 | data_dict["X_test"] = X_test 174 | data_dict["y_test"] = y_test 175 | return data_dict 176 | -------------------------------------------------------------------------------- /eecs498-007/A5/eecs598/grad.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import torch 4 | 5 | import eecs598 6 | 7 | """ Utilities for computing and checking gradients. """ 8 | 9 | 10 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-7): 11 | """ 12 | Utility function to perform numeric gradient checking. We use the centered 13 | difference formula to compute a numeric derivative: 14 | 15 | f'(x) =~ (f(x + h) - f(x - h)) / (2h) 16 | 17 | Rather than computing a full numeric gradient, we sparsely sample a few 18 | dimensions along which to compute numeric derivatives. 19 | 20 | Inputs: 21 | - f: A function that inputs a torch tensor and returns a torch scalar 22 | - x: A torch tensor of the point at which to evaluate the numeric gradient 23 | - analytic_grad: A torch tensor giving the analytic gradient of f at x 24 | - num_checks: The number of dimensions along which to check 25 | - h: Step size for computing numeric derivatives 26 | """ 27 | # fix random seed to 0 28 | eecs598.reset_seed(0) 29 | for i in range(num_checks): 30 | 31 | ix = tuple([random.randrange(m) for m in x.shape]) 32 | 33 | oldval = x[ix].item() 34 | x[ix] = oldval + h # increment by h 35 | fxph = f(x).item() # evaluate f(x + h) 36 | x[ix] = oldval - h # increment by h 37 | fxmh = f(x).item() # evaluate f(x - h) 38 | x[ix] = oldval # reset 39 | 40 | grad_numerical = (fxph - fxmh) / (2 * h) 41 | grad_analytic = analytic_grad[ix] 42 | rel_error_top = abs(grad_numerical - grad_analytic) 43 | rel_error_bot = abs(grad_numerical) + abs(grad_analytic) + 1e-12 44 | rel_error = rel_error_top / rel_error_bot 45 | msg = "numerical: %f analytic: %f, relative error: %e" 46 | print(msg % (grad_numerical, grad_analytic, rel_error)) 47 | 48 | 49 | def compute_numeric_gradient(f, x, dLdf=None, h=1e-7): 50 | """ 51 | Compute the numeric gradient of f at x using a finite differences 52 | approximation. We use the centered difference: 53 | 54 | df f(x + h) - f(x - h) 55 | -- ~= ------------------- 56 | dx 2 * h 57 | 58 | Function can also expand this easily to intermediate layers using the 59 | chain rule: 60 | 61 | dL df dL 62 | -- = -- * -- 63 | dx dx df 64 | 65 | Inputs: 66 | - f: A function that inputs a torch tensor and returns a torch scalar 67 | - x: A torch tensor giving the point at which to compute the gradient 68 | - dLdf: optional upstream gradient for intermediate layers 69 | - h: epsilon used in the finite difference calculation 70 | Returns: 71 | - grad: A tensor of the same shape as x giving the gradient of f at x 72 | """ 73 | flat_x = x.contiguous().flatten() 74 | grad = torch.zeros_like(x) 75 | flat_grad = grad.flatten() 76 | 77 | # Initialize upstream gradient to be ones if not provide 78 | if dLdf is None: 79 | y = f(x) 80 | dLdf = torch.ones_like(y) 81 | dLdf = dLdf.flatten() 82 | 83 | # iterate over all indexes in x 84 | for i in range(flat_x.shape[0]): 85 | oldval = flat_x[i].item() # Store the original value 86 | flat_x[i] = oldval + h # Increment by h 87 | fxph = f(x).flatten() # Evaluate f(x + h) 88 | flat_x[i] = oldval - h # Decrement by h 89 | fxmh = f(x).flatten() # Evaluate f(x - h) 90 | flat_x[i] = oldval # Restore original value 91 | 92 | # compute the partial derivative with centered formula 93 | dfdxi = (fxph - fxmh) / (2 * h) 94 | 95 | # use chain rule to compute dLdx 96 | flat_grad[i] = dLdf.dot(dfdxi).item() 97 | 98 | # Note that since flat_grad was only a reference to grad, 99 | # we can just return the object in the shape of x by returning grad 100 | return grad 101 | 102 | 103 | def rel_error(x, y, eps=1e-10): 104 | """ 105 | Compute the relative error between a pair of tensors x and y, 106 | which is defined as: 107 | 108 | max_i |x_i - y_i]| 109 | rel_error(x, y) = ------------------------------- 110 | max_i |x_i| + max_i |y_i| + eps 111 | 112 | Inputs: 113 | - x, y: Tensors of the same shape 114 | - eps: Small positive constant for numeric stability 115 | 116 | Returns: 117 | - rel_error: Scalar giving the relative error between x and y 118 | """ 119 | """ returns relative error between x and y """ 120 | top = (x - y).abs().max().item() 121 | bot = (x.abs() + y.abs()).clamp(min=eps).max().item() 122 | return top / bot 123 | -------------------------------------------------------------------------------- /eecs498-007/A5/eecs598/submit.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | 4 | _A1_FILES = [ 5 | "pytorch101.py", 6 | "pytorch101.ipynb", 7 | "knn.py", 8 | "knn.ipynb", 9 | ] 10 | 11 | _A2_FILES = [ 12 | "linear_classifier.py", 13 | "linear_classifier.ipynb", 14 | "two_layer_net.py", 15 | "two_layer_net.ipynb", 16 | "svm_best_model.pt", 17 | "softmax_best_model.pt", 18 | "nn_best_model.pt", 19 | ] 20 | 21 | _A3_FILES = [ 22 | "fully_connected_networks.py", 23 | "fully_connected_networks.ipynb", 24 | "convolutional_networks.py", 25 | "convolutional_networks.ipynb", 26 | "best_overfit_five_layer_net.pth", 27 | "best_two_layer_net.pth", 28 | "one_minute_deepconvnet.pth", 29 | "overfit_deepconvnet.pth", 30 | ] 31 | 32 | _A4_FILES = [ 33 | 'network_visualization.py', 34 | 'network_visualization.ipynb', 35 | 'style_transfer.py', 36 | 'style_transfer.ipynb', 37 | 'pytorch_autograd_and_nn.py', 38 | 'pytorch_autograd_and_nn.ipynb', 39 | 'rnn_lstm_attention_captioning.py', 40 | 'rnn_lstm_attention_captioning.ipynb', 41 | # result files 42 | 'pytorch_autograd_and_nn.pkl', 43 | 'rnn_lstm_attention_submission.pkl', 44 | 'saliency_maps_results.jpg', 45 | 'adversarial_attacks_results.jpg', 46 | 'class_viz_result.jpg', 47 | 'style_transfer_result.jpg', 48 | 'feature_inversion_result.jpg' 49 | ] 50 | 51 | _A5_FILES = [ 52 | 'single_stage_detector.py', 53 | 'two_stage_detector.py', 54 | 'single_stage_detector_yolo.ipynb', 55 | 'two_stage_detector_faster_rcnn.ipynb', 56 | 'yolo_detector.pt', 57 | 'frcnn_detector.pt', 58 | ] 59 | 60 | def make_a1_submission(assignment_path, uniquename=None, umid=None): 61 | _make_submission(assignment_path, _A1_FILES, "A1", uniquename, umid) 62 | 63 | 64 | def make_a2_submission(assignment_path, uniquename=None, umid=None): 65 | _make_submission(assignment_path, _A2_FILES, "A2", uniquename, umid) 66 | 67 | 68 | def make_a3_submission(assignment_path, uniquename=None, umid=None): 69 | _make_submission(assignment_path, _A3_FILES, "A3", uniquename, umid) 70 | 71 | 72 | def make_a4_submission(assignment_path, uniquename=None, umid=None): 73 | _make_submission(assignment_path, _A4_FILES, "A4", uniquename, umid) 74 | 75 | 76 | def make_a5_submission(assignment_path, uniquename=None, umid=None): 77 | _make_submission(assignment_path, _A5_FILES, "A5", uniquename, umid) 78 | 79 | 80 | def _make_submission( 81 | assignment_path, file_list, assignment_no, uniquename=None, umid=None 82 | ): 83 | if uniquename is None or umid is None: 84 | uniquename, umid = _get_user_info() 85 | zip_path = "{}_{}_{}.zip".format(uniquename, umid, assignment_no) 86 | zip_path = os.path.join(assignment_path, zip_path) 87 | print("Writing zip file to: ", zip_path) 88 | with zipfile.ZipFile(zip_path, "w") as zf: 89 | for filename in file_list: 90 | in_path = os.path.join(assignment_path, filename) 91 | if not os.path.isfile(in_path): 92 | raise ValueError('Could not find file "%s"' % filename) 93 | zf.write(in_path, filename) 94 | 95 | 96 | def _get_user_info(): 97 | if uniquename is None: 98 | uniquename = input("Enter your uniquename (e.g. justincj): ") 99 | if umid is None: 100 | umid = input("Enter your umid (e.g. 12345678): ") 101 | return uniquename, umid 102 | -------------------------------------------------------------------------------- /eecs498-007/A5/eecs598/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | from torchvision.utils import make_grid 4 | import matplotlib.pyplot as plt 5 | import cv2 6 | import numpy as np 7 | 8 | """ 9 | General utilities to help with implementation 10 | """ 11 | 12 | def reset_seed(number): 13 | """ 14 | Reset random seed to the specific number 15 | 16 | Inputs: 17 | - number: A seed number to use 18 | """ 19 | random.seed(number) 20 | torch.manual_seed(number) 21 | return 22 | 23 | def tensor_to_image(tensor): 24 | """ 25 | Convert a torch tensor into a numpy ndarray for visualization. 26 | 27 | Inputs: 28 | - tensor: A torch tensor of shape (3, H, W) with elements in the range [0, 1] 29 | 30 | Returns: 31 | - ndarr: A uint8 numpy array of shape (H, W, 3) 32 | """ 33 | tensor = tensor.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0) 34 | ndarr = tensor.to('cpu', torch.uint8).numpy() 35 | return ndarr 36 | 37 | 38 | def visualize_dataset(X_data, y_data, samples_per_class, class_list): 39 | """ 40 | Make a grid-shape image to plot 41 | 42 | Inputs: 43 | - X_data: set of [batch, 3, width, height] data 44 | - y_data: paired label of X_data in [batch] shape 45 | - samples_per_class: number of samples want to present 46 | - class_list: list of class names 47 | e.g.) ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] 48 | 49 | Outputs: 50 | - An grid-image that visualize samples_per_class number of samples per class 51 | """ 52 | img_half_width = X_data.shape[2] // 2 53 | samples = [] 54 | for y, cls in enumerate(class_list): 55 | plt.text(-4, (img_half_width * 2 + 2) * y + (img_half_width + 2), cls, ha='right') 56 | idxs = (y_data == y).nonzero().view(-1) 57 | for i in range(samples_per_class): 58 | idx = idxs[random.randrange(idxs.shape[0])].item() 59 | samples.append(X_data[idx]) 60 | 61 | img = make_grid(samples, nrow=samples_per_class) 62 | return tensor_to_image(img) 63 | 64 | 65 | def decode_captions(captions, idx_to_word): 66 | """ 67 | Decoding caption indexes into words. 68 | Inputs: 69 | - captions: Caption indexes in a tensor of shape (Nx)T. 70 | - idx_to_word: Mapping from the vocab index to word. 71 | 72 | Outputs: 73 | - decoded: A sentence (or a list of N sentences). 74 | """ 75 | singleton = False 76 | if captions.ndim == 1: 77 | singleton = True 78 | captions = captions[None] 79 | decoded = [] 80 | N, T = captions.shape 81 | for i in range(N): 82 | words = [] 83 | for t in range(T): 84 | word = idx_to_word[captions[i, t]] 85 | if word != '': 86 | words.append(word) 87 | if word == '': 88 | break 89 | decoded.append(' '.join(words)) 90 | if singleton: 91 | decoded = decoded[0] 92 | return decoded 93 | 94 | 95 | def attention_visualizer(img, attn_weights, token): 96 | """ 97 | Visuailze the attended regions on a single frame from a single query word. 98 | Inputs: 99 | - img: Image tensor input, of shape (3, H, W) 100 | - attn_weights: Attention weight tensor, on the final activation map 101 | - token: The token string you want to display above the image 102 | 103 | Outputs: 104 | - img_output: Image tensor output, of shape (3, H+25, W) 105 | 106 | """ 107 | C, H, W = img.shape 108 | assert C == 3, 'We only support image with three color channels!' 109 | 110 | # Reshape attention map 111 | attn_weights = cv2.resize(attn_weights.data.numpy().copy(), 112 | (H, W), interpolation=cv2.INTER_NEAREST) 113 | attn_weights = np.repeat(np.expand_dims(attn_weights, axis=2), 3, axis=2) 114 | 115 | # Combine image and attention map 116 | img_copy = img.float().div(255.).permute(1, 2, 0 117 | ).numpy()[:, :, ::-1].copy() # covert to BGR for cv2 118 | masked_img = cv2.addWeighted(attn_weights, 0.5, img_copy, 0.5, 0) 119 | img_copy = np.concatenate((np.zeros((25, W, 3)), 120 | masked_img), axis=0) 121 | 122 | # Add text 123 | cv2.putText(img_copy, '%s' % (token), (10, 15), 124 | cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), thickness=1) 125 | 126 | return img_copy -------------------------------------------------------------------------------- /eecs498-007/A5/eecs598/vis.py: -------------------------------------------------------------------------------- 1 | import random 2 | import cv2 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from torchvision.utils import make_grid 8 | 9 | 10 | """ 11 | Utilities to help with visualizing images and other data 12 | """ 13 | 14 | 15 | def tensor_to_image(tensor): 16 | """ 17 | Convert a torch tensor into a numpy ndarray for visualization. 18 | 19 | Inputs: 20 | - tensor: A torch tensor of shape (3, H, W) with elements in the range [0, 1] 21 | 22 | Returns: 23 | - ndarr: A uint8 numpy array of shape (H, W, 3) 24 | """ 25 | tensor = tensor.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0) 26 | ndarr = tensor.to("cpu", torch.uint8).numpy() 27 | return ndarr 28 | 29 | 30 | def visualize_dataset(X_data, y_data, samples_per_class, class_list): 31 | """ 32 | Make a grid-shape image to plot 33 | 34 | Inputs: 35 | - X_data: set of [batch, 3, width, height] data 36 | - y_data: paired label of X_data in [batch] shape 37 | - samples_per_class: number of samples want to present 38 | - class_list: list of class names; eg, 39 | ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] 40 | 41 | Outputs: 42 | - An grid-image that visualize samples_per_class number of samples per class 43 | """ 44 | img_half_width = X_data.shape[2] // 2 45 | samples = [] 46 | for y, cls in enumerate(class_list): 47 | tx = -4 48 | ty = (img_half_width * 2 + 2) * y + (img_half_width + 2) 49 | plt.text(tx, ty, cls, ha="right") 50 | idxs = (y_data == y).nonzero().view(-1) 51 | for i in range(samples_per_class): 52 | idx = idxs[random.randrange(idxs.shape[0])].item() 53 | samples.append(X_data[idx]) 54 | 55 | img = make_grid(samples, nrow=samples_per_class) 56 | return tensor_to_image(img) 57 | 58 | def detection_visualizer(img, idx_to_class, bbox=None, pred=None): 59 | """ 60 | Data visualizer on the original image. Support both GT box input and proposal input. 61 | 62 | Input: 63 | - img: PIL Image input 64 | - idx_to_class: Mapping from the index (0-19) to the class name 65 | - bbox: GT bbox (in red, optional), a tensor of shape Nx5, where N is 66 | the number of GT boxes, 5 indicates (x_tl, y_tl, x_br, y_br, class) 67 | - pred: Predicted bbox (in green, optional), a tensor of shape N'x6, where 68 | N' is the number of predicted boxes, 6 indicates 69 | (x_tl, y_tl, x_br, y_br, class, object confidence score) 70 | """ 71 | 72 | img_copy = np.array(img).astype('uint8') 73 | 74 | if bbox is not None: 75 | for bbox_idx in range(bbox.shape[0]): 76 | one_bbox = bbox[bbox_idx][:4] 77 | cv2.rectangle(img_copy, (one_bbox[0], one_bbox[1]), (one_bbox[2], 78 | one_bbox[3]), (255, 0, 0), 2) 79 | if bbox.shape[1] > 4: # if class info provided 80 | obj_cls = idx_to_class[bbox[bbox_idx][4].item()] 81 | cv2.putText(img_copy, '%s' % (obj_cls), 82 | (one_bbox[0], one_bbox[1]+15), 83 | cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) 84 | 85 | if pred is not None: 86 | for bbox_idx in range(pred.shape[0]): 87 | one_bbox = pred[bbox_idx][:4] 88 | cv2.rectangle(img_copy, (one_bbox[0], one_bbox[1]), (one_bbox[2], 89 | one_bbox[3]), (0, 255, 0), 2) 90 | 91 | if pred.shape[1] > 4: # if class and conf score info provided 92 | obj_cls = idx_to_class[pred[bbox_idx][4].item()] 93 | conf_score = pred[bbox_idx][5].item() 94 | cv2.putText(img_copy, '%s, %.2f' % (obj_cls, conf_score), 95 | (one_bbox[0], one_bbox[1]+15), 96 | cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) 97 | 98 | plt.imshow(img_copy) 99 | plt.axis('off') 100 | plt.show() -------------------------------------------------------------------------------- /eecs498-007/A5/frcnn_detector.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/eecs498-007/A5/frcnn_detector.pt -------------------------------------------------------------------------------- /eecs498-007/A5/yolo_detector.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/eecs498-007/A5/yolo_detector.pt -------------------------------------------------------------------------------- /eecs498-007/A6/README.md: -------------------------------------------------------------------------------- 1 | 5 | 6 | # Goals 7 | 8 | In this assignment you will implement two different kinds of generative models: **Variational Autoencoders (VAEs)** and **Generative Adversarial Networks (GANs)**. 9 | 10 | # Questions 11 | 12 | ## Q1: Variational Autoencoder 13 | 14 | The notebook [``variational_autoencoders.ipynb``](variational_autoencoders.ipynb) will walk you through the implementation of a VAE on the MNIST dataset. This will allow you to generate new data, and to interpolate in the latent space. 15 | 16 | ## Q2: Generative Adversarial Networks 17 | 18 | The notebook [``generative_adversarial_networks.ipynb``](generative_adversarial_networks.ipynb) will walk you through the implementation of fully-connected and convolutional generative adversarial networks on the MNIST dataset. 19 | -------------------------------------------------------------------------------- /eecs498-007/A6/a6_helper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import time 3 | import math 4 | import os 5 | import shutil 6 | import torch.optim as optim 7 | from torchvision import models, datasets, transforms 8 | from torch.utils.data import DataLoader 9 | import matplotlib.pyplot as plt 10 | import matplotlib.gridspec as gridspec 11 | from vae import loss_function 12 | from torch import nn 13 | 14 | 15 | def hello_helper(): 16 | print("Hello from a6_helper.py!") 17 | 18 | def show_images(images): 19 | images = torch.reshape(images, [images.shape[0], -1]) # images reshape to (batch_size, D) 20 | sqrtn = int(math.ceil(math.sqrt(images.shape[0]))) 21 | sqrtimg = int(math.ceil(math.sqrt(images.shape[1]))) 22 | 23 | fig = plt.figure(figsize=(sqrtn, sqrtn)) 24 | gs = gridspec.GridSpec(sqrtn, sqrtn) 25 | gs.update(wspace=0.05, hspace=0.05) 26 | 27 | for i, img in enumerate(images): 28 | ax = plt.subplot(gs[i]) 29 | plt.axis('off') 30 | ax.set_xticklabels([]) 31 | ax.set_yticklabels([]) 32 | ax.set_aspect('equal') 33 | plt.imshow(img.reshape([sqrtimg,sqrtimg])) 34 | return 35 | 36 | def count_params(model): 37 | """Count the number of parameters in the model""" 38 | param_count = sum([p.numel() for p in model.parameters()]) 39 | return param_count 40 | 41 | def initialize_weights(m): 42 | """ Initializes the weights of a torch.nn model using xavier initialization""" 43 | if isinstance(m, nn.Linear) or isinstance(m, nn.ConvTranspose2d): 44 | nn.init.xavier_uniform_(m.weight.data) 45 | 46 | 47 | def one_hot(labels, class_size): 48 | """ 49 | Create one hot label matrix of size (N, C) 50 | 51 | Inputs: 52 | - labels: Labels Tensor of shape (N,) representing a ground-truth label 53 | for each MNIST image 54 | - class_size: Scalar representing of target classes our dataset 55 | Outputs: 56 | - targets: One-hot label matrix of (N, C), where targets[i, j] = 1 when 57 | the ground truth label for image i is j, and targets[i, :j] & 58 | targets[i, j + 1:] are equal to 0 59 | """ 60 | targets = torch.zeros(labels.size(0), class_size) 61 | for i, label in enumerate(labels): 62 | targets[i, label] = 1 63 | return targets 64 | 65 | def train_vae(epoch, model, train_loader, cond=False): 66 | """ 67 | Train a VAE or CVAE! 68 | 69 | Inputs: 70 | - epoch: Current epoch number 71 | - model: VAE model object 72 | - train_loader: PyTorch Dataloader object that contains our training data 73 | - cond: Boolean value representing whether we're training a VAE or 74 | Conditional VAE 75 | """ 76 | model.train() 77 | train_loss = 0 78 | num_classes = 10 79 | loss = None 80 | optimizer = optim.Adam(model.parameters(), lr=1e-3) 81 | for batch_idx, (data, labels) in enumerate(train_loader): 82 | data = data.to(device='cuda:0') 83 | if cond: 84 | one_hot_vec = one_hot(labels, num_classes).to(device='cuda') 85 | recon_batch, mu, logvar = model(data, one_hot_vec) 86 | else: 87 | recon_batch, mu, logvar = model(data) 88 | optimizer.zero_grad() 89 | loss = loss_function(recon_batch, data, mu, logvar) 90 | loss.backward() 91 | train_loss += loss.data 92 | optimizer.step() 93 | print('Train Epoch: {} \tLoss: {:.6f}'.format( 94 | epoch, loss.data)) 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /eecs498-007/A6/conditional_vae_generation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/eecs498-007/A6/conditional_vae_generation.jpg -------------------------------------------------------------------------------- /eecs498-007/A6/dc_gan_results.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/eecs498-007/A6/dc_gan_results.jpg -------------------------------------------------------------------------------- /eecs498-007/A6/eecs598/__init__.py: -------------------------------------------------------------------------------- 1 | from . import data, grad, submit 2 | from .solver import Solver 3 | from .utils import reset_seed 4 | from .vis import tensor_to_image, visualize_dataset 5 | -------------------------------------------------------------------------------- /eecs498-007/A6/eecs598/data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | import matplotlib.pyplot as plt 5 | import torch 6 | import torchvision 7 | from torchvision.datasets import CIFAR10 8 | 9 | import eecs598 10 | 11 | 12 | def _extract_tensors(dset, num=None, x_dtype=torch.float32): 13 | """ 14 | Extract the data and labels from a CIFAR10 dataset object and convert them to 15 | tensors. 16 | 17 | Input: 18 | - dset: A torchvision.datasets.CIFAR10 object 19 | - num: Optional. If provided, the number of samples to keep. 20 | - x_dtype: Optional. data type of the input image 21 | 22 | Returns: 23 | - x: `x_dtype` tensor of shape (N, 3, 32, 32) 24 | - y: int64 tensor of shape (N,) 25 | """ 26 | x = torch.tensor(dset.data, dtype=x_dtype).permute(0, 3, 1, 2).div_(255) 27 | y = torch.tensor(dset.targets, dtype=torch.int64) 28 | if num is not None: 29 | if num <= 0 or num > x.shape[0]: 30 | raise ValueError( 31 | "Invalid value num=%d; must be in the range [0, %d]" % (num, x.shape[0]) 32 | ) 33 | x = x[:num].clone() 34 | y = y[:num].clone() 35 | return x, y 36 | 37 | 38 | def cifar10(num_train=None, num_test=None, x_dtype=torch.float32): 39 | """ 40 | Return the CIFAR10 dataset, automatically downloading it if necessary. 41 | This function can also subsample the dataset. 42 | 43 | Inputs: 44 | - num_train: [Optional] How many samples to keep from the training set. 45 | If not provided, then keep the entire training set. 46 | - num_test: [Optional] How many samples to keep from the test set. 47 | If not provided, then keep the entire test set. 48 | - x_dtype: [Optional] Data type of the input image 49 | 50 | Returns: 51 | - x_train: `x_dtype` tensor of shape (num_train, 3, 32, 32) 52 | - y_train: int64 tensor of shape (num_train, 3, 32, 32) 53 | - x_test: `x_dtype` tensor of shape (num_test, 3, 32, 32) 54 | - y_test: int64 tensor of shape (num_test, 3, 32, 32) 55 | """ 56 | download = not os.path.isdir("cifar-10-batches-py") 57 | dset_train = CIFAR10(root=".", download=download, train=True) 58 | dset_test = CIFAR10(root=".", train=False) 59 | x_train, y_train = _extract_tensors(dset_train, num_train, x_dtype) 60 | x_test, y_test = _extract_tensors(dset_test, num_test, x_dtype) 61 | 62 | return x_train, y_train, x_test, y_test 63 | 64 | 65 | def preprocess_cifar10( 66 | cuda=True, 67 | show_examples=True, 68 | bias_trick=False, 69 | flatten=True, 70 | validation_ratio=0.2, 71 | dtype=torch.float32, 72 | ): 73 | """ 74 | Returns a preprocessed version of the CIFAR10 dataset, automatically 75 | downloading if necessary. We perform the following steps: 76 | 77 | (0) [Optional] Visualize some images from the dataset 78 | (1) Normalize the data by subtracting the mean 79 | (2) Reshape each image of shape (3, 32, 32) into a vector of shape (3072,) 80 | (3) [Optional] Bias trick: add an extra dimension of ones to the data 81 | (4) Carve out a validation set from the training set 82 | 83 | Inputs: 84 | - cuda: If true, move the entire dataset to the GPU 85 | - validation_ratio: Float in the range (0, 1) giving the fraction of the train 86 | set to reserve for validation 87 | - bias_trick: Boolean telling whether or not to apply the bias trick 88 | - show_examples: Boolean telling whether or not to visualize data samples 89 | - dtype: Optional, data type of the input image X 90 | 91 | Returns a dictionary with the following keys: 92 | - 'X_train': `dtype` tensor of shape (N_train, D) giving training images 93 | - 'X_val': `dtype` tensor of shape (N_val, D) giving val images 94 | - 'X_test': `dtype` tensor of shape (N_test, D) giving test images 95 | - 'y_train': int64 tensor of shape (N_train,) giving training labels 96 | - 'y_val': int64 tensor of shape (N_val,) giving val labels 97 | - 'y_test': int64 tensor of shape (N_test,) giving test labels 98 | 99 | N_train, N_val, and N_test are the number of examples in the train, val, and 100 | test sets respectively. The precise values of N_train and N_val are determined 101 | by the input parameter validation_ratio. D is the dimension of the image data; 102 | if bias_trick is False, then D = 32 * 32 * 3 = 3072; 103 | if bias_trick is True then D = 1 + 32 * 32 * 3 = 3073. 104 | """ 105 | X_train, y_train, X_test, y_test = cifar10(x_dtype=dtype) 106 | 107 | # Move data to the GPU 108 | if cuda: 109 | X_train = X_train.cuda() 110 | y_train = y_train.cuda() 111 | X_test = X_test.cuda() 112 | y_test = y_test.cuda() 113 | 114 | # 0. Visualize some examples from the dataset. 115 | if show_examples: 116 | classes = [ 117 | "plane", 118 | "car", 119 | "bird", 120 | "cat", 121 | "deer", 122 | "dog", 123 | "frog", 124 | "horse", 125 | "ship", 126 | "truck", 127 | ] 128 | samples_per_class = 12 129 | samples = [] 130 | eecs598.reset_seed(0) 131 | for y, cls in enumerate(classes): 132 | plt.text(-4, 34 * y + 18, cls, ha="right") 133 | (idxs,) = (y_train == y).nonzero(as_tuple=True) 134 | for i in range(samples_per_class): 135 | idx = idxs[random.randrange(idxs.shape[0])].item() 136 | samples.append(X_train[idx]) 137 | img = torchvision.utils.make_grid(samples, nrow=samples_per_class) 138 | plt.imshow(eecs598.tensor_to_image(img)) 139 | plt.axis("off") 140 | plt.show() 141 | 142 | # 1. Normalize the data: subtract the mean RGB (zero mean) 143 | mean_image = X_train.mean(dim=(0, 2, 3), keepdim=True) 144 | X_train -= mean_image 145 | X_test -= mean_image 146 | 147 | # 2. Reshape the image data into rows 148 | if flatten: 149 | X_train = X_train.reshape(X_train.shape[0], -1) 150 | X_test = X_test.reshape(X_test.shape[0], -1) 151 | 152 | # 3. Add bias dimension and transform into columns 153 | if bias_trick: 154 | ones_train = torch.ones(X_train.shape[0], 1, device=X_train.device) 155 | X_train = torch.cat([X_train, ones_train], dim=1) 156 | ones_test = torch.ones(X_test.shape[0], 1, device=X_test.device) 157 | X_test = torch.cat([X_test, ones_test], dim=1) 158 | 159 | # 4. take the validation set from the training set 160 | # Note: It should not be taken from the test set 161 | # For random permumation, you can use torch.randperm or torch.randint 162 | # But, for this homework, we use slicing instead. 163 | num_training = int(X_train.shape[0] * (1.0 - validation_ratio)) 164 | num_validation = X_train.shape[0] - num_training 165 | 166 | # return the dataset 167 | data_dict = {} 168 | data_dict["X_val"] = X_train[num_training : num_training + num_validation] 169 | data_dict["y_val"] = y_train[num_training : num_training + num_validation] 170 | data_dict["X_train"] = X_train[0:num_training] 171 | data_dict["y_train"] = y_train[0:num_training] 172 | 173 | data_dict["X_test"] = X_test 174 | data_dict["y_test"] = y_test 175 | return data_dict 176 | -------------------------------------------------------------------------------- /eecs498-007/A6/eecs598/grad.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import torch 4 | 5 | import eecs598 6 | 7 | """ Utilities for computing and checking gradients. """ 8 | 9 | 10 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-7): 11 | """ 12 | Utility function to perform numeric gradient checking. We use the centered 13 | difference formula to compute a numeric derivative: 14 | 15 | f'(x) =~ (f(x + h) - f(x - h)) / (2h) 16 | 17 | Rather than computing a full numeric gradient, we sparsely sample a few 18 | dimensions along which to compute numeric derivatives. 19 | 20 | Inputs: 21 | - f: A function that inputs a torch tensor and returns a torch scalar 22 | - x: A torch tensor of the point at which to evaluate the numeric gradient 23 | - analytic_grad: A torch tensor giving the analytic gradient of f at x 24 | - num_checks: The number of dimensions along which to check 25 | - h: Step size for computing numeric derivatives 26 | """ 27 | # fix random seed to 0 28 | eecs598.reset_seed(0) 29 | for i in range(num_checks): 30 | 31 | ix = tuple([random.randrange(m) for m in x.shape]) 32 | 33 | oldval = x[ix].item() 34 | x[ix] = oldval + h # increment by h 35 | fxph = f(x).item() # evaluate f(x + h) 36 | x[ix] = oldval - h # increment by h 37 | fxmh = f(x).item() # evaluate f(x - h) 38 | x[ix] = oldval # reset 39 | 40 | grad_numerical = (fxph - fxmh) / (2 * h) 41 | grad_analytic = analytic_grad[ix] 42 | rel_error_top = abs(grad_numerical - grad_analytic) 43 | rel_error_bot = abs(grad_numerical) + abs(grad_analytic) + 1e-12 44 | rel_error = rel_error_top / rel_error_bot 45 | msg = "numerical: %f analytic: %f, relative error: %e" 46 | print(msg % (grad_numerical, grad_analytic, rel_error)) 47 | 48 | 49 | def compute_numeric_gradient(f, x, dLdf=None, h=1e-7): 50 | """ 51 | Compute the numeric gradient of f at x using a finite differences 52 | approximation. We use the centered difference: 53 | 54 | df f(x + h) - f(x - h) 55 | -- ~= ------------------- 56 | dx 2 * h 57 | 58 | Function can also expand this easily to intermediate layers using the 59 | chain rule: 60 | 61 | dL df dL 62 | -- = -- * -- 63 | dx dx df 64 | 65 | Inputs: 66 | - f: A function that inputs a torch tensor and returns a torch scalar 67 | - x: A torch tensor giving the point at which to compute the gradient 68 | - dLdf: optional upstream gradient for intermediate layers 69 | - h: epsilon used in the finite difference calculation 70 | Returns: 71 | - grad: A tensor of the same shape as x giving the gradient of f at x 72 | """ 73 | flat_x = x.contiguous().flatten() 74 | grad = torch.zeros_like(x) 75 | flat_grad = grad.flatten() 76 | 77 | # Initialize upstream gradient to be ones if not provide 78 | if dLdf is None: 79 | y = f(x) 80 | dLdf = torch.ones_like(y) 81 | dLdf = dLdf.flatten() 82 | 83 | # iterate over all indexes in x 84 | for i in range(flat_x.shape[0]): 85 | oldval = flat_x[i].item() # Store the original value 86 | flat_x[i] = oldval + h # Increment by h 87 | fxph = f(x).flatten() # Evaluate f(x + h) 88 | flat_x[i] = oldval - h # Decrement by h 89 | fxmh = f(x).flatten() # Evaluate f(x - h) 90 | flat_x[i] = oldval # Restore original value 91 | 92 | # compute the partial derivative with centered formula 93 | dfdxi = (fxph - fxmh) / (2 * h) 94 | 95 | # use chain rule to compute dLdx 96 | flat_grad[i] = dLdf.dot(dfdxi).item() 97 | 98 | # Note that since flat_grad was only a reference to grad, 99 | # we can just return the object in the shape of x by returning grad 100 | return grad 101 | 102 | 103 | def rel_error(x, y, eps=1e-10): 104 | """ 105 | Compute the relative error between a pair of tensors x and y, 106 | which is defined as: 107 | 108 | max_i |x_i - y_i]| 109 | rel_error(x, y) = ------------------------------- 110 | max_i |x_i| + max_i |y_i| + eps 111 | 112 | Inputs: 113 | - x, y: Tensors of the same shape 114 | - eps: Small positive constant for numeric stability 115 | 116 | Returns: 117 | - rel_error: Scalar giving the relative error between x and y 118 | """ 119 | """ returns relative error between x and y """ 120 | top = (x - y).abs().max().item() 121 | bot = (x.abs() + y.abs()).clamp(min=eps).max().item() 122 | return top / bot 123 | -------------------------------------------------------------------------------- /eecs498-007/A6/eecs598/submit.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | 4 | _A1_FILES = [ 5 | "pytorch101.py", 6 | "pytorch101.ipynb", 7 | "knn.py", 8 | "knn.ipynb", 9 | ] 10 | 11 | _A2_FILES = [ 12 | "linear_classifier.py", 13 | "linear_classifier.ipynb", 14 | "two_layer_net.py", 15 | "two_layer_net.ipynb", 16 | "svm_best_model.pt", 17 | "softmax_best_model.pt", 18 | "nn_best_model.pt", 19 | ] 20 | 21 | _A3_FILES = [ 22 | "fully_connected_networks.py", 23 | "fully_connected_networks.ipynb", 24 | "convolutional_networks.py", 25 | "convolutional_networks.ipynb", 26 | "best_overfit_five_layer_net.pth", 27 | "best_two_layer_net.pth", 28 | "one_minute_deepconvnet.pth", 29 | "overfit_deepconvnet.pth", 30 | ] 31 | 32 | _A4_FILES = [ 33 | 'network_visualization.py', 34 | 'network_visualization.ipynb', 35 | 'style_transfer.py', 36 | 'style_transfer.ipynb', 37 | 'pytorch_autograd_and_nn.py', 38 | 'pytorch_autograd_and_nn.ipynb', 39 | 'rnn_lstm_attention_captioning.py', 40 | 'rnn_lstm_attention_captioning.ipynb', 41 | # result files 42 | 'pytorch_autograd_and_nn.pkl', 43 | 'rnn_lstm_attention_submission.pkl', 44 | 'saliency_maps_results.jpg', 45 | 'adversarial_attacks_results.jpg', 46 | 'class_viz_result.jpg', 47 | 'style_transfer_result.jpg', 48 | 'feature_inversion_result.jpg' 49 | ] 50 | 51 | _A5_FILES = [ 52 | 'single_stage_detector.py', 53 | 'two_stage_detector.py', 54 | 'single_stage_detector_yolo.ipynb', 55 | 'two_stage_detector_faster_rcnn.ipynb', 56 | 'yolo_detector.pt', 57 | 'frcnn_detector.pt', 58 | ] 59 | 60 | 61 | _A6_FILES = [ 62 | 'vae.py', 63 | 'gan.py', 64 | 'variational_autoencoders.ipynb', 65 | 'generative_adversarial_networks.ipynb', 66 | 'vae_generation.jpg', 67 | 'conditional_vae_generation.jpg', 68 | 'fc_gan_results.jpg', 69 | 'ls_gan_results.jpg', 70 | 'dc_gan_results.jpg' 71 | ] 72 | 73 | 74 | def make_a1_submission(assignment_path, uniquename=None, umid=None): 75 | _make_submission(assignment_path, _A1_FILES, "A1", uniquename, umid) 76 | 77 | 78 | def make_a2_submission(assignment_path, uniquename=None, umid=None): 79 | _make_submission(assignment_path, _A2_FILES, "A2", uniquename, umid) 80 | 81 | 82 | def make_a3_submission(assignment_path, uniquename=None, umid=None): 83 | _make_submission(assignment_path, _A3_FILES, "A3", uniquename, umid) 84 | 85 | 86 | def make_a4_submission(assignment_path, uniquename=None, umid=None): 87 | _make_submission(assignment_path, _A4_FILES, "A4", uniquename, umid) 88 | 89 | 90 | def make_a5_submission(assignment_path, uniquename=None, umid=None): 91 | _make_submission(assignment_path, _A5_FILES, "A5", uniquename, umid) 92 | 93 | def make_a6_submission(assignment_path, uniquename=None, umid=None): 94 | _make_submission(assignment_path, _A6_FILES, "A6", uniquename, umid) 95 | 96 | 97 | def _make_submission( 98 | assignment_path, file_list, assignment_no, uniquename=None, umid=None 99 | ): 100 | if uniquename is None or umid is None: 101 | uniquename, umid = _get_user_info() 102 | zip_path = "{}_{}_{}.zip".format(uniquename, umid, assignment_no) 103 | zip_path = os.path.join(assignment_path, zip_path) 104 | print("Writing zip file to: ", zip_path) 105 | with zipfile.ZipFile(zip_path, "w") as zf: 106 | for filename in file_list: 107 | in_path = os.path.join(assignment_path, filename) 108 | if not os.path.isfile(in_path): 109 | raise ValueError('Could not find file "%s"' % filename) 110 | zf.write(in_path, filename) 111 | 112 | 113 | def _get_user_info(): 114 | if uniquename is None: 115 | uniquename = input("Enter your uniquename (e.g. justincj): ") 116 | if umid is None: 117 | umid = input("Enter your umid (e.g. 12345678): ") 118 | return uniquename, umid 119 | -------------------------------------------------------------------------------- /eecs498-007/A6/eecs598/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | from torchvision.utils import make_grid 4 | import matplotlib.pyplot as plt 5 | import cv2 6 | import numpy as np 7 | 8 | """ 9 | General utilities to help with implementation 10 | """ 11 | 12 | def reset_seed(number): 13 | """ 14 | Reset random seed to the specific number 15 | 16 | Inputs: 17 | - number: A seed number to use 18 | """ 19 | random.seed(number) 20 | torch.manual_seed(number) 21 | return 22 | 23 | def tensor_to_image(tensor): 24 | """ 25 | Convert a torch tensor into a numpy ndarray for visualization. 26 | 27 | Inputs: 28 | - tensor: A torch tensor of shape (3, H, W) with elements in the range [0, 1] 29 | 30 | Returns: 31 | - ndarr: A uint8 numpy array of shape (H, W, 3) 32 | """ 33 | tensor = tensor.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0) 34 | ndarr = tensor.to('cpu', torch.uint8).numpy() 35 | return ndarr 36 | 37 | 38 | def visualize_dataset(X_data, y_data, samples_per_class, class_list): 39 | """ 40 | Make a grid-shape image to plot 41 | 42 | Inputs: 43 | - X_data: set of [batch, 3, width, height] data 44 | - y_data: paired label of X_data in [batch] shape 45 | - samples_per_class: number of samples want to present 46 | - class_list: list of class names 47 | e.g.) ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] 48 | 49 | Outputs: 50 | - An grid-image that visualize samples_per_class number of samples per class 51 | """ 52 | img_half_width = X_data.shape[2] // 2 53 | samples = [] 54 | for y, cls in enumerate(class_list): 55 | plt.text(-4, (img_half_width * 2 + 2) * y + (img_half_width + 2), cls, ha='right') 56 | idxs = (y_data == y).nonzero().view(-1) 57 | for i in range(samples_per_class): 58 | idx = idxs[random.randrange(idxs.shape[0])].item() 59 | samples.append(X_data[idx]) 60 | 61 | img = make_grid(samples, nrow=samples_per_class) 62 | return tensor_to_image(img) 63 | 64 | 65 | def decode_captions(captions, idx_to_word): 66 | """ 67 | Decoding caption indexes into words. 68 | Inputs: 69 | - captions: Caption indexes in a tensor of shape (Nx)T. 70 | - idx_to_word: Mapping from the vocab index to word. 71 | 72 | Outputs: 73 | - decoded: A sentence (or a list of N sentences). 74 | """ 75 | singleton = False 76 | if captions.ndim == 1: 77 | singleton = True 78 | captions = captions[None] 79 | decoded = [] 80 | N, T = captions.shape 81 | for i in range(N): 82 | words = [] 83 | for t in range(T): 84 | word = idx_to_word[captions[i, t]] 85 | if word != '': 86 | words.append(word) 87 | if word == '': 88 | break 89 | decoded.append(' '.join(words)) 90 | if singleton: 91 | decoded = decoded[0] 92 | return decoded 93 | 94 | 95 | def attention_visualizer(img, attn_weights, token): 96 | """ 97 | Visuailze the attended regions on a single frame from a single query word. 98 | Inputs: 99 | - img: Image tensor input, of shape (3, H, W) 100 | - attn_weights: Attention weight tensor, on the final activation map 101 | - token: The token string you want to display above the image 102 | 103 | Outputs: 104 | - img_output: Image tensor output, of shape (3, H+25, W) 105 | 106 | """ 107 | C, H, W = img.shape 108 | assert C == 3, 'We only support image with three color channels!' 109 | 110 | # Reshape attention map 111 | attn_weights = cv2.resize(attn_weights.data.numpy().copy(), 112 | (H, W), interpolation=cv2.INTER_NEAREST) 113 | attn_weights = np.repeat(np.expand_dims(attn_weights, axis=2), 3, axis=2) 114 | 115 | # Combine image and attention map 116 | img_copy = img.float().div(255.).permute(1, 2, 0 117 | ).numpy()[:, :, ::-1].copy() # covert to BGR for cv2 118 | masked_img = cv2.addWeighted(attn_weights, 0.5, img_copy, 0.5, 0) 119 | img_copy = np.concatenate((np.zeros((25, W, 3)), 120 | masked_img), axis=0) 121 | 122 | # Add text 123 | cv2.putText(img_copy, '%s' % (token), (10, 15), 124 | cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), thickness=1) 125 | 126 | return img_copy -------------------------------------------------------------------------------- /eecs498-007/A6/eecs598/vis.py: -------------------------------------------------------------------------------- 1 | import random 2 | import cv2 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from torchvision.utils import make_grid 8 | 9 | 10 | """ 11 | Utilities to help with visualizing images and other data 12 | """ 13 | 14 | 15 | def tensor_to_image(tensor): 16 | """ 17 | Convert a torch tensor into a numpy ndarray for visualization. 18 | 19 | Inputs: 20 | - tensor: A torch tensor of shape (3, H, W) with elements in the range [0, 1] 21 | 22 | Returns: 23 | - ndarr: A uint8 numpy array of shape (H, W, 3) 24 | """ 25 | tensor = tensor.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0) 26 | ndarr = tensor.to("cpu", torch.uint8).numpy() 27 | return ndarr 28 | 29 | 30 | def visualize_dataset(X_data, y_data, samples_per_class, class_list): 31 | """ 32 | Make a grid-shape image to plot 33 | 34 | Inputs: 35 | - X_data: set of [batch, 3, width, height] data 36 | - y_data: paired label of X_data in [batch] shape 37 | - samples_per_class: number of samples want to present 38 | - class_list: list of class names; eg, 39 | ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] 40 | 41 | Outputs: 42 | - An grid-image that visualize samples_per_class number of samples per class 43 | """ 44 | img_half_width = X_data.shape[2] // 2 45 | samples = [] 46 | for y, cls in enumerate(class_list): 47 | tx = -4 48 | ty = (img_half_width * 2 + 2) * y + (img_half_width + 2) 49 | plt.text(tx, ty, cls, ha="right") 50 | idxs = (y_data == y).nonzero().view(-1) 51 | for i in range(samples_per_class): 52 | idx = idxs[random.randrange(idxs.shape[0])].item() 53 | samples.append(X_data[idx]) 54 | 55 | img = make_grid(samples, nrow=samples_per_class) 56 | return tensor_to_image(img) 57 | 58 | def detection_visualizer(img, idx_to_class, bbox=None, pred=None): 59 | """ 60 | Data visualizer on the original image. Support both GT box input and proposal input. 61 | 62 | Input: 63 | - img: PIL Image input 64 | - idx_to_class: Mapping from the index (0-19) to the class name 65 | - bbox: GT bbox (in red, optional), a tensor of shape Nx5, where N is 66 | the number of GT boxes, 5 indicates (x_tl, y_tl, x_br, y_br, class) 67 | - pred: Predicted bbox (in green, optional), a tensor of shape N'x6, where 68 | N' is the number of predicted boxes, 6 indicates 69 | (x_tl, y_tl, x_br, y_br, class, object confidence score) 70 | """ 71 | 72 | img_copy = np.array(img).astype('uint8') 73 | 74 | if bbox is not None: 75 | for bbox_idx in range(bbox.shape[0]): 76 | one_bbox = bbox[bbox_idx][:4] 77 | cv2.rectangle(img_copy, (one_bbox[0], one_bbox[1]), (one_bbox[2], 78 | one_bbox[3]), (255, 0, 0), 2) 79 | if bbox.shape[1] > 4: # if class info provided 80 | obj_cls = idx_to_class[bbox[bbox_idx][4].item()] 81 | cv2.putText(img_copy, '%s' % (obj_cls), 82 | (one_bbox[0], one_bbox[1]+15), 83 | cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) 84 | 85 | if pred is not None: 86 | for bbox_idx in range(pred.shape[0]): 87 | one_bbox = pred[bbox_idx][:4] 88 | cv2.rectangle(img_copy, (one_bbox[0], one_bbox[1]), (one_bbox[2], 89 | one_bbox[3]), (0, 255, 0), 2) 90 | 91 | if pred.shape[1] > 4: # if class and conf score info provided 92 | obj_cls = idx_to_class[pred[bbox_idx][4].item()] 93 | conf_score = pred[bbox_idx][5].item() 94 | cv2.putText(img_copy, '%s, %.2f' % (obj_cls, conf_score), 95 | (one_bbox[0], one_bbox[1]+15), 96 | cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) 97 | 98 | plt.imshow(img_copy) 99 | plt.axis('off') 100 | plt.show() -------------------------------------------------------------------------------- /eecs498-007/A6/fc_gan_results.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/eecs498-007/A6/fc_gan_results.jpg -------------------------------------------------------------------------------- /eecs498-007/A6/ls_gan_results.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/eecs498-007/A6/ls_gan_results.jpg -------------------------------------------------------------------------------- /eecs498-007/A6/vae_generation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/eecs498-007/A6/vae_generation.jpg -------------------------------------------------------------------------------- /examples/analog_clock.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/examples/analog_clock.gif -------------------------------------------------------------------------------- /examples/dcgan.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/examples/dcgan.gif -------------------------------------------------------------------------------- /examples/dining_table.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/examples/dining_table.gif -------------------------------------------------------------------------------- /examples/kit_fox.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/examples/kit_fox.gif -------------------------------------------------------------------------------- /examples/style_transfer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/examples/style_transfer.png -------------------------------------------------------------------------------- /examples/tarantula.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/examples/tarantula.gif -------------------------------------------------------------------------------- /examples/vanilla_gan.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seloufian/Deep-Learning-Computer-Vision/3f3a790b14dac7c573d0b68e25359109c2dd30a4/examples/vanilla_gan.gif --------------------------------------------------------------------------------