├── .gitignore ├── CNAME ├── LICENSE ├── Readme.md ├── _config.yml ├── _includes ├── footer.html ├── head.html └── header.html ├── _layouts ├── default.html ├── page.html └── post.html ├── acknowledgement.md ├── assets ├── ami-selection.png ├── aws-homepage.png ├── aws-signin.png ├── aws-signup.png ├── cat.jpg ├── cat_tinted.jpg ├── cat_tinted_imshow.png ├── challenges.jpeg ├── classify.png ├── classifydemo.jpeg ├── cnn │ ├── cnn.jpeg │ ├── convnet.jpeg │ ├── depthcol.jpeg │ ├── maxpool.jpeg │ ├── pool.jpeg │ ├── stride.jpeg │ └── weights.jpeg ├── cnnvis │ ├── act1.jpeg │ ├── act2.jpeg │ ├── filt1.jpeg │ ├── filt2.jpeg │ ├── occlude.jpeg │ ├── pool5max.jpeg │ └── tsne.jpeg ├── community-AMIs.png ├── conv-demo │ ├── external │ │ └── d3.min.js │ ├── index.html │ └── utils.js ├── crossval.jpeg ├── cveg.jpeg ├── cvplot.png ├── dataflow.jpeg ├── ec2-dashboard.png ├── eg │ ├── spiral_linear.png │ ├── spiral_net.png │ └── spiral_raw.png ├── imagemap.jpg ├── instance-selection.png ├── instances-page.png ├── ipython-tutorial │ ├── file-browser.png │ ├── notebook-1.png │ ├── notebook-2.png │ ├── notebook-3.png │ ├── notebook-error.png │ └── save-notebook.png ├── key-pair-create.png ├── key-pair.png ├── knn.jpeg ├── launch-screen.png ├── launching-screen.png ├── margin.jpg ├── nn.jpg ├── nn1 │ ├── alexplot.jpeg │ ├── layer_sizes.jpeg │ ├── neural_net.jpeg │ ├── neural_net2.jpeg │ ├── neuron.png │ ├── neuron_model.jpeg │ ├── reg_strengths.jpeg │ ├── relu.jpeg │ ├── sigmoid.jpeg │ └── tanh.jpeg ├── nn2 │ ├── cifar10pca.jpeg │ ├── dropout.jpeg │ ├── prepro1.jpeg │ └── prepro2.jpeg ├── nn3 │ ├── accuracies.jpeg │ ├── cnnweights.jpg │ ├── gridsearchbad.jpeg │ ├── learningrates.jpeg │ ├── loss.jpeg │ ├── nesterov.jpeg │ ├── opt1.gif │ ├── opt2.gif │ ├── updates.jpeg │ ├── values.jpeg │ └── weights.jpeg ├── nneg.jpeg ├── pixels_embed_cifar10.jpg ├── pixels_embed_cifar10_big.jpg ├── pixelspace.jpeg ├── samenorm.png ├── sine.png ├── sine_cosine.png ├── sine_cosine_subplot.png ├── stepsize.jpg ├── svm1d.png ├── svm_all.jpg ├── svm_one.jpg ├── svmbowl.png ├── svmvssoftmax.png ├── templates.jpg ├── terminal-coursework.jpg ├── terminal-development.jpg ├── terminal-my.jpg ├── terminal-shared.jpg ├── trainset.jpg └── wb.jpeg ├── assignment1.md ├── assignment2.md ├── assignment3.md ├── assignments2016 ├── assignment1.md ├── assignment1 │ ├── .gitignore │ ├── README.md │ ├── collectSubmission.sh │ ├── cs231n │ │ ├── __init__.py │ │ ├── classifiers │ │ │ ├── __init__.py │ │ │ ├── k_nearest_neighbor.py │ │ │ ├── linear_classifier.py │ │ │ ├── linear_svm.py │ │ │ ├── neural_net.py │ │ │ └── softmax.py │ │ ├── data_utils.py │ │ ├── datasets │ │ │ ├── .gitignore │ │ │ └── get_datasets.sh │ │ ├── features.py │ │ ├── gradient_check.py │ │ └── vis_utils.py │ ├── features.ipynb │ ├── frameworkpython │ ├── knn.ipynb │ ├── requirements.txt │ ├── softmax.ipynb │ ├── start_ipython_osx.sh │ ├── svm.ipynb │ └── two_layer_net.ipynb ├── assignment2.md ├── assignment2 │ ├── .gitignore │ ├── BatchNormalization.ipynb │ ├── ConvolutionalNetworks.ipynb │ ├── Dropout.ipynb │ ├── FullyConnectedNets.ipynb │ ├── README.md │ ├── collectSubmission.sh │ ├── cs231n │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── classifiers │ │ │ ├── __init__.py │ │ │ ├── cnn.py │ │ │ └── fc_net.py │ │ ├── data_utils.py │ │ ├── datasets │ │ │ ├── .gitignore │ │ │ └── get_datasets.sh │ │ ├── fast_layers.py │ │ ├── gradient_check.py │ │ ├── im2col.py │ │ ├── im2col_cython.pyx │ │ ├── layer_utils.py │ │ ├── layers.py │ │ ├── optim.py │ │ ├── setup.py │ │ ├── solver.py │ │ └── vis_utils.py │ ├── frameworkpython │ ├── kitten.jpg │ ├── puppy.jpg │ ├── requirements.txt │ └── start_ipython_osx.sh ├── assignment3.md └── assignment3 │ ├── .gitignore │ ├── ImageGeneration.ipynb │ ├── ImageGradients.ipynb │ ├── LSTM_Captioning.ipynb │ ├── RNN_Captioning.ipynb │ ├── collectSubmission.sh │ ├── cs231n │ ├── .gitignore │ ├── __init__.py │ ├── captioning_solver.py │ ├── classifiers │ │ ├── __init__.py │ │ ├── pretrained_cnn.py │ │ └── rnn.py │ ├── coco_utils.py │ ├── data_utils.py │ ├── datasets │ │ ├── get_coco_captioning.sh │ │ ├── get_pretrained_model.sh │ │ └── get_tiny_imagenet_a.sh │ ├── fast_layers.py │ ├── gradient_check.py │ ├── im2col.py │ ├── im2col_cython.pyx │ ├── image_utils.py │ ├── layer_utils.py │ ├── layers.py │ ├── optim.py │ ├── rnn_layers.py │ └── setup.py │ ├── frameworkpython │ ├── kitten.jpg │ ├── requirements.txt │ ├── sky.jpg │ └── start_ipython_osx.sh ├── aws-tutorial.md ├── captions ├── En │ ├── Lecture10_en.srt │ ├── Lecture11_en.srt │ ├── Lecture12_en.srt │ ├── Lecture13_en.srt │ ├── Lecture14_en.srt │ ├── Lecture15_en.srt │ ├── Lecture1_en.srt │ ├── Lecture2_en.srt │ ├── Lecture3_en.srt │ ├── Lecture4_en.srt │ ├── Lecture5_en.srt │ ├── Lecture6_en.srt │ └── Lecture8_en.srt └── Ko │ ├── Lecture10_ko.srt │ ├── Lecture11_ko.srt │ ├── Lecture12_ko.srt │ ├── Lecture13_ko.srt │ ├── Lecture14_ko.srt │ ├── Lecture15_ko.srt │ ├── Lecture1_ko.srt │ ├── Lecture2_ko.srt │ ├── Lecture3_ko.srt │ ├── Lecture4_ko.srt │ ├── Lecture5_ko.srt │ ├── Lecture6_ko.srt │ └── Lecture8_ko.srt ├── classification.md ├── convnet-tips.md ├── convolutional-networks.md ├── css └── main.css ├── glossary.md ├── index.html ├── ipython-tutorial.md ├── linear-classify.md ├── neural-networks-1.md ├── neural-networks-2.kr.md ├── neural-networks-2.md ├── neural-networks-3.md ├── neural-networks-case-study.md ├── optimization-1.md ├── optimization-2.md ├── overview.md ├── python-numpy-tutorial.md ├── terminal-tutorial.md ├── transfer-learning.md ├── understanding-cnn.md └── video-lectures.md /.gitignore: -------------------------------------------------------------------------------- 1 | _site 2 | .DS_Store 3 | *.swp 4 | .ipynb_checkpoints 5 | -------------------------------------------------------------------------------- /CNAME: -------------------------------------------------------------------------------- 1 | aikorea.org 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Andrej Karpathy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | 2 | English to Korean translation project for the notes and assignments for Stanford CS class [CS231n: Convolutional Neural Networks for Visual Recognition](http://vision.stanford.edu/teaching/cs231n/). 3 | 4 | ## How to Participate 5 | 6 | 1. Fork this repository 7 | 2. Translate the assigned file (markdown, ipython-notebook, etc.) into Korean - Please refer to the [glossary](http://aikorea.org/cs231n/glossary) 8 | 3. Send PR 9 | 10 | ## Local Development Instructions 11 | 12 | To view the rendered site in your browser, 13 | 14 | 1. Install Jekyll - follow the instructions [[here](https://jekyllrb.com/docs/installation/)] 15 | 2. Assuming that you have already forked this repo, `git clone https://github.com/yourUserName/cs231n.git` 16 | 3. `cd cs231n` 17 | 4. `jekyll serve` 18 | 5. View the website at http://127.0.0.1:4000/cs231n/ 19 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | # Site settings 2 | title: CS231n Convolutional Neural Networks for Visual Recognition 3 | email: team.aikorea@gmail.com 4 | description: "스탠포드 CS231n: Convolutional Neural Networks for Visual Recognition 수업자료 번역사이트" 5 | baseurl: "/cs231n" 6 | url: "http://aikorea.org" 7 | twitter_username: kjw6612 8 | github_username: aikorea 9 | 10 | # Build settings 11 | markdown: kramdown 12 | permalink: pretty 13 | -------------------------------------------------------------------------------- /_includes/footer.html: -------------------------------------------------------------------------------- 1 | 61 | -------------------------------------------------------------------------------- /_includes/head.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {% if page.title %}{{ page.title }}{% else %}{{ site.title }}{% endif %} 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /_includes/header.html: -------------------------------------------------------------------------------- 1 | 8 | -------------------------------------------------------------------------------- /_layouts/default.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {% include head.html %} 5 | 6 | 7 | 8 | {% include header.html %} 9 | 10 |
11 |
12 | {{ content }} 13 |
14 |
15 | 16 | {% include footer.html %} 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /_layouts/page.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | --- 4 |
5 | 6 |
7 |

{{ page.title }}

8 |
9 | 10 |
11 | {{ content }} 12 |
13 | 14 |
-------------------------------------------------------------------------------- /_layouts/post.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | --- 4 |
5 | 6 |
7 |

{{ page.title }}

8 |

{{ page.date | date: "%b %-d, %Y" }}{% if page.author %} • {{ page.author }}{% endif %}{% if page.meta %} • {{ page.meta }}{% endif %}

9 |
10 | 11 |
12 | {{ content }} 13 |
14 | 15 |
-------------------------------------------------------------------------------- /acknowledgement.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | mathjax: true 4 | permalink: /acknowledgement/ 5 | --- 6 | 7 | *(프로젝트 완료 시까지 임시 파일입니다)* 8 | 9 | 다들 바쁘신 와중에 틈틈이 시간내어 번역 프로젝트에 참여해 주신 myungsub, sandrokim, ygchoi, alexseong, ckyun777, dolai, donghun, gnujoow, j-min, jaywhang, jazzsaxmafia, jihoonl, jslee, junghojin, juyong, kjw0612, maybe, okmin, rollis0825, salopge, sanghun, sora, stats2ml, sungjunhong 님께 이 자리를 빌려 감사 말씀을 드립니다. 10 | -------------------------------------------------------------------------------- /assets/ami-selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/ami-selection.png -------------------------------------------------------------------------------- /assets/aws-homepage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/aws-homepage.png -------------------------------------------------------------------------------- /assets/aws-signin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/aws-signin.png -------------------------------------------------------------------------------- /assets/aws-signup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/aws-signup.png -------------------------------------------------------------------------------- /assets/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cat.jpg -------------------------------------------------------------------------------- /assets/cat_tinted.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cat_tinted.jpg -------------------------------------------------------------------------------- /assets/cat_tinted_imshow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cat_tinted_imshow.png -------------------------------------------------------------------------------- /assets/challenges.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/challenges.jpeg -------------------------------------------------------------------------------- /assets/classify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/classify.png -------------------------------------------------------------------------------- /assets/classifydemo.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/classifydemo.jpeg -------------------------------------------------------------------------------- /assets/cnn/cnn.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cnn/cnn.jpeg -------------------------------------------------------------------------------- /assets/cnn/convnet.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cnn/convnet.jpeg -------------------------------------------------------------------------------- /assets/cnn/depthcol.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cnn/depthcol.jpeg -------------------------------------------------------------------------------- /assets/cnn/maxpool.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cnn/maxpool.jpeg -------------------------------------------------------------------------------- /assets/cnn/pool.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cnn/pool.jpeg -------------------------------------------------------------------------------- /assets/cnn/stride.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cnn/stride.jpeg -------------------------------------------------------------------------------- /assets/cnn/weights.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cnn/weights.jpeg -------------------------------------------------------------------------------- /assets/cnnvis/act1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cnnvis/act1.jpeg -------------------------------------------------------------------------------- /assets/cnnvis/act2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cnnvis/act2.jpeg -------------------------------------------------------------------------------- /assets/cnnvis/filt1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cnnvis/filt1.jpeg -------------------------------------------------------------------------------- /assets/cnnvis/filt2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cnnvis/filt2.jpeg -------------------------------------------------------------------------------- /assets/cnnvis/occlude.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cnnvis/occlude.jpeg -------------------------------------------------------------------------------- /assets/cnnvis/pool5max.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cnnvis/pool5max.jpeg -------------------------------------------------------------------------------- /assets/cnnvis/tsne.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/cnnvis/tsne.jpeg -------------------------------------------------------------------------------- /assets/community-AMIs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assets/community-AMIs.png -------------------------------------------------------------------------------- /assets/conv-demo/utils.js: -------------------------------------------------------------------------------- 1 | var U = {}; 2 | 3 | (function(global) { 4 | "use strict"; 5 | 6 | // Random number utilities 7 | var return_v = false; 8 | var v_val = 0.0; 9 | var gaussRandom = function() { 10 | if(return_v) { 11 | return_v = false; 12 | return v_val; 13 | } 14 | var u = 2*Math.random()-1; 15 | var v = 2*Math.random()-1; 16 | var r = u*u + v*v; 17 | if(r == 0 || r > 1) return gaussRandom(); 18 | var c = Math.sqrt(-2*Math.log(r)/r); 19 | v_val = v*c; // cache this 20 | return_v = true; 21 | return u*c; 22 | } 23 | var randf = function(a, b) { return Math.random()*(b-a)+a; } 24 | var randi = function(a, b) { return Math.floor(Math.random()*(b-a)+a); } 25 | var randn = function(mu, std){ return mu+gaussRandom()*std; } 26 | 27 | // Array utilities 28 | var zeros = function(n) { 29 | if(typeof(n)==='undefined' || isNaN(n)) { return []; } 30 | if(typeof ArrayBuffer === 'undefined') { 31 | // lacking browser support 32 | var arr = new Array(n); 33 | for(var i=0;i maxv) { maxv = w[i]; maxi = i; } 67 | if(w[i] < minv) { minv = w[i]; mini = i; } 68 | } 69 | return {maxi: maxi, maxv: maxv, mini: mini, minv: minv, dv:maxv-minv}; 70 | } 71 | 72 | // create random permutation of numbers, in range [0...n-1] 73 | var randperm = function(n) { 74 | var i = n, 75 | j = 0, 76 | temp; 77 | var array = []; 78 | for(var q=0;q 85 | 번역: 배지운 (MaybeS) 86 |

87 | -------------------------------------------------------------------------------- /assignments2016/assignment1/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | .env/* 4 | -------------------------------------------------------------------------------- /assignments2016/assignment1/README.md: -------------------------------------------------------------------------------- 1 | Details about this assignment can be found [on the course webpage](http://cs231n.github.io/), under Assignment #1 of Winter 2016. 2 | -------------------------------------------------------------------------------- /assignments2016/assignment1/collectSubmission.sh: -------------------------------------------------------------------------------- 1 | rm -f assignment1.zip 2 | zip -r assignment1.zip . -x "*.git*" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" 3 | -------------------------------------------------------------------------------- /assignments2016/assignment1/cs231n/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assignments2016/assignment1/cs231n/__init__.py -------------------------------------------------------------------------------- /assignments2016/assignment1/cs231n/classifiers/__init__.py: -------------------------------------------------------------------------------- 1 | from cs231n.classifiers.k_nearest_neighbor import * 2 | from cs231n.classifiers.linear_classifier import * 3 | -------------------------------------------------------------------------------- /assignments2016/assignment1/cs231n/classifiers/k_nearest_neighbor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class KNearestNeighbor(object): 4 | """ a kNN classifier with L2 distance """ 5 | 6 | def __init__(self): 7 | pass 8 | 9 | def train(self, X, y): 10 | """ 11 | Train the classifier. For k-nearest neighbors this is just 12 | memorizing the training data. 13 | 14 | Inputs: 15 | - X: A numpy array of shape (num_train, D) containing the training data 16 | consisting of num_train samples each of dimension D. 17 | - y: A numpy array of shape (N,) containing the training labels, where 18 | y[i] is the label for X[i]. 19 | """ 20 | self.X_train = X 21 | self.y_train = y 22 | 23 | def predict(self, X, k=1, num_loops=0): 24 | """ 25 | Predict labels for test data using this classifier. 26 | 27 | Inputs: 28 | - X: A numpy array of shape (num_test, D) containing test data consisting 29 | of num_test samples each of dimension D. 30 | - k: The number of nearest neighbors that vote for the predicted labels. 31 | - num_loops: Determines which implementation to use to compute distances 32 | between training points and testing points. 33 | 34 | Returns: 35 | - y: A numpy array of shape (num_test,) containing predicted labels for the 36 | test data, where y[i] is the predicted label for the test point X[i]. 37 | """ 38 | if num_loops == 0: 39 | dists = self.compute_distances_no_loops(X) 40 | elif num_loops == 1: 41 | dists = self.compute_distances_one_loop(X) 42 | elif num_loops == 2: 43 | dists = self.compute_distances_two_loops(X) 44 | else: 45 | raise ValueError('Invalid value %d for num_loops' % num_loops) 46 | 47 | return self.predict_labels(dists, k=k) 48 | 49 | def compute_distances_two_loops(self, X): 50 | """ 51 | Compute the distance between each test point in X and each training point 52 | in self.X_train using a nested loop over both the training data and the 53 | test data. 54 | 55 | Inputs: 56 | - X: A numpy array of shape (num_test, D) containing test data. 57 | 58 | Returns: 59 | - dists: A numpy array of shape (num_test, num_train) where dists[i, j] 60 | is the Euclidean distance between the ith test point and the jth training 61 | point. 62 | """ 63 | num_test = X.shape[0] 64 | num_train = self.X_train.shape[0] 65 | dists = np.zeros((num_test, num_train)) 66 | for i in xrange(num_test): 67 | for j in xrange(num_train): 68 | ##################################################################### 69 | # TODO: # 70 | # Compute the l2 distance between the ith test point and the jth # 71 | # training point, and store the result in dists[i, j]. You should # 72 | # not use a loop over dimension. # 73 | ##################################################################### 74 | pass 75 | ##################################################################### 76 | # END OF YOUR CODE # 77 | ##################################################################### 78 | return dists 79 | 80 | def compute_distances_one_loop(self, X): 81 | """ 82 | Compute the distance between each test point in X and each training point 83 | in self.X_train using a single loop over the test data. 84 | 85 | Input / Output: Same as compute_distances_two_loops 86 | """ 87 | num_test = X.shape[0] 88 | num_train = self.X_train.shape[0] 89 | dists = np.zeros((num_test, num_train)) 90 | for i in xrange(num_test): 91 | ####################################################################### 92 | # TODO: # 93 | # Compute the l2 distance between the ith test point and all training # 94 | # points, and store the result in dists[i, :]. # 95 | ####################################################################### 96 | pass 97 | ####################################################################### 98 | # END OF YOUR CODE # 99 | ####################################################################### 100 | return dists 101 | 102 | def compute_distances_no_loops(self, X): 103 | """ 104 | Compute the distance between each test point in X and each training point 105 | in self.X_train using no explicit loops. 106 | 107 | Input / Output: Same as compute_distances_two_loops 108 | """ 109 | num_test = X.shape[0] 110 | num_train = self.X_train.shape[0] 111 | dists = np.zeros((num_test, num_train)) 112 | ######################################################################### 113 | # TODO: # 114 | # Compute the l2 distance between all test points and all training # 115 | # points without using any explicit loops, and store the result in # 116 | # dists. # 117 | # # 118 | # You should implement this function using only basic array operations; # 119 | # in particular you should not use functions from scipy. # 120 | # # 121 | # HINT: Try to formulate the l2 distance using matrix multiplication # 122 | # and two broadcast sums. # 123 | ######################################################################### 124 | pass 125 | ######################################################################### 126 | # END OF YOUR CODE # 127 | ######################################################################### 128 | return dists 129 | 130 | def predict_labels(self, dists, k=1): 131 | """ 132 | Given a matrix of distances between test points and training points, 133 | predict a label for each test point. 134 | 135 | Inputs: 136 | - dists: A numpy array of shape (num_test, num_train) where dists[i, j] 137 | gives the distance betwen the ith test point and the jth training point. 138 | 139 | Returns: 140 | - y: A numpy array of shape (num_test,) containing predicted labels for the 141 | test data, where y[i] is the predicted label for the test point X[i]. 142 | """ 143 | num_test = dists.shape[0] 144 | y_pred = np.zeros(num_test) 145 | for i in xrange(num_test): 146 | # A list of length k storing the labels of the k nearest neighbors to 147 | # the ith test point. 148 | closest_y = [] 149 | ######################################################################### 150 | # TODO: # 151 | # Use the distance matrix to find the k nearest neighbors of the ith # 152 | # testing point, and use self.y_train to find the labels of these # 153 | # neighbors. Store these labels in closest_y. # 154 | # Hint: Look up the function numpy.argsort. # 155 | ######################################################################### 156 | pass 157 | ######################################################################### 158 | # TODO: # 159 | # Now that you have found the labels of the k nearest neighbors, you # 160 | # need to find the most common label in the list closest_y of labels. # 161 | # Store this label in y_pred[i]. Break ties by choosing the smaller # 162 | # label. # 163 | ######################################################################### 164 | pass 165 | ######################################################################### 166 | # END OF YOUR CODE # 167 | ######################################################################### 168 | 169 | return y_pred 170 | 171 | -------------------------------------------------------------------------------- /assignments2016/assignment1/cs231n/classifiers/linear_classifier.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cs231n.classifiers.linear_svm import * 3 | from cs231n.classifiers.softmax import * 4 | 5 | class LinearClassifier(object): 6 | 7 | def __init__(self): 8 | self.W = None 9 | 10 | def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100, 11 | batch_size=200, verbose=False): 12 | """ 13 | Train this linear classifier using stochastic gradient descent. 14 | 15 | Inputs: 16 | - X: A numpy array of shape (N, D) containing training data; there are N 17 | training samples each of dimension D. 18 | - y: A numpy array of shape (N,) containing training labels; y[i] = c 19 | means that X[i] has label 0 <= c < C for C classes. 20 | - learning_rate: (float) learning rate for optimization. 21 | - reg: (float) regularization strength. 22 | - num_iters: (integer) number of steps to take when optimizing 23 | - batch_size: (integer) number of training examples to use at each step. 24 | - verbose: (boolean) If true, print progress during optimization. 25 | 26 | Outputs: 27 | A list containing the value of the loss function at each training iteration. 28 | """ 29 | num_train, dim = X.shape 30 | num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes 31 | if self.W is None: 32 | # lazily initialize W 33 | self.W = 0.001 * np.random.randn(dim, num_classes) 34 | 35 | # Run stochastic gradient descent to optimize W 36 | loss_history = [] 37 | for it in xrange(num_iters): 38 | X_batch = None 39 | y_batch = None 40 | 41 | ######################################################################### 42 | # TODO: # 43 | # Sample batch_size elements from the training data and their # 44 | # corresponding labels to use in this round of gradient descent. # 45 | # Store the data in X_batch and their corresponding labels in # 46 | # y_batch; after sampling X_batch should have shape (dim, batch_size) # 47 | # and y_batch should have shape (batch_size,) # 48 | # # 49 | # Hint: Use np.random.choice to generate indices. Sampling with # 50 | # replacement is faster than sampling without replacement. # 51 | ######################################################################### 52 | pass 53 | ######################################################################### 54 | # END OF YOUR CODE # 55 | ######################################################################### 56 | 57 | # evaluate loss and gradient 58 | loss, grad = self.loss(X_batch, y_batch, reg) 59 | loss_history.append(loss) 60 | 61 | # perform parameter update 62 | ######################################################################### 63 | # TODO: # 64 | # Update the weights using the gradient and the learning rate. # 65 | ######################################################################### 66 | pass 67 | ######################################################################### 68 | # END OF YOUR CODE # 69 | ######################################################################### 70 | 71 | if verbose and it % 100 == 0: 72 | print 'iteration %d / %d: loss %f' % (it, num_iters, loss) 73 | 74 | return loss_history 75 | 76 | def predict(self, X): 77 | """ 78 | Use the trained weights of this linear classifier to predict labels for 79 | data points. 80 | 81 | Inputs: 82 | - X: D x N array of training data. Each column is a D-dimensional point. 83 | 84 | Returns: 85 | - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional 86 | array of length N, and each element is an integer giving the predicted 87 | class. 88 | """ 89 | y_pred = np.zeros(X.shape[1]) 90 | ########################################################################### 91 | # TODO: # 92 | # Implement this method. Store the predicted labels in y_pred. # 93 | ########################################################################### 94 | pass 95 | ########################################################################### 96 | # END OF YOUR CODE # 97 | ########################################################################### 98 | return y_pred 99 | 100 | def loss(self, X_batch, y_batch, reg): 101 | """ 102 | Compute the loss function and its derivative. 103 | Subclasses will override this. 104 | 105 | Inputs: 106 | - X_batch: A numpy array of shape (N, D) containing a minibatch of N 107 | data points; each point has dimension D. 108 | - y_batch: A numpy array of shape (N,) containing labels for the minibatch. 109 | - reg: (float) regularization strength. 110 | 111 | Returns: A tuple containing: 112 | - loss as a single float 113 | - gradient with respect to self.W; an array of the same shape as W 114 | """ 115 | pass 116 | 117 | 118 | class LinearSVM(LinearClassifier): 119 | """ A subclass that uses the Multiclass SVM loss function """ 120 | 121 | def loss(self, X_batch, y_batch, reg): 122 | return svm_loss_vectorized(self.W, X_batch, y_batch, reg) 123 | 124 | 125 | class Softmax(LinearClassifier): 126 | """ A subclass that uses the Softmax + Cross-entropy loss function """ 127 | 128 | def loss(self, X_batch, y_batch, reg): 129 | return softmax_loss_vectorized(self.W, X_batch, y_batch, reg) 130 | 131 | -------------------------------------------------------------------------------- /assignments2016/assignment1/cs231n/classifiers/linear_svm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import shuffle 3 | 4 | def svm_loss_naive(W, X, y, reg): 5 | """ 6 | Structured SVM loss function, naive implementation (with loops). 7 | 8 | Inputs have dimension D, there are C classes, and we operate on minibatches 9 | of N examples. 10 | 11 | Inputs: 12 | - W: A numpy array of shape (D, C) containing weights. 13 | - X: A numpy array of shape (N, D) containing a minibatch of data. 14 | - y: A numpy array of shape (N,) containing training labels; y[i] = c means 15 | that X[i] has label c, where 0 <= c < C. 16 | - reg: (float) regularization strength 17 | 18 | Returns a tuple of: 19 | - loss as single float 20 | - gradient with respect to weights W; an array of same shape as W 21 | """ 22 | dW = np.zeros(W.shape) # initialize the gradient as zero 23 | 24 | # compute the loss and the gradient 25 | num_classes = W.shape[1] 26 | num_train = X.shape[0] 27 | loss = 0.0 28 | for i in xrange(num_train): 29 | scores = X[i].dot(W) 30 | correct_class_score = scores[y[i]] 31 | for j in xrange(num_classes): 32 | if j == y[i]: 33 | continue 34 | margin = scores[j] - correct_class_score + 1 # note delta = 1 35 | if margin > 0: 36 | loss += margin 37 | 38 | # Right now the loss is a sum over all training examples, but we want it 39 | # to be an average instead so we divide by num_train. 40 | loss /= num_train 41 | 42 | # Add regularization to the loss. 43 | loss += 0.5 * reg * np.sum(W * W) 44 | 45 | ############################################################################# 46 | # TODO: # 47 | # Compute the gradient of the loss function and store it dW. # 48 | # Rather that first computing the loss and then computing the derivative, # 49 | # it may be simpler to compute the derivative at the same time that the # 50 | # loss is being computed. As a result you may need to modify some of the # 51 | # code above to compute the gradient. # 52 | ############################################################################# 53 | 54 | 55 | return loss, dW 56 | 57 | 58 | def svm_loss_vectorized(W, X, y, reg): 59 | """ 60 | Structured SVM loss function, vectorized implementation. 61 | 62 | Inputs and outputs are the same as svm_loss_naive. 63 | """ 64 | loss = 0.0 65 | dW = np.zeros(W.shape) # initialize the gradient as zero 66 | 67 | ############################################################################# 68 | # TODO: # 69 | # Implement a vectorized version of the structured SVM loss, storing the # 70 | # result in loss. # 71 | ############################################################################# 72 | pass 73 | ############################################################################# 74 | # END OF YOUR CODE # 75 | ############################################################################# 76 | 77 | 78 | ############################################################################# 79 | # TODO: # 80 | # Implement a vectorized version of the gradient for the structured SVM # 81 | # loss, storing the result in dW. # 82 | # # 83 | # Hint: Instead of computing the gradient from scratch, it may be easier # 84 | # to reuse some of the intermediate values that you used to compute the # 85 | # loss. # 86 | ############################################################################# 87 | pass 88 | ############################################################################# 89 | # END OF YOUR CODE # 90 | ############################################################################# 91 | 92 | return loss, dW 93 | -------------------------------------------------------------------------------- /assignments2016/assignment1/cs231n/classifiers/softmax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import shuffle 3 | 4 | def softmax_loss_naive(W, X, y, reg): 5 | """ 6 | Softmax loss function, naive implementation (with loops) 7 | 8 | Inputs have dimension D, there are C classes, and we operate on minibatches 9 | of N examples. 10 | 11 | Inputs: 12 | - W: A numpy array of shape (D, C) containing weights. 13 | - X: A numpy array of shape (N, D) containing a minibatch of data. 14 | - y: A numpy array of shape (N,) containing training labels; y[i] = c means 15 | that X[i] has label c, where 0 <= c < C. 16 | - reg: (float) regularization strength 17 | 18 | Returns a tuple of: 19 | - loss as single float 20 | - gradient with respect to weights W; an array of same shape as W 21 | """ 22 | # Initialize the loss and gradient to zero. 23 | loss = 0.0 24 | dW = np.zeros_like(W) 25 | 26 | ############################################################################# 27 | # TODO: Compute the softmax loss and its gradient using explicit loops. # 28 | # Store the loss in loss and the gradient in dW. If you are not careful # 29 | # here, it is easy to run into numeric instability. Don't forget the # 30 | # regularization! # 31 | ############################################################################# 32 | pass 33 | ############################################################################# 34 | # END OF YOUR CODE # 35 | ############################################################################# 36 | 37 | return loss, dW 38 | 39 | 40 | def softmax_loss_vectorized(W, X, y, reg): 41 | """ 42 | Softmax loss function, vectorized version. 43 | 44 | Inputs and outputs are the same as softmax_loss_naive. 45 | """ 46 | # Initialize the loss and gradient to zero. 47 | loss = 0.0 48 | dW = np.zeros_like(W) 49 | 50 | ############################################################################# 51 | # TODO: Compute the softmax loss and its gradient using no explicit loops. # 52 | # Store the loss in loss and the gradient in dW. If you are not careful # 53 | # here, it is easy to run into numeric instability. Don't forget the # 54 | # regularization! # 55 | ############################################################################# 56 | pass 57 | ############################################################################# 58 | # END OF YOUR CODE # 59 | ############################################################################# 60 | 61 | return loss, dW 62 | 63 | -------------------------------------------------------------------------------- /assignments2016/assignment1/cs231n/data_utils.py: -------------------------------------------------------------------------------- 1 | import cPickle as pickle 2 | import numpy as np 3 | import os 4 | from scipy.misc import imread 5 | 6 | def load_CIFAR_batch(filename): 7 | """ load single batch of cifar """ 8 | with open(filename, 'rb') as f: 9 | datadict = pickle.load(f) 10 | X = datadict['data'] 11 | Y = datadict['labels'] 12 | X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float") 13 | Y = np.array(Y) 14 | return X, Y 15 | 16 | def load_CIFAR10(ROOT): 17 | """ load all of cifar """ 18 | xs = [] 19 | ys = [] 20 | for b in range(1,6): 21 | f = os.path.join(ROOT, 'data_batch_%d' % (b, )) 22 | X, Y = load_CIFAR_batch(f) 23 | xs.append(X) 24 | ys.append(Y) 25 | Xtr = np.concatenate(xs) 26 | Ytr = np.concatenate(ys) 27 | del X, Y 28 | Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch')) 29 | return Xtr, Ytr, Xte, Yte 30 | 31 | def load_tiny_imagenet(path, dtype=np.float32): 32 | """ 33 | Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and 34 | TinyImageNet-200 have the same directory structure, so this can be used 35 | to load any of them. 36 | 37 | Inputs: 38 | - path: String giving path to the directory to load. 39 | - dtype: numpy datatype used to load the data. 40 | 41 | Returns: A tuple of 42 | - class_names: A list where class_names[i] is a list of strings giving the 43 | WordNet names for class i in the loaded dataset. 44 | - X_train: (N_tr, 3, 64, 64) array of training images 45 | - y_train: (N_tr,) array of training labels 46 | - X_val: (N_val, 3, 64, 64) array of validation images 47 | - y_val: (N_val,) array of validation labels 48 | - X_test: (N_test, 3, 64, 64) array of testing images. 49 | - y_test: (N_test,) array of test labels; if test labels are not available 50 | (such as in student code) then y_test will be None. 51 | """ 52 | # First load wnids 53 | with open(os.path.join(path, 'wnids.txt'), 'r') as f: 54 | wnids = [x.strip() for x in f] 55 | 56 | # Map wnids to integer labels 57 | wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)} 58 | 59 | # Use words.txt to get names for each class 60 | with open(os.path.join(path, 'words.txt'), 'r') as f: 61 | wnid_to_words = dict(line.split('\t') for line in f) 62 | for wnid, words in wnid_to_words.iteritems(): 63 | wnid_to_words[wnid] = [w.strip() for w in words.split(',')] 64 | class_names = [wnid_to_words[wnid] for wnid in wnids] 65 | 66 | # Next load training data. 67 | X_train = [] 68 | y_train = [] 69 | for i, wnid in enumerate(wnids): 70 | if (i + 1) % 20 == 0: 71 | print 'loading training data for synset %d / %d' % (i + 1, len(wnids)) 72 | # To figure out the filenames we need to open the boxes file 73 | boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid) 74 | with open(boxes_file, 'r') as f: 75 | filenames = [x.split('\t')[0] for x in f] 76 | num_images = len(filenames) 77 | 78 | X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype) 79 | y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64) 80 | for j, img_file in enumerate(filenames): 81 | img_file = os.path.join(path, 'train', wnid, 'images', img_file) 82 | img = imread(img_file) 83 | if img.ndim == 2: 84 | ## grayscale file 85 | img.shape = (64, 64, 1) 86 | X_train_block[j] = img.transpose(2, 0, 1) 87 | X_train.append(X_train_block) 88 | y_train.append(y_train_block) 89 | 90 | # We need to concatenate all training data 91 | X_train = np.concatenate(X_train, axis=0) 92 | y_train = np.concatenate(y_train, axis=0) 93 | 94 | # Next load validation data 95 | with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f: 96 | img_files = [] 97 | val_wnids = [] 98 | for line in f: 99 | img_file, wnid = line.split('\t')[:2] 100 | img_files.append(img_file) 101 | val_wnids.append(wnid) 102 | num_val = len(img_files) 103 | y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids]) 104 | X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype) 105 | for i, img_file in enumerate(img_files): 106 | img_file = os.path.join(path, 'val', 'images', img_file) 107 | img = imread(img_file) 108 | if img.ndim == 2: 109 | img.shape = (64, 64, 1) 110 | X_val[i] = img.transpose(2, 0, 1) 111 | 112 | # Next load test images 113 | # Students won't have test labels, so we need to iterate over files in the 114 | # images directory. 115 | img_files = os.listdir(os.path.join(path, 'test', 'images')) 116 | X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype) 117 | for i, img_file in enumerate(img_files): 118 | img_file = os.path.join(path, 'test', 'images', img_file) 119 | img = imread(img_file) 120 | if img.ndim == 2: 121 | img.shape = (64, 64, 1) 122 | X_test[i] = img.transpose(2, 0, 1) 123 | 124 | y_test = None 125 | y_test_file = os.path.join(path, 'test', 'test_annotations.txt') 126 | if os.path.isfile(y_test_file): 127 | with open(y_test_file, 'r') as f: 128 | img_file_to_wnid = {} 129 | for line in f: 130 | line = line.split('\t') 131 | img_file_to_wnid[line[0]] = line[1] 132 | y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files] 133 | y_test = np.array(y_test) 134 | 135 | return class_names, X_train, y_train, X_val, y_val, X_test, y_test 136 | 137 | 138 | def load_models(models_dir): 139 | """ 140 | Load saved models from disk. This will attempt to unpickle all files in a 141 | directory; any files that give errors on unpickling (such as README.txt) will 142 | be skipped. 143 | 144 | Inputs: 145 | - models_dir: String giving the path to a directory containing model files. 146 | Each model file is a pickled dictionary with a 'model' field. 147 | 148 | Returns: 149 | A dictionary mapping model file names to models. 150 | """ 151 | models = {} 152 | for model_file in os.listdir(models_dir): 153 | with open(os.path.join(models_dir, model_file), 'rb') as f: 154 | try: 155 | models[model_file] = pickle.load(f)['model'] 156 | except pickle.UnpicklingError: 157 | continue 158 | return models 159 | -------------------------------------------------------------------------------- /assignments2016/assignment1/cs231n/datasets/.gitignore: -------------------------------------------------------------------------------- 1 | cifar-10-batches-py/* 2 | tiny-imagenet-100-A* 3 | tiny-imagenet-100-B* 4 | tiny-100-A-pretrained/* 5 | -------------------------------------------------------------------------------- /assignments2016/assignment1/cs231n/datasets/get_datasets.sh: -------------------------------------------------------------------------------- 1 | # Get CIFAR10 2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz 3 | tar -xzvf cifar-10-python.tar.gz 4 | rm cifar-10-python.tar.gz 5 | -------------------------------------------------------------------------------- /assignments2016/assignment1/cs231n/features.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | import numpy as np 3 | from scipy.ndimage import uniform_filter 4 | 5 | 6 | def extract_features(imgs, feature_fns, verbose=False): 7 | """ 8 | Given pixel data for images and several feature functions that can operate on 9 | single images, apply all feature functions to all images, concatenating the 10 | feature vectors for each image and storing the features for all images in 11 | a single matrix. 12 | 13 | Inputs: 14 | - imgs: N x H X W X C array of pixel data for N images. 15 | - feature_fns: List of k feature functions. The ith feature function should 16 | take as input an H x W x D array and return a (one-dimensional) array of 17 | length F_i. 18 | - verbose: Boolean; if true, print progress. 19 | 20 | Returns: 21 | An array of shape (N, F_1 + ... + F_k) where each column is the concatenation 22 | of all features for a single image. 23 | """ 24 | num_images = imgs.shape[0] 25 | if num_images == 0: 26 | return np.array([]) 27 | 28 | # Use the first image to determine feature dimensions 29 | feature_dims = [] 30 | first_image_features = [] 31 | for feature_fn in feature_fns: 32 | feats = feature_fn(imgs[0].squeeze()) 33 | assert len(feats.shape) == 1, 'Feature functions must be one-dimensional' 34 | feature_dims.append(feats.size) 35 | first_image_features.append(feats) 36 | 37 | # Now that we know the dimensions of the features, we can allocate a single 38 | # big array to store all features as columns. 39 | total_feature_dim = sum(feature_dims) 40 | imgs_features = np.zeros((num_images, total_feature_dim)) 41 | imgs_features[0] = np.hstack(first_image_features).T 42 | 43 | # Extract features for the rest of the images. 44 | for i in xrange(1, num_images): 45 | idx = 0 46 | for feature_fn, feature_dim in zip(feature_fns, feature_dims): 47 | next_idx = idx + feature_dim 48 | imgs_features[i, idx:next_idx] = feature_fn(imgs[i].squeeze()) 49 | idx = next_idx 50 | if verbose and i % 1000 == 0: 51 | print 'Done extracting features for %d / %d images' % (i, num_images) 52 | 53 | return imgs_features 54 | 55 | 56 | def rgb2gray(rgb): 57 | """Convert RGB image to grayscale 58 | 59 | Parameters: 60 | rgb : RGB image 61 | 62 | Returns: 63 | gray : grayscale image 64 | 65 | """ 66 | return np.dot(rgb[...,:3], [0.299, 0.587, 0.144]) 67 | 68 | 69 | def hog_feature(im): 70 | """Compute Histogram of Gradient (HOG) feature for an image 71 | 72 | Modified from skimage.feature.hog 73 | http://pydoc.net/Python/scikits-image/0.4.2/skimage.feature.hog 74 | 75 | Reference: 76 | Histograms of Oriented Gradients for Human Detection 77 | Navneet Dalal and Bill Triggs, CVPR 2005 78 | 79 | Parameters: 80 | im : an input grayscale or rgb image 81 | 82 | Returns: 83 | feat: Histogram of Gradient (HOG) feature 84 | 85 | """ 86 | 87 | # convert rgb to grayscale if needed 88 | if im.ndim == 3: 89 | image = rgb2gray(im) 90 | else: 91 | image = np.at_least_2d(im) 92 | 93 | sx, sy = image.shape # image size 94 | orientations = 9 # number of gradient bins 95 | cx, cy = (8, 8) # pixels per cell 96 | 97 | gx = np.zeros(image.shape) 98 | gy = np.zeros(image.shape) 99 | gx[:, :-1] = np.diff(image, n=1, axis=1) # compute gradient on x-direction 100 | gy[:-1, :] = np.diff(image, n=1, axis=0) # compute gradient on y-direction 101 | grad_mag = np.sqrt(gx ** 2 + gy ** 2) # gradient magnitude 102 | grad_ori = np.arctan2(gy, (gx + 1e-15)) * (180 / np.pi) + 90 # gradient orientation 103 | 104 | n_cellsx = int(np.floor(sx / cx)) # number of cells in x 105 | n_cellsy = int(np.floor(sy / cy)) # number of cells in y 106 | # compute orientations integral images 107 | orientation_histogram = np.zeros((n_cellsx, n_cellsy, orientations)) 108 | for i in range(orientations): 109 | # create new integral image for this orientation 110 | # isolate orientations in this range 111 | temp_ori = np.where(grad_ori < 180 / orientations * (i + 1), 112 | grad_ori, 0) 113 | temp_ori = np.where(grad_ori >= 180 / orientations * i, 114 | temp_ori, 0) 115 | # select magnitudes for those orientations 116 | cond2 = temp_ori > 0 117 | temp_mag = np.where(cond2, grad_mag, 0) 118 | orientation_histogram[:,:,i] = uniform_filter(temp_mag, size=(cx, cy))[cx/2::cx, cy/2::cy].T 119 | 120 | return orientation_histogram.ravel() 121 | 122 | 123 | def color_histogram_hsv(im, nbin=10, xmin=0, xmax=255, normalized=True): 124 | """ 125 | Compute color histogram for an image using hue. 126 | 127 | Inputs: 128 | - im: H x W x C array of pixel data for an RGB image. 129 | - nbin: Number of histogram bins. (default: 10) 130 | - xmin: Minimum pixel value (default: 0) 131 | - xmax: Maximum pixel value (default: 255) 132 | - normalized: Whether to normalize the histogram (default: True) 133 | 134 | Returns: 135 | 1D vector of length nbin giving the color histogram over the hue of the 136 | input image. 137 | """ 138 | ndim = im.ndim 139 | bins = np.linspace(xmin, xmax, nbin+1) 140 | hsv = matplotlib.colors.rgb_to_hsv(im/xmax) * xmax 141 | imhist, bin_edges = np.histogram(hsv[:,:,0], bins=bins, density=normalized) 142 | imhist = imhist * np.diff(bin_edges) 143 | 144 | # return histogram 145 | return imhist 146 | 147 | 148 | pass 149 | -------------------------------------------------------------------------------- /assignments2016/assignment1/cs231n/gradient_check.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import randrange 3 | 4 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001): 5 | """ 6 | a naive implementation of numerical gradient of f at x 7 | - f should be a function that takes a single argument 8 | - x is the point (numpy array) to evaluate the gradient at 9 | """ 10 | 11 | fx = f(x) # evaluate function value at original point 12 | grad = np.zeros_like(x) 13 | # iterate over all indexes in x 14 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 15 | while not it.finished: 16 | 17 | # evaluate function at x+h 18 | ix = it.multi_index 19 | oldval = x[ix] 20 | x[ix] = oldval + h # increment by h 21 | fxph = f(x) # evalute f(x + h) 22 | x[ix] = oldval - h 23 | fxmh = f(x) # evaluate f(x - h) 24 | x[ix] = oldval # restore 25 | 26 | # compute the partial derivative with centered formula 27 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 28 | if verbose: 29 | print ix, grad[ix] 30 | it.iternext() # step to next dimension 31 | 32 | return grad 33 | 34 | 35 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 36 | """ 37 | Evaluate a numeric gradient for a function that accepts a numpy 38 | array and returns a numpy array. 39 | """ 40 | grad = np.zeros_like(x) 41 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 42 | while not it.finished: 43 | ix = it.multi_index 44 | 45 | oldval = x[ix] 46 | x[ix] = oldval + h 47 | pos = f(x).copy() 48 | x[ix] = oldval - h 49 | neg = f(x).copy() 50 | x[ix] = oldval 51 | 52 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 53 | it.iternext() 54 | return grad 55 | 56 | 57 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): 58 | """ 59 | Compute numeric gradients for a function that operates on input 60 | and output blobs. 61 | 62 | We assume that f accepts several input blobs as arguments, followed by a blob 63 | into which outputs will be written. For example, f might be called like this: 64 | 65 | f(x, w, out) 66 | 67 | where x and w are input Blobs, and the result of f will be written to out. 68 | 69 | Inputs: 70 | - f: function 71 | - inputs: tuple of input blobs 72 | - output: output blob 73 | - h: step size 74 | """ 75 | numeric_diffs = [] 76 | for input_blob in inputs: 77 | diff = np.zeros_like(input_blob.diffs) 78 | it = np.nditer(input_blob.vals, flags=['multi_index'], 79 | op_flags=['readwrite']) 80 | while not it.finished: 81 | idx = it.multi_index 82 | orig = input_blob.vals[idx] 83 | 84 | input_blob.vals[idx] = orig + h 85 | f(*(inputs + (output,))) 86 | pos = np.copy(output.vals) 87 | input_blob.vals[idx] = orig - h 88 | f(*(inputs + (output,))) 89 | neg = np.copy(output.vals) 90 | input_blob.vals[idx] = orig 91 | 92 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) 93 | 94 | it.iternext() 95 | numeric_diffs.append(diff) 96 | return numeric_diffs 97 | 98 | 99 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5): 100 | return eval_numerical_gradient_blobs(lambda *args: net.forward(), 101 | inputs, output, h=h) 102 | 103 | 104 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): 105 | """ 106 | sample a few random elements and only return numerical 107 | in this dimensions. 108 | """ 109 | 110 | for i in xrange(num_checks): 111 | ix = tuple([randrange(m) for m in x.shape]) 112 | 113 | oldval = x[ix] 114 | x[ix] = oldval + h # increment by h 115 | fxph = f(x) # evaluate f(x + h) 116 | x[ix] = oldval - h # increment by h 117 | fxmh = f(x) # evaluate f(x - h) 118 | x[ix] = oldval # reset 119 | 120 | grad_numerical = (fxph - fxmh) / (2 * h) 121 | grad_analytic = analytic_grad[ix] 122 | rel_error = abs(grad_numerical - grad_analytic) / (abs(grad_numerical) + abs(grad_analytic)) 123 | print 'numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error) 124 | 125 | -------------------------------------------------------------------------------- /assignments2016/assignment1/cs231n/vis_utils.py: -------------------------------------------------------------------------------- 1 | from math import sqrt, ceil 2 | import numpy as np 3 | 4 | def visualize_grid(Xs, ubound=255.0, padding=1): 5 | """ 6 | Reshape a 4D tensor of image data to a grid for easy visualization. 7 | 8 | Inputs: 9 | - Xs: Data of shape (N, H, W, C) 10 | - ubound: Output grid will have values scaled to the range [0, ubound] 11 | - padding: The number of blank pixels between elements of the grid 12 | """ 13 | (N, H, W, C) = Xs.shape 14 | grid_size = int(ceil(sqrt(N))) 15 | grid_height = H * grid_size + padding * (grid_size - 1) 16 | grid_width = W * grid_size + padding * (grid_size - 1) 17 | grid = np.zeros((grid_height, grid_width, C)) 18 | next_idx = 0 19 | y0, y1 = 0, H 20 | for y in xrange(grid_size): 21 | x0, x1 = 0, W 22 | for x in xrange(grid_size): 23 | if next_idx < N: 24 | img = Xs[next_idx] 25 | low, high = np.min(img), np.max(img) 26 | grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low) 27 | # grid[y0:y1, x0:x1] = Xs[next_idx] 28 | next_idx += 1 29 | x0 += W + padding 30 | x1 += W + padding 31 | y0 += H + padding 32 | y1 += H + padding 33 | # grid_max = np.max(grid) 34 | # grid_min = np.min(grid) 35 | # grid = ubound * (grid - grid_min) / (grid_max - grid_min) 36 | return grid 37 | 38 | def vis_grid(Xs): 39 | """ visualize a grid of images """ 40 | (N, H, W, C) = Xs.shape 41 | A = int(ceil(sqrt(N))) 42 | G = np.ones((A*H+A, A*W+A, C), Xs.dtype) 43 | G *= np.min(Xs) 44 | n = 0 45 | for y in range(A): 46 | for x in range(A): 47 | if n < N: 48 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:] 49 | n += 1 50 | # normalize to [0,1] 51 | maxg = G.max() 52 | ming = G.min() 53 | G = (G - ming)/(maxg-ming) 54 | return G 55 | 56 | def vis_nn(rows): 57 | """ visualize array of arrays of images """ 58 | N = len(rows) 59 | D = len(rows[0]) 60 | H,W,C = rows[0][0].shape 61 | Xs = rows[0][0] 62 | G = np.ones((N*H+N, D*W+D, C), Xs.dtype) 63 | for y in range(N): 64 | for x in range(D): 65 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x] 66 | # normalize to [0,1] 67 | maxg = G.max() 68 | ming = G.min() 69 | G = (G - ming)/(maxg-ming) 70 | return G 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /assignments2016/assignment1/frameworkpython: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # what real Python executable to use 4 | PYVER=2.7 5 | PATHTOPYTHON=/usr/local/bin/ 6 | PYTHON=${PATHTOPYTHON}python${PYVER} 7 | 8 | # find the root of the virtualenv, it should be the parent of the dir this script is in 9 | ENV=`$PYTHON -c "import os; print os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..'))"` 10 | 11 | # now run Python with the virtualenv set as Python's HOME 12 | export PYTHONHOME=$ENV 13 | exec $PYTHON "$@" 14 | -------------------------------------------------------------------------------- /assignments2016/assignment1/requirements.txt: -------------------------------------------------------------------------------- 1 | Jinja2==2.8 2 | MarkupSafe==0.23 3 | Pillow==3.0.0 4 | Pygments==2.0.2 5 | appnope==0.1.0 6 | backports-abc==0.4 7 | backports.ssl-match-hostname==3.5.0.1 8 | certifi==2015.11.20.1 9 | cycler==0.9.0 10 | decorator==4.0.6 11 | functools32==3.2.3-2 12 | gnureadline==6.3.3 13 | ipykernel==4.2.2 14 | ipython==4.0.1 15 | ipython-genutils==0.1.0 16 | ipywidgets==4.1.1 17 | jsonschema==2.5.1 18 | jupyter==1.0.0 19 | jupyter-client==4.1.1 20 | jupyter-console==4.0.3 21 | jupyter-core==4.0.6 22 | matplotlib==1.5.0 23 | mistune==0.7.1 24 | nbconvert==4.1.0 25 | nbformat==4.0.1 26 | notebook==4.0.6 27 | numpy==1.10.4 28 | path.py==8.1.2 29 | pexpect==4.0.1 30 | pickleshare==0.5 31 | ptyprocess==0.5 32 | pyparsing==2.0.7 33 | python-dateutil==2.4.2 34 | pytz==2015.7 35 | pyzmq==15.1.0 36 | qtconsole==4.1.1 37 | scipy==0.16.1 38 | simplegeneric==0.8.1 39 | singledispatch==3.4.0.3 40 | six==1.10.0 41 | terminado==0.5 42 | tornado==4.3 43 | traitlets==4.0.0 44 | wsgiref==0.1.2 45 | jupyter==1.0.0 46 | pillow==3.1.0 47 | -------------------------------------------------------------------------------- /assignments2016/assignment1/start_ipython_osx.sh: -------------------------------------------------------------------------------- 1 | # Assume the virtualenv is called .env 2 | 3 | cp frameworkpython .env/bin 4 | .env/bin/frameworkpython -m IPython notebook 5 | -------------------------------------------------------------------------------- /assignments2016/assignment2.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | mathjax: true 4 | permalink: assignments2016/assignment2/ 5 | --- 6 | 7 | 이번 숙제에서 여러분은 backpropagation 코드를 작성하는 법을 연습하고, 기본 형태의 뉴럴 네트워크(신경망)와 컨볼루션 신경망을 학습해볼 것입니다. 이번 숙제의 목표는 다음과 같습니다. 8 | 9 | - **뉴럴 네트워크(신경망)** 에 대해 이해하고 레이어가 있는 구조가 어떻게 배치되어 있는지 이해하기 10 | - **backpropagation** 에 대해 이해하고 (벡터화된) 코드로 구현하기 11 | - 뉴럴 네트워크를 학습시키는데 필요한 여러 가지 **업데이트 규칙** 구현하기 12 | - 딥 뉴럴 네트워크를 학습하는데 필요한 **batch normalization** 구현하기 13 | - 네트워크를 regularization 할 때 필요한 **dropout** 구현하기 14 | - 효과적인 **교차 검증(cross validation)** 을 통해 뉴럴 네트워크 구조에서 사용되는 여러 가지 hyperparameter 들의 최적값 찾기 15 | - **컨볼루션 신경망** 구조에 대해 이해하고 이 모델들을 실제 데이터에 학습해보는 것을 경험하기 16 | 17 | ## 설치 18 | 여러분은 다음 두가지 방법으로 숙제를 시작할 수 있습니다: Terminal.com을 이용한 가상 환경 또는 로컬 환경. 19 | 20 | ### Terminal에서의 가상 환경. 21 | Terminal에는 우리의 수업을 위한 서브도메인이 만들어져 있습니다. [www.stanfordterminalcloud.com](https://www.stanfordterminalcloud.com) 계정을 등록하세요. 이번 숙제에 대한 스냅샷은 [여기](https://www.stanfordterminalcloud.com/snapshot/6c95ca2c9866a962964ede3ea5813d4c2410ba48d92cf8d11a93fbb13e08b76a)에서 찾아볼 수 있습니다. 만약 수업에 등록되었다면, TA(see Piazza for more information)에게 이 수업을 위한 Terminal 예산을 요구할 수 있습니다. 처음 스냅샷을 실행시키면, 수업을 위한 모든 것이 설치되어 있어서 바로 숙제를 시작할 수 있습니다. [여기](/terminal-tutorial)에 Terminal을 위한 간단한 튜토리얼을 작성해 뒀습니다. 22 | 23 | ### 로컬 환경 24 | [여기](http://vision.stanford.edu/teaching/cs231n/winter1516_assignment2.zip)에서 압축파일을 다운받고 다음을 따르세요. 25 | 26 | **[선택 1] Use Anaconda:** 27 | 과학, 수학, 공학, 데이터 분석을 위한 대부분의 주요 패키지들을 담고있는 [Anaconda](https://www.continuum.io/downloads)를 사용하여 설치하는 것이 흔히 사용하는 방법입니다. 설치가 다 되면 모든 요구사항(dependency)을 넘기고 바로 숙제를 시작해도 좋습니다. 28 | 29 | **[선택 2] 수동 설치, virtual environment:** 30 | 만약 Anaconda 대신 좀 더 일반적이면서 까다로운 방법을 택하고 싶다면 이번 과제를 위한 [virtual environment](http://docs.python-guide.org/en/latest/dev/virtualenvs/)를 만들 수 있습니다. 만약 virtual environment를 사용하지 않는다면 모든 코드가 컴퓨터에 전역적으로 종속되게 설치됩니다. Virtual environment의 설정은 아래를 참조하세요. 31 | 32 | ~~~bash 33 | cd assignment1 34 | sudo pip install virtualenv # 아마 먼저 설치되어 있을 겁니다. 35 | virtualenv .env # virtual environment를 만듭니다. 36 | source .env/bin/activate # virtual environment를 활성화 합니다. 37 | pip install -r requirements.txt # dependencies 설치합니다. 38 | # Work on the assignment for a while ... 39 | deactivate # virtual environment를 종료합니다. 40 | ~~~ 41 | 42 | **데이터셋 다운로드:** 43 | 먼저 숙제를 시작하기전에 CIFAR-10 dataset를 다운로드해야 합니다. 아래 코드를 `assignment2` 폴더에서 실행하세요: 44 | 45 | ~~~bash 46 | cd cs231n/datasets 47 | ./get_datasets.sh 48 | ~~~ 49 | 50 | **Cython extension 컴파일하기:** 컨볼루션 신경망은 매우 효율적인 구현을 필요로 합니다. 이 숙제를 위해서 [Cython](http://cython.org/)을 활용하여 여러 기능들을 구현해 놓았는데, 이를 위해 코드를 돌리기 전에 Cython extension을 컴파일 해야 합니다. `cs231n` 디렉토리에서 아래 명령어를 실행하세요: 51 | 52 | ~~~bash 53 | python setup.py build_ext --inplace 54 | ~~~ 55 | 56 | **IPython 시작:** 57 | CIFAR-10 data를 받았다면, `assignment1` 폴더의 IPython notebook server를 시작할 수 있습니다. IPython에 친숙하지 않다면 작성해둔 [IPython tutorial](/ipython-tutorial)를 읽어보는 것을 권장합니다. 58 | 59 | **NOTE:** OSX에서 virtual environment를 실행하면, matplotlib 에러가 날 수 있습니다([이 문제에 관한 이슈](http://matplotlib.org/faq/virtualenv_faq.html)). IPython 서버를 `assignment2`폴더의 `start_ipython_osx.sh`로 실행하면 이 문제를 피해갈 수 있습니다; 이 스크립트는 virtual environment가 `.env`라고 되어있다고 가정하고 작성되었습니다. 60 | 61 | ### 과제 제출: 62 | 로컬 환경이나 Terminal에 상관없이, 이번 숙제를 마쳤다면 `collectSubmission.sh`스크립트를 실행하세요. 이 스크립트는 `assignment2.zip`파일을 만듭니다. 이 파일을 [the coursework](https://coursework.stanford.edu/portal/site/W16-CS-231N-01/)에 업로드하세요. 63 | 64 | ### Q1: Fully-connected 뉴럴 네트워크 (30 points) 65 | `FullyConnectedNets.ipynb` IPython notebook 파일에서 모듈화된 레이어 디자인을 소개하고, 이 레이어들을 이용해서 임의의 깊이를 갖는 fully-connected 네트워크를 구현할 것입니다. 이 모델들을 최적화하기 위해서 자주 사용되는 여러 가지 업데이트 규칙들을 구현해야 할 것입니다. 66 | 67 | ### Q2: Batch Normalization (30 points) 68 | `BatchNormalization.ipynb` IPython notebook 파일에서는 batch normalization 을 구현하고, 이를 사용하여 깊은(deep) fully-connected 네트워크를 학습할 것입니다. 69 | 70 | ### Q3: Dropout (10 points) 71 | `Dropout.ipynb` IPython notebook 파일에서는 Dropout을 구현하고, 이것이 모델의 일반화 성능에 어떤 영향을 미치는지 살펴볼 것입니다. 72 | 73 | ### Q4: CIFAR-10 에서의 컨볼루션 신경망 (30 points) 74 | `ConvolutionalNetworks.ipynb` IPython notebook 파일에서는 컨볼루션 신경망에서 흔히 사용되는 여러 새로운 레이어들을 구현할 것입니다. 먼저 CIFAR-10 데이터셋에 대해 (얕은, 깊지않은, 작은 규모의) 컨볼루션 신경망을 학습하고, 이후에는 가능한 한 최선의 노력을 다해서 최고의 성능을 뽑아내보길 바랍니다. 75 | 76 | ### Q5: 추가 과제: 뭔가 더 해보세요! (up to +10 points) 77 | 네트워크를 학습하는 과정 속에서, 더 좋은 성능을 위해 필요한 것이 있다면 얼마든지 추가적으로 구현하기 바랍니다. 최적화 기법(solver)을 바꿔도 좋고, 추가적인 레이어를 구현하거나, 다른 종류의 regularization 을 사용하고나, 모델 ensemble 등 생각나는 모든 것을 시도해 보세요. 이번 숙제에서 다루지 않은 새로운 아이디어를 구현한다면 추가 점수를 받을 수 있을 것입니다. 78 | -------------------------------------------------------------------------------- /assignments2016/assignment2/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | .env/* 4 | -------------------------------------------------------------------------------- /assignments2016/assignment2/README.md: -------------------------------------------------------------------------------- 1 | In this assignment you will practice writing backpropagation code, and training 2 | Neural Networks and Convolutional Neural Networks. The goals of this assignment 3 | are as follows: 4 | 5 | - understand **Neural Networks** and how they are arranged in layered 6 | architectures 7 | - understand and be able to implement (vectorized) **backpropagation** 8 | - implement various **update rules** used to optimize Neural Networks 9 | - implement **batch normalization** for training deep networks 10 | - implement **dropout** to regularize networks 11 | - effectively **cross-validate** and find the best hyperparameters for Neural 12 | Network architecture 13 | - understand the architecture of **Convolutional Neural Networks** and train 14 | gain experience with training these models on data 15 | 16 | ## Setup 17 | You can work on the assignment in one of two ways: locally on your own machine, 18 | or on a virtual machine through Terminal.com. 19 | 20 | ### Working in the cloud on Terminal 21 | 22 | Terminal has created a separate subdomain to serve our class, 23 | [www.stanfordterminalcloud.com](https://www.stanfordterminalcloud.com). Register 24 | your account there. The Assignment 2 snapshot can then be found HERE. If you are 25 | registered in the class you can contact the TA (see Piazza for more information) 26 | to request Terminal credits for use on the assignment. Once you boot up the 27 | snapshot everything will be installed for you, and you will be ready to start on 28 | your assignment right away. We have written a small tutorial on Terminal 29 | [here](http://cs231n.github.io/terminal-tutorial/). 30 | 31 | ### Working locally 32 | Get the code as a zip file 33 | [here](http://vision.stanford.edu/teaching/cs231n/winter1516_assignment2.zip). 34 | As for the dependencies: 35 | 36 | **[Option 1] Use Anaconda:** 37 | The preferred approach for installing all the assignment dependencies is to use 38 | [Anaconda](https://www.continuum.io/downloads), which is a Python distribution 39 | that includes many of the most popular Python packages for science, math, 40 | engineering and data analysis. Once you install it you can skip all mentions of 41 | requirements and you are ready to go directly to working on the assignment. 42 | 43 | **[Option 2] Manual install, virtual environment:** 44 | If you do not want to use Anaconda and want to go with a more manual and risky 45 | installation route you will likely want to create a 46 | [virtual environment](http://docs.python-guide.org/en/latest/dev/virtualenvs/) 47 | for the project. If you choose not to use a virtual environment, it is up to you 48 | to make sure that all dependencies for the code are installed globally on your 49 | machine. To set up a virtual environment, run the following: 50 | 51 | ```bash 52 | cd assignment2 53 | sudo pip install virtualenv # This may already be installed 54 | virtualenv .env # Create a virtual environment 55 | source .env/bin/activate # Activate the virtual environment 56 | pip install -r requirements.txt # Install dependencies 57 | # Work on the assignment for a while ... 58 | deactivate # Exit the virtual environment 59 | ``` 60 | 61 | **Download data:** 62 | Once you have the starter code, you will need to download the CIFAR-10 dataset. 63 | Run the following from the `assignment2` directory: 64 | 65 | ```bash 66 | cd cs231n/datasets 67 | ./get_datasets.sh 68 | ``` 69 | 70 | **Compile the Cython extension:** Convolutional Neural Networks require a very 71 | efficient implementation. We have implemented of the functionality using 72 | [Cython](http://cython.org/); you will need to compile the Cython extension 73 | before you can run the code. From the `cs231n` directory, run the following 74 | command: 75 | 76 | ```bash 77 | python setup.py build_ext --inplace 78 | ``` 79 | 80 | **Start IPython:** 81 | After you have the CIFAR-10 data, you should start the IPython notebook server 82 | from the `assignment2` directory. If you are unfamiliar with IPython, you should 83 | read our [IPython tutorial](http://cs231n.github.io/ipython-tutorial/). 84 | 85 | **NOTE:** If you are working in a virtual environment on OSX, you may encounter 86 | errors with matplotlib due to the 87 | [issues described here](http://matplotlib.org/faq/virtualenv_faq.html). 88 | You can work around this issue by starting the IPython server using the 89 | `start_ipython_osx.sh` script from the `assignment2` directory; the script 90 | assumes that your virtual environment is named `.env`. 91 | 92 | 93 | ### Submitting your work: 94 | Whether you work on the assignment locally or using Terminal, once you are done 95 | working run the `collectSubmission.sh` script; this will produce a file called 96 | `assignment2.zip`. Upload this file to your dropbox on 97 | [the coursework](https://coursework.stanford.edu/portal/site/W15-CS-231N-01/) 98 | page for the course. 99 | 100 | 101 | ### Q1: Fully-connected Neural Network (30 points) 102 | The IPython notebook `FullyConnectedNets.ipynb` will introduce you to our 103 | modular layer design, and then use those layers to implement fully-connected 104 | networks of arbitrary depth. To optimize these models you will implement several 105 | popular update rules. 106 | 107 | ### Q2: Batch Normalization (30 points) 108 | In the IPython notebook `BatchNormalization.ipynb` you will implement batch 109 | normalization, and use it to train deep fully-connected networks. 110 | 111 | ### Q3: Dropout (10 points) 112 | The IPython notebook `Dropout.ipynb` will help you implement Dropout and explore 113 | its effects on model generalization. 114 | 115 | ### Q4: ConvNet on CIFAR-10 (30 points) 116 | In the IPython Notebook `ConvolutionalNetworks.ipynb` you will implement several 117 | new layers that are commonly used in convolutional networks. You will train a 118 | (shallow) convolutional network on CIFAR-10, and it will then be up to you to 119 | train the best network that you can. 120 | 121 | ### Q5: Do something extra! (up to +10 points) 122 | In the process of training your network, you should feel free to implement 123 | anything that you want to get better performance. You can modify the solver, 124 | implement additional layers, use different types of regularization, use an 125 | ensemble of models, or anything else that comes to mind. If you implement these 126 | or other ideas not covered in the assignment then you will be awarded some bonus 127 | points. 128 | 129 | -------------------------------------------------------------------------------- /assignments2016/assignment2/collectSubmission.sh: -------------------------------------------------------------------------------- 1 | rm -f assignment2.zip 2 | zip -r assignment2.zip . -x "*.git*" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" ".env/*" "*.pyc" "*cs231n/build/*" 3 | -------------------------------------------------------------------------------- /assignments2016/assignment2/cs231n/.gitignore: -------------------------------------------------------------------------------- 1 | build/* 2 | im2col_cython.c 3 | im2col_cython.so 4 | -------------------------------------------------------------------------------- /assignments2016/assignment2/cs231n/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assignments2016/assignment2/cs231n/__init__.py -------------------------------------------------------------------------------- /assignments2016/assignment2/cs231n/classifiers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assignments2016/assignment2/cs231n/classifiers/__init__.py -------------------------------------------------------------------------------- /assignments2016/assignment2/cs231n/classifiers/cnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from cs231n.layers import * 4 | from cs231n.fast_layers import * 5 | from cs231n.layer_utils import * 6 | 7 | 8 | class ThreeLayerConvNet(object): 9 | """ 10 | A three-layer convolutional network with the following architecture: 11 | 12 | conv - relu - 2x2 max pool - affine - relu - affine - softmax 13 | 14 | The network operates on minibatches of data that have shape (N, C, H, W) 15 | consisting of N images, each with height H and width W and with C input 16 | channels. 17 | """ 18 | 19 | def __init__(self, input_dim=(3, 32, 32), num_filters=32, filter_size=7, 20 | hidden_dim=100, num_classes=10, weight_scale=1e-3, reg=0.0, 21 | dtype=np.float32): 22 | """ 23 | Initialize a new network. 24 | 25 | Inputs: 26 | - input_dim: Tuple (C, H, W) giving size of input data 27 | - num_filters: Number of filters to use in the convolutional layer 28 | - filter_size: Size of filters to use in the convolutional layer 29 | - hidden_dim: Number of units to use in the fully-connected hidden layer 30 | - num_classes: Number of scores to produce from the final affine layer. 31 | - weight_scale: Scalar giving standard deviation for random initialization 32 | of weights. 33 | - reg: Scalar giving L2 regularization strength 34 | - dtype: numpy datatype to use for computation. 35 | """ 36 | self.params = {} 37 | self.reg = reg 38 | self.dtype = dtype 39 | 40 | ############################################################################ 41 | # TODO: Initialize weights and biases for the three-layer convolutional # 42 | # network. Weights should be initialized from a Gaussian with standard # 43 | # deviation equal to weight_scale; biases should be initialized to zero. # 44 | # All weights and biases should be stored in the dictionary self.params. # 45 | # Store weights and biases for the convolutional layer using the keys 'W1' # 46 | # and 'b1'; use keys 'W2' and 'b2' for the weights and biases of the # 47 | # hidden affine layer, and keys 'W3' and 'b3' for the weights and biases # 48 | # of the output affine layer. # 49 | ############################################################################ 50 | pass 51 | ############################################################################ 52 | # END OF YOUR CODE # 53 | ############################################################################ 54 | 55 | for k, v in self.params.iteritems(): 56 | self.params[k] = v.astype(dtype) 57 | 58 | 59 | def loss(self, X, y=None): 60 | """ 61 | Evaluate loss and gradient for the three-layer convolutional network. 62 | 63 | Input / output: Same API as TwoLayerNet in fc_net.py. 64 | """ 65 | W1, b1 = self.params['W1'], self.params['b1'] 66 | W2, b2 = self.params['W2'], self.params['b2'] 67 | W3, b3 = self.params['W3'], self.params['b3'] 68 | 69 | # pass conv_param to the forward pass for the convolutional layer 70 | filter_size = W1.shape[2] 71 | conv_param = {'stride': 1, 'pad': (filter_size - 1) / 2} 72 | 73 | # pass pool_param to the forward pass for the max-pooling layer 74 | pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2} 75 | 76 | scores = None 77 | ############################################################################ 78 | # TODO: Implement the forward pass for the three-layer convolutional net, # 79 | # computing the class scores for X and storing them in the scores # 80 | # variable. # 81 | ############################################################################ 82 | pass 83 | ############################################################################ 84 | # END OF YOUR CODE # 85 | ############################################################################ 86 | 87 | if y is None: 88 | return scores 89 | 90 | loss, grads = 0, {} 91 | ############################################################################ 92 | # TODO: Implement the backward pass for the three-layer convolutional net, # 93 | # storing the loss and gradients in the loss and grads variables. Compute # 94 | # data loss using softmax, and make sure that grads[k] holds the gradients # 95 | # for self.params[k]. Don't forget to add L2 regularization! # 96 | ############################################################################ 97 | pass 98 | ############################################################################ 99 | # END OF YOUR CODE # 100 | ############################################################################ 101 | 102 | return loss, grads 103 | 104 | 105 | pass 106 | -------------------------------------------------------------------------------- /assignments2016/assignment2/cs231n/data_utils.py: -------------------------------------------------------------------------------- 1 | import cPickle as pickle 2 | import numpy as np 3 | import os 4 | from scipy.misc import imread 5 | 6 | def load_CIFAR_batch(filename): 7 | """ load single batch of cifar """ 8 | with open(filename, 'rb') as f: 9 | datadict = pickle.load(f) 10 | X = datadict['data'] 11 | Y = datadict['labels'] 12 | X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float") 13 | Y = np.array(Y) 14 | return X, Y 15 | 16 | def load_CIFAR10(ROOT): 17 | """ load all of cifar """ 18 | xs = [] 19 | ys = [] 20 | for b in range(1,6): 21 | f = os.path.join(ROOT, 'data_batch_%d' % (b, )) 22 | X, Y = load_CIFAR_batch(f) 23 | xs.append(X) 24 | ys.append(Y) 25 | Xtr = np.concatenate(xs) 26 | Ytr = np.concatenate(ys) 27 | del X, Y 28 | Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch')) 29 | return Xtr, Ytr, Xte, Yte 30 | 31 | 32 | def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): 33 | """ 34 | Load the CIFAR-10 dataset from disk and perform preprocessing to prepare 35 | it for classifiers. These are the same steps as we used for the SVM, but 36 | condensed to a single function. 37 | """ 38 | # Load the raw CIFAR-10 data 39 | cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' 40 | X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) 41 | 42 | # Subsample the data 43 | mask = range(num_training, num_training + num_validation) 44 | X_val = X_train[mask] 45 | y_val = y_train[mask] 46 | mask = range(num_training) 47 | X_train = X_train[mask] 48 | y_train = y_train[mask] 49 | mask = range(num_test) 50 | X_test = X_test[mask] 51 | y_test = y_test[mask] 52 | 53 | # Normalize the data: subtract the mean image 54 | mean_image = np.mean(X_train, axis=0) 55 | X_train -= mean_image 56 | X_val -= mean_image 57 | X_test -= mean_image 58 | 59 | # Transpose so that channels come first 60 | X_train = X_train.transpose(0, 3, 1, 2).copy() 61 | X_val = X_val.transpose(0, 3, 1, 2).copy() 62 | X_test = X_test.transpose(0, 3, 1, 2).copy() 63 | 64 | # Package data into a dictionary 65 | return { 66 | 'X_train': X_train, 'y_train': y_train, 67 | 'X_val': X_val, 'y_val': y_val, 68 | 'X_test': X_test, 'y_test': y_test, 69 | } 70 | 71 | 72 | def load_tiny_imagenet(path, dtype=np.float32): 73 | """ 74 | Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and 75 | TinyImageNet-200 have the same directory structure, so this can be used 76 | to load any of them. 77 | 78 | Inputs: 79 | - path: String giving path to the directory to load. 80 | - dtype: numpy datatype used to load the data. 81 | 82 | Returns: A tuple of 83 | - class_names: A list where class_names[i] is a list of strings giving the 84 | WordNet names for class i in the loaded dataset. 85 | - X_train: (N_tr, 3, 64, 64) array of training images 86 | - y_train: (N_tr,) array of training labels 87 | - X_val: (N_val, 3, 64, 64) array of validation images 88 | - y_val: (N_val,) array of validation labels 89 | - X_test: (N_test, 3, 64, 64) array of testing images. 90 | - y_test: (N_test,) array of test labels; if test labels are not available 91 | (such as in student code) then y_test will be None. 92 | """ 93 | # First load wnids 94 | with open(os.path.join(path, 'wnids.txt'), 'r') as f: 95 | wnids = [x.strip() for x in f] 96 | 97 | # Map wnids to integer labels 98 | wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)} 99 | 100 | # Use words.txt to get names for each class 101 | with open(os.path.join(path, 'words.txt'), 'r') as f: 102 | wnid_to_words = dict(line.split('\t') for line in f) 103 | for wnid, words in wnid_to_words.iteritems(): 104 | wnid_to_words[wnid] = [w.strip() for w in words.split(',')] 105 | class_names = [wnid_to_words[wnid] for wnid in wnids] 106 | 107 | # Next load training data. 108 | X_train = [] 109 | y_train = [] 110 | for i, wnid in enumerate(wnids): 111 | if (i + 1) % 20 == 0: 112 | print 'loading training data for synset %d / %d' % (i + 1, len(wnids)) 113 | # To figure out the filenames we need to open the boxes file 114 | boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid) 115 | with open(boxes_file, 'r') as f: 116 | filenames = [x.split('\t')[0] for x in f] 117 | num_images = len(filenames) 118 | 119 | X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype) 120 | y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64) 121 | for j, img_file in enumerate(filenames): 122 | img_file = os.path.join(path, 'train', wnid, 'images', img_file) 123 | img = imread(img_file) 124 | if img.ndim == 2: 125 | ## grayscale file 126 | img.shape = (64, 64, 1) 127 | X_train_block[j] = img.transpose(2, 0, 1) 128 | X_train.append(X_train_block) 129 | y_train.append(y_train_block) 130 | 131 | # We need to concatenate all training data 132 | X_train = np.concatenate(X_train, axis=0) 133 | y_train = np.concatenate(y_train, axis=0) 134 | 135 | # Next load validation data 136 | with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f: 137 | img_files = [] 138 | val_wnids = [] 139 | for line in f: 140 | img_file, wnid = line.split('\t')[:2] 141 | img_files.append(img_file) 142 | val_wnids.append(wnid) 143 | num_val = len(img_files) 144 | y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids]) 145 | X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype) 146 | for i, img_file in enumerate(img_files): 147 | img_file = os.path.join(path, 'val', 'images', img_file) 148 | img = imread(img_file) 149 | if img.ndim == 2: 150 | img.shape = (64, 64, 1) 151 | X_val[i] = img.transpose(2, 0, 1) 152 | 153 | # Next load test images 154 | # Students won't have test labels, so we need to iterate over files in the 155 | # images directory. 156 | img_files = os.listdir(os.path.join(path, 'test', 'images')) 157 | X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype) 158 | for i, img_file in enumerate(img_files): 159 | img_file = os.path.join(path, 'test', 'images', img_file) 160 | img = imread(img_file) 161 | if img.ndim == 2: 162 | img.shape = (64, 64, 1) 163 | X_test[i] = img.transpose(2, 0, 1) 164 | 165 | y_test = None 166 | y_test_file = os.path.join(path, 'test', 'test_annotations.txt') 167 | if os.path.isfile(y_test_file): 168 | with open(y_test_file, 'r') as f: 169 | img_file_to_wnid = {} 170 | for line in f: 171 | line = line.split('\t') 172 | img_file_to_wnid[line[0]] = line[1] 173 | y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files] 174 | y_test = np.array(y_test) 175 | 176 | return class_names, X_train, y_train, X_val, y_val, X_test, y_test 177 | 178 | 179 | def load_models(models_dir): 180 | """ 181 | Load saved models from disk. This will attempt to unpickle all files in a 182 | directory; any files that give errors on unpickling (such as README.txt) will 183 | be skipped. 184 | 185 | Inputs: 186 | - models_dir: String giving the path to a directory containing model files. 187 | Each model file is a pickled dictionary with a 'model' field. 188 | 189 | Returns: 190 | A dictionary mapping model file names to models. 191 | """ 192 | models = {} 193 | for model_file in os.listdir(models_dir): 194 | with open(os.path.join(models_dir, model_file), 'rb') as f: 195 | try: 196 | models[model_file] = pickle.load(f)['model'] 197 | except pickle.UnpicklingError: 198 | continue 199 | return models 200 | -------------------------------------------------------------------------------- /assignments2016/assignment2/cs231n/datasets/.gitignore: -------------------------------------------------------------------------------- 1 | cifar-10-batches-py/* 2 | tiny-imagenet-100-A* 3 | tiny-imagenet-100-B* 4 | tiny-100-A-pretrained/* 5 | -------------------------------------------------------------------------------- /assignments2016/assignment2/cs231n/datasets/get_datasets.sh: -------------------------------------------------------------------------------- 1 | # Get CIFAR10 2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz 3 | tar -xzvf cifar-10-python.tar.gz 4 | rm cifar-10-python.tar.gz 5 | -------------------------------------------------------------------------------- /assignments2016/assignment2/cs231n/gradient_check.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import randrange 3 | 4 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001): 5 | """ 6 | a naive implementation of numerical gradient of f at x 7 | - f should be a function that takes a single argument 8 | - x is the point (numpy array) to evaluate the gradient at 9 | """ 10 | 11 | fx = f(x) # evaluate function value at original point 12 | grad = np.zeros_like(x) 13 | # iterate over all indexes in x 14 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 15 | while not it.finished: 16 | 17 | # evaluate function at x+h 18 | ix = it.multi_index 19 | oldval = x[ix] 20 | x[ix] = oldval + h # increment by h 21 | fxph = f(x) # evalute f(x + h) 22 | x[ix] = oldval - h 23 | fxmh = f(x) # evaluate f(x - h) 24 | x[ix] = oldval # restore 25 | 26 | # compute the partial derivative with centered formula 27 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 28 | if verbose: 29 | print ix, grad[ix] 30 | it.iternext() # step to next dimension 31 | 32 | return grad 33 | 34 | 35 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 36 | """ 37 | Evaluate a numeric gradient for a function that accepts a numpy 38 | array and returns a numpy array. 39 | """ 40 | grad = np.zeros_like(x) 41 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 42 | while not it.finished: 43 | ix = it.multi_index 44 | 45 | oldval = x[ix] 46 | x[ix] = oldval + h 47 | pos = f(x).copy() 48 | x[ix] = oldval - h 49 | neg = f(x).copy() 50 | x[ix] = oldval 51 | 52 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 53 | it.iternext() 54 | return grad 55 | 56 | 57 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): 58 | """ 59 | Compute numeric gradients for a function that operates on input 60 | and output blobs. 61 | 62 | We assume that f accepts several input blobs as arguments, followed by a blob 63 | into which outputs will be written. For example, f might be called like this: 64 | 65 | f(x, w, out) 66 | 67 | where x and w are input Blobs, and the result of f will be written to out. 68 | 69 | Inputs: 70 | - f: function 71 | - inputs: tuple of input blobs 72 | - output: output blob 73 | - h: step size 74 | """ 75 | numeric_diffs = [] 76 | for input_blob in inputs: 77 | diff = np.zeros_like(input_blob.diffs) 78 | it = np.nditer(input_blob.vals, flags=['multi_index'], 79 | op_flags=['readwrite']) 80 | while not it.finished: 81 | idx = it.multi_index 82 | orig = input_blob.vals[idx] 83 | 84 | input_blob.vals[idx] = orig + h 85 | f(*(inputs + (output,))) 86 | pos = np.copy(output.vals) 87 | input_blob.vals[idx] = orig - h 88 | f(*(inputs + (output,))) 89 | neg = np.copy(output.vals) 90 | input_blob.vals[idx] = orig 91 | 92 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) 93 | 94 | it.iternext() 95 | numeric_diffs.append(diff) 96 | return numeric_diffs 97 | 98 | 99 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5): 100 | return eval_numerical_gradient_blobs(lambda *args: net.forward(), 101 | inputs, output, h=h) 102 | 103 | 104 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): 105 | """ 106 | sample a few random elements and only return numerical 107 | in this dimensions. 108 | """ 109 | 110 | for i in xrange(num_checks): 111 | ix = tuple([randrange(m) for m in x.shape]) 112 | 113 | oldval = x[ix] 114 | x[ix] = oldval + h # increment by h 115 | fxph = f(x) # evaluate f(x + h) 116 | x[ix] = oldval - h # increment by h 117 | fxmh = f(x) # evaluate f(x - h) 118 | x[ix] = oldval # reset 119 | 120 | grad_numerical = (fxph - fxmh) / (2 * h) 121 | grad_analytic = analytic_grad[ix] 122 | rel_error = abs(grad_numerical - grad_analytic) / (abs(grad_numerical) + abs(grad_analytic)) 123 | print 'numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error) 124 | 125 | -------------------------------------------------------------------------------- /assignments2016/assignment2/cs231n/im2col.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1): 5 | # First figure out what the size of the output should be 6 | N, C, H, W = x_shape 7 | assert (H + 2 * padding - field_height) % stride == 0 8 | assert (W + 2 * padding - field_height) % stride == 0 9 | out_height = (H + 2 * padding - field_height) / stride + 1 10 | out_width = (W + 2 * padding - field_width) / stride + 1 11 | 12 | i0 = np.repeat(np.arange(field_height), field_width) 13 | i0 = np.tile(i0, C) 14 | i1 = stride * np.repeat(np.arange(out_height), out_width) 15 | j0 = np.tile(np.arange(field_width), field_height * C) 16 | j1 = stride * np.tile(np.arange(out_width), out_height) 17 | i = i0.reshape(-1, 1) + i1.reshape(1, -1) 18 | j = j0.reshape(-1, 1) + j1.reshape(1, -1) 19 | 20 | k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1) 21 | 22 | return (k, i, j) 23 | 24 | 25 | def im2col_indices(x, field_height, field_width, padding=1, stride=1): 26 | """ An implementation of im2col based on some fancy indexing """ 27 | # Zero-pad the input 28 | p = padding 29 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 30 | 31 | k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, 32 | stride) 33 | 34 | cols = x_padded[:, k, i, j] 35 | C = x.shape[1] 36 | cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1) 37 | return cols 38 | 39 | 40 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1, 41 | stride=1): 42 | """ An implementation of col2im based on fancy indexing and np.add.at """ 43 | N, C, H, W = x_shape 44 | H_padded, W_padded = H + 2 * padding, W + 2 * padding 45 | x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype) 46 | k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, 47 | stride) 48 | cols_reshaped = cols.reshape(C * field_height * field_width, -1, N) 49 | cols_reshaped = cols_reshaped.transpose(2, 0, 1) 50 | np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped) 51 | if padding == 0: 52 | return x_padded 53 | return x_padded[:, :, padding:-padding, padding:-padding] 54 | 55 | pass 56 | -------------------------------------------------------------------------------- /assignments2016/assignment2/cs231n/im2col_cython.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | 5 | # DTYPE = np.float64 6 | # ctypedef np.float64_t DTYPE_t 7 | 8 | ctypedef fused DTYPE_t: 9 | np.float32_t 10 | np.float64_t 11 | 12 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height, 13 | int field_width, int padding, int stride): 14 | cdef int N = x.shape[0] 15 | cdef int C = x.shape[1] 16 | cdef int H = x.shape[2] 17 | cdef int W = x.shape[3] 18 | 19 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 20 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 21 | 22 | cdef int p = padding 23 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x, 24 | ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 25 | 26 | cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros( 27 | (C * field_height * field_width, N * HH * WW), 28 | dtype=x.dtype) 29 | 30 | # Moving the inner loop to a C function with no bounds checking works, but does 31 | # not seem to help performance in any measurable way. 32 | 33 | im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 34 | field_height, field_width, padding, stride) 35 | return cols 36 | 37 | 38 | @cython.boundscheck(False) 39 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 40 | np.ndarray[DTYPE_t, ndim=4] x_padded, 41 | int N, int C, int H, int W, int HH, int WW, 42 | int field_height, int field_width, int padding, int stride) except? -1: 43 | cdef int c, ii, jj, row, yy, xx, i, col 44 | 45 | for c in range(C): 46 | for yy in range(HH): 47 | for xx in range(WW): 48 | for ii in range(field_height): 49 | for jj in range(field_width): 50 | row = c * field_width * field_height + ii * field_height + jj 51 | for i in range(N): 52 | col = yy * WW * N + xx * N + i 53 | cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj] 54 | 55 | 56 | 57 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W, 58 | int field_height, int field_width, int padding, int stride): 59 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 60 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 61 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 62 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding), 63 | dtype=cols.dtype) 64 | 65 | # Moving the inner loop to a C-function with no bounds checking improves 66 | # performance quite a bit for col2im. 67 | col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 68 | field_height, field_width, padding, stride) 69 | if padding > 0: 70 | return x_padded[:, :, padding:-padding, padding:-padding] 71 | return x_padded 72 | 73 | 74 | @cython.boundscheck(False) 75 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 76 | np.ndarray[DTYPE_t, ndim=4] x_padded, 77 | int N, int C, int H, int W, int HH, int WW, 78 | int field_height, int field_width, int padding, int stride) except? -1: 79 | cdef int c, ii, jj, row, yy, xx, i, col 80 | 81 | for c in range(C): 82 | for ii in range(field_height): 83 | for jj in range(field_width): 84 | row = c * field_width * field_height + ii * field_height + jj 85 | for yy in range(HH): 86 | for xx in range(WW): 87 | for i in range(N): 88 | col = yy * WW * N + xx * N + i 89 | x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col] 90 | 91 | 92 | @cython.boundscheck(False) 93 | @cython.wraparound(False) 94 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols, 95 | np.ndarray[DTYPE_t, ndim=4] x_padded, 96 | int N, int C, int H, int W, int HH, int WW, 97 | int out_h, int out_w, int pad, int stride): 98 | 99 | cdef int c, hh, ww, n, h, w 100 | for n in range(N): 101 | for c in range(C): 102 | for hh in range(HH): 103 | for ww in range(WW): 104 | for h in range(out_h): 105 | for w in range(out_w): 106 | x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w] 107 | 108 | 109 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W, 110 | int HH, int WW, int pad, int stride): 111 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 112 | cdef int out_h = (H + 2 * pad - HH) / stride + 1 113 | cdef int out_w = (W + 2 * pad - WW) / stride + 1 114 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad), 115 | dtype=cols.dtype) 116 | 117 | col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride) 118 | 119 | if pad > 0: 120 | return x_padded[:, :, pad:-pad, pad:-pad] 121 | return x_padded 122 | -------------------------------------------------------------------------------- /assignments2016/assignment2/cs231n/layer_utils.py: -------------------------------------------------------------------------------- 1 | from cs231n.layers import * 2 | from cs231n.fast_layers import * 3 | 4 | 5 | def affine_relu_forward(x, w, b): 6 | """ 7 | Convenience layer that perorms an affine transform followed by a ReLU 8 | 9 | Inputs: 10 | - x: Input to the affine layer 11 | - w, b: Weights for the affine layer 12 | 13 | Returns a tuple of: 14 | - out: Output from the ReLU 15 | - cache: Object to give to the backward pass 16 | """ 17 | a, fc_cache = affine_forward(x, w, b) 18 | out, relu_cache = relu_forward(a) 19 | cache = (fc_cache, relu_cache) 20 | return out, cache 21 | 22 | 23 | def affine_relu_backward(dout, cache): 24 | """ 25 | Backward pass for the affine-relu convenience layer 26 | """ 27 | fc_cache, relu_cache = cache 28 | da = relu_backward(dout, relu_cache) 29 | dx, dw, db = affine_backward(da, fc_cache) 30 | return dx, dw, db 31 | 32 | 33 | pass 34 | 35 | 36 | def conv_relu_forward(x, w, b, conv_param): 37 | """ 38 | A convenience layer that performs a convolution followed by a ReLU. 39 | 40 | Inputs: 41 | - x: Input to the convolutional layer 42 | - w, b, conv_param: Weights and parameters for the convolutional layer 43 | 44 | Returns a tuple of: 45 | - out: Output from the ReLU 46 | - cache: Object to give to the backward pass 47 | """ 48 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 49 | out, relu_cache = relu_forward(a) 50 | cache = (conv_cache, relu_cache) 51 | return out, cache 52 | 53 | 54 | def conv_relu_backward(dout, cache): 55 | """ 56 | Backward pass for the conv-relu convenience layer. 57 | """ 58 | conv_cache, relu_cache = cache 59 | da = relu_backward(dout, relu_cache) 60 | dx, dw, db = conv_backward_fast(da, conv_cache) 61 | return dx, dw, db 62 | 63 | 64 | def conv_relu_pool_forward(x, w, b, conv_param, pool_param): 65 | """ 66 | Convenience layer that performs a convolution, a ReLU, and a pool. 67 | 68 | Inputs: 69 | - x: Input to the convolutional layer 70 | - w, b, conv_param: Weights and parameters for the convolutional layer 71 | - pool_param: Parameters for the pooling layer 72 | 73 | Returns a tuple of: 74 | - out: Output from the pooling layer 75 | - cache: Object to give to the backward pass 76 | """ 77 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 78 | s, relu_cache = relu_forward(a) 79 | out, pool_cache = max_pool_forward_fast(s, pool_param) 80 | cache = (conv_cache, relu_cache, pool_cache) 81 | return out, cache 82 | 83 | 84 | def conv_relu_pool_backward(dout, cache): 85 | """ 86 | Backward pass for the conv-relu-pool convenience layer 87 | """ 88 | conv_cache, relu_cache, pool_cache = cache 89 | ds = max_pool_backward_fast(dout, pool_cache) 90 | da = relu_backward(ds, relu_cache) 91 | dx, dw, db = conv_backward_fast(da, conv_cache) 92 | return dx, dw, db 93 | 94 | -------------------------------------------------------------------------------- /assignments2016/assignment2/cs231n/optim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | This file implements various first-order update rules that are commonly used for 5 | training neural networks. Each update rule accepts current weights and the 6 | gradient of the loss with respect to those weights and produces the next set of 7 | weights. Each update rule has the same interface: 8 | 9 | def update(w, dw, config=None): 10 | 11 | Inputs: 12 | - w: A numpy array giving the current weights. 13 | - dw: A numpy array of the same shape as w giving the gradient of the 14 | loss with respect to w. 15 | - config: A dictionary containing hyperparameter values such as learning rate, 16 | momentum, etc. If the update rule requires caching values over many 17 | iterations, then config will also hold these cached values. 18 | 19 | Returns: 20 | - next_w: The next point after the update. 21 | - config: The config dictionary to be passed to the next iteration of the 22 | update rule. 23 | 24 | NOTE: For most update rules, the default learning rate will probably not perform 25 | well; however the default values of the other hyperparameters should work well 26 | for a variety of different problems. 27 | 28 | For efficiency, update rules may perform in-place updates, mutating w and 29 | setting next_w equal to w. 30 | """ 31 | 32 | 33 | def sgd(w, dw, config=None): 34 | """ 35 | Performs vanilla stochastic gradient descent. 36 | 37 | config format: 38 | - learning_rate: Scalar learning rate. 39 | """ 40 | if config is None: config = {} 41 | config.setdefault('learning_rate', 1e-2) 42 | 43 | w -= config['learning_rate'] * dw 44 | return w, config 45 | 46 | 47 | def sgd_momentum(w, dw, config=None): 48 | """ 49 | Performs stochastic gradient descent with momentum. 50 | 51 | config format: 52 | - learning_rate: Scalar learning rate. 53 | - momentum: Scalar between 0 and 1 giving the momentum value. 54 | Setting momentum = 0 reduces to sgd. 55 | - velocity: A numpy array of the same shape as w and dw used to store a moving 56 | average of the gradients. 57 | """ 58 | if config is None: config = {} 59 | config.setdefault('learning_rate', 1e-2) 60 | config.setdefault('momentum', 0.9) 61 | v = config.get('velocity', np.zeros_like(w)) 62 | 63 | next_w = None 64 | ############################################################################# 65 | # TODO: Implement the momentum update formula. Store the updated value in # 66 | # the next_w variable. You should also use and update the velocity v. # 67 | ############################################################################# 68 | pass 69 | ############################################################################# 70 | # END OF YOUR CODE # 71 | ############################################################################# 72 | config['velocity'] = v 73 | 74 | return next_w, config 75 | 76 | 77 | 78 | def rmsprop(x, dx, config=None): 79 | """ 80 | Uses the RMSProp update rule, which uses a moving average of squared gradient 81 | values to set adaptive per-parameter learning rates. 82 | 83 | config format: 84 | - learning_rate: Scalar learning rate. 85 | - decay_rate: Scalar between 0 and 1 giving the decay rate for the squared 86 | gradient cache. 87 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 88 | - cache: Moving average of second moments of gradients. 89 | """ 90 | if config is None: config = {} 91 | config.setdefault('learning_rate', 1e-2) 92 | config.setdefault('decay_rate', 0.99) 93 | config.setdefault('epsilon', 1e-8) 94 | config.setdefault('cache', np.zeros_like(x)) 95 | 96 | next_x = None 97 | ############################################################################# 98 | # TODO: Implement the RMSprop update formula, storing the next value of x # 99 | # in the next_x variable. Don't forget to update cache value stored in # 100 | # config['cache']. # 101 | ############################################################################# 102 | pass 103 | ############################################################################# 104 | # END OF YOUR CODE # 105 | ############################################################################# 106 | 107 | return next_x, config 108 | 109 | 110 | def adam(x, dx, config=None): 111 | """ 112 | Uses the Adam update rule, which incorporates moving averages of both the 113 | gradient and its square and a bias correction term. 114 | 115 | config format: 116 | - learning_rate: Scalar learning rate. 117 | - beta1: Decay rate for moving average of first moment of gradient. 118 | - beta2: Decay rate for moving average of second moment of gradient. 119 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 120 | - m: Moving average of gradient. 121 | - v: Moving average of squared gradient. 122 | - t: Iteration number. 123 | """ 124 | if config is None: config = {} 125 | config.setdefault('learning_rate', 1e-3) 126 | config.setdefault('beta1', 0.9) 127 | config.setdefault('beta2', 0.999) 128 | config.setdefault('epsilon', 1e-8) 129 | config.setdefault('m', np.zeros_like(x)) 130 | config.setdefault('v', np.zeros_like(x)) 131 | config.setdefault('t', 0) 132 | 133 | next_x = None 134 | ############################################################################# 135 | # TODO: Implement the Adam update formula, storing the next value of x in # 136 | # the next_x variable. Don't forget to update the m, v, and t variables # 137 | # stored in config. # 138 | ############################################################################# 139 | pass 140 | ############################################################################# 141 | # END OF YOUR CODE # 142 | ############################################################################# 143 | 144 | return next_x, config 145 | 146 | 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /assignments2016/assignment2/cs231n/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Build import cythonize 4 | import numpy 5 | 6 | extensions = [ 7 | Extension('im2col_cython', ['im2col_cython.pyx'], 8 | include_dirs = [numpy.get_include()] 9 | ), 10 | ] 11 | 12 | setup( 13 | ext_modules = cythonize(extensions), 14 | ) 15 | -------------------------------------------------------------------------------- /assignments2016/assignment2/cs231n/vis_utils.py: -------------------------------------------------------------------------------- 1 | from math import sqrt, ceil 2 | import numpy as np 3 | 4 | def visualize_grid(Xs, ubound=255.0, padding=1): 5 | """ 6 | Reshape a 4D tensor of image data to a grid for easy visualization. 7 | 8 | Inputs: 9 | - Xs: Data of shape (N, H, W, C) 10 | - ubound: Output grid will have values scaled to the range [0, ubound] 11 | - padding: The number of blank pixels between elements of the grid 12 | """ 13 | (N, H, W, C) = Xs.shape 14 | grid_size = int(ceil(sqrt(N))) 15 | grid_height = H * grid_size + padding * (grid_size - 1) 16 | grid_width = W * grid_size + padding * (grid_size - 1) 17 | grid = np.zeros((grid_height, grid_width, C)) 18 | next_idx = 0 19 | y0, y1 = 0, H 20 | for y in xrange(grid_size): 21 | x0, x1 = 0, W 22 | for x in xrange(grid_size): 23 | if next_idx < N: 24 | img = Xs[next_idx] 25 | low, high = np.min(img), np.max(img) 26 | grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low) 27 | # grid[y0:y1, x0:x1] = Xs[next_idx] 28 | next_idx += 1 29 | x0 += W + padding 30 | x1 += W + padding 31 | y0 += H + padding 32 | y1 += H + padding 33 | # grid_max = np.max(grid) 34 | # grid_min = np.min(grid) 35 | # grid = ubound * (grid - grid_min) / (grid_max - grid_min) 36 | return grid 37 | 38 | def vis_grid(Xs): 39 | """ visualize a grid of images """ 40 | (N, H, W, C) = Xs.shape 41 | A = int(ceil(sqrt(N))) 42 | G = np.ones((A*H+A, A*W+A, C), Xs.dtype) 43 | G *= np.min(Xs) 44 | n = 0 45 | for y in range(A): 46 | for x in range(A): 47 | if n < N: 48 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:] 49 | n += 1 50 | # normalize to [0,1] 51 | maxg = G.max() 52 | ming = G.min() 53 | G = (G - ming)/(maxg-ming) 54 | return G 55 | 56 | def vis_nn(rows): 57 | """ visualize array of arrays of images """ 58 | N = len(rows) 59 | D = len(rows[0]) 60 | H,W,C = rows[0][0].shape 61 | Xs = rows[0][0] 62 | G = np.ones((N*H+N, D*W+D, C), Xs.dtype) 63 | for y in range(N): 64 | for x in range(D): 65 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x] 66 | # normalize to [0,1] 67 | maxg = G.max() 68 | ming = G.min() 69 | G = (G - ming)/(maxg-ming) 70 | return G 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /assignments2016/assignment2/frameworkpython: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # what real Python executable to use 4 | PYVER=2.7 5 | PATHTOPYTHON=/usr/local/bin/ 6 | PYTHON=${PATHTOPYTHON}python${PYVER} 7 | 8 | # find the root of the virtualenv, it should be the parent of the dir this script is in 9 | ENV=`$PYTHON -c "import os; print os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..'))"` 10 | 11 | # now run Python with the virtualenv set as Python's HOME 12 | export PYTHONHOME=$ENV 13 | exec $PYTHON "$@" 14 | -------------------------------------------------------------------------------- /assignments2016/assignment2/kitten.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assignments2016/assignment2/kitten.jpg -------------------------------------------------------------------------------- /assignments2016/assignment2/puppy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assignments2016/assignment2/puppy.jpg -------------------------------------------------------------------------------- /assignments2016/assignment2/requirements.txt: -------------------------------------------------------------------------------- 1 | Cython==0.23.4 2 | Jinja2==2.8 3 | MarkupSafe==0.23 4 | Pillow==3.0.0 5 | Pygments==2.0.2 6 | appnope==0.1.0 7 | argparse==1.2.1 8 | backports-abc==0.4 9 | backports.ssl-match-hostname==3.5.0.1 10 | certifi==2015.11.20.1 11 | cycler==0.9.0 12 | decorator==4.0.6 13 | functools32==3.2.3-2 14 | gnureadline==6.3.3 15 | ipykernel==4.2.2 16 | ipython==4.0.1 17 | ipython-genutils==0.1.0 18 | ipywidgets==4.1.1 19 | jsonschema==2.5.1 20 | jupyter==1.0.0 21 | jupyter-client==4.1.1 22 | jupyter-console==4.0.3 23 | jupyter-core==4.0.6 24 | matplotlib==1.5.0 25 | mistune==0.7.1 26 | nbconvert==4.1.0 27 | nbformat==4.0.1 28 | notebook==4.0.6 29 | numpy==1.10.4 30 | path.py==8.1.2 31 | pexpect==4.0.1 32 | pickleshare==0.5 33 | ptyprocess==0.5 34 | pyparsing==2.0.7 35 | python-dateutil==2.4.2 36 | pytz==2015.7 37 | pyzmq==15.1.0 38 | qtconsole==4.1.1 39 | scipy==0.16.1 40 | simplegeneric==0.8.1 41 | singledispatch==3.4.0.3 42 | six==1.10.0 43 | terminado==0.5 44 | tornado==4.3 45 | traitlets==4.0.0 46 | wsgiref==0.1.2 47 | -------------------------------------------------------------------------------- /assignments2016/assignment2/start_ipython_osx.sh: -------------------------------------------------------------------------------- 1 | # Assume the virtualenv is called .env 2 | 3 | cp frameworkpython .env/bin 4 | .env/bin/frameworkpython -m IPython notebook 5 | -------------------------------------------------------------------------------- /assignments2016/assignment3.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | mathjax: true 4 | permalink: assignments2016/assignment3/ 5 | --- 6 | 7 | 이번 과제에서는 회귀신경망(Recurrent Neural Network, RNN)을 구현하고, Microsoft COCO 데이터셋의 이미지 캡셔닝(captionint) 문제에 적용해볼 것입니다. 또한, TinyImageNet 데이터셋을 소개하고, 이 데이터셋에 대해 미리 학습된 모델을 사용하여 이미지 그라디언트에 대한 다양한 어플리케이션에 대해 알아볼 것입니다. 8 | 9 | 이번 과제의 목표는 다음과 같습니다. 10 | 11 | - *회귀신경망(Recurrent Neural Network, RNN)* 구조에 대해 이해하고 시간축 상에서 파라미터 값을 공유하면서 어떻게 시퀀스 데이터에 대해 동작하는지 이해하기 12 | - 기본 RNN 구조와 Long-Short Term Memory (LSTM) RNN 구조의 차이점 이해하기 13 | - 테스트 시 RNN에서 어떻게 샘플을 뽑는지 이해하기 14 | - 이미지 캡셔닝 시스템을 구현하기 위해 컨볼루션 신경망(CNN)과 회귀신경망(RNN)을 결합하는 방법 이해하기 15 | - 학습된 CNN이 입력 이미지에 대한 그라디언트를 계산할 때 어떻게 활용되는지 이해하기 16 | - 이미지 그라디언트의 여러 가지 응용법들 구현하기 (saliency 맵, 모델 속이기, 클래스 시각화, 특징 추출의 역과정, DeepDream 등 포함) 17 | 18 | ## 설치 19 | 다음 두가지 방법으로 숙제를 시작할 수 있습니다: Terminal.com을 이용한 가상 환경 또는 로컬 환경. 20 | 21 | ### Terminal에서의 가상 환경. 22 | Terminal에는 우리의 수업을 위한 서브도메인이 만들어져 있습니다. [www.stanfordterminalcloud.com](https://www.stanfordterminalcloud.com) 계정을 등록하세요. 이번 숙제에 대한 스냅샷은 [여기](https://www.stanfordterminalcloud.com/snapshot/49f5a1ea15dc424aec19155b3398784d57c55045435315ce4f8b96b62819ef65)에서 찾아볼 수 있습니다. 만약 수업에 등록되었다면, TA(see Piazza for more information)에게 이 수업을 위한 Terminal 예산을 요구할 수 있습니다. 처음 스냅샷을 실행시키면, 수업을 위한 모든 것이 설치되어 있어서 바로 숙제를 시작할 수 있습니다. [여기](/terminal-tutorial)에 Terminal을 위한 간단한 튜토리얼을 작성해 뒀습니다. 23 | 24 | ### 로컬 환경 25 | [여기](http://cs231n.stanford.edu/winter1516_assignment3.zip)에서 압축파일을 다운받으세요. 26 | Dependency 관련: 27 | 28 | **[Option 1] Use Anaconda:** 29 | 과학, 수학, 공학, 데이터 분석을 위한 대부분의 주요 패키지들을 담고있는 [Anaconda](https://www.continuum.io/downloads)를 사용하여 설치하는 것이 흔히 사용하는 방법입니다. 설치가 다 되면 모든 요구사항(dependency)을 넘기고 바로 숙제를 시작해도 좋습니다. 30 | 31 | **[Option 2] 수동 설치, virtual environment:** 32 | 만약 Anaconda 대신 좀 더 일반적이면서 까다로운 방법을 택하고 싶다면 이번 과제를 위한 [virtual environment](http://docs.python-guide.org/en/latest/dev/virtualenvs/)를 만들 수 있습니다. 만약 virtual environment를 사용하지 않는다면 모든 코드가 컴퓨터에 전역적으로 종속되게 설치됩니다. Virtual environment의 설정은 아래를 참조하세요. 33 | 34 | ~~~bash드 35 | cd assignment3 36 | sudo pip install virtualenv # This may already be installed 37 | virtualenv .env # Create a virtual environment 38 | source .env/bin/activate # Activate the virtual environment 39 | pip install -r requirements.txt # Install dependencies 40 | # Work on the assignment for a while ... 41 | deactivate # Exit the virtual environment 42 | ~~~ 43 | 44 | **데이터셋 다운로드:** 45 | 시작 코드를 받은 후, 전처리 과정이 수행된 MS-COCO 데이터셋, TinyImageNet 데이터셋, 미리 학습된 TinyImageNet 모델을 다운받아야 합니다. `assignment3` 디렉토리에서 다음 명령어를 입력하세요. 46 | 47 | ~~~bash 48 | cd cs231n/datasets 49 | ./get_coco_captioning.sh 50 | ./get_tiny_imagenet_a.sh 51 | ./get_pretrained_model.sh 52 | ~~~ 53 | 54 | **Cython extension 컴파일하기:** 컨볼루션 신경망은 매우 효율적인 구현을 필요로 합니다. 이 숙제를 위해서 [Cython](http://cython.org/)을 활용하여 여러 기능들을 구현해 놓았는데, 이를 위해 코드를 돌리기 전에 Cython extension을 컴파일해 주어야 합크니다. `cs231n` 디렉토리에서 아래 명령어를 실행하세요: 55 | 56 | ~~~bash 57 | python setup.py build_ext --inplace 58 | ~~~ 59 | 60 | **IPython 시작:** 61 | 데이터를 모두 다운받은 뒤, `assignment3`에서 IPython notebook 서버를 시작해야 합니다. IPython에 익숙하지 않다면 [IPython tutorial](/ipython-tutorial)을 먼저 읽어보는 것을 권장합니다. 62 | 63 | **NOTE:** OSX에서 virtual environment를 실행하면, matplotlib 에러가 날 수 있습니다([이 문제에 관한 이슈](http://matplotlib.org/faq/virtualenv_faq.html)). IPython 서버를 `assignment3`폴더의 `start_ipython_osx.sh`로 실행하면 이 문제를 피해갈 수 있습니다; 이 스크립트는 virtual environment가 `.env`라고 되어있다고 가정하고 작성되었습니다. 64 | 65 | 66 | ### 과제 제출: 67 | 로컬 환경이나 Terminal에서 숙제를 마쳤다면 `collectSubmission.sh`스크립트를 실행하세요. 이 스크립트는 `assignment3.zip`파일을 만듭니다. 이 파일을 [the coursework](https://coursework.stanford.edu/portal/site/W15-CS-231N-01/) 페이지의 Assignments 탭 아래에 업로드하세요. 68 | 69 | 70 | ### Q1: 기본 RNN 구조로 이미지 캡셔닝 구현 (40 points) 71 | IPython notebook `RNN_Captioning.ipynb`에서 기본 RNN 구조를 사용하여 MS COCO 데이터셋에서 이미지 캡셔닝 시스템을 구현하는 방법을 설명합니다. 72 | 73 | ### Q2: LSTM 구조로 이미지 캡셔닝 구현 (35 points) 74 | IPython notebook `LSTM_Captioning.ipynb`에서 Long-Short Term Memory (LSTM) RNN 구조의 구현에 대해 설명하고, 이를 MS COCO 데이터셋의 이미지 캡셔닝 문제에 적용해 봅니다. 75 | 76 | ### Q3: 이미지 그라디언트: Saliency 맵과 Fooling Images (10 points) 77 | IPython notebook `ImageGradients.ipynb`에서 TinyImageNet 데이터셋을 소개합니다. 이 데이터셋에 대해 미리 학습된 모델(pretrained model)을 활용하여 이미지에 대한 그라디언트를 계산하고, 이를 사용해서 saliency 맵과 fooling image들을 생성하는 법에 대해 설명합니다. 78 | 79 | ### Q4: 이미지 생성: 클래스, 역 과정(Inversion), DeepDream (15 points) 80 | IPython notebook `ImageGeneration.ipynb`에서는 미리 학습된 TinyImageNet 모델을 활용하여 이미지를 생성해볼 것입니다. 특히, 클래스들을 시각화 해보고 특징(feature) 추출의 역과정과 DeepDream을 구현할 것입니다. 81 | 82 | ### Q5: 추가 과제: 뭔가 더 해보세요! (+10 points) 83 | 이번 과제에서 제공된 것들을 활용해서 무언가 멋있는 것들을 시도해볼 수 있을 것입니다. 과제에서 구현하지 않은 다른 방식으로 이미지들을 생성하는 방법이 있을 수도 있어요! 84 | 85 | --- 86 |

87 | 번역: 최명섭 (myungsub) 88 |

89 | -------------------------------------------------------------------------------- /assignments2016/assignment3/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | .env/* 4 | -------------------------------------------------------------------------------- /assignments2016/assignment3/collectSubmission.sh: -------------------------------------------------------------------------------- 1 | rm -f assignment3.zip 2 | zip -r assignment3.zip . -x "*.git" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" ".env/*" "*.pyc" "*cs231n/build/*" 3 | -------------------------------------------------------------------------------- /assignments2016/assignment3/cs231n/.gitignore: -------------------------------------------------------------------------------- 1 | build/* 2 | im2col_cython.c 3 | im2col_cython.so 4 | -------------------------------------------------------------------------------- /assignments2016/assignment3/cs231n/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assignments2016/assignment3/cs231n/__init__.py -------------------------------------------------------------------------------- /assignments2016/assignment3/cs231n/classifiers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assignments2016/assignment3/cs231n/classifiers/__init__.py -------------------------------------------------------------------------------- /assignments2016/assignment3/cs231n/coco_utils.py: -------------------------------------------------------------------------------- 1 | import os, json 2 | import numpy as np 3 | import h5py 4 | 5 | 6 | def load_coco_data(base_dir='cs231n/datasets/coco_captioning', 7 | max_train=None, 8 | pca_features=True): 9 | data = {} 10 | caption_file = os.path.join(base_dir, 'coco2014_captions.h5') 11 | with h5py.File(caption_file, 'r') as f: 12 | for k, v in f.iteritems(): 13 | data[k] = np.asarray(v) 14 | 15 | if pca_features: 16 | train_feat_file = os.path.join(base_dir, 'train2014_vgg16_fc7_pca.h5') 17 | else: 18 | train_feat_file = os.path.join(base_dir, 'train2014_vgg16_fc7.h5') 19 | with h5py.File(train_feat_file, 'r') as f: 20 | data['train_features'] = np.asarray(f['features']) 21 | 22 | if pca_features: 23 | val_feat_file = os.path.join(base_dir, 'val2014_vgg16_fc7_pca.h5') 24 | else: 25 | val_feat_file = os.path.join(base_dir, 'val2014_vgg16_fc7.h5') 26 | with h5py.File(val_feat_file, 'r') as f: 27 | data['val_features'] = np.asarray(f['features']) 28 | 29 | dict_file = os.path.join(base_dir, 'coco2014_vocab.json') 30 | with open(dict_file, 'r') as f: 31 | dict_data = json.load(f) 32 | for k, v in dict_data.iteritems(): 33 | data[k] = v 34 | 35 | train_url_file = os.path.join(base_dir, 'train2014_urls.txt') 36 | with open(train_url_file, 'r') as f: 37 | train_urls = np.asarray([line.strip() for line in f]) 38 | data['train_urls'] = train_urls 39 | 40 | val_url_file = os.path.join(base_dir, 'val2014_urls.txt') 41 | with open(val_url_file, 'r') as f: 42 | val_urls = np.asarray([line.strip() for line in f]) 43 | data['val_urls'] = val_urls 44 | 45 | # Maybe subsample the training data 46 | if max_train is not None: 47 | num_train = data['train_captions'].shape[0] 48 | mask = np.random.randint(num_train, size=max_train) 49 | data['train_captions'] = data['train_captions'][mask] 50 | data['train_image_idxs'] = data['train_image_idxs'][mask] 51 | 52 | return data 53 | 54 | 55 | def decode_captions(captions, idx_to_word): 56 | singleton = False 57 | if captions.ndim == 1: 58 | singleton = True 59 | captions = captions[None] 60 | decoded = [] 61 | N, T = captions.shape 62 | for i in xrange(N): 63 | words = [] 64 | for t in xrange(T): 65 | word = idx_to_word[captions[i, t]] 66 | if word != '': 67 | words.append(word) 68 | if word == '': 69 | break 70 | decoded.append(' '.join(words)) 71 | if singleton: 72 | decoded = decoded[0] 73 | return decoded 74 | 75 | 76 | def sample_coco_minibatch(data, batch_size=100, split='train'): 77 | split_size = data['%s_captions' % split].shape[0] 78 | mask = np.random.choice(split_size, batch_size) 79 | captions = data['%s_captions' % split][mask] 80 | image_idxs = data['%s_image_idxs' % split][mask] 81 | image_features = data['%s_features' % split][image_idxs] 82 | urls = data['%s_urls' % split][image_idxs] 83 | return captions, image_features, urls 84 | 85 | -------------------------------------------------------------------------------- /assignments2016/assignment3/cs231n/data_utils.py: -------------------------------------------------------------------------------- 1 | import cPickle as pickle 2 | import numpy as np 3 | import os 4 | from scipy.misc import imread 5 | 6 | def load_CIFAR_batch(filename): 7 | """ load single batch of cifar """ 8 | with open(filename, 'rb') as f: 9 | datadict = pickle.load(f) 10 | X = datadict['data'] 11 | Y = datadict['labels'] 12 | X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float") 13 | Y = np.array(Y) 14 | return X, Y 15 | 16 | def load_CIFAR10(ROOT): 17 | """ load all of cifar """ 18 | xs = [] 19 | ys = [] 20 | for b in range(1,6): 21 | f = os.path.join(ROOT, 'data_batch_%d' % (b, )) 22 | X, Y = load_CIFAR_batch(f) 23 | xs.append(X) 24 | ys.append(Y) 25 | Xtr = np.concatenate(xs) 26 | Ytr = np.concatenate(ys) 27 | del X, Y 28 | Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch')) 29 | return Xtr, Ytr, Xte, Yte 30 | 31 | 32 | def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, 33 | subtract_mean=True): 34 | """ 35 | Load the CIFAR-10 dataset from disk and perform preprocessing to prepare 36 | it for classifiers. These are the same steps as we used for the SVM, but 37 | condensed to a single function. 38 | """ 39 | # Load the raw CIFAR-10 data 40 | cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' 41 | X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) 42 | 43 | # Subsample the data 44 | mask = range(num_training, num_training + num_validation) 45 | X_val = X_train[mask] 46 | y_val = y_train[mask] 47 | mask = range(num_training) 48 | X_train = X_train[mask] 49 | y_train = y_train[mask] 50 | mask = range(num_test) 51 | X_test = X_test[mask] 52 | y_test = y_test[mask] 53 | 54 | # Normalize the data: subtract the mean image 55 | if subtract_mean: 56 | mean_image = np.mean(X_train, axis=0) 57 | X_train -= mean_image 58 | X_val -= mean_image 59 | X_test -= mean_image 60 | 61 | # Transpose so that channels come first 62 | X_train = X_train.transpose(0, 3, 1, 2).copy() 63 | X_val = X_val.transpose(0, 3, 1, 2).copy() 64 | X_test = X_test.transpose(0, 3, 1, 2).copy() 65 | 66 | # Package data into a dictionary 67 | return { 68 | 'X_train': X_train, 'y_train': y_train, 69 | 'X_val': X_val, 'y_val': y_val, 70 | 'X_test': X_test, 'y_test': y_test, 71 | } 72 | 73 | 74 | def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True): 75 | """ 76 | Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and 77 | TinyImageNet-200 have the same directory structure, so this can be used 78 | to load any of them. 79 | 80 | Inputs: 81 | - path: String giving path to the directory to load. 82 | - dtype: numpy datatype used to load the data. 83 | - subtract_mean: Whether to subtract the mean training image. 84 | 85 | Returns: A dictionary with the following entries: 86 | - class_names: A list where class_names[i] is a list of strings giving the 87 | WordNet names for class i in the loaded dataset. 88 | - X_train: (N_tr, 3, 64, 64) array of training images 89 | - y_train: (N_tr,) array of training labels 90 | - X_val: (N_val, 3, 64, 64) array of validation images 91 | - y_val: (N_val,) array of validation labels 92 | - X_test: (N_test, 3, 64, 64) array of testing images. 93 | - y_test: (N_test,) array of test labels; if test labels are not available 94 | (such as in student code) then y_test will be None. 95 | - mean_image: (3, 64, 64) array giving mean training image 96 | """ 97 | # First load wnids 98 | with open(os.path.join(path, 'wnids.txt'), 'r') as f: 99 | wnids = [x.strip() for x in f] 100 | 101 | # Map wnids to integer labels 102 | wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)} 103 | 104 | # Use words.txt to get names for each class 105 | with open(os.path.join(path, 'words.txt'), 'r') as f: 106 | wnid_to_words = dict(line.split('\t') for line in f) 107 | for wnid, words in wnid_to_words.iteritems(): 108 | wnid_to_words[wnid] = [w.strip() for w in words.split(',')] 109 | class_names = [wnid_to_words[wnid] for wnid in wnids] 110 | 111 | # Next load training data. 112 | X_train = [] 113 | y_train = [] 114 | for i, wnid in enumerate(wnids): 115 | if (i + 1) % 20 == 0: 116 | print 'loading training data for synset %d / %d' % (i + 1, len(wnids)) 117 | # To figure out the filenames we need to open the boxes file 118 | boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid) 119 | with open(boxes_file, 'r') as f: 120 | filenames = [x.split('\t')[0] for x in f] 121 | num_images = len(filenames) 122 | 123 | X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype) 124 | y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64) 125 | for j, img_file in enumerate(filenames): 126 | img_file = os.path.join(path, 'train', wnid, 'images', img_file) 127 | img = imread(img_file) 128 | if img.ndim == 2: 129 | ## grayscale file 130 | img.shape = (64, 64, 1) 131 | X_train_block[j] = img.transpose(2, 0, 1) 132 | X_train.append(X_train_block) 133 | y_train.append(y_train_block) 134 | 135 | # We need to concatenate all training data 136 | X_train = np.concatenate(X_train, axis=0) 137 | y_train = np.concatenate(y_train, axis=0) 138 | 139 | # Next load validation data 140 | with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f: 141 | img_files = [] 142 | val_wnids = [] 143 | for line in f: 144 | img_file, wnid = line.split('\t')[:2] 145 | img_files.append(img_file) 146 | val_wnids.append(wnid) 147 | num_val = len(img_files) 148 | y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids]) 149 | X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype) 150 | for i, img_file in enumerate(img_files): 151 | img_file = os.path.join(path, 'val', 'images', img_file) 152 | img = imread(img_file) 153 | if img.ndim == 2: 154 | img.shape = (64, 64, 1) 155 | X_val[i] = img.transpose(2, 0, 1) 156 | 157 | # Next load test images 158 | # Students won't have test labels, so we need to iterate over files in the 159 | # images directory. 160 | img_files = os.listdir(os.path.join(path, 'test', 'images')) 161 | X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype) 162 | for i, img_file in enumerate(img_files): 163 | img_file = os.path.join(path, 'test', 'images', img_file) 164 | img = imread(img_file) 165 | if img.ndim == 2: 166 | img.shape = (64, 64, 1) 167 | X_test[i] = img.transpose(2, 0, 1) 168 | 169 | y_test = None 170 | y_test_file = os.path.join(path, 'test', 'test_annotations.txt') 171 | if os.path.isfile(y_test_file): 172 | with open(y_test_file, 'r') as f: 173 | img_file_to_wnid = {} 174 | for line in f: 175 | line = line.split('\t') 176 | img_file_to_wnid[line[0]] = line[1] 177 | y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files] 178 | y_test = np.array(y_test) 179 | 180 | mean_image = X_train.mean(axis=0) 181 | if subtract_mean: 182 | X_train -= mean_image[None] 183 | X_val -= mean_image[None] 184 | X_test -= mean_image[None] 185 | 186 | return { 187 | 'class_names': class_names, 188 | 'X_train': X_train, 189 | 'y_train': y_train, 190 | 'X_val': X_val, 191 | 'y_val': y_val, 192 | 'X_test': X_test, 193 | 'y_test': y_test, 194 | 'class_names': class_names, 195 | 'mean_image': mean_image, 196 | } 197 | 198 | 199 | def load_models(models_dir): 200 | """ 201 | Load saved models from disk. This will attempt to unpickle all files in a 202 | directory; any files that give errors on unpickling (such as README.txt) will 203 | be skipped. 204 | 205 | Inputs: 206 | - models_dir: String giving the path to a directory containing model files. 207 | Each model file is a pickled dictionary with a 'model' field. 208 | 209 | Returns: 210 | A dictionary mapping model file names to models. 211 | """ 212 | models = {} 213 | for model_file in os.listdir(models_dir): 214 | with open(os.path.join(models_dir, model_file), 'rb') as f: 215 | try: 216 | models[model_file] = pickle.load(f)['model'] 217 | except pickle.UnpicklingError: 218 | continue 219 | return models 220 | -------------------------------------------------------------------------------- /assignments2016/assignment3/cs231n/datasets/get_coco_captioning.sh: -------------------------------------------------------------------------------- 1 | wget "http://cs231n.stanford.edu/coco_captioning.zip" 2 | unzip coco_captioning.zip 3 | rm coco_captioning.zip 4 | -------------------------------------------------------------------------------- /assignments2016/assignment3/cs231n/datasets/get_pretrained_model.sh: -------------------------------------------------------------------------------- 1 | wget http://cs231n.stanford.edu/pretrained_model.h5 2 | -------------------------------------------------------------------------------- /assignments2016/assignment3/cs231n/datasets/get_tiny_imagenet_a.sh: -------------------------------------------------------------------------------- 1 | wget http://cs231n.stanford.edu/tiny-imagenet-100-A.zip 2 | unzip tiny-imagenet-100-A.zip 3 | rm tiny-imagenet-100-A.zip 4 | -------------------------------------------------------------------------------- /assignments2016/assignment3/cs231n/gradient_check.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import randrange 3 | 4 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001): 5 | """ 6 | a naive implementation of numerical gradient of f at x 7 | - f should be a function that takes a single argument 8 | - x is the point (numpy array) to evaluate the gradient at 9 | """ 10 | 11 | fx = f(x) # evaluate function value at original point 12 | grad = np.zeros_like(x) 13 | # iterate over all indexes in x 14 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 15 | while not it.finished: 16 | 17 | # evaluate function at x+h 18 | ix = it.multi_index 19 | oldval = x[ix] 20 | x[ix] = oldval + h # increment by h 21 | fxph = f(x) # evalute f(x + h) 22 | x[ix] = oldval - h 23 | fxmh = f(x) # evaluate f(x - h) 24 | x[ix] = oldval # restore 25 | 26 | # compute the partial derivative with centered formula 27 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 28 | if verbose: 29 | print ix, grad[ix] 30 | it.iternext() # step to next dimension 31 | 32 | return grad 33 | 34 | 35 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 36 | """ 37 | Evaluate a numeric gradient for a function that accepts a numpy 38 | array and returns a numpy array. 39 | """ 40 | grad = np.zeros_like(x) 41 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 42 | while not it.finished: 43 | ix = it.multi_index 44 | 45 | oldval = x[ix] 46 | x[ix] = oldval + h 47 | pos = f(x).copy() 48 | x[ix] = oldval - h 49 | neg = f(x).copy() 50 | x[ix] = oldval 51 | 52 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 53 | it.iternext() 54 | return grad 55 | 56 | 57 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): 58 | """ 59 | Compute numeric gradients for a function that operates on input 60 | and output blobs. 61 | 62 | We assume that f accepts several input blobs as arguments, followed by a blob 63 | into which outputs will be written. For example, f might be called like this: 64 | 65 | f(x, w, out) 66 | 67 | where x and w are input Blobs, and the result of f will be written to out. 68 | 69 | Inputs: 70 | - f: function 71 | - inputs: tuple of input blobs 72 | - output: output blob 73 | - h: step size 74 | """ 75 | numeric_diffs = [] 76 | for input_blob in inputs: 77 | diff = np.zeros_like(input_blob.diffs) 78 | it = np.nditer(input_blob.vals, flags=['multi_index'], 79 | op_flags=['readwrite']) 80 | while not it.finished: 81 | idx = it.multi_index 82 | orig = input_blob.vals[idx] 83 | 84 | input_blob.vals[idx] = orig + h 85 | f(*(inputs + (output,))) 86 | pos = np.copy(output.vals) 87 | input_blob.vals[idx] = orig - h 88 | f(*(inputs + (output,))) 89 | neg = np.copy(output.vals) 90 | input_blob.vals[idx] = orig 91 | 92 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) 93 | 94 | it.iternext() 95 | numeric_diffs.append(diff) 96 | return numeric_diffs 97 | 98 | 99 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5): 100 | return eval_numerical_gradient_blobs(lambda *args: net.forward(), 101 | inputs, output, h=h) 102 | 103 | 104 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): 105 | """ 106 | sample a few random elements and only return numerical 107 | in this dimensions. 108 | """ 109 | 110 | for i in xrange(num_checks): 111 | ix = tuple([randrange(m) for m in x.shape]) 112 | 113 | oldval = x[ix] 114 | x[ix] = oldval + h # increment by h 115 | fxph = f(x) # evaluate f(x + h) 116 | x[ix] = oldval - h # increment by h 117 | fxmh = f(x) # evaluate f(x - h) 118 | x[ix] = oldval # reset 119 | 120 | grad_numerical = (fxph - fxmh) / (2 * h) 121 | grad_analytic = analytic_grad[ix] 122 | rel_error = abs(grad_numerical - grad_analytic) / (abs(grad_numerical) + abs(grad_analytic)) 123 | print 'numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error) 124 | 125 | -------------------------------------------------------------------------------- /assignments2016/assignment3/cs231n/im2col.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1): 5 | # First figure out what the size of the output should be 6 | N, C, H, W = x_shape 7 | assert (H + 2 * padding - field_height) % stride == 0 8 | assert (W + 2 * padding - field_height) % stride == 0 9 | out_height = (H + 2 * padding - field_height) / stride + 1 10 | out_width = (W + 2 * padding - field_width) / stride + 1 11 | 12 | i0 = np.repeat(np.arange(field_height), field_width) 13 | i0 = np.tile(i0, C) 14 | i1 = stride * np.repeat(np.arange(out_height), out_width) 15 | j0 = np.tile(np.arange(field_width), field_height * C) 16 | j1 = stride * np.tile(np.arange(out_width), out_height) 17 | i = i0.reshape(-1, 1) + i1.reshape(1, -1) 18 | j = j0.reshape(-1, 1) + j1.reshape(1, -1) 19 | 20 | k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1) 21 | 22 | return (k, i, j) 23 | 24 | 25 | def im2col_indices(x, field_height, field_width, padding=1, stride=1): 26 | """ An implementation of im2col based on some fancy indexing """ 27 | # Zero-pad the input 28 | p = padding 29 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 30 | 31 | k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, 32 | stride) 33 | 34 | cols = x_padded[:, k, i, j] 35 | C = x.shape[1] 36 | cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1) 37 | return cols 38 | 39 | 40 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1, 41 | stride=1): 42 | """ An implementation of col2im based on fancy indexing and np.add.at """ 43 | N, C, H, W = x_shape 44 | H_padded, W_padded = H + 2 * padding, W + 2 * padding 45 | x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype) 46 | k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, 47 | stride) 48 | cols_reshaped = cols.reshape(C * field_height * field_width, -1, N) 49 | cols_reshaped = cols_reshaped.transpose(2, 0, 1) 50 | np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped) 51 | if padding == 0: 52 | return x_padded 53 | return x_padded[:, :, padding:-padding, padding:-padding] 54 | 55 | pass 56 | -------------------------------------------------------------------------------- /assignments2016/assignment3/cs231n/im2col_cython.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | 5 | # DTYPE = np.float64 6 | # ctypedef np.float64_t DTYPE_t 7 | 8 | ctypedef fused DTYPE_t: 9 | np.float32_t 10 | np.float64_t 11 | 12 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height, 13 | int field_width, int padding, int stride): 14 | cdef int N = x.shape[0] 15 | cdef int C = x.shape[1] 16 | cdef int H = x.shape[2] 17 | cdef int W = x.shape[3] 18 | 19 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 20 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 21 | 22 | cdef int p = padding 23 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x, 24 | ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 25 | 26 | cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros( 27 | (C * field_height * field_width, N * HH * WW), 28 | dtype=x.dtype) 29 | 30 | # Moving the inner loop to a C function with no bounds checking works, but does 31 | # not seem to help performance in any measurable way. 32 | 33 | im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 34 | field_height, field_width, padding, stride) 35 | return cols 36 | 37 | 38 | @cython.boundscheck(False) 39 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 40 | np.ndarray[DTYPE_t, ndim=4] x_padded, 41 | int N, int C, int H, int W, int HH, int WW, 42 | int field_height, int field_width, int padding, int stride) except? -1: 43 | cdef int c, ii, jj, row, yy, xx, i, col 44 | 45 | for c in range(C): 46 | for yy in range(HH): 47 | for xx in range(WW): 48 | for ii in range(field_height): 49 | for jj in range(field_width): 50 | row = c * field_width * field_height + ii * field_height + jj 51 | for i in range(N): 52 | col = yy * WW * N + xx * N + i 53 | cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj] 54 | 55 | 56 | 57 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W, 58 | int field_height, int field_width, int padding, int stride): 59 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 60 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 61 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 62 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding), 63 | dtype=cols.dtype) 64 | 65 | # Moving the inner loop to a C-function with no bounds checking improves 66 | # performance quite a bit for col2im. 67 | col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 68 | field_height, field_width, padding, stride) 69 | if padding > 0: 70 | return x_padded[:, :, padding:-padding, padding:-padding] 71 | return x_padded 72 | 73 | 74 | @cython.boundscheck(False) 75 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 76 | np.ndarray[DTYPE_t, ndim=4] x_padded, 77 | int N, int C, int H, int W, int HH, int WW, 78 | int field_height, int field_width, int padding, int stride) except? -1: 79 | cdef int c, ii, jj, row, yy, xx, i, col 80 | 81 | for c in range(C): 82 | for ii in range(field_height): 83 | for jj in range(field_width): 84 | row = c * field_width * field_height + ii * field_height + jj 85 | for yy in range(HH): 86 | for xx in range(WW): 87 | for i in range(N): 88 | col = yy * WW * N + xx * N + i 89 | x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col] 90 | 91 | 92 | @cython.boundscheck(False) 93 | @cython.wraparound(False) 94 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols, 95 | np.ndarray[DTYPE_t, ndim=4] x_padded, 96 | int N, int C, int H, int W, int HH, int WW, 97 | int out_h, int out_w, int pad, int stride): 98 | 99 | cdef int c, hh, ww, n, h, w 100 | for n in range(N): 101 | for c in range(C): 102 | for hh in range(HH): 103 | for ww in range(WW): 104 | for h in range(out_h): 105 | for w in range(out_w): 106 | x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w] 107 | 108 | 109 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W, 110 | int HH, int WW, int pad, int stride): 111 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 112 | cdef int out_h = (H + 2 * pad - HH) / stride + 1 113 | cdef int out_w = (W + 2 * pad - WW) / stride + 1 114 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad), 115 | dtype=cols.dtype) 116 | 117 | col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride) 118 | 119 | if pad > 0: 120 | return x_padded[:, :, pad:-pad, pad:-pad] 121 | return x_padded 122 | -------------------------------------------------------------------------------- /assignments2016/assignment3/cs231n/image_utils.py: -------------------------------------------------------------------------------- 1 | import urllib2, os, tempfile 2 | 3 | import numpy as np 4 | from scipy.misc import imread 5 | 6 | from cs231n.fast_layers import conv_forward_fast 7 | 8 | 9 | """ 10 | Utility functions used for viewing and processing images. 11 | """ 12 | 13 | 14 | def blur_image(X): 15 | """ 16 | A very gentle image blurring operation, to be used as a regularizer for image 17 | generation. 18 | 19 | Inputs: 20 | - X: Image data of shape (N, 3, H, W) 21 | 22 | Returns: 23 | - X_blur: Blurred version of X, of shape (N, 3, H, W) 24 | """ 25 | w_blur = np.zeros((3, 3, 3, 3)) 26 | b_blur = np.zeros(3) 27 | blur_param = {'stride': 1, 'pad': 1} 28 | for i in xrange(3): 29 | w_blur[i, i] = np.asarray([[1, 2, 1], [2, 188, 2], [1, 2, 1]], dtype=np.float32) 30 | w_blur /= 200.0 31 | return conv_forward_fast(X, w_blur, b_blur, blur_param)[0] 32 | 33 | 34 | def preprocess_image(img, mean_img, mean='image'): 35 | """ 36 | Convert to float, transepose, and subtract mean pixel 37 | 38 | Input: 39 | - img: (H, W, 3) 40 | 41 | Returns: 42 | - (1, 3, H, 3) 43 | """ 44 | if mean == 'image': 45 | mean = mean_img 46 | elif mean == 'pixel': 47 | mean = mean_img.mean(axis=(1, 2), keepdims=True) 48 | elif mean == 'none': 49 | mean = 0 50 | else: 51 | raise ValueError('mean must be image or pixel or none') 52 | return img.astype(np.float32).transpose(2, 0, 1)[None] - mean 53 | 54 | 55 | def deprocess_image(img, mean_img, mean='image', renorm=False): 56 | """ 57 | Add mean pixel, transpose, and convert to uint8 58 | 59 | Input: 60 | - (1, 3, H, W) or (3, H, W) 61 | 62 | Returns: 63 | - (H, W, 3) 64 | """ 65 | if mean == 'image': 66 | mean = mean_img 67 | elif mean == 'pixel': 68 | mean = mean_img.mean(axis=(1, 2), keepdims=True) 69 | elif mean == 'none': 70 | mean = 0 71 | else: 72 | raise ValueError('mean must be image or pixel or none') 73 | if img.ndim == 3: 74 | img = img[None] 75 | img = (img + mean)[0].transpose(1, 2, 0) 76 | if renorm: 77 | low, high = img.min(), img.max() 78 | img = 255.0 * (img - low) / (high - low) 79 | return img.astype(np.uint8) 80 | 81 | 82 | def image_from_url(url): 83 | """ 84 | Read an image from a URL. Returns a numpy array with the pixel data. 85 | We write the image to a temporary file then read it back. Kinda gross. 86 | """ 87 | try: 88 | f = urllib2.urlopen(url) 89 | _, fname = tempfile.mkstemp() 90 | with open(fname, 'wb') as ff: 91 | ff.write(f.read()) 92 | img = imread(fname) 93 | os.remove(fname) 94 | return img 95 | except urllib2.URLError as e: 96 | print 'URL Error: ', e.reason, url 97 | except urllib2.HTTPError as e: 98 | print 'HTTP Error: ', e.code, url 99 | -------------------------------------------------------------------------------- /assignments2016/assignment3/cs231n/layer_utils.py: -------------------------------------------------------------------------------- 1 | from cs231n.layers import * 2 | from cs231n.fast_layers import * 3 | 4 | 5 | def affine_relu_forward(x, w, b): 6 | """ 7 | Convenience layer that perorms an affine transform followed by a ReLU 8 | 9 | Inputs: 10 | - x: Input to the affine layer 11 | - w, b: Weights for the affine layer 12 | 13 | Returns a tuple of: 14 | - out: Output from the ReLU 15 | - cache: Object to give to the backward pass 16 | """ 17 | a, fc_cache = affine_forward(x, w, b) 18 | out, relu_cache = relu_forward(a) 19 | cache = (fc_cache, relu_cache) 20 | return out, cache 21 | 22 | 23 | def affine_relu_backward(dout, cache): 24 | """ 25 | Backward pass for the affine-relu convenience layer 26 | """ 27 | fc_cache, relu_cache = cache 28 | da = relu_backward(dout, relu_cache) 29 | dx, dw, db = affine_backward(da, fc_cache) 30 | return dx, dw, db 31 | 32 | 33 | def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param): 34 | """ 35 | Convenience layer that performs an affine transform, batch normalization, 36 | and ReLU. 37 | 38 | Inputs: 39 | - x: Array of shape (N, D1); input to the affine layer 40 | - w, b: Arrays of shape (D2, D2) and (D2,) giving the weight and bias for 41 | the affine transform. 42 | - gamma, beta: Arrays of shape (D2,) and (D2,) giving scale and shift 43 | parameters for batch normalization. 44 | - bn_param: Dictionary of parameters for batch normalization. 45 | 46 | Returns: 47 | - out: Output from ReLU, of shape (N, D2) 48 | - cache: Object to give to the backward pass. 49 | """ 50 | a, fc_cache = affine_forward(x, w, b) 51 | a_bn, bn_cache = batchnorm_forward(a, gamma, beta, bn_param) 52 | out, relu_cache = relu_forward(a_bn) 53 | cache = (fc_cache, bn_cache, relu_cache) 54 | return out, cache 55 | 56 | 57 | def affine_bn_relu_backward(dout, cache): 58 | """ 59 | Backward pass for the affine-batchnorm-relu convenience layer. 60 | """ 61 | fc_cache, bn_cache, relu_cache = cache 62 | da_bn = relu_backward(dout, relu_cache) 63 | da, dgamma, dbeta = batchnorm_backward(da_bn, bn_cache) 64 | dx, dw, db = affine_backward(da, fc_cache) 65 | return dx, dw, db, dgamma, dbeta 66 | 67 | 68 | def conv_relu_forward(x, w, b, conv_param): 69 | """ 70 | A convenience layer that performs a convolution followed by a ReLU. 71 | 72 | Inputs: 73 | - x: Input to the convolutional layer 74 | - w, b, conv_param: Weights and parameters for the convolutional layer 75 | 76 | Returns a tuple of: 77 | - out: Output from the ReLU 78 | - cache: Object to give to the backward pass 79 | """ 80 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 81 | out, relu_cache = relu_forward(a) 82 | cache = (conv_cache, relu_cache) 83 | return out, cache 84 | 85 | 86 | def conv_relu_backward(dout, cache): 87 | """ 88 | Backward pass for the conv-relu convenience layer. 89 | """ 90 | conv_cache, relu_cache = cache 91 | da = relu_backward(dout, relu_cache) 92 | dx, dw, db = conv_backward_fast(da, conv_cache) 93 | return dx, dw, db 94 | 95 | 96 | def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param): 97 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 98 | an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param) 99 | out, relu_cache = relu_forward(an) 100 | cache = (conv_cache, bn_cache, relu_cache) 101 | return out, cache 102 | 103 | 104 | def conv_bn_relu_backward(dout, cache): 105 | conv_cache, bn_cache, relu_cache = cache 106 | dan = relu_backward(dout, relu_cache) 107 | da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache) 108 | dx, dw, db = conv_backward_fast(da, conv_cache) 109 | return dx, dw, db, dgamma, dbeta 110 | 111 | 112 | def conv_relu_pool_forward(x, w, b, conv_param, pool_param): 113 | """ 114 | Convenience layer that performs a convolution, a ReLU, and a pool. 115 | 116 | Inputs: 117 | - x: Input to the convolutional layer 118 | - w, b, conv_param: Weights and parameters for the convolutional layer 119 | - pool_param: Parameters for the pooling layer 120 | 121 | Returns a tuple of: 122 | - out: Output from the pooling layer 123 | - cache: Object to give to the backward pass 124 | """ 125 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 126 | s, relu_cache = relu_forward(a) 127 | out, pool_cache = max_pool_forward_fast(s, pool_param) 128 | cache = (conv_cache, relu_cache, pool_cache) 129 | return out, cache 130 | 131 | 132 | def conv_relu_pool_backward(dout, cache): 133 | """ 134 | Backward pass for the conv-relu-pool convenience layer 135 | """ 136 | conv_cache, relu_cache, pool_cache = cache 137 | ds = max_pool_backward_fast(dout, pool_cache) 138 | da = relu_backward(ds, relu_cache) 139 | dx, dw, db = conv_backward_fast(da, conv_cache) 140 | return dx, dw, db 141 | 142 | -------------------------------------------------------------------------------- /assignments2016/assignment3/cs231n/optim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | This file implements various first-order update rules that are commonly used for 5 | training neural networks. Each update rule accepts current weights and the 6 | gradient of the loss with respect to those weights and produces the next set of 7 | weights. Each update rule has the same interface: 8 | 9 | def update(w, dw, config=None): 10 | 11 | Inputs: 12 | - w: A numpy array giving the current weights. 13 | - dw: A numpy array of the same shape as w giving the gradient of the 14 | loss with respect to w. 15 | - config: A dictionary containing hyperparameter values such as learning rate, 16 | momentum, etc. If the update rule requires caching values over many 17 | iterations, then config will also hold these cached values. 18 | 19 | Returns: 20 | - next_w: The next point after the update. 21 | - config: The config dictionary to be passed to the next iteration of the 22 | update rule. 23 | 24 | NOTE: For most update rules, the default learning rate will probably not perform 25 | well; however the default values of the other hyperparameters should work well 26 | for a variety of different problems. 27 | 28 | For efficiency, update rules may perform in-place updates, mutating w and 29 | setting next_w equal to w. 30 | """ 31 | 32 | 33 | def sgd(w, dw, config=None): 34 | """ 35 | Performs vanilla stochastic gradient descent. 36 | 37 | config format: 38 | - learning_rate: Scalar learning rate. 39 | """ 40 | if config is None: config = {} 41 | config.setdefault('learning_rate', 1e-2) 42 | 43 | w -= config['learning_rate'] * dw 44 | return w, config 45 | 46 | 47 | def adam(x, dx, config=None): 48 | """ 49 | Uses the Adam update rule, which incorporates moving averages of both the 50 | gradient and its square and a bias correction term. 51 | 52 | config format: 53 | - learning_rate: Scalar learning rate. 54 | - beta1: Decay rate for moving average of first moment of gradient. 55 | - beta2: Decay rate for moving average of second moment of gradient. 56 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 57 | - m: Moving average of gradient. 58 | - v: Moving average of squared gradient. 59 | - t: Iteration number. 60 | """ 61 | if config is None: config = {} 62 | config.setdefault('learning_rate', 1e-3) 63 | config.setdefault('beta1', 0.9) 64 | config.setdefault('beta2', 0.999) 65 | config.setdefault('epsilon', 1e-8) 66 | config.setdefault('m', np.zeros_like(x)) 67 | config.setdefault('v', np.zeros_like(x)) 68 | config.setdefault('t', 0) 69 | 70 | next_x = None 71 | beta1, beta2, eps = config['beta1'], config['beta2'], config['epsilon'] 72 | t, m, v = config['t'], config['m'], config['v'] 73 | m = beta1 * m + (1 - beta1) * dx 74 | v = beta2 * v + (1 - beta2) * (dx * dx) 75 | t += 1 76 | alpha = config['learning_rate'] * np.sqrt(1 - beta2 ** t) / (1 - beta1 ** t) 77 | x -= alpha * (m / (np.sqrt(v) + eps)) 78 | config['t'] = t 79 | config['m'] = m 80 | config['v'] = v 81 | next_x = x 82 | 83 | return next_x, config 84 | 85 | 86 | -------------------------------------------------------------------------------- /assignments2016/assignment3/cs231n/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Build import cythonize 4 | import numpy 5 | 6 | extensions = [ 7 | Extension('im2col_cython', ['im2col_cython.pyx'], 8 | include_dirs = [numpy.get_include()] 9 | ), 10 | ] 11 | 12 | setup( 13 | ext_modules = cythonize(extensions), 14 | ) 15 | -------------------------------------------------------------------------------- /assignments2016/assignment3/frameworkpython: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # what real Python executable to use 4 | PYVER=2.7 5 | PATHTOPYTHON=/usr/local/bin/ 6 | PYTHON=${PATHTOPYTHON}python${PYVER} 7 | 8 | # find the root of the virtualenv, it should be the parent of the dir this script is in 9 | ENV=`$PYTHON -c "import os; print os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..'))"` 10 | 11 | # now run Python with the virtualenv set as Python's HOME 12 | export PYTHONHOME=$ENV 13 | exec $PYTHON "$@" 14 | -------------------------------------------------------------------------------- /assignments2016/assignment3/kitten.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assignments2016/assignment3/kitten.jpg -------------------------------------------------------------------------------- /assignments2016/assignment3/requirements.txt: -------------------------------------------------------------------------------- 1 | Cython==0.23.4 2 | Jinja2==2.8 3 | MarkupSafe==0.23 4 | Pillow==3.0.0 5 | Pygments==2.0.2 6 | appnope==0.1.0 7 | argparse==1.2.1 8 | backports-abc==0.4 9 | backports.ssl-match-hostname==3.5.0.1 10 | certifi==2015.11.20.1 11 | cycler==0.9.0 12 | decorator==4.0.6 13 | functools32==3.2.3-2 14 | gnureadline==6.3.3 15 | ipykernel==4.2.2 16 | ipython==4.0.1 17 | ipython-genutils==0.1.0 18 | ipywidgets==4.1.1 19 | jsonschema==2.5.1 20 | jupyter==1.0.0 21 | jupyter-client==4.1.1 22 | jupyter-console==4.0.3 23 | jupyter-core==4.0.6 24 | matplotlib==1.5.0 25 | mistune==0.7.1 26 | nbconvert==4.1.0 27 | nbformat==4.0.1 28 | notebook==4.0.6 29 | numpy==1.10.4 30 | path.py==8.1.2 31 | pexpect==4.0.1 32 | pickleshare==0.5 33 | ptyprocess==0.5 34 | pyparsing==2.0.7 35 | python-dateutil==2.4.2 36 | pytz==2015.7 37 | pyzmq==15.1.0 38 | qtconsole==4.1.1 39 | scipy==0.16.1 40 | simplegeneric==0.8.1 41 | singledispatch==3.4.0.3 42 | six==1.10.0 43 | terminado==0.5 44 | tornado==4.3 45 | traitlets==4.0.0 46 | wsgiref==0.1.2 47 | -------------------------------------------------------------------------------- /assignments2016/assignment3/sky.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aikorea/cs231n/4a3dcdef4deb553c7ba12f891aaa4ddd3ccf7e96/assignments2016/assignment3/sky.jpg -------------------------------------------------------------------------------- /assignments2016/assignment3/start_ipython_osx.sh: -------------------------------------------------------------------------------- 1 | # Assume the virtualenv is called .env 2 | 3 | cp frameworkpython .env/bin 4 | .env/bin/frameworkpython -m IPython notebook 5 | -------------------------------------------------------------------------------- /aws-tutorial.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: AWS Tutorial 4 | permalink: /aws-tutorial/ 5 | --- 6 | 7 | GPU 인스턴스를 사용할경우, 아마존 EC2에 GPU 인스턴스를 사용할 수 있는 아마존 머신 이미지 (AMI)가 있습니다. 이 튜토리얼은 제공된 AMI를 통해 자신의 EC2 인스턴스를 설정하는 방법에 대해서 설명합니다. **현재 CS231N 학생들에게 AWS크레딧을 제공하지 않습니다. AWS 스냅샷을 사용하기 위해 여러분의 예산을 사용하기 권장합니다.** 8 | 9 | **요약** AWS가 익숙한 분들: 사용할 이미지는 10 | `cs231n_caffe_torch7_keras_lasagne_v2` 입니다., AMI ID: `ami-125b2c72` region은 US WEST(N. California)입니다. 인스턴스는 `g2.2xlarge`를 사용합니다. 이 이미지에는 Caffe, Torch7, Theano, Keras 그리고 Lasagne가 설치되어 있습니다. 그리고 caffe의 Python binding을 사용할 수 있습니다. 생성한 인스턴스는 CUDA 7.5 와 CuDNN v3를 포함하고 있습니다. 11 | 12 | 첫째로, AWS계정이 아직 없다면 [AWS홈페이지](http://aws.amazon.com/)에 접속하여 "가입"이라고 적혀있는 노란색 버튼을 눌러 계정을 생성합니다. 버튼을 누르면 가입페이지가 나오며 아래 그림과 같이 나타납니다. 13 | 14 |
15 | 16 |
17 | 18 | 이메일 또는 휴대폰 번호를 입력하고 "새 사용자입니다."를 선택합니다, "보안서버를 사용하여 로그인"을 누르면 세부사항을 입력하는 페이지들이 나오게 됩니다. 이 과정에서 신용카드 정보입력과 핸드폰 인증절차를 진행하게 됩니다. 가입을 위해서 핸드폰과 신용카드를 준비해주세요. 19 | 20 | 가입을 완료했다면 [AWS 홈페이지](http://aws.amazon.com)로 돌아가 "콘솔에 로그인" 버튼을 클릭합니다. 그리고 이메일과 비밀번호를 입력해 로그인을 진행합니다. 21 | 22 |
23 | 24 |
25 | 26 | 로그인을 완료했다면 다음과 같은 페이지가 여러분을 맞아줍니다. 27 | 28 |
29 | 30 |
31 | 32 | 오른쪽 상단의 region이 N. California로 설정되어있는지 확인합니다. 만약 제대로 설정되어 있지 않다면 드롭다운 메뉴에서 N. California로 설정합니다. 33 | 34 | (그 다음으로 진행하기 위해서는 여러분의 계정이 "인증"되어야 합니다. 인증에 소요되는 시간은 약 2시간이며 인증이 완료되기 전까지는 인스턴스를 실행할 수 없을 수도 있습니다.) 35 | 36 | 다음으로 EC2링크를 클릭합니다. (Compute 카테고리의 첫번째 링크) 그러면 다음과 같은 대시보드 페이지로 이동합니다. 37 | 38 |
39 | 40 |
41 | 42 | "Launch Instace"라고 적혀있는 파란색 버튼을 클릭합니다. 그러면 다음과 같은 페이지로 이동하게 됩니다. 43 | 44 |
45 | 46 |
47 | 48 | 왼쪽의 사이드바 메뉴에서 "Community AMIs"를 클릭합니다. 그리고 검색창에 "cs231n"를 입력합니다. 검색결과에 `cs231n_caffe_torch7_keras_lasagne_v2`(AMI ID: `ami-125b2c72`)가 나타납니다. 이 AMI를 선택하고 다음 단게에서 인트턴스 타입을 선택합니다. 49 | 50 |
51 | 52 |
53 | 54 | 인스턴스 타입`g2.2xlarge` 를 선택하고 "Review and Launch"를 클릭합니다. 55 | 56 |
57 | 58 |
59 | 60 | 다음 화면에서 Launch를 클릭합니다. 61 | 62 |
63 | 64 |
65 | 66 | 클릭하게 되면 기존에 사용하던 key-pair를 사용할 것인지 새로 key-pair를 만들것인지 묻는 창이 뜨게됩니다. 만약 AWS를 이미 사용하고 있다면 사용하던 key를 사용할 수 있습니다. 혹은 드롭다운 메뉴에서 "Create a new key pair"를 선택하여 새로 key를 생성할 수 있습니다. 그리고 key 를 다운로드해야합니다. 다운로드한 key를 실수로 삭제하지 않도록 각별한 주의를 기울여야합니다. 만약 key를 잃어버릴 경우 인스턴스에 **접속할 수 없습니다.** 67 | 68 |
69 | 70 |
71 | 72 |
73 | 74 |
75 | 76 | key 다운로드가 완료되면 key의 권한을 user-only RW로 바꿉니다. Linux/OSX 사용자는 다음 명령어로 권한을 수정할 수 있습니다. 77 | 78 | ~~~ 79 | $ chmod 600 PEM_FILENAME 80 | ~~~ 81 | 82 | 여기서 `PEM_FILENAME`은 방금전에 다운로드한 .pem 파일의 이름입니다. 83 | 84 | 권한수정을 마쳤다면 "Launch Instace"를 클릭합니다. 그럼 생성한 인스턴스가 지금 작동중(Your instance are now launching)이라는 메시지가 나타납니다. 85 | 86 | 87 |
88 | 89 |
90 | 91 | "View Instance"를 클릭하여 인스턴스의 상태를 확인합니다. "2/2 status checks passed"상태가 지나면 "Running"으로 상태가 변하게 됩니다. "Running"상태가 되면 ssh를 통해 생성한 인스턴스에 접속 할 수 있습니다. 92 | 93 |
94 | 95 |
96 | 97 | 먼저, 인스턴스 리스트에서 인스턴스의 Public IP를 기억해 둡니다. 그리고 다음을 진행합니다. 98 | 99 | ~~~ 100 | ssh -i PEM_FILENAME ubuntu@PUBLIC_IP 101 | ~~~ 102 | 103 | 이제 인스턴스에 로그인이 됩니다. 다음 명령어를 통해 Caffe가 작동중인지 확인할 수 있습니다. 104 | 105 | ~~~ 106 | $ cd caffe 107 | $ ./build/tools/caffe time --gpu 0 --model examples/mnist/lenet.prototxt 108 | ~~~ 109 | 110 | 생성한 인스턴스에는 Caff3, Theano, Torch7, Keras 그리고 Lasagne이 설치되어 있습니다. 또한 Caffe Python bindings를 기본적으로 사용할 수 있게 설정되어 있습니다. 그리고 인스턴스에는 CUDA 7.5 와 CuDNN v3가 설치되어 있습니다. 111 | 112 | 만약 아래와 같은 에러가 발생한다면 113 | 114 | ~~~ 115 | Check failed: error == cudaSuccess (77 vs. 0) an illegal memory access was encountered 116 | ~~~ 117 | 118 | 생성한 인스턴스를 terminate하고 인스턴스 생성부터 다시 시작해야합니다. 오류가 발생하는 정확한 이유는 알 수 없지만 이런현상이 드물게 일어난다고 합니다. 119 | 120 | 생성한 인스턴스를 사용하는 방법: 121 | 122 | - root directory는 총 12GB 입니다. 그리고 ~ 3GB 정도의 여유공간이 있습니다. 123 | - model checkpoins, model들을 저장할 수 있는 60GB의 공간이 `/mnt`에 있습니다. 124 | - 인스턴스를 reboot/terminate 하면 `/mnt` 디렉토리의 자료는 소멸됩니다. 125 | - 추가 비용이 발생하지 않도록 작업이 완료되면 인스턴스를 stop해야합니다. GPU 인스턴스는 사용료가 높습니다. 예산을 현명하게 사용하는것을 권장합니다. 여러분의 작업이 완전히 끝났다면 인스턴스를 Terminate합니다. (디스크 공간 또한 과금이 됩니다. 만약 큰 용량의 디스크를 사용한다면 과금이 많이 될 수 있습니다.) 126 | - 'creating custom alarms'에서 인스턴스가 아무 작업을 하지 않을때 인스턴스를 stop하도록 설정할 수 있습니다. 127 | - 만약 인스턴스의 큰 데이터베이스에 접근할 필요가 없거나 데이터베이스를 다운로드 하기위해서 인스턴스 작동을 원하지 않는다면 가장 좋은 방법은 AMI를 생성하고 인스턴스를 설정할 때 당신의 기기에 AMI를 연결하는 것 일것입니다. (이 작업은 AMI를 선택한 후에 인스턴스를 실행(launching) 하기 전에 설정해야합니다.) 128 | 129 | --- 130 |

131 | 번역: 김우정 (gnujoow) 132 |

133 | -------------------------------------------------------------------------------- /convnet-tips.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | permalink: /convnet-tips/ 4 | --- 5 | 6 | 7 | ### Addressing Overfitting 8 | 9 | #### Data Augmentation 10 | 11 | - Flip the training images over x-axis 12 | - Sample random crops / scales in the original image 13 | - Jitter the colors 14 | 15 | #### Dropout 16 | 17 | - Dropout is just as effective for Conv layers. Usually people apply less dropout right before early conv layers since there are not that many parameters there compared to later stages of the network (e.g. the fully connected layers). 18 | -------------------------------------------------------------------------------- /glossary.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | mathjax: true 4 | permalink: /glossary/ 5 | --- 6 | 7 | 영어 --> 한글 번역시 용어의 통일성을 위한 단어장입니다. 새로운 용어에 대한 추가는 GitHub에 이슈를 파서 서로 논의해 보고 정하도록 하면 좋을 것 같습니다. 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 |
English한글
Accuracy정확도, 성능
Activation function활성 함수
Architecture구조
Backpropagation(영어 그대로)
Batch배치
Batch normalization배치 정규화
Bias(영어 그대로)
Chain rule연쇄 법칙
Class클래스
Classification분류
Classifier분류기
Column vector열 벡터
Convolution컨볼루션
Convolutional neural network컨볼루션 신경망
Covariance공분산
Cross entropy(영어 그대로)
Cross validation교차 검증
Depth깊이
Derivative미분값, 도함수
Dropout(영어 그대로)
Error에러, 오차
Evaluate평가하다
Feature특징, 표현, 피쳐
Filter필터
Forward propagation(영어 그대로)
Fully-connected(영어 그대로)
Gate게이트
Gradient그라디언트
GRU(영어 그대로)
Hyperparameter(영어 그대로)
Image이미지
Implement구현하다
Initialization초기화
Iteration반복
Label라벨
Layer레이어
Learning러닝, 학습
Loop루프
Loss (function)손실 함수
LSTM(영어 그대로)
Matrix행렬
Nearest neighbor(영어 그대로)
Network네트워크
Neural network신경망, 뉴럴 네트워크
Neuron뉴런
Node노드
Non-linearity비선형~
Optimization최적화
Overfitting(영어 그대로)
Padding패딩
Parameter파라미터
Performance성능
Pixel픽셀, 화소
Pooling풀링
Preprocessing전처리
Receptive Field(영어 그대로)
Regression회귀
Regularization(영어 그대로)
ReLU(영어 그대로)
Representation표현
Recurrent neural network (RNN)회귀신경망, RNN
Row vector행 벡터
Score스코어, 점수
Sigmoid(영어 그대로)
Softmax(영어 그대로)
Training학습, 트레이닝
Tuning튜닝
Validation검증
Variable변수
Visualization시각화
Weights파라미터 값, 가중치 (문맥상 사용되는 의미에 따라)
90 | 91 | 92 | 105 | -------------------------------------------------------------------------------- /ipython-tutorial.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: IPython Tutorial 4 | permalink: /ipython-tutorial/ 5 | --- 6 | cs231n 수업에서는 프로그래밍 과제 진행을 위해 [IPython notebooks](http://ipython.org/)을 사용합니다. IPython notebook을 사용하면 여러분의 브라우저에서 Python 코드를 작성하고 실행할 수 있습니다. Python notebook를 사용하면 여러 조각의 코드를 아주 쉽게 수정하고 실행할 수 있습니다. 이런 장점 때문에 IPython notebook은 계산과학분야에서 널리 사용되고 있습니다. 7 | 8 | IPython의 설치와 실행은 간단합니다. command line에서 다음 명령어를 입력하여 IPython을 설치합니다. 9 | 10 | ~~~ 11 | pip install "ipython[notebook]" 12 | ~~~ 13 | 14 | IPython의 설치가 완료되면 다음 명령어를 통해 IPython을 실행합니다. 15 | 16 | ~~~ 17 | ipython notebook 18 | ~~~ 19 | 20 | IPython이 실행되면, IPyhton을 사용하기 위해 웹 브라우저를 실행하여 http://localhost:8888 에 접속합니다. 모든 것이 잘 작동한다면 웹 브라우저에는 아래와 같은 화면이 나타납니다. 화면에는 현재 폴더에 사용가능한 Python notebook들이 나타납니다. 21 | 22 |
23 | 24 |
25 | 26 | notebook 파일을 클릭하면 다음과 같은 화면이 나타납니다. 27 | 28 |
29 | 30 |
31 | 32 | IPython notebook은 여러 개의 **cell**들로 이루어져 있습니다. 각각의 cell들은 Python 코드를 포함하고 있습니다. `Shift-Enter`를 누르거나 셀을 클릭하여 셀을 실행할 수 있습니다. 셀의 코드를 실행하면 셀의 코드의 실행결과는 셀의 바로 아래에 나타납니다. 예를 들어 첫 번째 cell의 코드를 실행하면 아래와 같은 화면이 나타납니다. 33 | 34 |
35 | 36 |
37 | 38 | 전역변수들은 다른 셀들에도 공유됩니다. 두 번째 셀을 실행하면 다음과 같은 결과가 나옵니다. 39 | 40 |
41 | 42 |
43 | 44 | 일반적으로, IPython notebook의 코드를 실행할 때 맨 위에서 맨 아래 순서로 실행합니다. 45 | 몇몇 셀을 실행하는 데 실패하거나 셀들을 순서대로 실행하지 않으면 오류가 발생할 수 있습니다. 46 | 47 |
48 | 49 |
50 | 51 | 과제를 진행하면서 notebook의 cell을 수정하거나 실행하여 IPython notebook이 변경되었다면 **저장하는 것을 잊지 마세요.** 52 | 53 |
54 | 55 |
56 | 57 | 지금까지 IPyhton의 사용법에 대해서 알아보았습니다. 간략한 내용이지만 위 내용을 잘 숙지하면 무리 없이 과제를 진행할 수 있습니다. 58 | 59 | --- 60 |

61 | 번역: 김우정 (gnujoow) 62 |

63 | -------------------------------------------------------------------------------- /overview.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: Overview of Computer Vision and Visual Recognition 4 | permalink: /overview/ 5 | --- 6 | 7 | Our introductory lecture covered the history of Computer Vision and many of its current applications, to help you understand the context in which this class is offered. See the [slides](http://vision.stanford.edu/teaching/cs231n/slides/lecture1.pdf) for details. 8 | -------------------------------------------------------------------------------- /terminal-tutorial.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: Terminal.com Tutorial 4 | permalink: /terminal-tutorial/ 5 | --- 6 | 과제를 진행하기 위해서, [Terminal](https://www.stanfordterminalcloud.com)을 사용하는 옵션을 제공합니다. Terminal에서 여러분의 결과물을 개발하고 테스트할 수 있습니다. 한가지 유의해야 할 것은 Terminal.com의 메인페이지를 사용하지 않고 cs231n 수업을 위해 특별히 할당된 서브도메인에 등록된 사이트를 사용합니다. [Terminal](https://www.stanfordterminalcloud.com)은 미리 설정된 커맨드 라인 환경(command line environment)에 접근할 수 있는 온라인 컴퓨팅 플랫폼입니다. 여러분이 과제를 진행하기 위해서 반드시 [Terminal](https://www.stanfordterminalcloud.com) 을 사용할 필요는 없습니다. 그러나 개발을 위한 필요사항들과 개발도구들이 미리 설정되어 있기 때문에 수고를 덜 수 있습니다. 7 | 8 | 이 튜토리얼은 Terminal을 사용하여 과제를 진행하기 위한 필수적인 과정들을 설명합니다. 가장 먼저, [여러분의 계정을 만듭니다.](https://www.stanfordterminalcloud.com/signup). 바로 전에 만든 계정으로 [Terminal](https://www.stanfordterminalcloud.com)에 로그인합니다. 9 | 10 | 각각의 과제마다 Terminal 스냅 샷 링크를 제공합니다. 이 스냅 샷들은 여러분의 결과물을 작성하고 실행할 시작 코드와 미리 설정된 command line 환경이 포함되어 있습니다. 11 | 12 | 여기 2015년 과제처럼 보이는 스냅 샷을 통해 예를 들어보겠습니다. 13 | 14 |
15 | 16 |
17 | 18 | 여러분의 스냅 샷도 이와 비슷할 것입니다. 오른쪽 아래의 "Start" 버튼을 클릭합니다. 그럼 여러분의 계정에 공유된 스냅 샷이 복사됩니다. 이제 [My Terminals](https://www.stanfordterminalcloud.com/terminals) 탭에서 복사된 터미널을 찾을 수 있습니다. 19 | 20 |
21 | 22 |
23 | 24 | 여러분의 화면도 이와 비슷할 것입니다. 이제 과제를 진행하기 위한 준비가 되었습니다! 링크를 클릭하여 terminal을 열어봅시다. (위 이미지의 빨간색 상자) 이 링크는 AWS 머신상의 사용자인터페이스를 계층을 엽니다. 다음과 비슷한 화면이 나타납니다. 25 | 26 |
27 | 28 |
29 | 30 | terminal에 Jupyter Notebook과 다른 필요요소들이 설치되어 있습니다. 조그마한 + 버튼을 눌러 콘솔을 실행합니다. (콘솔이 없으면 ), 그리고 과제 폴더와 코드를 찾습니다. 그리고 Jupyper Notebook을 실행하고 과제를 진행합니다. 만약 당신이 cs231n에 등록한 학생이면 코스워크를 통해 과제를 제출해야 합니다. 31 | 32 |
33 | 34 |
35 | 36 | [Terminal](https://www.stanfordterminalcloud.com)에 대한 더 많은 정보를 원하시면 [FAQ](https://www.stanfordterminalcloud.com/faq) 페이지를 방문해주세요 37 | 38 | **중요** 터미널 사용 시 사용하는 인스턴스 타입에 따라 시간당 사용요금이 부과됩니다. 미디엄 타입의 인스턴스 요금은 시간당 $0.124입니다. 39 | 40 | --- 41 |

42 | 번역: 김우정 (gnujoow) 43 |

44 | -------------------------------------------------------------------------------- /transfer-learning.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | permalink: /transfer-learning/ 4 | --- 5 | 6 | (These notes are currently in draft form and under development) 7 | 8 | Table of Contents: 9 | 10 | - [Transfer Learning](#tf) 11 | - [Additional References](#add) 12 | 13 | 14 | ## Transfer Learning 15 | 16 | In practice, very few people train an entire Convolutional Network from scratch (with random initialization), because it is relatively rare to have a dataset of sufficient size. Instead, it is common to pretrain a ConvNet on a very large dataset (e.g. ImageNet, which contains 1.2 million images with 1000 categories), and then use the ConvNet either as an initialization or a fixed feature extractor for the task of interest. The three major Transfer Learning scenarios look as follows: 17 | 18 | - **ConvNet as fixed feature extractor**. Take a ConvNet pretrained on ImageNet, remove the last fully-connected layer (this layer's outputs are the 1000 class scores for a different task like ImageNet), then treat the rest of the ConvNet as a fixed feature extractor for the new dataset. In an AlexNet, this would compute a 4096-D vector for every image that contains the activations of the hidden layer immediately before the classifier. We call these features **CNN codes**. It is important for performance that these codes are ReLUd (i.e. thresholded at zero) if they were also thresholded during the training of the ConvNet on ImageNet (as is usually the case). Once you extract the 4096-D codes for all images, train a linear classifier (e.g. Linear SVM or Softmax classifier) for the new dataset. 19 | - **Fine-tuning the ConvNet**. The second strategy is to not only replace and retrain the classifier on top of the ConvNet on the new dataset, but to also fine-tune the weights of the pretrained network by continuing the backpropagation. It is possible to fine-tune all the layers of the ConvNet, or it's possible to keep some of the earlier layers fixed (due to overfitting concerns) and only fine-tune some higher-level portion of the network. This is motivated by the observation that the earlier features of a ConvNet contain more generic features (e.g. edge detectors or color blob detectors) that should be useful to many tasks, but later layers of the ConvNet becomes progressively more specific to the details of the classes contained in the original dataset. In case of ImageNet for example, which contains many dog breeds, a significant portion of the representational power of the ConvNet may be devoted to features that are specific to differentiating between dog breeds. 20 | 21 | **Pretrained models**. Since modern ConvNets take 2-3 weeks to train across multiple GPUs on ImageNet, it is common to see people release their final ConvNet checkpoints for the benefit of others who can use the networks for fine-tuning. For example, the Caffe library has a [Model Zoo](https://github.com/BVLC/caffe/wiki/Model-Zoo) where people share their network weights. 22 | 23 | **When and how to fine-tune?** How do you decide what type of transfer learning you should perform on a new dataset? This is a function of several factors, but the two most important ones are the size of the new dataset (small or big), and its similarity to the original dataset (e.g. ImageNet-like in terms of the content of images and the classes, or very different, such as microscope images). Keeping in mind that ConvNet features are more generic in early layers and more original-dataset-specific in later layers, here are some common rules of thumb for navigating the 4 major scenarios: 24 | 25 | 1. *New dataset is small and similar to original dataset*. Since the data is small, it is not a good idea to fine-tune the ConvNet due to overfitting concerns. Since the data is similar to the original data, we expect higher-level features in the ConvNet to be relevant to this dataset as well. Hence, the best idea might be to train a linear classifier on the CNN codes. 26 | 2. *New dataset is large and similar to the original dataset*. Since we have more data, we can have more confidence that we won't overfit if we were to try to fine-tune through the full network. 27 | 3. *New dataset is small but very different from the original dataset*. Since the data is small, it is likely best to only train a linear classifier. Since the dataset is very different, it might not be best to train the classifier form the top of the network, which contains more dataset-specific features. Instead, it might work better to train the SVM classifier from activations somewhere earlier in the network. 28 | 4. *New dataset is large and very different from the original dataset*. Since the dataset is very large, we may expect that we can afford to train a ConvNet from scratch. However, in practice it is very often still beneficial to initialize with weights from a pretrained model. In this case, we would have enough data and confidence to fine-tune through the entire network. 29 | 30 | **Practical advice**. There are a few additional things to keep in mind when performing Transfer Learning: 31 | 32 | - *Constraints from pretrained models*. Note that if you wish to use a pretrained network, you may be slightly constrained in terms of the architecture you can use for your new dataset. For example, you can't arbitrarily take out Conv layers from the pretrained network. However, some changes are straight-forward: Due to parameter sharing, you can easily run a pretrained network on images of different spatial size. This is clearly evident in the case of Conv/Pool layers because their forward function is independent of the input volume spatial size (as long as the strides "fit"). In case of FC layers, this still holds true because FC layers can be converted to a Convolutional Layer: For example, in an AlexNet, the final pooling volume before the first FC layer is of size [6x6x512]. Therefore, the FC layer looking at this volume is equivalent to having a Convolutional Layer that has receptive field size 6x6, and is applied with padding of 0. 33 | - *Learning rates*. It's common to use a smaller learning rate for ConvNet weights that are being fine-tuned, in comparison to the (randomly-initialized) weights for the new linear classifier that computes the class scores of your new dataset. This is because we expect that the ConvNet weights are relatively good, so we don't wish to distort them too quickly and too much (especially while the new Linear Classifier above them is being trained from random initialization). 34 | 35 | 36 | ## Additional References 37 | 38 | - [CNN Features off-the-shelf: an Astounding Baseline for Recognition](http://arxiv.org/abs/1403.6382) trains SVMs on features from ImageNet-pretrained ConvNet and reports several state of the art results. 39 | - [DeCAF](http://arxiv.org/abs/1310.1531) reported similar findings in 2013. The framework in this paper (DeCAF) was a Python-based precursor to the C++ Caffe library. 40 | - [How transferable are features in deep neural networks?](http://arxiv.org/abs/1411.1792) studies the transfer learning performance in detail, including some unintuitive findings about layer co-adaptations. 41 | -------------------------------------------------------------------------------- /video-lectures.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: Video Lectures 4 | permalink: /video-lectures/ 5 | --- 6 | 7 | 동영상 강의는 원래 강사인 Andrej Karpathy가 직접 유튜브에 올렸었지만, 몇 가지 문제로 인해 현재는 내려간 상태입니다. 8 | 그러나 [[이곳](https://archive.org/details/cs231n-CNNs)]에서 웹으로 강의를 듣거나 [토렌트 링크](https://archive.org/download/cs231n-CNNs/cs231n-CNNs_archive.torrent)를 통해 받을 수 있고, [새로 유튜브에 재생목록을 만들어주신 분](https://www.youtube.com/playlist?list=PLLvH2FwAQhnpj1WEB-jHmPuUeQ8mX-XXG)도 있습니다. 자막 파일은 [[여기](https://github.com/aikorea/cs231n/tree/master/captions)]에서 다운받으실 수 있습니다. (아직 진행 중이라 미완입니다.) 9 | 10 | 유튜브에서 자동생성되어 매우 안 좋은 상태였던 영어 자막을 수정하고 한글로 번역까지 해 주는 작업은 **김영범 (rollis0825), 황재하 (jaywhang), 이지훈 (jihoonl), 김석우 (sandrokim), 이준수 (jslee), 조재민 (j-min)** 님께서 수고해 주시고 계십니다! 11 | --------------------------------------------------------------------------------