├── .gitignore ├── assignment1 ├── cs231n │ ├── __init__.py │ ├── datasets │ │ ├── .gitignore │ │ └── get_datasets.sh │ ├── classifiers │ │ ├── __init__.py │ │ ├── softmax.py │ │ ├── linear_svm.py │ │ ├── linear_classifier.py │ │ └── k_nearest_neighbor.py │ ├── vis_utils.py │ ├── gradient_check.py │ ├── features.py │ └── data_utils.py ├── .gitignore ├── README.md ├── start_ipython_osx.sh ├── collectSubmission.sh ├── frameworkpython ├── requirements.txt ├── setup_googlecloud.sh └── softmax.md ├── assignment2 ├── cs231n │ ├── __init__.py │ ├── classifiers │ │ ├── __init__.py │ │ └── cnn.py │ ├── .gitignore │ ├── datasets │ │ ├── .gitignore │ │ └── get_datasets.sh │ ├── im2col_cython.cpython-35m-x86_64-linux-gnu.so │ ├── setup.py │ ├── im2col.py │ ├── vis_utils.py │ ├── layer_utils.py │ ├── gradient_check.py │ ├── im2col_cython.pyx │ ├── optim.py │ ├── data_utils.py │ └── fast_layers.py ├── .gitignore ├── puppy.jpg ├── kitten.jpg ├── start_ipython_osx.sh ├── collectSubmission.sh ├── frameworkpython ├── requirements.txt └── README.md ├── assignment3 ├── cs231n │ ├── __init__.py │ ├── classifiers │ │ ├── __init__.py │ │ └── squeezenet.py │ ├── datasets │ │ ├── .gitignore │ │ ├── get_imagenet_val.sh │ │ ├── get_assignment3_data.sh │ │ ├── get_squeezenet_tf.sh │ │ ├── get_coco_captioning.sh │ │ └── imagenet_val_25.npz │ ├── .gitignore │ ├── setup.py │ ├── im2col.py │ ├── image_utils.py │ ├── optim.py │ ├── coco_utils.py │ ├── gradient_check.py │ ├── layer_utils.py │ ├── im2col_cython.pyx │ ├── captioning_solver.py │ ├── data_utils.py │ ├── fast_layers.py │ └── layers.py ├── .gitignore ├── sky.jpg ├── kitten.jpg ├── gan-checks-tf.npz ├── styles │ ├── muse.jpg │ ├── tubingen.jpg │ ├── the_scream.jpg │ ├── starry_night.jpg │ └── composition_vii.jpg ├── gan_outputs_tf.png ├── gan_outputs_pytorch.png ├── example_styletransfer.png ├── style-transfer-checks.npz ├── style-transfer-checks-tf.npz ├── start_ipython_osx.sh ├── where_are_my_drivers.sh ├── frameworkpython ├── collectSubmission.sh └── requirements.txt ├── .dcgan_err.png ├── .style_err.png ├── .dcgan_right.png ├── .style_right.png └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.zip -------------------------------------------------------------------------------- /assignment1/cs231n/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assignment2/cs231n/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assignment3/cs231n/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assignment2/cs231n/classifiers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assignment3/cs231n/classifiers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assignment1/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | .env/* 4 | -------------------------------------------------------------------------------- /assignment2/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | .env/* 4 | -------------------------------------------------------------------------------- /assignment3/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | .env/* 4 | -------------------------------------------------------------------------------- /assignment3/cs231n/datasets/.gitignore: -------------------------------------------------------------------------------- 1 | coco_captioning/* 2 | MNIST_data/* 3 | -------------------------------------------------------------------------------- /assignment2/cs231n/.gitignore: -------------------------------------------------------------------------------- 1 | build/* 2 | im2col_cython.c 3 | im2col_cython.so 4 | -------------------------------------------------------------------------------- /assignment3/cs231n/.gitignore: -------------------------------------------------------------------------------- 1 | build/* 2 | im2col_cython.c 3 | im2col_cython.so 4 | -------------------------------------------------------------------------------- /.dcgan_err.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/.dcgan_err.png -------------------------------------------------------------------------------- /.style_err.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/.style_err.png -------------------------------------------------------------------------------- /assignment3/cs231n/datasets/get_imagenet_val.sh: -------------------------------------------------------------------------------- 1 | wget http://cs231n.stanford.edu/imagenet_val_25.npz 2 | -------------------------------------------------------------------------------- /.dcgan_right.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/.dcgan_right.png -------------------------------------------------------------------------------- /.style_right.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/.style_right.png -------------------------------------------------------------------------------- /assignment3/sky.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/sky.jpg -------------------------------------------------------------------------------- /assignment2/puppy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment2/puppy.jpg -------------------------------------------------------------------------------- /assignment2/kitten.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment2/kitten.jpg -------------------------------------------------------------------------------- /assignment3/kitten.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/kitten.jpg -------------------------------------------------------------------------------- /assignment3/gan-checks-tf.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/gan-checks-tf.npz -------------------------------------------------------------------------------- /assignment3/styles/muse.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/styles/muse.jpg -------------------------------------------------------------------------------- /assignment3/gan_outputs_tf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/gan_outputs_tf.png -------------------------------------------------------------------------------- /assignment3/styles/tubingen.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/styles/tubingen.jpg -------------------------------------------------------------------------------- /assignment1/cs231n/datasets/.gitignore: -------------------------------------------------------------------------------- 1 | cifar-10-batches-py/* 2 | tiny-imagenet-100-A* 3 | tiny-imagenet-100-B* 4 | tiny-100-A-pretrained/* 5 | -------------------------------------------------------------------------------- /assignment2/cs231n/datasets/.gitignore: -------------------------------------------------------------------------------- 1 | cifar-10-batches-py/* 2 | tiny-imagenet-100-A* 3 | tiny-imagenet-100-B* 4 | tiny-100-A-pretrained/* 5 | -------------------------------------------------------------------------------- /assignment3/styles/the_scream.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/styles/the_scream.jpg -------------------------------------------------------------------------------- /assignment3/gan_outputs_pytorch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/gan_outputs_pytorch.png -------------------------------------------------------------------------------- /assignment3/styles/starry_night.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/styles/starry_night.jpg -------------------------------------------------------------------------------- /assignment1/cs231n/classifiers/__init__.py: -------------------------------------------------------------------------------- 1 | from cs231n.classifiers.k_nearest_neighbor import * 2 | from cs231n.classifiers.linear_classifier import * 3 | -------------------------------------------------------------------------------- /assignment3/example_styletransfer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/example_styletransfer.png -------------------------------------------------------------------------------- /assignment3/style-transfer-checks.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/style-transfer-checks.npz -------------------------------------------------------------------------------- /assignment3/styles/composition_vii.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/styles/composition_vii.jpg -------------------------------------------------------------------------------- /assignment1/README.md: -------------------------------------------------------------------------------- 1 | Details about this assignment can be found [on the course webpage](http://cs231n.github.io/), under Assignment #1 of Spring 2017. 2 | -------------------------------------------------------------------------------- /assignment3/cs231n/datasets/get_assignment3_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./get_coco_captioning.sh 3 | ./get_squeezenet_tf.sh 4 | ./get_imagenet_val.sh 5 | 6 | -------------------------------------------------------------------------------- /assignment3/cs231n/datasets/get_squeezenet_tf.sh: -------------------------------------------------------------------------------- 1 | wget "http://cs231n.stanford.edu/squeezenet_tf.zip" 2 | unzip squeezenet_tf.zip 3 | rm squeezenet_tf.zip 4 | -------------------------------------------------------------------------------- /assignment3/style-transfer-checks-tf.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/style-transfer-checks-tf.npz -------------------------------------------------------------------------------- /assignment1/start_ipython_osx.sh: -------------------------------------------------------------------------------- 1 | # Assume the virtualenv is called .env 2 | 3 | cp frameworkpython .env/bin 4 | .env/bin/frameworkpython -m IPython notebook 5 | -------------------------------------------------------------------------------- /assignment2/start_ipython_osx.sh: -------------------------------------------------------------------------------- 1 | # Assume the virtualenv is called .env 2 | 3 | cp frameworkpython .env/bin 4 | .env/bin/frameworkpython -m IPython notebook 5 | -------------------------------------------------------------------------------- /assignment3/start_ipython_osx.sh: -------------------------------------------------------------------------------- 1 | # Assume the virtualenv is called .env 2 | 3 | cp frameworkpython .env/bin 4 | .env/bin/frameworkpython -m IPython notebook 5 | -------------------------------------------------------------------------------- /assignment3/cs231n/datasets/get_coco_captioning.sh: -------------------------------------------------------------------------------- 1 | wget "http://cs231n.stanford.edu/coco_captioning.zip" 2 | unzip coco_captioning.zip 3 | rm coco_captioning.zip 4 | -------------------------------------------------------------------------------- /assignment3/cs231n/datasets/imagenet_val_25.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment3/cs231n/datasets/imagenet_val_25.npz -------------------------------------------------------------------------------- /assignment1/cs231n/datasets/get_datasets.sh: -------------------------------------------------------------------------------- 1 | # Get CIFAR10 2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz 3 | tar -xzvf cifar-10-python.tar.gz 4 | rm cifar-10-python.tar.gz 5 | -------------------------------------------------------------------------------- /assignment2/cs231n/datasets/get_datasets.sh: -------------------------------------------------------------------------------- 1 | # Get CIFAR10 2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz 3 | tar -xzvf cifar-10-python.tar.gz 4 | rm cifar-10-python.tar.gz 5 | -------------------------------------------------------------------------------- /assignment2/cs231n/im2col_cython.cpython-35m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shiaoming/CS231n-Spring-2017-Assignment/HEAD/assignment2/cs231n/im2col_cython.cpython-35m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /assignment1/collectSubmission.sh: -------------------------------------------------------------------------------- 1 | rm -f assignment1.zip 2 | zip -r assignment1.zip . -x "*.git*" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" ".env/*" 3 | -------------------------------------------------------------------------------- /assignment2/cs231n/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Build import cythonize 4 | import numpy 5 | 6 | extensions = [ 7 | Extension('im2col_cython', ['im2col_cython.pyx'], 8 | include_dirs = [numpy.get_include()] 9 | ), 10 | ] 11 | 12 | setup( 13 | ext_modules = cythonize(extensions), 14 | ) 15 | -------------------------------------------------------------------------------- /assignment3/cs231n/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Build import cythonize 4 | import numpy 5 | 6 | extensions = [ 7 | Extension('im2col_cython', ['im2col_cython.pyx'], 8 | include_dirs = [numpy.get_include()] 9 | ), 10 | ] 11 | 12 | setup( 13 | ext_modules = cythonize(extensions), 14 | ) 15 | -------------------------------------------------------------------------------- /assignment3/where_are_my_drivers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "Checking for CUDA and installing." 3 | # Check for CUDA and try to install. 4 | if ! dpkg-query -W cuda; then 5 | # The 16.04 installer works with 16.10. 6 | curl -O http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-repo-ubuntu1604_8.0.61-1_amd64.deb 7 | dpkg -i ./cuda-repo-ubuntu1604_8.0.61-1_amd64.deb 8 | apt-get update 9 | apt-get install cuda -y 10 | fi 11 | -------------------------------------------------------------------------------- /assignment2/collectSubmission.sh: -------------------------------------------------------------------------------- 1 | files="BatchNormalization.ipynb 2 | ConvolutionalNetworks.ipynb 3 | Dropout.ipynb 4 | FullyConnectedNets.ipynb 5 | PyTorch.ipynb 6 | TensorFlow.ipynb" 7 | 8 | for file in $files 9 | do 10 | if [ ! -f $file ]; then 11 | echo "Required notebook $file not found." 12 | exit 0 13 | fi 14 | done 15 | 16 | rm -f assignment2.zip 17 | zip -r assignment2.zip . -x "*.git*" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" ".env/*" "*.pyc" "*cs231n/build/*" 18 | -------------------------------------------------------------------------------- /assignment1/frameworkpython: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # what real Python executable to use 4 | #PYVER=2.7 5 | #PATHTOPYTHON=/usr/local/bin/ 6 | #PYTHON=${PATHTOPYTHON}python${PYVER} 7 | 8 | PYTHON=$(which $(readlink .env/bin/python)) # only works with python3 9 | 10 | # find the root of the virtualenv, it should be the parent of the dir this script is in 11 | ENV=`$PYTHON -c "import os; print(os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..')))"` 12 | 13 | # now run Python with the virtualenv set as Python's HOME 14 | export PYTHONHOME=$ENV 15 | exec $PYTHON "$@" 16 | -------------------------------------------------------------------------------- /assignment2/frameworkpython: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # what real Python executable to use 4 | #PYVER=2.7 5 | #PATHTOPYTHON=/usr/local/bin/ 6 | #PYTHON=${PATHTOPYTHON}python${PYVER} 7 | 8 | PYTHON=$(which $(readlink .env/bin/python)) # only works with python3 9 | 10 | # find the root of the virtualenv, it should be the parent of the dir this script is in 11 | ENV=`$PYTHON -c "import os; print(os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..')))"` 12 | 13 | # now run Python with the virtualenv set as Python's HOME 14 | export PYTHONHOME=$ENV 15 | exec $PYTHON "$@" 16 | -------------------------------------------------------------------------------- /assignment3/frameworkpython: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # what real Python executable to use 4 | #PYVER=2.7 5 | #PATHTOPYTHON=/usr/local/bin/ 6 | #PYTHON=${PATHTOPYTHON}python${PYVER} 7 | 8 | PYTHON=$(which $(readlink .env/bin/python)) # only works with python3 9 | 10 | # find the root of the virtualenv, it should be the parent of the dir this script is in 11 | ENV=`$PYTHON -c "import os; print(os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..')))"` 12 | 13 | # now run Python with the virtualenv set as Python's HOME 14 | export PYTHONHOME=$ENV 15 | exec $PYTHON "$@" 16 | -------------------------------------------------------------------------------- /assignment3/collectSubmission.sh: -------------------------------------------------------------------------------- 1 | files="GANs-PyTorch.ipynb 2 | GANs-TensorFlow.ipynb 3 | LSTM_Captioning.ipynb 4 | NetworkVisualization-PyTorch.ipynb 5 | NetworkVisualization-TensorFlow.ipynb 6 | RNN_Captioning.ipynb 7 | StyleTransfer-PyTorch.ipynb 8 | StyleTransfer-TensorFlow.ipynb" 9 | 10 | for file in $files 11 | do 12 | if [ ! -f $file ]; then 13 | echo "Required notebook $file not found." 14 | exit 0 15 | fi 16 | done 17 | 18 | 19 | rm -f assignment3.zip 20 | zip -r assignment3.zip . -x "*.git" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" ".env/*" "*.pyc" "*cs231n/build/*" 21 | -------------------------------------------------------------------------------- /assignment1/requirements.txt: -------------------------------------------------------------------------------- 1 | Cython==0.23.4 2 | Jinja2==2.8 3 | MarkupSafe==0.23 4 | Pillow==3.0.0 5 | Pygments==2.0.2 6 | appnope==0.1.0 7 | argparse==1.2.1 8 | backports-abc==0.4 9 | backports.ssl-match-hostname==3.5.0.1 10 | certifi==2015.11.20.1 11 | cycler==0.10.0 12 | decorator==4.0.6 13 | future==0.16.0 14 | gnureadline==6.3.3 15 | ipykernel==4.2.2 16 | ipython==4.0.1 17 | ipython-genutils==0.1.0 18 | ipywidgets==4.1.1 19 | jsonschema==2.5.1 20 | jupyter==1.0.0 21 | jupyter-client==4.1.1 22 | jupyter-console==4.0.3 23 | jupyter-core==4.0.6 24 | matplotlib==2.0.0 25 | mistune==0.7.1 26 | nbconvert==4.1.0 27 | nbformat==4.0.1 28 | notebook==5.4.1 29 | numpy==1.10.4 30 | path.py==8.1.2 31 | pexpect==4.0.1 32 | pickleshare==0.5 33 | ptyprocess==0.5 34 | pyparsing==2.0.7 35 | python-dateutil==2.4.2 36 | pytz==2015.7 37 | pyzmq==15.1.0 38 | qtconsole==4.1.1 39 | scipy==0.16.1 40 | simplegeneric==0.8.1 41 | singledispatch==3.4.0.3 42 | site==0.0.1 43 | six==1.10.0 44 | terminado==0.5 45 | tornado==4.3 46 | traitlets==4.0.0 47 | -------------------------------------------------------------------------------- /assignment2/requirements.txt: -------------------------------------------------------------------------------- 1 | Cython>=0.25.2 2 | Jinja2>=2.8 3 | MarkupSafe>=0.23 4 | Pillow>=3.0.0 5 | Pygments>=2.0.2 6 | appnope>=0.1.0 7 | argparse>=1.2.1 8 | backports-abc>=0.4 9 | backports.ssl-match-hostname>=3.5.0.1 10 | certifi>=2015.11.20.1 11 | cycler>=0.10.0 12 | decorator>=4.0.6 13 | future>=0.16.0 14 | #gnureadline>=6.3.3 15 | h5py>=2.7.0 16 | ipykernel>=4.2.2 17 | ipython>=4.0.1 18 | ipython-genutils>=0.1.0 19 | ipywidgets>=4.1.1 20 | jsonschema>=2.5.1 21 | jupyter>=1.0.0 22 | jupyter-client>=4.1.1 23 | jupyter-console>=4.0.3 24 | jupyter-core>=4.0.6 25 | matplotlib>=2.0.0 26 | mistune>=0.7.1 27 | nbconvert>=4.1.0 28 | nbformat>=4.0.1 29 | nltk>=3.2.2 30 | notebook>=5.4.1 31 | numpy>=1.12.1 32 | path.py>=8.1.2 33 | pexpect>=4.0.1 34 | pickleshare>=0.5 35 | ptyprocess>=0.5 36 | pyparsing>=2.0.7 37 | python-dateutil>=2.4.2 38 | pytz>=2015.7 39 | pyzmq>=15.1.0 40 | qtconsole>=4.1.1 41 | scipy>=0.19.0 42 | simplegeneric>=0.8.1 43 | singledispatch>=3.4.0.3 44 | site>=0.0.1 45 | six>=1.10.0 46 | terminado>=0.5 47 | tornado>=4.3 48 | traitlets>=4.0.0 49 | -------------------------------------------------------------------------------- /assignment3/requirements.txt: -------------------------------------------------------------------------------- 1 | Cython==0.25.2 2 | Jinja2==2.8 3 | MarkupSafe==0.23 4 | Pillow==3.0.0 5 | Pygments==2.0.2 6 | appnope==0.1.0 7 | argparse==1.2.1 8 | backports-abc==0.4 9 | backports.ssl-match-hostname==3.5.0.1 10 | certifi==2015.11.20.1 11 | cycler==0.10.0 12 | decorator==4.0.6 13 | future==0.16.0 14 | gnureadline==6.3.3 15 | h5py==2.7.0 16 | ipykernel==4.2.2 17 | ipython==4.0.1 18 | ipython-genutils==0.1.0 19 | ipywidgets==4.1.1 20 | jsonschema==2.5.1 21 | jupyter==1.0.0 22 | jupyter-client==4.1.1 23 | jupyter-console==4.0.3 24 | jupyter-core==4.0.6 25 | matplotlib==2.0.0 26 | mistune==0.7.1 27 | nbconvert==4.1.0 28 | nbformat==4.0.1 29 | nltk==3.2.2 30 | notebook==5.4.1 31 | numpy==1.12.1 32 | path.py==8.1.2 33 | pexpect==4.0.1 34 | pickleshare==0.5 35 | ptyprocess==0.5 36 | pyparsing==2.0.7 37 | python-dateutil==2.4.2 38 | pytz==2015.7 39 | pyzmq==15.1.0 40 | qtconsole==4.1.1 41 | scipy==0.19.0 42 | simplegeneric==0.8.1 43 | singledispatch==3.4.0.3 44 | site==0.0.1 45 | six==1.10.0 46 | terminado==0.5 47 | tornado==4.3 48 | traitlets==4.0.0 49 | h5py==2.7.0 50 | -------------------------------------------------------------------------------- /assignment1/setup_googlecloud.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This is the set-up script for Google Cloud. 4 | sudo apt-get update 5 | sudo apt-get install libncurses5-dev 6 | sudo apt-get install python-dev 7 | sudo apt-get install python-pip 8 | sudo apt-get install libjpeg8-dev 9 | sudo ln -s /usr/lib/x86_64-linux-gnu/libjpeg.so /usr/lib 10 | pip install pillow 11 | sudo apt-get build-dep python-imaging 12 | sudo apt-get install libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev 13 | sudo pip install virtualenv 14 | virtualenv .env # Create a virtual environment 15 | source .env/bin/activate # Activate the virtual environment 16 | pip install -r requirements.txt # Install dependencies 17 | deactivate 18 | echo "**************************************************" 19 | echo "***** End of Google Cloud Set-up Script ********" 20 | echo "**************************************************" 21 | echo "" 22 | echo "If you had no errors, You can proceed to work with your virtualenv as normal." 23 | echo "(run 'source .env/bin/activate' in your assignment directory to load the venv," 24 | echo " and run 'deactivate' to exit the venv. See assignment handout for details.)" 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CS231n Spring 2017 homework 2 | My implementation about [CS231n spring 2017 homework](http://cs231n.github.io/). 3 | I chose to use tensorfow to implement assignment3. Some weird things happend in DCGAN and styletransfer. 4 | 5 | 1. In questions about DCGAN, all the images in a single step are the same, although all the checks seem to be right. 6 | 2. In questions about styletransfer, my code can't synthesis images similar to results in the slides. It seems that my network dosen't learn to the right style. 7 | 8 | Hoping someone can pull me out of the mire. 9 | 10 | DCGAN error result 11 | 12 | ![DCGAN error result](https://raw.githubusercontent.com/Psunshine/CS231n-Spring-2017-Assignment/master/.dcgan_err.png) 13 | 14 | DCGAN right result 15 | 16 | ![DCGAN right result](https://raw.githubusercontent.com/Psunshine/CS231n-Spring-2017-Assignment/master/.dcgan_right.png) 17 | 18 | styletransfer error result 19 | 20 | ![style error result](https://raw.githubusercontent.com/Psunshine/CS231n-Spring-2017-Assignment/master/.style_err.png) 21 | 22 | styletransfer right result 23 | 24 | ![style right result](https://raw.githubusercontent.com/Psunshine/CS231n-Spring-2017-Assignment/master/.style_right.png) 25 | 26 | -------------- 27 | 自己完成的[CS231n spring 2017 homework](http://cs231n.github.io/)。 28 | assignment3使用tensorflow实现,但是有两处结果不太对: 29 | 30 | 1. DCGAN一问中每一步结果都是一样的。而奇怪的是前面的check都通过了。 31 | 2. styletransfer一问中得到的图像和课件上的不一样,应该是没有正确转换style。 32 | 33 | 34 | 35 | 试验了几天也没找到原因,希望有明白的大佬可以提点提点 36 | -------------------------------------------------------------------------------- /assignment1/cs231n/vis_utils.py: -------------------------------------------------------------------------------- 1 | #from past.builtins import xrange 2 | 3 | from math import sqrt, ceil 4 | import numpy as np 5 | 6 | def visualize_grid(Xs, ubound=255.0, padding=1): 7 | """ 8 | Reshape a 4D tensor of image data to a grid for easy visualization. 9 | 10 | Inputs: 11 | - Xs: Data of shape (N, H, W, C) 12 | - ubound: Output grid will have values scaled to the range [0, ubound] 13 | - padding: The number of blank pixels between elements of the grid 14 | """ 15 | (N, H, W, C) = Xs.shape 16 | grid_size = int(ceil(sqrt(N))) 17 | grid_height = H * grid_size + padding * (grid_size - 1) 18 | grid_width = W * grid_size + padding * (grid_size - 1) 19 | grid = np.zeros((grid_height, grid_width, C)) 20 | next_idx = 0 21 | y0, y1 = 0, H 22 | for y in xrange(grid_size): 23 | x0, x1 = 0, W 24 | for x in xrange(grid_size): 25 | if next_idx < N: 26 | img = Xs[next_idx] 27 | low, high = np.min(img), np.max(img) 28 | grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low) 29 | # grid[y0:y1, x0:x1] = Xs[next_idx] 30 | next_idx += 1 31 | x0 += W + padding 32 | x1 += W + padding 33 | y0 += H + padding 34 | y1 += H + padding 35 | # grid_max = np.max(grid) 36 | # grid_min = np.min(grid) 37 | # grid = ubound * (grid - grid_min) / (grid_max - grid_min) 38 | return grid 39 | 40 | def vis_grid(Xs): 41 | """ visualize a grid of images """ 42 | (N, H, W, C) = Xs.shape 43 | A = int(ceil(sqrt(N))) 44 | G = np.ones((A*H+A, A*W+A, C), Xs.dtype) 45 | G *= np.min(Xs) 46 | n = 0 47 | for y in range(A): 48 | for x in range(A): 49 | if n < N: 50 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:] 51 | n += 1 52 | # normalize to [0,1] 53 | maxg = G.max() 54 | ming = G.min() 55 | G = (G - ming)/(maxg-ming) 56 | return G 57 | 58 | def vis_nn(rows): 59 | """ visualize array of arrays of images """ 60 | N = len(rows) 61 | D = len(rows[0]) 62 | H,W,C = rows[0][0].shape 63 | Xs = rows[0][0] 64 | G = np.ones((N*H+N, D*W+D, C), Xs.dtype) 65 | for y in range(N): 66 | for x in range(D): 67 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x] 68 | # normalize to [0,1] 69 | maxg = G.max() 70 | ming = G.min() 71 | G = (G - ming)/(maxg-ming) 72 | return G 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /assignment2/cs231n/im2col.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | import numpy as np 3 | 4 | 5 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1): 6 | # First figure out what the size of the output should be 7 | N, C, H, W = x_shape 8 | assert (H + 2 * padding - field_height) % stride == 0 9 | assert (W + 2 * padding - field_height) % stride == 0 10 | out_height = (H + 2 * padding - field_height) / stride + 1 11 | out_width = (W + 2 * padding - field_width) / stride + 1 12 | 13 | i0 = np.repeat(np.arange(field_height), field_width) 14 | i0 = np.tile(i0, C) 15 | i1 = stride * np.repeat(np.arange(out_height), out_width) 16 | j0 = np.tile(np.arange(field_width), field_height * C) 17 | j1 = stride * np.tile(np.arange(out_width), out_height) 18 | i = i0.reshape(-1, 1) + i1.reshape(1, -1) 19 | j = j0.reshape(-1, 1) + j1.reshape(1, -1) 20 | 21 | k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1) 22 | 23 | return (k, i, j) 24 | 25 | 26 | def im2col_indices(x, field_height, field_width, padding=1, stride=1): 27 | """ An implementation of im2col based on some fancy indexing """ 28 | # Zero-pad the input 29 | p = padding 30 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 31 | 32 | k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, 33 | stride) 34 | 35 | cols = x_padded[:, k, i, j] 36 | C = x.shape[1] 37 | cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1) 38 | return cols 39 | 40 | 41 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1, 42 | stride=1): 43 | """ An implementation of col2im based on fancy indexing and np.add.at """ 44 | N, C, H, W = x_shape 45 | H_padded, W_padded = H + 2 * padding, W + 2 * padding 46 | x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype) 47 | k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, 48 | stride) 49 | cols_reshaped = cols.reshape(C * field_height * field_width, -1, N) 50 | cols_reshaped = cols_reshaped.transpose(2, 0, 1) 51 | np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped) 52 | if padding == 0: 53 | return x_padded 54 | return x_padded[:, :, padding:-padding, padding:-padding] 55 | 56 | pass 57 | -------------------------------------------------------------------------------- /assignment3/cs231n/im2col.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | import numpy as np 3 | 4 | 5 | def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1): 6 | # First figure out what the size of the output should be 7 | N, C, H, W = x_shape 8 | assert (H + 2 * padding - field_height) % stride == 0 9 | assert (W + 2 * padding - field_height) % stride == 0 10 | out_height = (H + 2 * padding - field_height) / stride + 1 11 | out_width = (W + 2 * padding - field_width) / stride + 1 12 | 13 | i0 = np.repeat(np.arange(field_height), field_width) 14 | i0 = np.tile(i0, C) 15 | i1 = stride * np.repeat(np.arange(out_height), out_width) 16 | j0 = np.tile(np.arange(field_width), field_height * C) 17 | j1 = stride * np.tile(np.arange(out_width), out_height) 18 | i = i0.reshape(-1, 1) + i1.reshape(1, -1) 19 | j = j0.reshape(-1, 1) + j1.reshape(1, -1) 20 | 21 | k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1) 22 | 23 | return (k, i, j) 24 | 25 | 26 | def im2col_indices(x, field_height, field_width, padding=1, stride=1): 27 | """ An implementation of im2col based on some fancy indexing """ 28 | # Zero-pad the input 29 | p = padding 30 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 31 | 32 | k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding, 33 | stride) 34 | 35 | cols = x_padded[:, k, i, j] 36 | C = x.shape[1] 37 | cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1) 38 | return cols 39 | 40 | 41 | def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1, 42 | stride=1): 43 | """ An implementation of col2im based on fancy indexing and np.add.at """ 44 | N, C, H, W = x_shape 45 | H_padded, W_padded = H + 2 * padding, W + 2 * padding 46 | x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype) 47 | k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding, 48 | stride) 49 | cols_reshaped = cols.reshape(C * field_height * field_width, -1, N) 50 | cols_reshaped = cols_reshaped.transpose(2, 0, 1) 51 | np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped) 52 | if padding == 0: 53 | return x_padded 54 | return x_padded[:, :, padding:-padding, padding:-padding] 55 | 56 | pass 57 | -------------------------------------------------------------------------------- /assignment2/cs231n/vis_utils.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | from past.builtins import xrange 3 | 4 | from math import sqrt, ceil 5 | import numpy as np 6 | 7 | def visualize_grid(Xs, ubound=255.0, padding=1): 8 | """ 9 | Reshape a 4D tensor of image data to a grid for easy visualization. 10 | 11 | Inputs: 12 | - Xs: Data of shape (N, H, W, C) 13 | - ubound: Output grid will have values scaled to the range [0, ubound] 14 | - padding: The number of blank pixels between elements of the grid 15 | """ 16 | (N, H, W, C) = Xs.shape 17 | grid_size = int(ceil(sqrt(N))) 18 | grid_height = H * grid_size + padding * (grid_size - 1) 19 | grid_width = W * grid_size + padding * (grid_size - 1) 20 | grid = np.zeros((grid_height, grid_width, C)) 21 | next_idx = 0 22 | y0, y1 = 0, H 23 | for y in range(grid_size): 24 | x0, x1 = 0, W 25 | for x in range(grid_size): 26 | if next_idx < N: 27 | img = Xs[next_idx] 28 | low, high = np.min(img), np.max(img) 29 | grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low) 30 | # grid[y0:y1, x0:x1] = Xs[next_idx] 31 | next_idx += 1 32 | x0 += W + padding 33 | x1 += W + padding 34 | y0 += H + padding 35 | y1 += H + padding 36 | # grid_max = np.max(grid) 37 | # grid_min = np.min(grid) 38 | # grid = ubound * (grid - grid_min) / (grid_max - grid_min) 39 | return grid 40 | 41 | def vis_grid(Xs): 42 | """ visualize a grid of images """ 43 | (N, H, W, C) = Xs.shape 44 | A = int(ceil(sqrt(N))) 45 | G = np.ones((A*H+A, A*W+A, C), Xs.dtype) 46 | G *= np.min(Xs) 47 | n = 0 48 | for y in range(A): 49 | for x in range(A): 50 | if n < N: 51 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:] 52 | n += 1 53 | # normalize to [0,1] 54 | maxg = G.max() 55 | ming = G.min() 56 | G = (G - ming)/(maxg-ming) 57 | return G 58 | 59 | def vis_nn(rows): 60 | """ visualize array of arrays of images """ 61 | N = len(rows) 62 | D = len(rows[0]) 63 | H,W,C = rows[0][0].shape 64 | Xs = rows[0][0] 65 | G = np.ones((N*H+N, D*W+D, C), Xs.dtype) 66 | for y in range(N): 67 | for x in range(D): 68 | G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x] 69 | # normalize to [0,1] 70 | maxg = G.max() 71 | ming = G.min() 72 | G = (G - ming)/(maxg-ming) 73 | return G 74 | -------------------------------------------------------------------------------- /assignment3/cs231n/image_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from future import standard_library 3 | standard_library.install_aliases() 4 | from builtins import range 5 | import urllib.request, urllib.error, urllib.parse, os, tempfile 6 | 7 | import numpy as np 8 | from scipy.misc import imread, imresize 9 | 10 | """ 11 | Utility functions used for viewing and processing images. 12 | """ 13 | 14 | def blur_image(X): 15 | """ 16 | A very gentle image blurring operation, to be used as a regularizer for 17 | image generation. 18 | 19 | Inputs: 20 | - X: Image data of shape (N, 3, H, W) 21 | 22 | Returns: 23 | - X_blur: Blurred version of X, of shape (N, 3, H, W) 24 | """ 25 | from cs231n.fast_layers import conv_forward_fast 26 | w_blur = np.zeros((3, 3, 3, 3)) 27 | b_blur = np.zeros(3) 28 | blur_param = {'stride': 1, 'pad': 1} 29 | for i in range(3): 30 | w_blur[i, i] = np.asarray([[1, 2, 1], [2, 188, 2], [1, 2, 1]], 31 | dtype=np.float32) 32 | w_blur /= 200.0 33 | return conv_forward_fast(X, w_blur, b_blur, blur_param)[0] 34 | 35 | 36 | SQUEEZENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32) 37 | SQUEEZENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32) 38 | 39 | def preprocess_image(img): 40 | """Preprocess an image for squeezenet. 41 | 42 | Subtracts the pixel mean and divides by the standard deviation. 43 | """ 44 | return (img.astype(np.float32)/255.0 - SQUEEZENET_MEAN) / SQUEEZENET_STD 45 | 46 | 47 | def deprocess_image(img, rescale=False): 48 | """Undo preprocessing on an image and convert back to uint8.""" 49 | img = (img * SQUEEZENET_STD + SQUEEZENET_MEAN) 50 | if rescale: 51 | vmin, vmax = img.min(), img.max() 52 | img = (img - vmin) / (vmax - vmin) 53 | return np.clip(255 * img, 0.0, 255.0).astype(np.uint8) 54 | 55 | 56 | def image_from_url(url): 57 | """ 58 | Read an image from a URL. Returns a numpy array with the pixel data. 59 | We write the image to a temporary file then read it back. Kinda gross. 60 | """ 61 | try: 62 | f = urllib.request.urlopen(url) 63 | _, fname = tempfile.mkstemp() 64 | with open(fname, 'wb') as ff: 65 | ff.write(f.read()) 66 | img = imread(fname) 67 | os.remove(fname) 68 | return img 69 | except urllib.error.URLError as e: 70 | print('URL Error: ', e.reason, url) 71 | except urllib.error.HTTPError as e: 72 | print('HTTP Error: ', e.code, url) 73 | 74 | 75 | def load_image(filename, size=None): 76 | """Load and resize an image from disk. 77 | 78 | Inputs: 79 | - filename: path to file 80 | - size: size of shortest dimension after rescaling 81 | """ 82 | img = imread(filename) 83 | if size is not None: 84 | orig_shape = np.array(img.shape[:2]) 85 | min_idx = np.argmin(orig_shape) 86 | scale_factor = float(size) / orig_shape[min_idx] 87 | new_shape = (orig_shape * scale_factor).astype(int) 88 | img = imresize(img, scale_factor) 89 | return img 90 | -------------------------------------------------------------------------------- /assignment3/cs231n/optim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | This file implements various first-order update rules that are commonly used for 5 | training neural networks. Each update rule accepts current weights and the 6 | gradient of the loss with respect to those weights and produces the next set of 7 | weights. Each update rule has the same interface: 8 | 9 | def update(w, dw, config=None): 10 | 11 | Inputs: 12 | - w: A numpy array giving the current weights. 13 | - dw: A numpy array of the same shape as w giving the gradient of the 14 | loss with respect to w. 15 | - config: A dictionary containing hyperparameter values such as learning rate, 16 | momentum, etc. If the update rule requires caching values over many 17 | iterations, then config will also hold these cached values. 18 | 19 | Returns: 20 | - next_w: The next point after the update. 21 | - config: The config dictionary to be passed to the next iteration of the 22 | update rule. 23 | 24 | NOTE: For most update rules, the default learning rate will probably not perform 25 | well; however the default values of the other hyperparameters should work well 26 | for a variety of different problems. 27 | 28 | For efficiency, update rules may perform in-place updates, mutating w and 29 | setting next_w equal to w. 30 | """ 31 | 32 | 33 | def sgd(w, dw, config=None): 34 | """ 35 | Performs vanilla stochastic gradient descent. 36 | 37 | config format: 38 | - learning_rate: Scalar learning rate. 39 | """ 40 | if config is None: config = {} 41 | config.setdefault('learning_rate', 1e-2) 42 | 43 | w -= config['learning_rate'] * dw 44 | return w, config 45 | 46 | 47 | def adam(x, dx, config=None): 48 | """ 49 | Uses the Adam update rule, which incorporates moving averages of both the 50 | gradient and its square and a bias correction term. 51 | 52 | config format: 53 | - learning_rate: Scalar learning rate. 54 | - beta1: Decay rate for moving average of first moment of gradient. 55 | - beta2: Decay rate for moving average of second moment of gradient. 56 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 57 | - m: Moving average of gradient. 58 | - v: Moving average of squared gradient. 59 | - t: Iteration number. 60 | """ 61 | if config is None: config = {} 62 | config.setdefault('learning_rate', 1e-3) 63 | config.setdefault('beta1', 0.9) 64 | config.setdefault('beta2', 0.999) 65 | config.setdefault('epsilon', 1e-8) 66 | config.setdefault('m', np.zeros_like(x)) 67 | config.setdefault('v', np.zeros_like(x)) 68 | config.setdefault('t', 0) 69 | 70 | next_x = None 71 | beta1, beta2, eps = config['beta1'], config['beta2'], config['epsilon'] 72 | t, m, v = config['t'], config['m'], config['v'] 73 | m = beta1 * m + (1 - beta1) * dx 74 | v = beta2 * v + (1 - beta2) * (dx * dx) 75 | t += 1 76 | alpha = config['learning_rate'] * np.sqrt(1 - beta2 ** t) / (1 - beta1 ** t) 77 | x -= alpha * (m / (np.sqrt(v) + eps)) 78 | config['t'] = t 79 | config['m'] = m 80 | config['v'] = v 81 | next_x = x 82 | 83 | return next_x, config 84 | -------------------------------------------------------------------------------- /assignment3/cs231n/coco_utils.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | import os, json 3 | import numpy as np 4 | import h5py 5 | 6 | BASE_DIR = 'cs231n/datasets/coco_captioning' 7 | 8 | def load_coco_data(base_dir=BASE_DIR, 9 | max_train=None, 10 | pca_features=True): 11 | data = {} 12 | caption_file = os.path.join(base_dir, 'coco2014_captions.h5') 13 | with h5py.File(caption_file, 'r') as f: 14 | for k, v in f.items(): 15 | data[k] = np.asarray(v) 16 | 17 | if pca_features: 18 | train_feat_file = os.path.join(base_dir, 'train2014_vgg16_fc7_pca.h5') 19 | else: 20 | train_feat_file = os.path.join(base_dir, 'train2014_vgg16_fc7.h5') 21 | with h5py.File(train_feat_file, 'r') as f: 22 | data['train_features'] = np.asarray(f['features']) 23 | 24 | if pca_features: 25 | val_feat_file = os.path.join(base_dir, 'val2014_vgg16_fc7_pca.h5') 26 | else: 27 | val_feat_file = os.path.join(base_dir, 'val2014_vgg16_fc7.h5') 28 | with h5py.File(val_feat_file, 'r') as f: 29 | data['val_features'] = np.asarray(f['features']) 30 | 31 | dict_file = os.path.join(base_dir, 'coco2014_vocab.json') 32 | with open(dict_file, 'r') as f: 33 | dict_data = json.load(f) 34 | for k, v in dict_data.items(): 35 | data[k] = v 36 | 37 | train_url_file = os.path.join(base_dir, 'train2014_urls.txt') 38 | with open(train_url_file, 'r') as f: 39 | train_urls = np.asarray([line.strip() for line in f]) 40 | data['train_urls'] = train_urls 41 | 42 | val_url_file = os.path.join(base_dir, 'val2014_urls.txt') 43 | with open(val_url_file, 'r') as f: 44 | val_urls = np.asarray([line.strip() for line in f]) 45 | data['val_urls'] = val_urls 46 | 47 | # Maybe subsample the training data 48 | if max_train is not None: 49 | num_train = data['train_captions'].shape[0] 50 | mask = np.random.randint(num_train, size=max_train) 51 | data['train_captions'] = data['train_captions'][mask] 52 | data['train_image_idxs'] = data['train_image_idxs'][mask] 53 | 54 | return data 55 | 56 | 57 | def decode_captions(captions, idx_to_word): 58 | singleton = False 59 | if captions.ndim == 1: 60 | singleton = True 61 | captions = captions[None] 62 | decoded = [] 63 | N, T = captions.shape 64 | for i in range(N): 65 | words = [] 66 | for t in range(T): 67 | word = idx_to_word[captions[i, t]] 68 | if word != '': 69 | words.append(word) 70 | if word == '': 71 | break 72 | decoded.append(' '.join(words)) 73 | if singleton: 74 | decoded = decoded[0] 75 | return decoded 76 | 77 | 78 | def sample_coco_minibatch(data, batch_size=100, split='train'): 79 | split_size = data['%s_captions' % split].shape[0] 80 | mask = np.random.choice(split_size, batch_size) 81 | captions = data['%s_captions' % split][mask] 82 | image_idxs = data['%s_image_idxs' % split][mask] 83 | image_features = data['%s_features' % split][image_idxs] 84 | urls = data['%s_urls' % split][image_idxs] 85 | return captions, image_features, urls 86 | -------------------------------------------------------------------------------- /assignment2/cs231n/layer_utils.py: -------------------------------------------------------------------------------- 1 | pass 2 | from cs231n.layers import * 3 | from cs231n.fast_layers import * 4 | 5 | 6 | def affine_relu_forward(x, w, b): 7 | """ 8 | Convenience layer that perorms an affine transform followed by a ReLU 9 | 10 | Inputs: 11 | - x: Input to the affine layer 12 | - w, b: Weights for the affine layer 13 | 14 | Returns a tuple of: 15 | - out: Output from the ReLU 16 | - cache: Object to give to the backward pass 17 | """ 18 | a, fc_cache = affine_forward(x, w, b) 19 | out, relu_cache = relu_forward(a) 20 | cache = (fc_cache, relu_cache) 21 | return out, cache 22 | 23 | 24 | def affine_relu_backward(dout, cache): 25 | """ 26 | Backward pass for the affine-relu convenience layer 27 | """ 28 | fc_cache, relu_cache = cache 29 | da = relu_backward(dout, relu_cache) 30 | dx, dw, db = affine_backward(da, fc_cache) 31 | return dx, dw, db 32 | 33 | 34 | def conv_relu_forward(x, w, b, conv_param): 35 | """ 36 | A convenience layer that performs a convolution followed by a ReLU. 37 | 38 | Inputs: 39 | - x: Input to the convolutional layer 40 | - w, b, conv_param: Weights and parameters for the convolutional layer 41 | 42 | Returns a tuple of: 43 | - out: Output from the ReLU 44 | - cache: Object to give to the backward pass 45 | """ 46 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 47 | out, relu_cache = relu_forward(a) 48 | cache = (conv_cache, relu_cache) 49 | return out, cache 50 | 51 | 52 | def conv_relu_backward(dout, cache): 53 | """ 54 | Backward pass for the conv-relu convenience layer. 55 | """ 56 | conv_cache, relu_cache = cache 57 | da = relu_backward(dout, relu_cache) 58 | dx, dw, db = conv_backward_fast(da, conv_cache) 59 | return dx, dw, db 60 | 61 | 62 | def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param): 63 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 64 | an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param) 65 | out, relu_cache = relu_forward(an) 66 | cache = (conv_cache, bn_cache, relu_cache) 67 | return out, cache 68 | 69 | 70 | def conv_bn_relu_backward(dout, cache): 71 | conv_cache, bn_cache, relu_cache = cache 72 | dan = relu_backward(dout, relu_cache) 73 | da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache) 74 | dx, dw, db = conv_backward_fast(da, conv_cache) 75 | return dx, dw, db, dgamma, dbeta 76 | 77 | 78 | def conv_relu_pool_forward(x, w, b, conv_param, pool_param): 79 | """ 80 | Convenience layer that performs a convolution, a ReLU, and a pool. 81 | 82 | Inputs: 83 | - x: Input to the convolutional layer 84 | - w, b, conv_param: Weights and parameters for the convolutional layer 85 | - pool_param: Parameters for the pooling layer 86 | 87 | Returns a tuple of: 88 | - out: Output from the pooling layer 89 | - cache: Object to give to the backward pass 90 | """ 91 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 92 | s, relu_cache = relu_forward(a) 93 | out, pool_cache = max_pool_forward_fast(s, pool_param) 94 | cache = (conv_cache, relu_cache, pool_cache) 95 | return out, cache 96 | 97 | 98 | def conv_relu_pool_backward(dout, cache): 99 | """ 100 | Backward pass for the conv-relu-pool convenience layer 101 | """ 102 | conv_cache, relu_cache, pool_cache = cache 103 | ds = max_pool_backward_fast(dout, pool_cache) 104 | da = relu_backward(ds, relu_cache) 105 | dx, dw, db = conv_backward_fast(da, conv_cache) 106 | return dx, dw, db 107 | -------------------------------------------------------------------------------- /assignment1/cs231n/gradient_check.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | #from past.builtins import xrange 3 | 4 | import numpy as np 5 | from random import randrange 6 | 7 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001): 8 | """ 9 | a naive implementation of numerical gradient of f at x 10 | - f should be a function that takes a single argument 11 | - x is the point (numpy array) to evaluate the gradient at 12 | """ 13 | 14 | fx = f(x) # evaluate function value at original point 15 | grad = np.zeros_like(x) 16 | # iterate over all indexes in x 17 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 18 | while not it.finished: 19 | 20 | # evaluate function at x+h 21 | ix = it.multi_index 22 | oldval = x[ix] 23 | x[ix] = oldval + h # increment by h 24 | fxph = f(x) # evalute f(x + h) 25 | x[ix] = oldval - h 26 | fxmh = f(x) # evaluate f(x - h) 27 | x[ix] = oldval # restore 28 | 29 | # compute the partial derivative with centered formula 30 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 31 | if verbose: 32 | print(ix, grad[ix]) 33 | it.iternext() # step to next dimension 34 | 35 | return grad 36 | 37 | 38 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 39 | """ 40 | Evaluate a numeric gradient for a function that accepts a numpy 41 | array and returns a numpy array. 42 | """ 43 | grad = np.zeros_like(x) 44 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 45 | while not it.finished: 46 | ix = it.multi_index 47 | 48 | oldval = x[ix] 49 | x[ix] = oldval + h 50 | pos = f(x).copy() 51 | x[ix] = oldval - h 52 | neg = f(x).copy() 53 | x[ix] = oldval 54 | 55 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 56 | it.iternext() 57 | return grad 58 | 59 | 60 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): 61 | """ 62 | Compute numeric gradients for a function that operates on input 63 | and output blobs. 64 | 65 | We assume that f accepts several input blobs as arguments, followed by a blob 66 | into which outputs will be written. For example, f might be called like this: 67 | 68 | f(x, w, out) 69 | 70 | where x and w are input Blobs, and the result of f will be written to out. 71 | 72 | Inputs: 73 | - f: function 74 | - inputs: tuple of input blobs 75 | - output: output blob 76 | - h: step size 77 | """ 78 | numeric_diffs = [] 79 | for input_blob in inputs: 80 | diff = np.zeros_like(input_blob.diffs) 81 | it = np.nditer(input_blob.vals, flags=['multi_index'], 82 | op_flags=['readwrite']) 83 | while not it.finished: 84 | idx = it.multi_index 85 | orig = input_blob.vals[idx] 86 | 87 | input_blob.vals[idx] = orig + h 88 | f(*(inputs + (output,))) 89 | pos = np.copy(output.vals) 90 | input_blob.vals[idx] = orig - h 91 | f(*(inputs + (output,))) 92 | neg = np.copy(output.vals) 93 | input_blob.vals[idx] = orig 94 | 95 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) 96 | 97 | it.iternext() 98 | numeric_diffs.append(diff) 99 | return numeric_diffs 100 | 101 | 102 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5): 103 | return eval_numerical_gradient_blobs(lambda *args: net.forward(), 104 | inputs, output, h=h) 105 | 106 | 107 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): 108 | """ 109 | sample a few random elements and only return numerical 110 | in this dimensions. 111 | """ 112 | 113 | for i in xrange(num_checks): 114 | ix = tuple([randrange(m) for m in x.shape]) 115 | 116 | oldval = x[ix] 117 | x[ix] = oldval + h # increment by h 118 | fxph = f(x) # evaluate f(x + h) 119 | x[ix] = oldval - h # increment by h 120 | fxmh = f(x) # evaluate f(x - h) 121 | x[ix] = oldval # reset 122 | 123 | grad_numerical = (fxph - fxmh) / (2 * h) 124 | grad_analytic = analytic_grad[ix] 125 | rel_error = abs(grad_numerical - grad_analytic) / (abs(grad_numerical) + abs(grad_analytic)) 126 | print('numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error)) 127 | 128 | -------------------------------------------------------------------------------- /assignment2/cs231n/gradient_check.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from builtins import range 3 | from past.builtins import xrange 4 | 5 | import numpy as np 6 | from random import randrange 7 | 8 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001): 9 | """ 10 | a naive implementation of numerical gradient of f at x 11 | - f should be a function that takes a single argument 12 | - x is the point (numpy array) to evaluate the gradient at 13 | """ 14 | 15 | fx = f(x) # evaluate function value at original point 16 | grad = np.zeros_like(x) 17 | # iterate over all indexes in x 18 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 19 | while not it.finished: 20 | 21 | # evaluate function at x+h 22 | ix = it.multi_index 23 | oldval = x[ix] 24 | x[ix] = oldval + h # increment by h 25 | fxph = f(x) # evalute f(x + h) 26 | x[ix] = oldval - h 27 | fxmh = f(x) # evaluate f(x - h) 28 | x[ix] = oldval # restore 29 | 30 | # compute the partial derivative with centered formula 31 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 32 | if verbose: 33 | print(ix, grad[ix]) 34 | it.iternext() # step to next dimension 35 | 36 | return grad 37 | 38 | 39 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 40 | """ 41 | Evaluate a numeric gradient for a function that accepts a numpy 42 | array and returns a numpy array. 43 | """ 44 | grad = np.zeros_like(x) 45 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 46 | while not it.finished: 47 | ix = it.multi_index 48 | 49 | oldval = x[ix] 50 | x[ix] = oldval + h 51 | pos = f(x).copy() 52 | x[ix] = oldval - h 53 | neg = f(x).copy() 54 | x[ix] = oldval 55 | 56 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 57 | it.iternext() 58 | return grad 59 | 60 | 61 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): 62 | """ 63 | Compute numeric gradients for a function that operates on input 64 | and output blobs. 65 | 66 | We assume that f accepts several input blobs as arguments, followed by a 67 | blob where outputs will be written. For example, f might be called like: 68 | 69 | f(x, w, out) 70 | 71 | where x and w are input Blobs, and the result of f will be written to out. 72 | 73 | Inputs: 74 | - f: function 75 | - inputs: tuple of input blobs 76 | - output: output blob 77 | - h: step size 78 | """ 79 | numeric_diffs = [] 80 | for input_blob in inputs: 81 | diff = np.zeros_like(input_blob.diffs) 82 | it = np.nditer(input_blob.vals, flags=['multi_index'], 83 | op_flags=['readwrite']) 84 | while not it.finished: 85 | idx = it.multi_index 86 | orig = input_blob.vals[idx] 87 | 88 | input_blob.vals[idx] = orig + h 89 | f(*(inputs + (output,))) 90 | pos = np.copy(output.vals) 91 | input_blob.vals[idx] = orig - h 92 | f(*(inputs + (output,))) 93 | neg = np.copy(output.vals) 94 | input_blob.vals[idx] = orig 95 | 96 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) 97 | 98 | it.iternext() 99 | numeric_diffs.append(diff) 100 | return numeric_diffs 101 | 102 | 103 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5): 104 | return eval_numerical_gradient_blobs(lambda *args: net.forward(), 105 | inputs, output, h=h) 106 | 107 | 108 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): 109 | """ 110 | sample a few random elements and only return numerical 111 | in this dimensions. 112 | """ 113 | 114 | for i in range(num_checks): 115 | ix = tuple([randrange(m) for m in x.shape]) 116 | 117 | oldval = x[ix] 118 | x[ix] = oldval + h # increment by h 119 | fxph = f(x) # evaluate f(x + h) 120 | x[ix] = oldval - h # increment by h 121 | fxmh = f(x) # evaluate f(x - h) 122 | x[ix] = oldval # reset 123 | 124 | grad_numerical = (fxph - fxmh) / (2 * h) 125 | grad_analytic = analytic_grad[ix] 126 | rel_error = (abs(grad_numerical - grad_analytic) / 127 | (abs(grad_numerical) + abs(grad_analytic))) 128 | print('numerical: %f analytic: %f, relative error: %e' 129 | %(grad_numerical, grad_analytic, rel_error)) 130 | -------------------------------------------------------------------------------- /assignment3/cs231n/gradient_check.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from builtins import range 3 | from past.builtins import xrange 4 | 5 | import numpy as np 6 | from random import randrange 7 | 8 | def eval_numerical_gradient(f, x, verbose=True, h=0.00001): 9 | """ 10 | a naive implementation of numerical gradient of f at x 11 | - f should be a function that takes a single argument 12 | - x is the point (numpy array) to evaluate the gradient at 13 | """ 14 | 15 | fx = f(x) # evaluate function value at original point 16 | grad = np.zeros_like(x) 17 | # iterate over all indexes in x 18 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 19 | while not it.finished: 20 | 21 | # evaluate function at x+h 22 | ix = it.multi_index 23 | oldval = x[ix] 24 | x[ix] = oldval + h # increment by h 25 | fxph = f(x) # evalute f(x + h) 26 | x[ix] = oldval - h 27 | fxmh = f(x) # evaluate f(x - h) 28 | x[ix] = oldval # restore 29 | 30 | # compute the partial derivative with centered formula 31 | grad[ix] = (fxph - fxmh) / (2 * h) # the slope 32 | if verbose: 33 | print(ix, grad[ix]) 34 | it.iternext() # step to next dimension 35 | 36 | return grad 37 | 38 | 39 | def eval_numerical_gradient_array(f, x, df, h=1e-5): 40 | """ 41 | Evaluate a numeric gradient for a function that accepts a numpy 42 | array and returns a numpy array. 43 | """ 44 | grad = np.zeros_like(x) 45 | it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) 46 | while not it.finished: 47 | ix = it.multi_index 48 | 49 | oldval = x[ix] 50 | x[ix] = oldval + h 51 | pos = f(x).copy() 52 | x[ix] = oldval - h 53 | neg = f(x).copy() 54 | x[ix] = oldval 55 | 56 | grad[ix] = np.sum((pos - neg) * df) / (2 * h) 57 | it.iternext() 58 | return grad 59 | 60 | 61 | def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): 62 | """ 63 | Compute numeric gradients for a function that operates on input 64 | and output blobs. 65 | 66 | We assume that f accepts several input blobs as arguments, followed by a 67 | blob where outputs will be written. For example, f might be called like: 68 | 69 | f(x, w, out) 70 | 71 | where x and w are input Blobs, and the result of f will be written to out. 72 | 73 | Inputs: 74 | - f: function 75 | - inputs: tuple of input blobs 76 | - output: output blob 77 | - h: step size 78 | """ 79 | numeric_diffs = [] 80 | for input_blob in inputs: 81 | diff = np.zeros_like(input_blob.diffs) 82 | it = np.nditer(input_blob.vals, flags=['multi_index'], 83 | op_flags=['readwrite']) 84 | while not it.finished: 85 | idx = it.multi_index 86 | orig = input_blob.vals[idx] 87 | 88 | input_blob.vals[idx] = orig + h 89 | f(*(inputs + (output,))) 90 | pos = np.copy(output.vals) 91 | input_blob.vals[idx] = orig - h 92 | f(*(inputs + (output,))) 93 | neg = np.copy(output.vals) 94 | input_blob.vals[idx] = orig 95 | 96 | diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) 97 | 98 | it.iternext() 99 | numeric_diffs.append(diff) 100 | return numeric_diffs 101 | 102 | 103 | def eval_numerical_gradient_net(net, inputs, output, h=1e-5): 104 | return eval_numerical_gradient_blobs(lambda *args: net.forward(), 105 | inputs, output, h=h) 106 | 107 | 108 | def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): 109 | """ 110 | sample a few random elements and only return numerical 111 | in this dimensions. 112 | """ 113 | 114 | for i in range(num_checks): 115 | ix = tuple([randrange(m) for m in x.shape]) 116 | 117 | oldval = x[ix] 118 | x[ix] = oldval + h # increment by h 119 | fxph = f(x) # evaluate f(x + h) 120 | x[ix] = oldval - h # increment by h 121 | fxmh = f(x) # evaluate f(x - h) 122 | x[ix] = oldval # reset 123 | 124 | grad_numerical = (fxph - fxmh) / (2 * h) 125 | grad_analytic = analytic_grad[ix] 126 | rel_error = (abs(grad_numerical - grad_analytic) / 127 | (abs(grad_numerical) + abs(grad_analytic))) 128 | print('numerical: %f analytic: %f, relative error: %e' 129 | %(grad_numerical, grad_analytic, rel_error)) 130 | -------------------------------------------------------------------------------- /assignment1/cs231n/classifiers/softmax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import shuffle 3 | #from past.builtins import xrange 4 | 5 | def softmax_loss_naive(W, X, y, reg): 6 | """ 7 | Softmax loss function, naive implementation (with loops) 8 | 9 | Inputs have dimension D, there are C classes, and we operate on minibatches 10 | of N examples. 11 | 12 | Inputs: 13 | - W: A numpy array of shape (D, C) containing weights. 14 | - X: A numpy array of shape (N, D) containing a minibatch of data. 15 | - y: A numpy array of shape (N,) containing training labels; y[i] = c means 16 | that X[i] has label c, where 0 <= c < C. 17 | - reg: (float) regularization strength 18 | 19 | Returns a tuple of: 20 | - loss as single float 21 | - gradient with respect to weights W; an array of same shape as W 22 | """ 23 | # Initialize the loss and gradient to zero. 24 | loss = 0.0 25 | dW = np.zeros_like(W) 26 | 27 | ############################################################################# 28 | # TODO: Compute the softmax loss and its gradient using explicit loops. # 29 | # Store the loss in loss and the gradient in dW. If you are not careful # 30 | # here, it is easy to run into numeric instability. Don't forget the # 31 | # regularization! # 32 | ############################################################################# 33 | train_num = X.shape[0] 34 | num_classes = np.max(y) + 1 35 | for i in xrange(train_num): 36 | y_pred = X[i,:].dot(W) 37 | y_pred = y_pred - np.max(y_pred) 38 | y_label = y[i] 39 | prob = np.exp(y_pred[y_label]) / np.sum(np.exp(y_pred)) 40 | loss += -1*np.log(prob) 41 | for j in xrange(num_classes): 42 | if j != y[i]: 43 | dW[:,j] += np.exp(y_pred[j])/np.sum(np.exp(y_pred)) * X[i] 44 | else: 45 | dW[:,y[i]] += (-1 + np.exp(y_pred[j])/np.sum(np.exp(y_pred))) * X[i] 46 | 47 | loss /= train_num 48 | dW /= train_num 49 | loss += 0.5*reg*np.sum(np.square(W)) 50 | dW += reg*W 51 | 52 | ############################################################################# 53 | # END OF YOUR CODE # 54 | ############################################################################# 55 | 56 | return loss, dW 57 | 58 | 59 | def softmax_loss_vectorized(W, X, y, reg): 60 | """ 61 | Softmax loss function, vectorized version. 62 | 63 | Inputs and outputs are the same as softmax_loss_naive. 64 | """ 65 | # Initialize the loss and gradient to zero. 66 | loss = 0.0 67 | dW = np.zeros_like(W) 68 | 69 | ############################################################################# 70 | # TODO: Compute the softmax loss and its gradient using no explicit loops. # 71 | # Store the loss in loss and the gradient in dW. If you are not careful # 72 | # here, it is easy to run into numeric instability. Don't forget the # 73 | # regularization! # 74 | ############################################################################# 75 | num_train , dims = X.shape 76 | 77 | scores = np.dot(X, W)# N by C 78 | scores -= np.max(scores,axis=1,keepdims=True) 79 | expscores = np.exp(scores) 80 | p = expscores / np.sum(expscores,axis=1,keepdims=True) 81 | y_trueClass = np.zeros_like(p) 82 | y_trueClass[range(num_train),y] = 1.0 83 | 84 | loss = -1 * np.sum(y_trueClass*np.log(p)) / num_train + 0.5*reg*np.sum(np.square(W)) 85 | 86 | 87 | dW = np.dot(X.transpose(), p - y_trueClass) 88 | dW /= num_train 89 | dW += reg * W 90 | # scores = X.dot(W) # N by C 91 | # num_train = X.shape[0] 92 | # num_classes = W.shape[1] 93 | # scores_correct = scores[np.arange(num_train), y] # 1 by N 94 | # scores_correct = np.reshape(scores_correct, (num_train, 1)) # N by 1 95 | # margins = scores - scores_correct + 1.0 # N by C 96 | # margins[np.arange(num_train), y] = 0.0 97 | # margins[margins <= 0] = 0.0 98 | # loss += np.sum(margins) / num_train 99 | # loss += 0.5 * reg * np.sum(W * W) 100 | # # compute the gradient 101 | # margins[margins > 0] = 1.0 102 | # row_sum = np.sum(margins, axis=1) # 1 by N 103 | # margins[np.arange(num_train), y] = -row_sum 104 | # dW += np.dot(X.T, margins)/num_train + reg * W # D by C 105 | ############################################################################# 106 | # END OF YOUR CODE # 107 | ############################################################################# 108 | 109 | return loss, dW 110 | 111 | -------------------------------------------------------------------------------- /assignment1/cs231n/classifiers/linear_svm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import shuffle 3 | #from past.builtins import xrange 4 | 5 | def svm_loss_naive(W, X, y, reg): 6 | """ 7 | Structured SVM loss function, naive implementation (with loops). 8 | 9 | Inputs have dimension D, there are C classes, and we operate on minibatches 10 | of N examples. 11 | 12 | Inputs: 13 | - W: A numpy array of shape (D, C) containing weights. 14 | - X: A numpy array of shape (N, D) containing a minibatch of data. 15 | - y: A numpy array of shape (N,) containing training labels; y[i] = c means 16 | that X[i] has label c, where 0 <= c < C. 17 | - reg: (float) regularization strength 18 | 19 | Returns a tuple of: 20 | - loss as single float 21 | - gradient with respect to weights W; an array of same shape as W 22 | """ 23 | dW = np.zeros(W.shape) # initialize the gradient as zero 24 | 25 | # compute the loss and the gradient 26 | num_classes = W.shape[1] 27 | num_train = X.shape[0] 28 | loss = 0.0 29 | for i in xrange(num_train): 30 | scores = X[i].dot(W) 31 | correct_class_score = scores[y[i]] 32 | for j in xrange(num_classes): 33 | margin = scores[j] - correct_class_score + 1# note delta = 1 34 | if margin > 0: 35 | if j != y[i]: 36 | loss += margin 37 | dW[:, y[i]] += -1 * X[i] 38 | dW[:, j] += 1 * X[i] 39 | 40 | # Right now the loss is a sum over all training examples, but we want it 41 | # to be an average instead so we divide by num_train. 42 | loss /= num_train 43 | dW /= num_train 44 | 45 | # Add regularization to the loss. 46 | loss += reg * np.sum(W * W) 47 | dW += 2*reg*W 48 | ############################################################################# 49 | # TODO: # 50 | # Compute the gradient of the loss function and store it dW. # 51 | # Rather that first computing the loss and then computing the derivative, # 52 | # it may be simpler to compute the derivative at the same time that the # 53 | # loss is being computed. As a result you may need to modify some of the # 54 | # code above to compute the gradient. # 55 | ############################################################################# 56 | 57 | 58 | return loss, dW 59 | 60 | 61 | def svm_loss_vectorized(W, X, y, reg): 62 | """ 63 | Structured SVM loss function, vectorized implementation. 64 | 65 | Inputs and outputs are the same as svm_loss_naive. 66 | """ 67 | loss = 0.0 68 | dW = np.zeros(W.shape) # initialize the gradient as zero 69 | 70 | ############################################################################# 71 | # TODO: # 72 | # Implement a vectorized version of the structured SVM loss, storing the # 73 | # result in loss. # 74 | ############################################################################# 75 | scores = np.dot(X,W) 76 | num_train = X.shape[0] 77 | rows = range(num_train) 78 | correct_class_score = scores[rows,y] 79 | margins = np.maximum(0,scores-np.reshape(correct_class_score,[num_train,1])+1) 80 | margins[rows,y] = 0 81 | loss = np.sum(margins) 82 | loss /= num_train 83 | loss += 0.5 * reg * np.sum(W * W) 84 | ############################################################################# 85 | # END OF YOUR CODE # 86 | ############################################################################# 87 | 88 | 89 | ############################################################################# 90 | # TODO: # 91 | # Implement a vectorized version of the gradient for the structured SVM # 92 | # loss, storing the result in dW. # 93 | # # 94 | # Hint: Instead of computing the gradient from scratch, it may be easier # 95 | # to reuse some of the intermediate values that you used to compute the # 96 | # loss. # 97 | ############################################################################# 98 | margins01 = 1 * (margins > 0) 99 | margins01[rows,y] = -1*np.sum(margins01, axis=1) 100 | dW = np.dot(X.transpose(), margins01) 101 | dW /= num_train 102 | dW += reg * W 103 | 104 | ############################################################################# 105 | # END OF YOUR CODE # 106 | ############################################################################# 107 | 108 | return loss, dW 109 | -------------------------------------------------------------------------------- /assignment3/cs231n/layer_utils.py: -------------------------------------------------------------------------------- 1 | from cs231n.layers import * 2 | from cs231n.fast_layers import * 3 | 4 | 5 | def affine_relu_forward(x, w, b): 6 | """ 7 | Convenience layer that perorms an affine transform followed by a ReLU 8 | 9 | Inputs: 10 | - x: Input to the affine layer 11 | - w, b: Weights for the affine layer 12 | 13 | Returns a tuple of: 14 | - out: Output from the ReLU 15 | - cache: Object to give to the backward pass 16 | """ 17 | a, fc_cache = affine_forward(x, w, b) 18 | out, relu_cache = relu_forward(a) 19 | cache = (fc_cache, relu_cache) 20 | return out, cache 21 | 22 | 23 | def affine_relu_backward(dout, cache): 24 | """ 25 | Backward pass for the affine-relu convenience layer 26 | """ 27 | fc_cache, relu_cache = cache 28 | da = relu_backward(dout, relu_cache) 29 | dx, dw, db = affine_backward(da, fc_cache) 30 | return dx, dw, db 31 | 32 | 33 | def affine_bn_relu_forward(x, w, b, gamma, beta, bn_param): 34 | """ 35 | Convenience layer that performs an affine transform, batch normalization, 36 | and ReLU. 37 | 38 | Inputs: 39 | - x: Array of shape (N, D1); input to the affine layer 40 | - w, b: Arrays of shape (D2, D2) and (D2,) giving the weight and bias for 41 | the affine transform. 42 | - gamma, beta: Arrays of shape (D2,) and (D2,) giving scale and shift 43 | parameters for batch normalization. 44 | - bn_param: Dictionary of parameters for batch normalization. 45 | 46 | Returns: 47 | - out: Output from ReLU, of shape (N, D2) 48 | - cache: Object to give to the backward pass. 49 | """ 50 | a, fc_cache = affine_forward(x, w, b) 51 | a_bn, bn_cache = batchnorm_forward(a, gamma, beta, bn_param) 52 | out, relu_cache = relu_forward(a_bn) 53 | cache = (fc_cache, bn_cache, relu_cache) 54 | return out, cache 55 | 56 | 57 | def affine_bn_relu_backward(dout, cache): 58 | """ 59 | Backward pass for the affine-batchnorm-relu convenience layer. 60 | """ 61 | fc_cache, bn_cache, relu_cache = cache 62 | da_bn = relu_backward(dout, relu_cache) 63 | da, dgamma, dbeta = batchnorm_backward(da_bn, bn_cache) 64 | dx, dw, db = affine_backward(da, fc_cache) 65 | return dx, dw, db, dgamma, dbeta 66 | 67 | 68 | def conv_relu_forward(x, w, b, conv_param): 69 | """ 70 | A convenience layer that performs a convolution followed by a ReLU. 71 | 72 | Inputs: 73 | - x: Input to the convolutional layer 74 | - w, b, conv_param: Weights and parameters for the convolutional layer 75 | 76 | Returns a tuple of: 77 | - out: Output from the ReLU 78 | - cache: Object to give to the backward pass 79 | """ 80 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 81 | out, relu_cache = relu_forward(a) 82 | cache = (conv_cache, relu_cache) 83 | return out, cache 84 | 85 | 86 | def conv_relu_backward(dout, cache): 87 | """ 88 | Backward pass for the conv-relu convenience layer. 89 | """ 90 | conv_cache, relu_cache = cache 91 | da = relu_backward(dout, relu_cache) 92 | dx, dw, db = conv_backward_fast(da, conv_cache) 93 | return dx, dw, db 94 | 95 | 96 | def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param): 97 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 98 | an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param) 99 | out, relu_cache = relu_forward(an) 100 | cache = (conv_cache, bn_cache, relu_cache) 101 | return out, cache 102 | 103 | 104 | def conv_bn_relu_backward(dout, cache): 105 | conv_cache, bn_cache, relu_cache = cache 106 | dan = relu_backward(dout, relu_cache) 107 | da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache) 108 | dx, dw, db = conv_backward_fast(da, conv_cache) 109 | return dx, dw, db, dgamma, dbeta 110 | 111 | 112 | def conv_relu_pool_forward(x, w, b, conv_param, pool_param): 113 | """ 114 | Convenience layer that performs a convolution, a ReLU, and a pool. 115 | 116 | Inputs: 117 | - x: Input to the convolutional layer 118 | - w, b, conv_param: Weights and parameters for the convolutional layer 119 | - pool_param: Parameters for the pooling layer 120 | 121 | Returns a tuple of: 122 | - out: Output from the pooling layer 123 | - cache: Object to give to the backward pass 124 | """ 125 | a, conv_cache = conv_forward_fast(x, w, b, conv_param) 126 | s, relu_cache = relu_forward(a) 127 | out, pool_cache = max_pool_forward_fast(s, pool_param) 128 | cache = (conv_cache, relu_cache, pool_cache) 129 | return out, cache 130 | 131 | 132 | def conv_relu_pool_backward(dout, cache): 133 | """ 134 | Backward pass for the conv-relu-pool convenience layer 135 | """ 136 | conv_cache, relu_cache, pool_cache = cache 137 | ds = max_pool_backward_fast(dout, pool_cache) 138 | da = relu_backward(ds, relu_cache) 139 | dx, dw, db = conv_backward_fast(da, conv_cache) 140 | return dx, dw, db 141 | -------------------------------------------------------------------------------- /assignment3/cs231n/classifiers/squeezenet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | NUM_CLASSES = 1000 4 | 5 | def fire_module(x,inp,sp,e11p,e33p): 6 | with tf.variable_scope("fire"): 7 | with tf.variable_scope("squeeze"): 8 | W = tf.get_variable("weights",shape=[1,1,inp,sp]) 9 | b = tf.get_variable("bias",shape=[sp]) 10 | s = tf.nn.conv2d(x,W,[1,1,1,1],"VALID")+b 11 | s = tf.nn.relu(s) 12 | with tf.variable_scope("e11"): 13 | W = tf.get_variable("weights",shape=[1,1,sp,e11p]) 14 | b = tf.get_variable("bias",shape=[e11p]) 15 | e11 = tf.nn.conv2d(s,W,[1,1,1,1],"VALID")+b 16 | e11 = tf.nn.relu(e11) 17 | with tf.variable_scope("e33"): 18 | W = tf.get_variable("weights",shape=[3,3,sp,e33p]) 19 | b = tf.get_variable("bias",shape=[e33p]) 20 | e33 = tf.nn.conv2d(s,W,[1,1,1,1],"SAME")+b 21 | e33 = tf.nn.relu(e33) 22 | return tf.concat([e11,e33],3) 23 | 24 | 25 | class SqueezeNet(object): 26 | def extract_features(self, input=None, reuse=True): 27 | if input is None: 28 | input = self.image 29 | x = input 30 | layers = [] 31 | with tf.variable_scope('features', reuse=reuse): 32 | with tf.variable_scope('layer0'): 33 | W = tf.get_variable("weights",shape=[3,3,3,64]) 34 | b = tf.get_variable("bias",shape=[64]) 35 | x = tf.nn.conv2d(x,W,[1,2,2,1],"VALID") 36 | x = tf.nn.bias_add(x,b) 37 | layers.append(x) 38 | with tf.variable_scope('layer1'): 39 | x = tf.nn.relu(x) 40 | layers.append(x) 41 | with tf.variable_scope('layer2'): 42 | x = tf.nn.max_pool(x,[1,3,3,1],strides=[1,2,2,1],padding='VALID') 43 | layers.append(x) 44 | with tf.variable_scope('layer3'): 45 | x = fire_module(x,64,16,64,64) 46 | layers.append(x) 47 | with tf.variable_scope('layer4'): 48 | x = fire_module(x,128,16,64,64) 49 | layers.append(x) 50 | with tf.variable_scope('layer5'): 51 | x = tf.nn.max_pool(x,[1,3,3,1],strides=[1,2,2,1],padding='VALID') 52 | layers.append(x) 53 | with tf.variable_scope('layer6'): 54 | x = fire_module(x,128,32,128,128) 55 | layers.append(x) 56 | with tf.variable_scope('layer7'): 57 | x = fire_module(x,256,32,128,128) 58 | layers.append(x) 59 | with tf.variable_scope('layer8'): 60 | x = tf.nn.max_pool(x,[1,3,3,1],strides=[1,2,2,1],padding='VALID') 61 | layers.append(x) 62 | with tf.variable_scope('layer9'): 63 | x = fire_module(x,256,48,192,192) 64 | layers.append(x) 65 | with tf.variable_scope('layer10'): 66 | x = fire_module(x,384,48,192,192) 67 | layers.append(x) 68 | with tf.variable_scope('layer11'): 69 | x = fire_module(x,384,64,256,256) 70 | layers.append(x) 71 | with tf.variable_scope('layer12'): 72 | x = fire_module(x,512,64,256,256) 73 | layers.append(x) 74 | return layers 75 | 76 | def __init__(self, save_path=None, sess=None): 77 | """Create a SqueezeNet model. 78 | Inputs: 79 | - save_path: path to TensorFlow checkpoint 80 | - sess: TensorFlow session 81 | - input: optional input to the model. If None, will use placeholder for input. 82 | """ 83 | self.image = tf.placeholder('float',shape=[None,None,None,3],name='input_image') 84 | self.labels = tf.placeholder('int32', shape=[None], name='labels') 85 | self.layers = [] 86 | x = self.image 87 | self.layers = self.extract_features(x, reuse=False) 88 | self.features = self.layers[-1] 89 | with tf.variable_scope('classifier'): 90 | with tf.variable_scope('layer0'): 91 | x = self.features 92 | self.layers.append(x) 93 | with tf.variable_scope('layer1'): 94 | W = tf.get_variable("weights",shape=[1,1,512,1000]) 95 | b = tf.get_variable("bias",shape=[1000]) 96 | x = tf.nn.conv2d(x,W,[1,1,1,1],"VALID") 97 | x = tf.nn.bias_add(x,b) 98 | self.layers.append(x) 99 | with tf.variable_scope('layer2'): 100 | x = tf.nn.relu(x) 101 | self.layers.append(x) 102 | with tf.variable_scope('layer3'): 103 | x = tf.nn.avg_pool(x,[1,13,13,1],strides=[1,13,13,1],padding='VALID') 104 | self.layers.append(x) 105 | self.classifier = tf.reshape(x,[-1, NUM_CLASSES]) 106 | 107 | with tf.name_scope('loss'): 108 | if save_path is not None: 109 | saver = tf.train.Saver() 110 | saver.restore(sess, save_path) 111 | self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(self.labels, NUM_CLASSES), logits=self.classifier)) 112 | 113 | # writer = tf.summary.FileWriter('/home/zxm/document/Learn/CS231 Assignments/assignment3/tensorboard') 114 | # writer.add_graph(sess.graph) 115 | -------------------------------------------------------------------------------- /assignment2/cs231n/im2col_cython.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | 5 | # DTYPE = np.float64 6 | # ctypedef np.float64_t DTYPE_t 7 | 8 | ctypedef fused DTYPE_t: 9 | np.float32_t 10 | np.float64_t 11 | 12 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height, 13 | int field_width, int padding, int stride): 14 | cdef int N = x.shape[0] 15 | cdef int C = x.shape[1] 16 | cdef int H = x.shape[2] 17 | cdef int W = x.shape[3] 18 | 19 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 20 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 21 | 22 | cdef int p = padding 23 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x, 24 | ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 25 | 26 | cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros( 27 | (C * field_height * field_width, N * HH * WW), 28 | dtype=x.dtype) 29 | 30 | # Moving the inner loop to a C function with no bounds checking works, but does 31 | # not seem to help performance in any measurable way. 32 | 33 | im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 34 | field_height, field_width, padding, stride) 35 | return cols 36 | 37 | 38 | @cython.boundscheck(False) 39 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 40 | np.ndarray[DTYPE_t, ndim=4] x_padded, 41 | int N, int C, int H, int W, int HH, int WW, 42 | int field_height, int field_width, int padding, int stride) except? -1: 43 | cdef int c, ii, jj, row, yy, xx, i, col 44 | 45 | for c in range(C): 46 | for yy in range(HH): 47 | for xx in range(WW): 48 | for ii in range(field_height): 49 | for jj in range(field_width): 50 | row = c * field_width * field_height + ii * field_height + jj 51 | for i in range(N): 52 | col = yy * WW * N + xx * N + i 53 | cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj] 54 | 55 | 56 | 57 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W, 58 | int field_height, int field_width, int padding, int stride): 59 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 60 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 61 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 62 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding), 63 | dtype=cols.dtype) 64 | 65 | # Moving the inner loop to a C-function with no bounds checking improves 66 | # performance quite a bit for col2im. 67 | col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 68 | field_height, field_width, padding, stride) 69 | if padding > 0: 70 | return x_padded[:, :, padding:-padding, padding:-padding] 71 | return x_padded 72 | 73 | 74 | @cython.boundscheck(False) 75 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 76 | np.ndarray[DTYPE_t, ndim=4] x_padded, 77 | int N, int C, int H, int W, int HH, int WW, 78 | int field_height, int field_width, int padding, int stride) except? -1: 79 | cdef int c, ii, jj, row, yy, xx, i, col 80 | 81 | for c in range(C): 82 | for ii in range(field_height): 83 | for jj in range(field_width): 84 | row = c * field_width * field_height + ii * field_height + jj 85 | for yy in range(HH): 86 | for xx in range(WW): 87 | for i in range(N): 88 | col = yy * WW * N + xx * N + i 89 | x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col] 90 | 91 | 92 | @cython.boundscheck(False) 93 | @cython.wraparound(False) 94 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols, 95 | np.ndarray[DTYPE_t, ndim=4] x_padded, 96 | int N, int C, int H, int W, int HH, int WW, 97 | int out_h, int out_w, int pad, int stride): 98 | 99 | cdef int c, hh, ww, n, h, w 100 | for n in range(N): 101 | for c in range(C): 102 | for hh in range(HH): 103 | for ww in range(WW): 104 | for h in range(out_h): 105 | for w in range(out_w): 106 | x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w] 107 | 108 | 109 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W, 110 | int HH, int WW, int pad, int stride): 111 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 112 | cdef int out_h = (H + 2 * pad - HH) / stride + 1 113 | cdef int out_w = (W + 2 * pad - WW) / stride + 1 114 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad), 115 | dtype=cols.dtype) 116 | 117 | col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride) 118 | 119 | if pad > 0: 120 | return x_padded[:, :, pad:-pad, pad:-pad] 121 | return x_padded 122 | -------------------------------------------------------------------------------- /assignment3/cs231n/im2col_cython.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as np 3 | cimport cython 4 | 5 | # DTYPE = np.float64 6 | # ctypedef np.float64_t DTYPE_t 7 | 8 | ctypedef fused DTYPE_t: 9 | np.float32_t 10 | np.float64_t 11 | 12 | def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height, 13 | int field_width, int padding, int stride): 14 | cdef int N = x.shape[0] 15 | cdef int C = x.shape[1] 16 | cdef int H = x.shape[2] 17 | cdef int W = x.shape[3] 18 | 19 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 20 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 21 | 22 | cdef int p = padding 23 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x, 24 | ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 25 | 26 | cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros( 27 | (C * field_height * field_width, N * HH * WW), 28 | dtype=x.dtype) 29 | 30 | # Moving the inner loop to a C function with no bounds checking works, but does 31 | # not seem to help performance in any measurable way. 32 | 33 | im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 34 | field_height, field_width, padding, stride) 35 | return cols 36 | 37 | 38 | @cython.boundscheck(False) 39 | cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 40 | np.ndarray[DTYPE_t, ndim=4] x_padded, 41 | int N, int C, int H, int W, int HH, int WW, 42 | int field_height, int field_width, int padding, int stride) except? -1: 43 | cdef int c, ii, jj, row, yy, xx, i, col 44 | 45 | for c in range(C): 46 | for yy in range(HH): 47 | for xx in range(WW): 48 | for ii in range(field_height): 49 | for jj in range(field_width): 50 | row = c * field_width * field_height + ii * field_height + jj 51 | for i in range(N): 52 | col = yy * WW * N + xx * N + i 53 | cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj] 54 | 55 | 56 | 57 | def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W, 58 | int field_height, int field_width, int padding, int stride): 59 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 60 | cdef int HH = (H + 2 * padding - field_height) / stride + 1 61 | cdef int WW = (W + 2 * padding - field_width) / stride + 1 62 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding), 63 | dtype=cols.dtype) 64 | 65 | # Moving the inner loop to a C-function with no bounds checking improves 66 | # performance quite a bit for col2im. 67 | col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW, 68 | field_height, field_width, padding, stride) 69 | if padding > 0: 70 | return x_padded[:, :, padding:-padding, padding:-padding] 71 | return x_padded 72 | 73 | 74 | @cython.boundscheck(False) 75 | cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols, 76 | np.ndarray[DTYPE_t, ndim=4] x_padded, 77 | int N, int C, int H, int W, int HH, int WW, 78 | int field_height, int field_width, int padding, int stride) except? -1: 79 | cdef int c, ii, jj, row, yy, xx, i, col 80 | 81 | for c in range(C): 82 | for ii in range(field_height): 83 | for jj in range(field_width): 84 | row = c * field_width * field_height + ii * field_height + jj 85 | for yy in range(HH): 86 | for xx in range(WW): 87 | for i in range(N): 88 | col = yy * WW * N + xx * N + i 89 | x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col] 90 | 91 | 92 | @cython.boundscheck(False) 93 | @cython.wraparound(False) 94 | cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols, 95 | np.ndarray[DTYPE_t, ndim=4] x_padded, 96 | int N, int C, int H, int W, int HH, int WW, 97 | int out_h, int out_w, int pad, int stride): 98 | 99 | cdef int c, hh, ww, n, h, w 100 | for n in range(N): 101 | for c in range(C): 102 | for hh in range(HH): 103 | for ww in range(WW): 104 | for h in range(out_h): 105 | for w in range(out_w): 106 | x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w] 107 | 108 | 109 | def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W, 110 | int HH, int WW, int pad, int stride): 111 | cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype) 112 | cdef int out_h = (H + 2 * pad - HH) / stride + 1 113 | cdef int out_w = (W + 2 * pad - WW) / stride + 1 114 | cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad), 115 | dtype=cols.dtype) 116 | 117 | col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride) 118 | 119 | if pad > 0: 120 | return x_padded[:, :, pad:-pad, pad:-pad] 121 | return x_padded 122 | -------------------------------------------------------------------------------- /assignment1/cs231n/features.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | #from past.builtins import xrange 3 | 4 | import matplotlib 5 | import numpy as np 6 | from scipy.ndimage import uniform_filter 7 | 8 | 9 | def extract_features(imgs, feature_fns, verbose=False): 10 | """ 11 | Given pixel data for images and several feature functions that can operate on 12 | single images, apply all feature functions to all images, concatenating the 13 | feature vectors for each image and storing the features for all images in 14 | a single matrix. 15 | 16 | Inputs: 17 | - imgs: N x H X W X C array of pixel data for N images. 18 | - feature_fns: List of k feature functions. The ith feature function should 19 | take as input an H x W x D array and return a (one-dimensional) array of 20 | length F_i. 21 | - verbose: Boolean; if true, print progress. 22 | 23 | Returns: 24 | An array of shape (N, F_1 + ... + F_k) where each column is the concatenation 25 | of all features for a single image. 26 | """ 27 | num_images = imgs.shape[0] 28 | if num_images == 0: 29 | return np.array([]) 30 | 31 | # Use the first image to determine feature dimensions 32 | feature_dims = [] 33 | first_image_features = [] 34 | for feature_fn in feature_fns: 35 | feats = feature_fn(imgs[0].squeeze()) 36 | assert len(feats.shape) == 1, 'Feature functions must be one-dimensional' 37 | feature_dims.append(feats.size) 38 | first_image_features.append(feats) 39 | 40 | # Now that we know the dimensions of the features, we can allocate a single 41 | # big array to store all features as columns. 42 | total_feature_dim = sum(feature_dims) 43 | imgs_features = np.zeros((num_images, total_feature_dim)) 44 | imgs_features[0] = np.hstack(first_image_features).T 45 | 46 | # Extract features for the rest of the images. 47 | for i in xrange(1, num_images): 48 | idx = 0 49 | for feature_fn, feature_dim in zip(feature_fns, feature_dims): 50 | next_idx = idx + feature_dim 51 | imgs_features[i, idx:next_idx] = feature_fn(imgs[i].squeeze()) 52 | idx = next_idx 53 | if verbose and i % 1000 == 0: 54 | print('Done extracting features for %d / %d images' % (i, num_images)) 55 | 56 | return imgs_features 57 | 58 | 59 | def rgb2gray(rgb): 60 | """Convert RGB image to grayscale 61 | 62 | Parameters: 63 | rgb : RGB image 64 | 65 | Returns: 66 | gray : grayscale image 67 | 68 | """ 69 | return np.dot(rgb[...,:3], [0.299, 0.587, 0.144]) 70 | 71 | 72 | def hog_feature(im): 73 | """Compute Histogram of Gradient (HOG) feature for an image 74 | 75 | Modified from skimage.feature.hog 76 | http://pydoc.net/Python/scikits-image/0.4.2/skimage.feature.hog 77 | 78 | Reference: 79 | Histograms of Oriented Gradients for Human Detection 80 | Navneet Dalal and Bill Triggs, CVPR 2005 81 | 82 | Parameters: 83 | im : an input grayscale or rgb image 84 | 85 | Returns: 86 | feat: Histogram of Gradient (HOG) feature 87 | 88 | """ 89 | 90 | # convert rgb to grayscale if needed 91 | if im.ndim == 3: 92 | image = rgb2gray(im) 93 | else: 94 | image = np.at_least_2d(im) 95 | 96 | sx, sy = image.shape # image size 97 | orientations = 9 # number of gradient bins 98 | cx, cy = (8, 8) # pixels per cell 99 | 100 | gx = np.zeros(image.shape) 101 | gy = np.zeros(image.shape) 102 | gx[:, :-1] = np.diff(image, n=1, axis=1) # compute gradient on x-direction 103 | gy[:-1, :] = np.diff(image, n=1, axis=0) # compute gradient on y-direction 104 | grad_mag = np.sqrt(gx ** 2 + gy ** 2) # gradient magnitude 105 | grad_ori = np.arctan2(gy, (gx + 1e-15)) * (180 / np.pi) + 90 # gradient orientation 106 | 107 | n_cellsx = int(np.floor(sx / cx)) # number of cells in x 108 | n_cellsy = int(np.floor(sy / cy)) # number of cells in y 109 | # compute orientations integral images 110 | orientation_histogram = np.zeros((n_cellsx, n_cellsy, orientations)) 111 | for i in range(orientations): 112 | # create new integral image for this orientation 113 | # isolate orientations in this range 114 | temp_ori = np.where(grad_ori < 180 / orientations * (i + 1), 115 | grad_ori, 0) 116 | temp_ori = np.where(grad_ori >= 180 / orientations * i, 117 | temp_ori, 0) 118 | # select magnitudes for those orientations 119 | cond2 = temp_ori > 0 120 | temp_mag = np.where(cond2, grad_mag, 0) 121 | orientation_histogram[:,:,i] = uniform_filter(temp_mag, size=(cx, cy))[cx/2::cx, cy/2::cy].T 122 | 123 | return orientation_histogram.ravel() 124 | 125 | 126 | def color_histogram_hsv(im, nbin=10, xmin=0, xmax=255, normalized=True): 127 | """ 128 | Compute color histogram for an image using hue. 129 | 130 | Inputs: 131 | - im: H x W x C array of pixel data for an RGB image. 132 | - nbin: Number of histogram bins. (default: 10) 133 | - xmin: Minimum pixel value (default: 0) 134 | - xmax: Maximum pixel value (default: 255) 135 | - normalized: Whether to normalize the histogram (default: True) 136 | 137 | Returns: 138 | 1D vector of length nbin giving the color histogram over the hue of the 139 | input image. 140 | """ 141 | ndim = im.ndim 142 | bins = np.linspace(xmin, xmax, nbin+1) 143 | hsv = matplotlib.colors.rgb_to_hsv(im/xmax) * xmax 144 | imhist, bin_edges = np.histogram(hsv[:,:,0], bins=bins, density=normalized) 145 | imhist = imhist * np.diff(bin_edges) 146 | 147 | # return histogram 148 | return imhist 149 | 150 | 151 | pass 152 | -------------------------------------------------------------------------------- /assignment1/cs231n/classifiers/linear_classifier.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | from cs231n.classifiers.linear_svm import * 5 | from cs231n.classifiers.softmax import * 6 | #from past.builtins import xrange 7 | 8 | 9 | class LinearClassifier(object): 10 | 11 | def __init__(self): 12 | self.W = None 13 | 14 | def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100, 15 | batch_size=200, verbose=False): 16 | """ 17 | Train this linear classifier using stochastic gradient descent. 18 | 19 | Inputs: 20 | - X: A numpy array of shape (N, D) containing training data; there are N 21 | training samples each of dimension D. 22 | - y: A numpy array of shape (N,) containing training labels; y[i] = c 23 | means that X[i] has label 0 <= c < C for C classes. 24 | - learning_rate: (float) learning rate for optimization. 25 | - reg: (float) regularization strength. 26 | - num_iters: (integer) number of steps to take when optimizing 27 | - batch_size: (integer) number of training examples to use at each step. 28 | - verbose: (boolean) If true, print progress during optimization. 29 | 30 | Outputs: 31 | A list containing the value of the loss function at each training iteration. 32 | """ 33 | num_train, dim = X.shape 34 | num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes 35 | if self.W is None: 36 | # lazily initialize W 37 | self.W = 0.001 * np.random.randn(dim, num_classes) 38 | 39 | # Run stochastic gradient descent to optimize W 40 | loss_history = [] 41 | for it in xrange(num_iters): 42 | X_batch = None 43 | y_batch = None 44 | 45 | ######################################################################### 46 | # TODO: # 47 | # Sample batch_size elements from the training data and their # 48 | # corresponding labels to use in this round of gradient descent. # 49 | # Store the data in X_batch and their corresponding labels in # 50 | # y_batch; after sampling X_batch should have shape (dim, batch_size) # 51 | # and y_batch should have shape (batch_size,) # 52 | # # 53 | # Hint: Use np.random.choice to generate indices. Sampling with # 54 | # replacement is faster than sampling without replacement. # 55 | ######################################################################### 56 | choice = np.random.choice(num_train,batch_size,replace=True) 57 | X_batch = X[choice] 58 | y_batch = y[choice] 59 | ######################################################################### 60 | # END OF YOUR CODE # 61 | ######################################################################### 62 | 63 | # evaluate loss and gradient 64 | loss, grad = self.loss(X_batch, y_batch, reg) 65 | loss_history.append(loss) 66 | 67 | # perform parameter update 68 | ######################################################################### 69 | # TODO: # 70 | # Update the weights using the gradient and the learning rate. # 71 | ######################################################################### 72 | self.W = self.W - learning_rate*grad 73 | ######################################################################### 74 | # END OF YOUR CODE # 75 | ######################################################################### 76 | 77 | if verbose and it % 100 == 0: 78 | print('iteration %d / %d: loss %f' % (it, num_iters, loss)) 79 | 80 | return loss_history 81 | 82 | def predict(self, X): 83 | """ 84 | Use the trained weights of this linear classifier to predict labels for 85 | data points. 86 | 87 | Inputs: 88 | - X: A numpy array of shape (N, D) containing training data; there are N 89 | training samples each of dimension D. 90 | 91 | Returns: 92 | - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional 93 | array of length N, and each element is an integer giving the predicted 94 | class. 95 | """ 96 | y_pred = np.zeros(X.shape[0]) 97 | ########################################################################### 98 | # TODO: # 99 | # Implement this method. Store the predicted labels in y_pred. # 100 | ########################################################################### 101 | scores = X.dot(self.W) 102 | y_pred = np.argmax(scores,axis=1) 103 | ########################################################################### 104 | # END OF YOUR CODE # 105 | ########################################################################### 106 | return y_pred 107 | 108 | def loss(self, X_batch, y_batch, reg): 109 | """ 110 | Compute the loss function and its derivative. 111 | Subclasses will override this. 112 | 113 | Inputs: 114 | - X_batch: A numpy array of shape (N, D) containing a minibatch of N 115 | data points; each point has dimension D. 116 | - y_batch: A numpy array of shape (N,) containing labels for the minibatch. 117 | - reg: (float) regularization strength. 118 | 119 | Returns: A tuple containing: 120 | - loss as a single float 121 | - gradient with respect to self.W; an array of the same shape as W 122 | """ 123 | pass 124 | 125 | 126 | class LinearSVM(LinearClassifier): 127 | """ A subclass that uses the Multiclass SVM loss function """ 128 | 129 | def loss(self, X_batch, y_batch, reg): 130 | return svm_loss_vectorized(self.W, X_batch, y_batch, reg) 131 | 132 | 133 | class Softmax(LinearClassifier): 134 | """ A subclass that uses the Softmax + Cross-entropy loss function """ 135 | 136 | def loss(self, X_batch, y_batch, reg): 137 | return softmax_loss_vectorized(self.W, X_batch, y_batch, reg) 138 | 139 | -------------------------------------------------------------------------------- /assignment2/cs231n/optim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | This file implements various first-order update rules that are commonly used 5 | for training neural networks. Each update rule accepts current weights and the 6 | gradient of the loss with respect to those weights and produces the next set of 7 | weights. Each update rule has the same interface: 8 | 9 | def update(w, dw, config=None): 10 | 11 | Inputs: 12 | - w: A numpy array giving the current weights. 13 | - dw: A numpy array of the same shape as w giving the gradient of the 14 | loss with respect to w. 15 | - config: A dictionary containing hyperparameter values such as learning 16 | rate, momentum, etc. If the update rule requires caching values over many 17 | iterations, then config will also hold these cached values. 18 | 19 | Returns: 20 | - next_w: The next point after the update. 21 | - config: The config dictionary to be passed to the next iteration of the 22 | update rule. 23 | 24 | NOTE: For most update rules, the default learning rate will probably not 25 | perform well; however the default values of the other hyperparameters should 26 | work well for a variety of different problems. 27 | 28 | For efficiency, update rules may perform in-place updates, mutating w and 29 | setting next_w equal to w. 30 | """ 31 | 32 | 33 | def sgd(w, dw, config=None): 34 | """ 35 | Performs vanilla stochastic gradient descent. 36 | 37 | config format: 38 | - learning_rate: Scalar learning rate. 39 | """ 40 | if config is None: config = {} 41 | config.setdefault('learning_rate', 1e-2) 42 | 43 | w -= config['learning_rate'] * dw 44 | return w, config 45 | 46 | 47 | def sgd_momentum(w, dw, config=None): 48 | """ 49 | Performs stochastic gradient descent with momentum. 50 | 51 | config format: 52 | - learning_rate: Scalar learning rate. 53 | - momentum: Scalar between 0 and 1 giving the momentum value. 54 | Setting momentum = 0 reduces to sgd. 55 | - velocity: A numpy array of the same shape as w and dw used to store a 56 | moving average of the gradients. 57 | """ 58 | if config is None: 59 | config = {} 60 | config.setdefault('learning_rate', 1e-2) 61 | config.setdefault('momentum', 0.9) 62 | v = config.get('velocity', np.zeros_like(w)) 63 | 64 | next_w = None 65 | ########################################################################### 66 | # TODO: Implement the momentum update formula. Store the updated value in # 67 | # the next_w variable. You should also use and update the velocity v. # 68 | ########################################################################### 69 | v = config['momentum'] * v - config['learning_rate'] * dw 70 | next_w = w + v 71 | ########################################################################### 72 | # END OF YOUR CODE # 73 | ########################################################################### 74 | config['velocity'] = v 75 | 76 | return next_w, config 77 | 78 | 79 | 80 | def rmsprop(x, dx, config=None): 81 | """ 82 | Uses the RMSProp update rule, which uses a moving average of squared 83 | gradient values to set adaptive per-parameter learning rates. 84 | 85 | config format: 86 | - learning_rate: Scalar learning rate. 87 | - decay_rate: Scalar between 0 and 1 giving the decay rate for the squared 88 | gradient cache. 89 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 90 | - cache: Moving average of second moments of gradients. 91 | """ 92 | if config is None: config = {} 93 | config.setdefault('learning_rate', 1e-2) 94 | config.setdefault('decay_rate', 0.99) 95 | config.setdefault('epsilon', 1e-8) 96 | config.setdefault('cache', np.zeros_like(x)) 97 | 98 | next_x = None 99 | ########################################################################### 100 | # TODO: Implement the RMSprop update formula, storing the next value of x # 101 | # in the next_x variable. Don't forget to update cache value stored in # 102 | # config['cache']. # 103 | ########################################################################### 104 | cache = config['cache'] 105 | cache = config['decay_rate'] * cache + (1 - config['decay_rate']) * dx**2 106 | next_x = x - config['learning_rate'] * dx / (np.sqrt(cache) + config['epsilon']) 107 | config['cache'] = cache 108 | ########################################################################### 109 | # END OF YOUR CODE # 110 | ########################################################################### 111 | 112 | return next_x, config 113 | 114 | 115 | def adam(x, dx, config=None): 116 | """ 117 | Uses the Adam update rule, which incorporates moving averages of both the 118 | gradient and its square and a bias correction term. 119 | 120 | config format: 121 | - learning_rate: Scalar learning rate. 122 | - beta1: Decay rate for moving average of first moment of gradient. 123 | - beta2: Decay rate for moving average of second moment of gradient. 124 | - epsilon: Small scalar used for smoothing to avoid dividing by zero. 125 | - m: Moving average of gradient. 126 | - v: Moving average of squared gradient. 127 | - t: Iteration number. 128 | """ 129 | if config is None: config = {} 130 | config.setdefault('learning_rate', 1e-3) 131 | config.setdefault('beta1', 0.9) 132 | config.setdefault('beta2', 0.999) 133 | config.setdefault('epsilon', 1e-8) 134 | config.setdefault('m', np.zeros_like(x)) 135 | config.setdefault('v', np.zeros_like(x)) 136 | config.setdefault('t', 1) 137 | 138 | next_x = None 139 | ########################################################################### 140 | # TODO: Implement the Adam update formula, storing the next value of x in # 141 | # the next_x variable. Don't forget to update the m, v, and t variables # 142 | # stored in config. # 143 | ########################################################################### 144 | config['m'] = config['beta1'] * config['m'] + (1 - config['beta1']) * dx 145 | config['v'] = config['beta2'] * config['v'] + (1 - config['beta2']) * dx**2 146 | next_x = x - config['learning_rate'] * config['m'] / \ 147 | (np.sqrt(config['v'])+config['epsilon']) 148 | ########################################################################### 149 | # END OF YOUR CODE # 150 | ########################################################################### 151 | 152 | return next_x, config 153 | -------------------------------------------------------------------------------- /assignment2/cs231n/classifiers/cnn.py: -------------------------------------------------------------------------------- 1 | from builtins import object 2 | import numpy as np 3 | 4 | from cs231n.layers import * 5 | from cs231n.fast_layers import * 6 | from cs231n.layer_utils import * 7 | 8 | 9 | class ThreeLayerConvNet(object): 10 | """ 11 | A three-layer convolutional network with the following architecture: 12 | 13 | conv - relu - 2x2 max pool - affine - relu - affine - softmax 14 | 15 | The network operates on minibatches of data that have shape (N, C, H, W) 16 | consisting of N images, each with height H and width W and with C input 17 | channels. 18 | """ 19 | 20 | def __init__(self, input_dim=(3, 32, 32), num_filters=32, filter_size=7, 21 | hidden_dim=100, num_classes=10, weight_scale=1e-3, reg=0.0, 22 | dtype=np.float32): 23 | """ 24 | Initialize a new network. 25 | 26 | Inputs: 27 | - input_dim: Tuple (C, H, W) giving size of input data 28 | - num_filters: Number of filters to use in the convolutional layer 29 | - filter_size: Size of filters to use in the convolutional layer 30 | - hidden_dim: Number of units to use in the fully-connected hidden layer 31 | - num_classes: Number of scores to produce from the final affine layer. 32 | - weight_scale: Scalar giving standard deviation for random initialization 33 | of weights. 34 | - reg: Scalar giving L2 regularization strength 35 | - dtype: numpy datatype to use for computation. 36 | """ 37 | self.params = {} 38 | self.reg = reg 39 | self.dtype = dtype 40 | 41 | ############################################################################ 42 | # TODO: Initialize weights and biases for the three-layer convolutional # 43 | # network. Weights should be initialized from a Gaussian with standard # 44 | # deviation equal to weight_scale; biases should be initialized to zero. # 45 | # All weights and biases should be stored in the dictionary self.params. # 46 | # Store weights and biases for the convolutional layer using the keys 'W1' # 47 | # and 'b1'; use keys 'W2' and 'b2' for the weights and biases of the # 48 | # hidden affine layer, and keys 'W3' and 'b3' for the weights and biases # 49 | # of the output affine layer. # 50 | ############################################################################ 51 | C, H, W = input_dim 52 | self.params['W1'] = weight_scale * np.random.randn(num_filters, C, filter_size, filter_size) 53 | self.params['b1'] = np.zeros(num_filters) 54 | 55 | self.params['W2'] = weight_scale * np.random.randn(H*W*num_filters//4, hidden_dim) 56 | self.params['b2'] = np.zeros(hidden_dim) 57 | 58 | self.params['W3'] = weight_scale * np.random.randn(hidden_dim, num_classes) 59 | self.params['b3'] = np.zeros(num_classes) 60 | ############################################################################ 61 | # END OF YOUR CODE # 62 | ############################################################################ 63 | 64 | for k, v in self.params.items(): 65 | self.params[k] = v.astype(dtype) 66 | 67 | 68 | def loss(self, X, y=None): 69 | """ 70 | Evaluate loss and gradient for the three-layer convolutional network. 71 | 72 | Input / output: Same API as TwoLayerNet in fc_net.py. 73 | """ 74 | W1, b1 = self.params['W1'], self.params['b1'] 75 | W2, b2 = self.params['W2'], self.params['b2'] 76 | W3, b3 = self.params['W3'], self.params['b3'] 77 | 78 | # pass conv_param to the forward pass for the convolutional layer 79 | filter_size = W1.shape[2] 80 | conv_param = {'stride': 1, 'pad': (filter_size - 1) // 2} 81 | 82 | # pass pool_param to the forward pass for the max-pooling layer 83 | pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2} 84 | 85 | scores = None 86 | ############################################################################ 87 | # TODO: Implement the forward pass for the three-layer convolutional net, # 88 | # computing the class scores for X and storing them in the scores # 89 | # variable. # 90 | ############################################################################ 91 | 92 | # conv 93 | conv_out, cache_conv = conv_forward_strides(X, W1, b1, conv_param) 94 | # | 95 | # V 96 | # relu 97 | relu1_out, cache_relu1 = relu_forward(conv_out) 98 | # | 99 | # V 100 | # pool 101 | pool_out, cache_pool = max_pool_forward_fast(relu1_out, pool_param) 102 | # | 103 | # V 104 | # affine1 105 | aff1_out, cache_aff1 = affine_forward(pool_out, W2, b2) 106 | # | 107 | # V 108 | # relu2 109 | relu2_out, cache_relu2 = relu_forward(aff1_out) 110 | # | 111 | # V 112 | # affine2 113 | scores, cache_aff2 = affine_forward(relu2_out, W3, b3) 114 | 115 | ############################################################################ 116 | # END OF YOUR CODE # 117 | ############################################################################ 118 | 119 | if y is None: 120 | return scores 121 | 122 | loss, grads = 0, {} 123 | ############################################################################ 124 | # TODO: Implement the backward pass for the three-layer convolutional net, # 125 | # storing the loss and gradients in the loss and grads variables. Compute # 126 | # data loss using softmax, and make sure that grads[k] holds the gradients # 127 | # for self.params[k]. Don't forget to add L2 regularization! # 128 | ############################################################################ 129 | 130 | # loss 131 | loss, dout = softmax_loss(scores, y) 132 | # | 133 | # V 134 | # affine2 135 | dx, dW3, db3 = affine_backward(dout,cache_aff2) 136 | # | 137 | # V 138 | # relu2 139 | dx = relu_backward(dx, cache_relu2) 140 | # | 141 | # V 142 | # affine1 143 | dx, dW2, db2 = affine_backward(dx, cache_aff1) 144 | # | 145 | # V 146 | # pool 147 | dx = max_pool_backward_fast(dx, cache_pool) 148 | # | 149 | # V 150 | # relu 151 | dx = relu_backward(dx, cache_relu1) 152 | # | 153 | # V 154 | # conv 155 | dx, dW1, db1 = conv_backward_strides(dx, cache_conv) 156 | 157 | grads['W1'], grads['b1'] = dW1 + self.reg*self.params['W1'], db1 158 | grads['W2'], grads['b2'] = dW2 + self.reg*self.params['W2'], db2 159 | grads['W3'], grads['b3'] = dW3 + self.reg*self.params['W3'], db3 160 | 161 | loss += 0.5 * self.reg*(np.sum(W1*W1)+np.sum(W2*W2)+np.sum(W3*W3)) 162 | ############################################################################ 163 | # END OF YOUR CODE # 164 | ############################################################################ 165 | 166 | return loss, grads 167 | -------------------------------------------------------------------------------- /assignment2/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | mathjax: true 4 | permalink: /assignments2017/assignment2/ 5 | --- 6 | 7 | In this assignment you will practice writing backpropagation code, and training 8 | Neural Networks and Convolutional Neural Networks. The goals of this assignment 9 | are as follows: 10 | 11 | - understand **Neural Networks** and how they are arranged in layered 12 | architectures 13 | - understand and be able to implement (vectorized) **backpropagation** 14 | - implement various **update rules** used to optimize Neural Networks 15 | - implement **batch normalization** for training deep networks 16 | - implement **dropout** to regularize networks 17 | - effectively **cross-validate** and find the best hyperparameters for Neural 18 | Network architecture 19 | - understand the architecture of **Convolutional Neural Networks** and train 20 | gain experience with training these models on data 21 | 22 | ## Setup 23 | You can work on the assignment in one of two ways: locally on your own machine, or on a virtual machine on Google Cloud. 24 | 25 | ### Working remotely on Google Cloud (Recommended) 26 | 27 | **Note:** after following these instructions, make sure you go to **Working on the assignment** below (you can skip the **Working locally** section). 28 | 29 | As part of this course, you can use Google Cloud for your assignments. We recommend this route for anyone who is having trouble with installation set-up, or if you would like to use better CPU/GPU resources than you may have locally. 30 | 31 | Please see the Google Cloud GPU set-up tutorial [here](http://cs231n.github.io/gce-tutorial-gpus/) for instructions. 32 | 33 | We strongly, strongly recommend using Google Cloud with GPU support for the last part of this assignment (the TensorFlow or PyTorch notebooks), since your training will go much, much faster. :) 34 | 35 | ### Working locally 36 | Here's how you install the necessary dependencies: 37 | 38 | **(OPTIONAL) Installing GPU drivers:** 39 | If you choose to work locally, you are at no disadvantage for the first 3 parts of the assignment. For the last part, which is in TensorFlow or PyTorch, however, having a GPU will be a significant advantage. We recommend using a Google Cloud Instance with a GPU, at least for this part. If you have your own NVIDIA GPU, however, and wish to use that, that's fine -- you'll need to install the drivers for your GPU, install CUDA, install cuDNN, and then install either [TensorFlow](https://www.tensorflow.org/install/) or [PyTorch](http://pytorch.org/). You could theoretically do the entire assignment with no GPUs, though this will make training much slower in the last part. 40 | 41 | **Installing Python 3.5+:** 42 | To use python3, make sure to install version 3.5 or 3.6 on your local machine. If you are on Mac OS X, you can do this using [Homebrew](https://brew.sh) with `brew install python3`. You can find instructions for Ubuntu [here](https://www.digitalocean.com/community/tutorials/how-to-install-python-3-and-set-up-a-local-programming-environment-on-ubuntu-16-04). 43 | 44 | **Virtual environment:** 45 | If you decide to work locally, we recommend using [virtual environment](http://docs.python-guide.org/en/latest/dev/virtualenvs/) for the project. If you choose not to use a virtual environment, it is up to you to make sure that all dependencies for the code are installed globally on your machine. To set up a virtual environment, run the following: 46 | 47 | ```bash 48 | cd assignment2 49 | sudo pip install virtualenv # This may already be installed 50 | virtualenv -p python3 .env # Create a virtual environment (python3) 51 | source .env/bin/activate # Activate the virtual environment 52 | pip install -r requirements.txt # Install dependencies 53 | # Note that this does NOT install TensorFlow or PyTorch, 54 | # which you need to do yourself. 55 | 56 | # Work on the assignment for a while ... 57 | # ... and when you're done: 58 | deactivate # Exit the virtual environment 59 | ``` 60 | 61 | Note that every time you want to work on the assignment, you should run `source .env/bin/activate` (from within your `assignment2` folder) to re-activate the virtual environment, and `deactivate` again whenever you are done. 62 | 63 | ## Working on the assignment: 64 | Get the code as a zip file [here](http://cs231n.stanford.edu/assignments/2017/spring1617_assignment2.zip). 65 | 66 | ### Download data: 67 | Once you have the starter code (regardless of which method you choose above), you will need to download the CIFAR-10 dataset. 68 | Run the following from the `assignment2` directory: 69 | 70 | ```bash 71 | cd cs231n/datasets 72 | ./get_datasets.sh 73 | ``` 74 | 75 | ### Start IPython: 76 | After you have the CIFAR-10 data, you should start the IPython notebook server from the 77 | `assignment2` directory, with the `jupyter notebook` command. (See the [Google Cloud Tutorial](http://cs231n.github.io/gce-tutorial/) for any additional steps you may need to do for setting this up, if you are working remotely) 78 | 79 | If you are unfamiliar with IPython, you can also refer to our 80 | [IPython tutorial](/ipython-tutorial). 81 | 82 | ### Some Notes 83 | **NOTE 1:** This year, the `assignment2` code has been tested to be compatible with python versions `3.5` and `3.6` (it may work with other versions of `3.x`, but we won't be officially supporting them). For this assignment, we are NOT officially supporting python2. Use it at your own risk. You will need to make sure that during your `virtualenv` setup that the correct version of `python` is used. You can confirm your python version by (1) activating your virtualenv and (2) running `which python`. 84 | 85 | **NOTE 2:** If you are working in a virtual environment on OSX, you may *potentially* encounter 86 | errors with matplotlib due to the [issues described here](http://matplotlib.org/faq/virtualenv_faq.html). In our testing, it seems that this issue is no longer present with the most recent version of matplotlib, but if you do end up running into this issue you may have to use the `start_ipython_osx.sh` script from the `assignment1` directory (instead of `jupyter notebook` above) to launch your IPython notebook server. Note that you may have to modify some variables within the script to match your version of python/installation directory. The script assumes that your virtual environment is named `.env`. 87 | 88 | ### Submitting your work: 89 | Whether you work on the assignment locally or using Google Cloud, once you are done 90 | working run the `collectSubmission.sh` script; this will produce a file called 91 | `assignment2.zip`. Please submit this file on [Canvas](https://canvas.stanford.edu/courses/66461/). 92 | 93 | ### Q1: Fully-connected Neural Network (25 points) 94 | The IPython notebook `FullyConnectedNets.ipynb` will introduce you to our 95 | modular layer design, and then use those layers to implement fully-connected 96 | networks of arbitrary depth. To optimize these models you will implement several 97 | popular update rules. 98 | 99 | ### Q2: Batch Normalization (25 points) 100 | In the IPython notebook `BatchNormalization.ipynb` you will implement batch 101 | normalization, and use it to train deep fully-connected networks. 102 | 103 | ### Q3: Dropout (10 points) 104 | The IPython notebook `Dropout.ipynb` will help you implement Dropout and explore 105 | its effects on model generalization. 106 | 107 | ### Q4: Convolutional Networks (30 points) 108 | In the IPython Notebook ConvolutionalNetworks.ipynb you will implement several new layers that are commonly used in convolutional networks. 109 | 110 | ### Q5: PyTorch / Tensorflow on CIFAR-10 (10 points) 111 | For this last part, you will be working in either TensorFlow or PyTorch, two popular and powerful deep learning frameworks. **You only need to complete ONE of these two notebooks.** You do NOT need to do both, but a very small amount of extra credit will be awarded to those who do. 112 | 113 | Open up either `PyTorch.ipynb` or `TensorFlow.ipynb`. There, you will learn how the framework works, culminating in training a convolutional network of your own design on CIFAR-10 to get the best performance you can. 114 | 115 | ### Q5: Do something extra! (up to +10 points) 116 | In the process of training your network, you should feel free to implement 117 | anything that you want to get better performance. You can modify the solver, 118 | implement additional layers, use different types of regularization, use an 119 | ensemble of models, or anything else that comes to mind. If you implement these 120 | or other ideas not covered in the assignment then you will be awarded some bonus 121 | points. 122 | -------------------------------------------------------------------------------- /assignment1/cs231n/data_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from six.moves import cPickle as pickle 4 | import numpy as np 5 | import os 6 | from scipy.misc import imread 7 | import platform 8 | 9 | def load_pickle(f): 10 | version = platform.python_version_tuple() 11 | if version[0] == '2': 12 | return pickle.load(f) 13 | elif version[0] == '3': 14 | return pickle.load(f, encoding='latin1') 15 | raise ValueError("invalid python version: {}".format(version)) 16 | 17 | def load_CIFAR_batch(filename): 18 | """ load single batch of cifar """ 19 | with open(filename, 'rb') as f: 20 | datadict = load_pickle(f) 21 | X = datadict['data'] 22 | Y = datadict['labels'] 23 | X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float") 24 | Y = np.array(Y) 25 | return X, Y 26 | 27 | def load_CIFAR10(ROOT): 28 | """ load all of cifar """ 29 | xs = [] 30 | ys = [] 31 | for b in range(1,6): 32 | f = os.path.join(ROOT, 'data_batch_%d' % (b, )) 33 | X, Y = load_CIFAR_batch(f) 34 | xs.append(X) 35 | ys.append(Y) 36 | Xtr = np.concatenate(xs) 37 | Ytr = np.concatenate(ys) 38 | del X, Y 39 | Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch')) 40 | return Xtr, Ytr, Xte, Yte 41 | 42 | 43 | def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, 44 | subtract_mean=True): 45 | """ 46 | Load the CIFAR-10 dataset from disk and perform preprocessing to prepare 47 | it for classifiers. These are the same steps as we used for the SVM, but 48 | condensed to a single function. 49 | """ 50 | # Load the raw CIFAR-10 data 51 | cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' 52 | X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) 53 | 54 | # Subsample the data 55 | mask = list(range(num_training, num_training + num_validation)) 56 | X_val = X_train[mask] 57 | y_val = y_train[mask] 58 | mask = list(range(num_training)) 59 | X_train = X_train[mask] 60 | y_train = y_train[mask] 61 | mask = list(range(num_test)) 62 | X_test = X_test[mask] 63 | y_test = y_test[mask] 64 | 65 | # Normalize the data: subtract the mean image 66 | if subtract_mean: 67 | mean_image = np.mean(X_train, axis=0) 68 | X_train -= mean_image 69 | X_val -= mean_image 70 | X_test -= mean_image 71 | 72 | # Transpose so that channels come first 73 | X_train = X_train.transpose(0, 3, 1, 2).copy() 74 | X_val = X_val.transpose(0, 3, 1, 2).copy() 75 | X_test = X_test.transpose(0, 3, 1, 2).copy() 76 | 77 | # Package data into a dictionary 78 | return { 79 | 'X_train': X_train, 'y_train': y_train, 80 | 'X_val': X_val, 'y_val': y_val, 81 | 'X_test': X_test, 'y_test': y_test, 82 | } 83 | 84 | 85 | def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True): 86 | """ 87 | Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and 88 | TinyImageNet-200 have the same directory structure, so this can be used 89 | to load any of them. 90 | 91 | Inputs: 92 | - path: String giving path to the directory to load. 93 | - dtype: numpy datatype used to load the data. 94 | - subtract_mean: Whether to subtract the mean training image. 95 | 96 | Returns: A dictionary with the following entries: 97 | - class_names: A list where class_names[i] is a list of strings giving the 98 | WordNet names for class i in the loaded dataset. 99 | - X_train: (N_tr, 3, 64, 64) array of training images 100 | - y_train: (N_tr,) array of training labels 101 | - X_val: (N_val, 3, 64, 64) array of validation images 102 | - y_val: (N_val,) array of validation labels 103 | - X_test: (N_test, 3, 64, 64) array of testing images. 104 | - y_test: (N_test,) array of test labels; if test labels are not available 105 | (such as in student code) then y_test will be None. 106 | - mean_image: (3, 64, 64) array giving mean training image 107 | """ 108 | # First load wnids 109 | with open(os.path.join(path, 'wnids.txt'), 'r') as f: 110 | wnids = [x.strip() for x in f] 111 | 112 | # Map wnids to integer labels 113 | wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)} 114 | 115 | # Use words.txt to get names for each class 116 | with open(os.path.join(path, 'words.txt'), 'r') as f: 117 | wnid_to_words = dict(line.split('\t') for line in f) 118 | for wnid, words in wnid_to_words.iteritems(): 119 | wnid_to_words[wnid] = [w.strip() for w in words.split(',')] 120 | class_names = [wnid_to_words[wnid] for wnid in wnids] 121 | 122 | # Next load training data. 123 | X_train = [] 124 | y_train = [] 125 | for i, wnid in enumerate(wnids): 126 | if (i + 1) % 20 == 0: 127 | print('loading training data for synset %d / %d' % (i + 1, len(wnids))) 128 | # To figure out the filenames we need to open the boxes file 129 | boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid) 130 | with open(boxes_file, 'r') as f: 131 | filenames = [x.split('\t')[0] for x in f] 132 | num_images = len(filenames) 133 | 134 | X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype) 135 | y_train_block = wnid_to_label[wnid] * np.ones(num_images, dtype=np.int64) 136 | for j, img_file in enumerate(filenames): 137 | img_file = os.path.join(path, 'train', wnid, 'images', img_file) 138 | img = imread(img_file) 139 | if img.ndim == 2: 140 | ## grayscale file 141 | img.shape = (64, 64, 1) 142 | X_train_block[j] = img.transpose(2, 0, 1) 143 | X_train.append(X_train_block) 144 | y_train.append(y_train_block) 145 | 146 | # We need to concatenate all training data 147 | X_train = np.concatenate(X_train, axis=0) 148 | y_train = np.concatenate(y_train, axis=0) 149 | 150 | # Next load validation data 151 | with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f: 152 | img_files = [] 153 | val_wnids = [] 154 | for line in f: 155 | img_file, wnid = line.split('\t')[:2] 156 | img_files.append(img_file) 157 | val_wnids.append(wnid) 158 | num_val = len(img_files) 159 | y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids]) 160 | X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype) 161 | for i, img_file in enumerate(img_files): 162 | img_file = os.path.join(path, 'val', 'images', img_file) 163 | img = imread(img_file) 164 | if img.ndim == 2: 165 | img.shape = (64, 64, 1) 166 | X_val[i] = img.transpose(2, 0, 1) 167 | 168 | # Next load test images 169 | # Students won't have test labels, so we need to iterate over files in the 170 | # images directory. 171 | img_files = os.listdir(os.path.join(path, 'test', 'images')) 172 | X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype) 173 | for i, img_file in enumerate(img_files): 174 | img_file = os.path.join(path, 'test', 'images', img_file) 175 | img = imread(img_file) 176 | if img.ndim == 2: 177 | img.shape = (64, 64, 1) 178 | X_test[i] = img.transpose(2, 0, 1) 179 | 180 | y_test = None 181 | y_test_file = os.path.join(path, 'test', 'test_annotations.txt') 182 | if os.path.isfile(y_test_file): 183 | with open(y_test_file, 'r') as f: 184 | img_file_to_wnid = {} 185 | for line in f: 186 | line = line.split('\t') 187 | img_file_to_wnid[line[0]] = line[1] 188 | y_test = [wnid_to_label[img_file_to_wnid[img_file]] for img_file in img_files] 189 | y_test = np.array(y_test) 190 | 191 | mean_image = X_train.mean(axis=0) 192 | if subtract_mean: 193 | X_train -= mean_image[None] 194 | X_val -= mean_image[None] 195 | X_test -= mean_image[None] 196 | 197 | return { 198 | 'class_names': class_names, 199 | 'X_train': X_train, 200 | 'y_train': y_train, 201 | 'X_val': X_val, 202 | 'y_val': y_val, 203 | 'X_test': X_test, 204 | 'y_test': y_test, 205 | 'class_names': class_names, 206 | 'mean_image': mean_image, 207 | } 208 | 209 | 210 | def load_models(models_dir): 211 | """ 212 | Load saved models from disk. This will attempt to unpickle all files in a 213 | directory; any files that give errors on unpickling (such as README.txt) will 214 | be skipped. 215 | 216 | Inputs: 217 | - models_dir: String giving the path to a directory containing model files. 218 | Each model file is a pickled dictionary with a 'model' field. 219 | 220 | Returns: 221 | A dictionary mapping model file names to models. 222 | """ 223 | models = {} 224 | for model_file in os.listdir(models_dir): 225 | with open(os.path.join(models_dir, model_file), 'rb') as f: 226 | try: 227 | models[model_file] = load_pickle(f)['model'] 228 | except pickle.UnpicklingError: 229 | continue 230 | return models 231 | -------------------------------------------------------------------------------- /assignment3/cs231n/captioning_solver.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from builtins import range 3 | from builtins import object 4 | import numpy as np 5 | 6 | from cs231n import optim 7 | from cs231n.coco_utils import sample_coco_minibatch 8 | 9 | 10 | class CaptioningSolver(object): 11 | """ 12 | A CaptioningSolver encapsulates all the logic necessary for training 13 | image captioning models. The CaptioningSolver performs stochastic gradient 14 | descent using different update rules defined in optim.py. 15 | 16 | The solver accepts both training and validataion data and labels so it can 17 | periodically check classification accuracy on both training and validation 18 | data to watch out for overfitting. 19 | 20 | To train a model, you will first construct a CaptioningSolver instance, 21 | passing the model, dataset, and various options (learning rate, batch size, 22 | etc) to the constructor. You will then call the train() method to run the 23 | optimization procedure and train the model. 24 | 25 | After the train() method returns, model.params will contain the parameters 26 | that performed best on the validation set over the course of training. 27 | In addition, the instance variable solver.loss_history will contain a list 28 | of all losses encountered during training and the instance variables 29 | solver.train_acc_history and solver.val_acc_history will be lists containing 30 | the accuracies of the model on the training and validation set at each epoch. 31 | 32 | Example usage might look something like this: 33 | 34 | data = load_coco_data() 35 | model = MyAwesomeModel(hidden_dim=100) 36 | solver = CaptioningSolver(model, data, 37 | update_rule='sgd', 38 | optim_config={ 39 | 'learning_rate': 1e-3, 40 | }, 41 | lr_decay=0.95, 42 | num_epochs=10, batch_size=100, 43 | print_every=100) 44 | solver.train() 45 | 46 | 47 | A CaptioningSolver works on a model object that must conform to the following 48 | API: 49 | 50 | - model.params must be a dictionary mapping string parameter names to numpy 51 | arrays containing parameter values. 52 | 53 | - model.loss(features, captions) must be a function that computes 54 | training-time loss and gradients, with the following inputs and outputs: 55 | 56 | Inputs: 57 | - features: Array giving a minibatch of features for images, of shape (N, D 58 | - captions: Array of captions for those images, of shape (N, T) where 59 | each element is in the range (0, V]. 60 | 61 | Returns: 62 | - loss: Scalar giving the loss 63 | - grads: Dictionary with the same keys as self.params mapping parameter 64 | names to gradients of the loss with respect to those parameters. 65 | """ 66 | 67 | def __init__(self, model, data, **kwargs): 68 | """ 69 | Construct a new CaptioningSolver instance. 70 | 71 | Required arguments: 72 | - model: A model object conforming to the API described above 73 | - data: A dictionary of training and validation data from load_coco_data 74 | 75 | Optional arguments: 76 | - update_rule: A string giving the name of an update rule in optim.py. 77 | Default is 'sgd'. 78 | - optim_config: A dictionary containing hyperparameters that will be 79 | passed to the chosen update rule. Each update rule requires different 80 | hyperparameters (see optim.py) but all update rules require a 81 | 'learning_rate' parameter so that should always be present. 82 | - lr_decay: A scalar for learning rate decay; after each epoch the learning 83 | rate is multiplied by this value. 84 | - batch_size: Size of minibatches used to compute loss and gradient during 85 | training. 86 | - num_epochs: The number of epochs to run for during training. 87 | - print_every: Integer; training losses will be printed every print_every 88 | iterations. 89 | - verbose: Boolean; if set to false then no output will be printed during 90 | training. 91 | """ 92 | self.model = model 93 | self.data = data 94 | 95 | # Unpack keyword arguments 96 | self.update_rule = kwargs.pop('update_rule', 'sgd') 97 | self.optim_config = kwargs.pop('optim_config', {}) 98 | self.lr_decay = kwargs.pop('lr_decay', 1.0) 99 | self.batch_size = kwargs.pop('batch_size', 100) 100 | self.num_epochs = kwargs.pop('num_epochs', 10) 101 | 102 | self.print_every = kwargs.pop('print_every', 10) 103 | self.verbose = kwargs.pop('verbose', True) 104 | 105 | # Throw an error if there are extra keyword arguments 106 | if len(kwargs) > 0: 107 | extra = ', '.join('"%s"' % k for k in list(kwargs.keys())) 108 | raise ValueError('Unrecognized arguments %s' % extra) 109 | 110 | # Make sure the update rule exists, then replace the string 111 | # name with the actual function 112 | if not hasattr(optim, self.update_rule): 113 | raise ValueError('Invalid update_rule "%s"' % self.update_rule) 114 | self.update_rule = getattr(optim, self.update_rule) 115 | 116 | self._reset() 117 | 118 | 119 | def _reset(self): 120 | """ 121 | Set up some book-keeping variables for optimization. Don't call this 122 | manually. 123 | """ 124 | # Set up some variables for book-keeping 125 | self.epoch = 0 126 | self.best_val_acc = 0 127 | self.best_params = {} 128 | self.loss_history = [] 129 | self.train_acc_history = [] 130 | self.val_acc_history = [] 131 | 132 | # Make a deep copy of the optim_config for each parameter 133 | self.optim_configs = {} 134 | for p in self.model.params: 135 | d = {k: v for k, v in self.optim_config.items()} 136 | self.optim_configs[p] = d 137 | 138 | 139 | def _step(self): 140 | """ 141 | Make a single gradient update. This is called by train() and should not 142 | be called manually. 143 | """ 144 | # Make a minibatch of training data 145 | minibatch = sample_coco_minibatch(self.data, 146 | batch_size=self.batch_size, 147 | split='train') 148 | captions, features, urls = minibatch 149 | 150 | # Compute loss and gradient 151 | loss, grads = self.model.loss(features, captions) 152 | self.loss_history.append(loss) 153 | 154 | # Perform a parameter update 155 | for p, w in self.model.params.items(): 156 | dw = grads[p] 157 | config = self.optim_configs[p] 158 | next_w, next_config = self.update_rule(w, dw, config) 159 | self.model.params[p] = next_w 160 | self.optim_configs[p] = next_config 161 | 162 | 163 | # TODO: This does nothing right now; maybe implement BLEU? 164 | def check_accuracy(self, X, y, num_samples=None, batch_size=100): 165 | """ 166 | Check accuracy of the model on the provided data. 167 | 168 | Inputs: 169 | - X: Array of data, of shape (N, d_1, ..., d_k) 170 | - y: Array of labels, of shape (N,) 171 | - num_samples: If not None, subsample the data and only test the model 172 | on num_samples datapoints. 173 | - batch_size: Split X and y into batches of this size to avoid using too 174 | much memory. 175 | 176 | Returns: 177 | - acc: Scalar giving the fraction of instances that were correctly 178 | classified by the model. 179 | """ 180 | return 0.0 181 | 182 | # Maybe subsample the data 183 | N = X.shape[0] 184 | if num_samples is not None and N > num_samples: 185 | mask = np.random.choice(N, num_samples) 186 | N = num_samples 187 | X = X[mask] 188 | y = y[mask] 189 | 190 | # Compute predictions in batches 191 | num_batches = N / batch_size 192 | if N % batch_size != 0: 193 | num_batches += 1 194 | y_pred = [] 195 | for i in range(num_batches): 196 | start = i * batch_size 197 | end = (i + 1) * batch_size 198 | scores = self.model.loss(X[start:end]) 199 | y_pred.append(np.argmax(scores, axis=1)) 200 | y_pred = np.hstack(y_pred) 201 | acc = np.mean(y_pred == y) 202 | 203 | return acc 204 | 205 | 206 | def train(self): 207 | """ 208 | Run optimization to train the model. 209 | """ 210 | num_train = self.data['train_captions'].shape[0] 211 | iterations_per_epoch = max(num_train // self.batch_size, 1) 212 | num_iterations = self.num_epochs * iterations_per_epoch 213 | 214 | for t in range(num_iterations): 215 | self._step() 216 | 217 | # Maybe print training loss 218 | if self.verbose and t % self.print_every == 0: 219 | print('(Iteration %d / %d) loss: %f' % ( 220 | t + 1, num_iterations, self.loss_history[-1])) 221 | 222 | # At the end of every epoch, increment the epoch counter and decay the 223 | # learning rate. 224 | epoch_end = (t + 1) % iterations_per_epoch == 0 225 | if epoch_end: 226 | self.epoch += 1 227 | for k in self.optim_configs: 228 | self.optim_configs[k]['learning_rate'] *= self.lr_decay 229 | 230 | # Check train and val accuracy on the first iteration, the last 231 | # iteration, and at the end of each epoch. 232 | # TODO: Implement some logic to check Bleu on validation set periodically 233 | 234 | # At the end of training swap the best params into the model 235 | # self.model.params = self.best_params 236 | -------------------------------------------------------------------------------- /assignment2/cs231n/data_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from builtins import range 4 | from six.moves import cPickle as pickle 5 | import numpy as np 6 | import os 7 | from scipy.misc import imread 8 | import platform 9 | 10 | def load_pickle(f): 11 | version = platform.python_version_tuple() 12 | if version[0] == '2': 13 | return pickle.load(f) 14 | elif version[0] == '3': 15 | return pickle.load(f, encoding='latin1') 16 | raise ValueError("invalid python version: {}".format(version)) 17 | 18 | def load_CIFAR_batch(filename): 19 | """ load single batch of cifar """ 20 | with open(filename, 'rb') as f: 21 | datadict = load_pickle(f) 22 | X = datadict['data'] 23 | Y = datadict['labels'] 24 | X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float") 25 | Y = np.array(Y) 26 | return X, Y 27 | 28 | def load_CIFAR10(ROOT): 29 | """ load all of cifar """ 30 | xs = [] 31 | ys = [] 32 | for b in range(1,6): 33 | f = os.path.join(ROOT, 'data_batch_%d' % (b, )) 34 | X, Y = load_CIFAR_batch(f) 35 | xs.append(X) 36 | ys.append(Y) 37 | Xtr = np.concatenate(xs) 38 | Ytr = np.concatenate(ys) 39 | del X, Y 40 | Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch')) 41 | return Xtr, Ytr, Xte, Yte 42 | 43 | 44 | def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, 45 | subtract_mean=True): 46 | """ 47 | Load the CIFAR-10 dataset from disk and perform preprocessing to prepare 48 | it for classifiers. These are the same steps as we used for the SVM, but 49 | condensed to a single function. 50 | """ 51 | # Load the raw CIFAR-10 data 52 | cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' 53 | X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) 54 | 55 | # Subsample the data 56 | mask = list(range(num_training, num_training + num_validation)) 57 | X_val = X_train[mask] 58 | y_val = y_train[mask] 59 | mask = list(range(num_training)) 60 | X_train = X_train[mask] 61 | y_train = y_train[mask] 62 | mask = list(range(num_test)) 63 | X_test = X_test[mask] 64 | y_test = y_test[mask] 65 | 66 | # Normalize the data: subtract the mean image 67 | if subtract_mean: 68 | mean_image = np.mean(X_train, axis=0) 69 | X_train -= mean_image 70 | X_val -= mean_image 71 | X_test -= mean_image 72 | 73 | # Transpose so that channels come first 74 | X_train = X_train.transpose(0, 3, 1, 2).copy() 75 | X_val = X_val.transpose(0, 3, 1, 2).copy() 76 | X_test = X_test.transpose(0, 3, 1, 2).copy() 77 | 78 | # Package data into a dictionary 79 | return { 80 | 'X_train': X_train, 'y_train': y_train, 81 | 'X_val': X_val, 'y_val': y_val, 82 | 'X_test': X_test, 'y_test': y_test, 83 | } 84 | 85 | 86 | def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True): 87 | """ 88 | Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and 89 | TinyImageNet-200 have the same directory structure, so this can be used 90 | to load any of them. 91 | 92 | Inputs: 93 | - path: String giving path to the directory to load. 94 | - dtype: numpy datatype used to load the data. 95 | - subtract_mean: Whether to subtract the mean training image. 96 | 97 | Returns: A dictionary with the following entries: 98 | - class_names: A list where class_names[i] is a list of strings giving the 99 | WordNet names for class i in the loaded dataset. 100 | - X_train: (N_tr, 3, 64, 64) array of training images 101 | - y_train: (N_tr,) array of training labels 102 | - X_val: (N_val, 3, 64, 64) array of validation images 103 | - y_val: (N_val,) array of validation labels 104 | - X_test: (N_test, 3, 64, 64) array of testing images. 105 | - y_test: (N_test,) array of test labels; if test labels are not available 106 | (such as in student code) then y_test will be None. 107 | - mean_image: (3, 64, 64) array giving mean training image 108 | """ 109 | # First load wnids 110 | with open(os.path.join(path, 'wnids.txt'), 'r') as f: 111 | wnids = [x.strip() for x in f] 112 | 113 | # Map wnids to integer labels 114 | wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)} 115 | 116 | # Use words.txt to get names for each class 117 | with open(os.path.join(path, 'words.txt'), 'r') as f: 118 | wnid_to_words = dict(line.split('\t') for line in f) 119 | for wnid, words in wnid_to_words.items(): 120 | wnid_to_words[wnid] = [w.strip() for w in words.split(',')] 121 | class_names = [wnid_to_words[wnid] for wnid in wnids] 122 | 123 | # Next load training data. 124 | X_train = [] 125 | y_train = [] 126 | for i, wnid in enumerate(wnids): 127 | if (i + 1) % 20 == 0: 128 | print('loading training data for synset %d / %d' 129 | % (i + 1, len(wnids))) 130 | # To figure out the filenames we need to open the boxes file 131 | boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid) 132 | with open(boxes_file, 'r') as f: 133 | filenames = [x.split('\t')[0] for x in f] 134 | num_images = len(filenames) 135 | 136 | X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype) 137 | y_train_block = wnid_to_label[wnid] * \ 138 | np.ones(num_images, dtype=np.int64) 139 | for j, img_file in enumerate(filenames): 140 | img_file = os.path.join(path, 'train', wnid, 'images', img_file) 141 | img = imread(img_file) 142 | if img.ndim == 2: 143 | ## grayscale file 144 | img.shape = (64, 64, 1) 145 | X_train_block[j] = img.transpose(2, 0, 1) 146 | X_train.append(X_train_block) 147 | y_train.append(y_train_block) 148 | 149 | # We need to concatenate all training data 150 | X_train = np.concatenate(X_train, axis=0) 151 | y_train = np.concatenate(y_train, axis=0) 152 | 153 | # Next load validation data 154 | with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f: 155 | img_files = [] 156 | val_wnids = [] 157 | for line in f: 158 | img_file, wnid = line.split('\t')[:2] 159 | img_files.append(img_file) 160 | val_wnids.append(wnid) 161 | num_val = len(img_files) 162 | y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids]) 163 | X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype) 164 | for i, img_file in enumerate(img_files): 165 | img_file = os.path.join(path, 'val', 'images', img_file) 166 | img = imread(img_file) 167 | if img.ndim == 2: 168 | img.shape = (64, 64, 1) 169 | X_val[i] = img.transpose(2, 0, 1) 170 | 171 | # Next load test images 172 | # Students won't have test labels, so we need to iterate over files in the 173 | # images directory. 174 | img_files = os.listdir(os.path.join(path, 'test', 'images')) 175 | X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype) 176 | for i, img_file in enumerate(img_files): 177 | img_file = os.path.join(path, 'test', 'images', img_file) 178 | img = imread(img_file) 179 | if img.ndim == 2: 180 | img.shape = (64, 64, 1) 181 | X_test[i] = img.transpose(2, 0, 1) 182 | 183 | y_test = None 184 | y_test_file = os.path.join(path, 'test', 'test_annotations.txt') 185 | if os.path.isfile(y_test_file): 186 | with open(y_test_file, 'r') as f: 187 | img_file_to_wnid = {} 188 | for line in f: 189 | line = line.split('\t') 190 | img_file_to_wnid[line[0]] = line[1] 191 | y_test = [wnid_to_label[img_file_to_wnid[img_file]] 192 | for img_file in img_files] 193 | y_test = np.array(y_test) 194 | 195 | mean_image = X_train.mean(axis=0) 196 | if subtract_mean: 197 | X_train -= mean_image[None] 198 | X_val -= mean_image[None] 199 | X_test -= mean_image[None] 200 | 201 | return { 202 | 'class_names': class_names, 203 | 'X_train': X_train, 204 | 'y_train': y_train, 205 | 'X_val': X_val, 206 | 'y_val': y_val, 207 | 'X_test': X_test, 208 | 'y_test': y_test, 209 | 'class_names': class_names, 210 | 'mean_image': mean_image, 211 | } 212 | 213 | 214 | def load_models(models_dir): 215 | """ 216 | Load saved models from disk. This will attempt to unpickle all files in a 217 | directory; any files that give errors on unpickling (such as README.txt) 218 | will be skipped. 219 | 220 | Inputs: 221 | - models_dir: String giving the path to a directory containing model files. 222 | Each model file is a pickled dictionary with a 'model' field. 223 | 224 | Returns: 225 | A dictionary mapping model file names to models. 226 | """ 227 | models = {} 228 | for model_file in os.listdir(models_dir): 229 | with open(os.path.join(models_dir, model_file), 'rb') as f: 230 | try: 231 | models[model_file] = load_pickle(f)['model'] 232 | except pickle.UnpicklingError: 233 | continue 234 | return models 235 | 236 | 237 | def load_imagenet_val(num=None): 238 | """Load a handful of validation images from ImageNet. 239 | 240 | Inputs: 241 | - num: Number of images to load (max of 25) 242 | 243 | Returns: 244 | - X: numpy array with shape [num, 224, 224, 3] 245 | - y: numpy array of integer image labels, shape [num] 246 | - class_names: dict mapping integer label to class name 247 | """ 248 | imagenet_fn = 'cs231n/datasets/imagenet_val_25.npz' 249 | if not os.path.isfile(imagenet_fn): 250 | print('file %s not found' % imagenet_fn) 251 | print('Run the following:') 252 | print('cd cs231n/datasets') 253 | print('bash get_imagenet_val.sh') 254 | assert False, 'Need to download imagenet_val_25.npz' 255 | f = np.load(imagenet_fn) 256 | X = f['X'] 257 | y = f['y'] 258 | class_names = f['label_map'].item() 259 | if num is not None: 260 | X = X[:num] 261 | y = y[:num] 262 | return X, y, class_names 263 | -------------------------------------------------------------------------------- /assignment3/cs231n/data_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from builtins import range 4 | from six.moves import cPickle as pickle 5 | import numpy as np 6 | import os 7 | from scipy.misc import imread 8 | import platform 9 | 10 | def load_pickle(f): 11 | version = platform.python_version_tuple() 12 | if version[0] == '2': 13 | return pickle.load(f) 14 | elif version[0] == '3': 15 | return pickle.load(f, encoding='latin1') 16 | raise ValueError("invalid python version: {}".format(version)) 17 | 18 | def load_CIFAR_batch(filename): 19 | """ load single batch of cifar """ 20 | with open(filename, 'rb') as f: 21 | datadict = load_pickle(f) 22 | X = datadict['data'] 23 | Y = datadict['labels'] 24 | X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float") 25 | Y = np.array(Y) 26 | return X, Y 27 | 28 | def load_CIFAR10(ROOT): 29 | """ load all of cifar """ 30 | xs = [] 31 | ys = [] 32 | for b in range(1,6): 33 | f = os.path.join(ROOT, 'data_batch_%d' % (b, )) 34 | X, Y = load_CIFAR_batch(f) 35 | xs.append(X) 36 | ys.append(Y) 37 | Xtr = np.concatenate(xs) 38 | Ytr = np.concatenate(ys) 39 | del X, Y 40 | Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch')) 41 | return Xtr, Ytr, Xte, Yte 42 | 43 | 44 | def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, 45 | subtract_mean=True): 46 | """ 47 | Load the CIFAR-10 dataset from disk and perform preprocessing to prepare 48 | it for classifiers. These are the same steps as we used for the SVM, but 49 | condensed to a single function. 50 | """ 51 | # Load the raw CIFAR-10 data 52 | cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' 53 | X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) 54 | 55 | # Subsample the data 56 | mask = list(range(num_training, num_training + num_validation)) 57 | X_val = X_train[mask] 58 | y_val = y_train[mask] 59 | mask = list(range(num_training)) 60 | X_train = X_train[mask] 61 | y_train = y_train[mask] 62 | mask = list(range(num_test)) 63 | X_test = X_test[mask] 64 | y_test = y_test[mask] 65 | 66 | # Normalize the data: subtract the mean image 67 | if subtract_mean: 68 | mean_image = np.mean(X_train, axis=0) 69 | X_train -= mean_image 70 | X_val -= mean_image 71 | X_test -= mean_image 72 | 73 | # Transpose so that channels come first 74 | X_train = X_train.transpose(0, 3, 1, 2).copy() 75 | X_val = X_val.transpose(0, 3, 1, 2).copy() 76 | X_test = X_test.transpose(0, 3, 1, 2).copy() 77 | 78 | # Package data into a dictionary 79 | return { 80 | 'X_train': X_train, 'y_train': y_train, 81 | 'X_val': X_val, 'y_val': y_val, 82 | 'X_test': X_test, 'y_test': y_test, 83 | } 84 | 85 | 86 | def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True): 87 | """ 88 | Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and 89 | TinyImageNet-200 have the same directory structure, so this can be used 90 | to load any of them. 91 | 92 | Inputs: 93 | - path: String giving path to the directory to load. 94 | - dtype: numpy datatype used to load the data. 95 | - subtract_mean: Whether to subtract the mean training image. 96 | 97 | Returns: A dictionary with the following entries: 98 | - class_names: A list where class_names[i] is a list of strings giving the 99 | WordNet names for class i in the loaded dataset. 100 | - X_train: (N_tr, 3, 64, 64) array of training images 101 | - y_train: (N_tr,) array of training labels 102 | - X_val: (N_val, 3, 64, 64) array of validation images 103 | - y_val: (N_val,) array of validation labels 104 | - X_test: (N_test, 3, 64, 64) array of testing images. 105 | - y_test: (N_test,) array of test labels; if test labels are not available 106 | (such as in student code) then y_test will be None. 107 | - mean_image: (3, 64, 64) array giving mean training image 108 | """ 109 | # First load wnids 110 | with open(os.path.join(path, 'wnids.txt'), 'r') as f: 111 | wnids = [x.strip() for x in f] 112 | 113 | # Map wnids to integer labels 114 | wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)} 115 | 116 | # Use words.txt to get names for each class 117 | with open(os.path.join(path, 'words.txt'), 'r') as f: 118 | wnid_to_words = dict(line.split('\t') for line in f) 119 | for wnid, words in wnid_to_words.items(): 120 | wnid_to_words[wnid] = [w.strip() for w in words.split(',')] 121 | class_names = [wnid_to_words[wnid] for wnid in wnids] 122 | 123 | # Next load training data. 124 | X_train = [] 125 | y_train = [] 126 | for i, wnid in enumerate(wnids): 127 | if (i + 1) % 20 == 0: 128 | print('loading training data for synset %d / %d' 129 | % (i + 1, len(wnids))) 130 | # To figure out the filenames we need to open the boxes file 131 | boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid) 132 | with open(boxes_file, 'r') as f: 133 | filenames = [x.split('\t')[0] for x in f] 134 | num_images = len(filenames) 135 | 136 | X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype) 137 | y_train_block = wnid_to_label[wnid] * \ 138 | np.ones(num_images, dtype=np.int64) 139 | for j, img_file in enumerate(filenames): 140 | img_file = os.path.join(path, 'train', wnid, 'images', img_file) 141 | img = imread(img_file) 142 | if img.ndim == 2: 143 | ## grayscale file 144 | img.shape = (64, 64, 1) 145 | X_train_block[j] = img.transpose(2, 0, 1) 146 | X_train.append(X_train_block) 147 | y_train.append(y_train_block) 148 | 149 | # We need to concatenate all training data 150 | X_train = np.concatenate(X_train, axis=0) 151 | y_train = np.concatenate(y_train, axis=0) 152 | 153 | # Next load validation data 154 | with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f: 155 | img_files = [] 156 | val_wnids = [] 157 | for line in f: 158 | img_file, wnid = line.split('\t')[:2] 159 | img_files.append(img_file) 160 | val_wnids.append(wnid) 161 | num_val = len(img_files) 162 | y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids]) 163 | X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype) 164 | for i, img_file in enumerate(img_files): 165 | img_file = os.path.join(path, 'val', 'images', img_file) 166 | img = imread(img_file) 167 | if img.ndim == 2: 168 | img.shape = (64, 64, 1) 169 | X_val[i] = img.transpose(2, 0, 1) 170 | 171 | # Next load test images 172 | # Students won't have test labels, so we need to iterate over files in the 173 | # images directory. 174 | img_files = os.listdir(os.path.join(path, 'test', 'images')) 175 | X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype) 176 | for i, img_file in enumerate(img_files): 177 | img_file = os.path.join(path, 'test', 'images', img_file) 178 | img = imread(img_file) 179 | if img.ndim == 2: 180 | img.shape = (64, 64, 1) 181 | X_test[i] = img.transpose(2, 0, 1) 182 | 183 | y_test = None 184 | y_test_file = os.path.join(path, 'test', 'test_annotations.txt') 185 | if os.path.isfile(y_test_file): 186 | with open(y_test_file, 'r') as f: 187 | img_file_to_wnid = {} 188 | for line in f: 189 | line = line.split('\t') 190 | img_file_to_wnid[line[0]] = line[1] 191 | y_test = [wnid_to_label[img_file_to_wnid[img_file]] 192 | for img_file in img_files] 193 | y_test = np.array(y_test) 194 | 195 | mean_image = X_train.mean(axis=0) 196 | if subtract_mean: 197 | X_train -= mean_image[None] 198 | X_val -= mean_image[None] 199 | X_test -= mean_image[None] 200 | 201 | return { 202 | 'class_names': class_names, 203 | 'X_train': X_train, 204 | 'y_train': y_train, 205 | 'X_val': X_val, 206 | 'y_val': y_val, 207 | 'X_test': X_test, 208 | 'y_test': y_test, 209 | 'class_names': class_names, 210 | 'mean_image': mean_image, 211 | } 212 | 213 | 214 | def load_models(models_dir): 215 | """ 216 | Load saved models from disk. This will attempt to unpickle all files in a 217 | directory; any files that give errors on unpickling (such as README.txt) 218 | will be skipped. 219 | 220 | Inputs: 221 | - models_dir: String giving the path to a directory containing model files. 222 | Each model file is a pickled dictionary with a 'model' field. 223 | 224 | Returns: 225 | A dictionary mapping model file names to models. 226 | """ 227 | models = {} 228 | for model_file in os.listdir(models_dir): 229 | with open(os.path.join(models_dir, model_file), 'rb') as f: 230 | try: 231 | models[model_file] = load_pickle(f)['model'] 232 | except pickle.UnpicklingError: 233 | continue 234 | return models 235 | 236 | 237 | def load_imagenet_val(num=None): 238 | """Load a handful of validation images from ImageNet. 239 | 240 | Inputs: 241 | - num: Number of images to load (max of 25) 242 | 243 | Returns: 244 | - X: numpy array with shape [num, 224, 224, 3] 245 | - y: numpy array of integer image labels, shape [num] 246 | - class_names: dict mapping integer label to class name 247 | """ 248 | imagenet_fn = 'cs231n/datasets/imagenet_val_25.npz' 249 | if not os.path.isfile(imagenet_fn): 250 | print('file %s not found' % imagenet_fn) 251 | print('Run the following:') 252 | print('cd cs231n/datasets') 253 | print('bash get_imagenet_val.sh') 254 | assert False, 'Need to download imagenet_val_25.npz' 255 | f = np.load(imagenet_fn) 256 | X = f['X'] 257 | y = f['y'] 258 | class_names = f['label_map'].item() 259 | if num is not None: 260 | X = X[:num] 261 | y = y[:num] 262 | return X, y, class_names 263 | -------------------------------------------------------------------------------- /assignment1/cs231n/classifiers/k_nearest_neighbor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | #from past.builtins import xrange 3 | 4 | 5 | class KNearestNeighbor(object): 6 | """ a kNN classifier with L2 distance """ 7 | 8 | def __init__(self): 9 | pass 10 | 11 | def train(self, X, y): 12 | """ 13 | Train the classifier. For k-nearest neighbors this is just 14 | memorizing the training data. 15 | 16 | Inputs: 17 | - X: A numpy array of shape (num_train, D) containing the training data 18 | consisting of num_train samples each of dimension D. 19 | - y: A numpy array of shape (N,) containing the training labels, where 20 | y[i] is the label for X[i]. 21 | """ 22 | self.X_train = X 23 | self.y_train = y 24 | 25 | def predict(self, X, k=1, num_loops=0): 26 | """ 27 | Predict labels for test data using this classifier. 28 | 29 | Inputs: 30 | - X: A numpy array of shape (num_test, D) containing test data consisting 31 | of num_test samples each of dimension D. 32 | - k: The number of nearest neighbors that vote for the predicted labels. 33 | - num_loops: Determines which implementation to use to compute distances 34 | between training points and testing points. 35 | 36 | Returns: 37 | - y: A numpy array of shape (num_test,) containing predicted labels for the 38 | test data, where y[i] is the predicted label for the test point X[i]. 39 | """ 40 | if num_loops == 0: 41 | dists = self.compute_distances_no_loops(X) 42 | elif num_loops == 1: 43 | dists = self.compute_distances_one_loop(X) 44 | elif num_loops == 2: 45 | dists = self.compute_distances_two_loops(X) 46 | else: 47 | raise ValueError('Invalid value %d for num_loops' % num_loops) 48 | 49 | return self.predict_labels(dists, k=k) 50 | 51 | def compute_distances_two_loops(self, X): 52 | """ 53 | Compute the distance between each test point in X and each training point 54 | in self.X_train using a nested loop over both the training data and the 55 | test data. 56 | 57 | Inputs: 58 | - X: A numpy array of shape (num_test, D) containing test data. 59 | 60 | Returns: 61 | - dists: A numpy array of shape (num_test, num_train) where dists[i, j] 62 | is the Euclidean distance between the ith test point and the jth training 63 | point. 64 | """ 65 | num_test = X.shape[0] 66 | num_train = self.X_train.shape[0] 67 | dists = np.zeros((num_test, num_train)) 68 | for i in xrange(num_test): 69 | for j in xrange(num_train): 70 | ##################################################################### 71 | # TODO: # 72 | # Compute the l2 distance between the ith test point and the jth # 73 | # training point, and store the result in dists[i, j]. You should # 74 | # not use a loop over dimension. # 75 | ##################################################################### 76 | # diff = X[i,:]-self.X_train[j,:] 77 | # diff = diff.dot(diff) 78 | # dists[i,j]=np.sqrt(diff) 79 | dists[i][j] = np.sqrt(np.sum(np.square(self.X_train[j,:] - X[i,:]))) 80 | ##################################################################### 81 | # END OF YOUR CODE # 82 | ##################################################################### 83 | 84 | return dists 85 | 86 | def compute_distances_one_loop(self, X): 87 | """ 88 | Compute the distance between each test point in X and each training point 89 | in self.X_train using a single loop over the test data. 90 | 91 | Input / Output: Same as compute_distances_two_loops 92 | """ 93 | num_test = X.shape[0] 94 | num_train = self.X_train.shape[0] 95 | dists = np.zeros((num_test, num_train)) 96 | for i in xrange(num_test): 97 | ####################################################################### 98 | # TODO: # 99 | # Compute the l2 distance between the ith test point and all training # 100 | # points, and store the result in dists[i, :]. # 101 | ####################################################################### 102 | dists[i] = np.sqrt(np.sum(np.square(X[i,:]-self.X_train),axis=1)) 103 | ####################################################################### 104 | # END OF YOUR CODE # 105 | ####################################################################### 106 | return dists 107 | 108 | def compute_distances_no_loops(self, X): 109 | """ 110 | Compute the distance between each test point in X and each training point 111 | in self.X_train using no explicit loops. 112 | 113 | Input / Output: Same as compute_distances_two_loops 114 | """ 115 | num_test = X.shape[0] 116 | num_train = self.X_train.shape[0] 117 | dists = np.zeros((num_test, num_train)) 118 | ######################################################################### 119 | # TODO: # 120 | # Compute the l2 distance between all test points and all training # 121 | # points without using any explicit loops, and store the result in # 122 | # dists. # 123 | # # 124 | # You should implement this function using only basic array operations; # 125 | # in particular you should not use functions from scipy. # 126 | # # 127 | # HINT: Try to formulate the l2 distance using matrix multiplication # 128 | # and two broadcast sums. # 129 | ######################################################################### 130 | dists = np.multiply(np.dot(X,self.X_train.T),-2) 131 | sq1 = np.sum(np.square(X),axis=1,keepdims = True) 132 | sq2 = np.sum(np.square(self.X_train),axis=1) 133 | dists = np.add(dists,sq1) 134 | dists = np.add(dists,sq2) 135 | dists = np.sqrt(dists) 136 | ######################################################################### 137 | # END OF YOUR CODE # 138 | ######################################################################### 139 | return dists 140 | 141 | def predict_labels(self, dists, k=1): 142 | """ 143 | Given a matrix of distances between test points and training points, 144 | predict a label for each test point. 145 | 146 | Inputs: 147 | - dists: A numpy array of shape (num_test, num_train) where dists[i, j] 148 | gives the distance betwen the ith test point and the jth training point. 149 | 150 | Returns: 151 | - y: A numpy array of shape (num_test,) containing predicted labels for the 152 | test data, where y[i] is the predicted label for the test point X[i]. 153 | """ 154 | num_test = dists.shape[0] 155 | y_pred = np.zeros(num_test) 156 | for i in xrange(num_test): 157 | # A list of length k storing the labels of the k nearest neighbors to 158 | # the ith test point. 159 | closest_y = [] 160 | ######################################################################### 161 | # TODO: # 162 | # Use the distance matrix to find the k nearest neighbors of the ith # 163 | # testing point, and use self.y_train to find the labels of these # 164 | # neighbors. Store these labels in closest_y. # 165 | # Hint: Look up the function numpy.argsort. # 166 | ######################################################################### 167 | closest_y = self.y_train[np.argsort(dists[i,:])[:k]] 168 | ######################################################################### 169 | # TODO: # 170 | # Now that you have found the labels of the k nearest neighbors, you # 171 | # need to find the most common label in the list closest_y of labels. # 172 | # Store this label in y_pred[i]. Break ties by choosing the smaller # 173 | # label. # 174 | ######################################################################### 175 | y_pred[i] = np.argmax(np.bincount(closest_y)) 176 | # classCount= {} 177 | # flag = 0 178 | # for j in range(k): 179 | # classCount[closest_y[j]] = classCount.get(closest_y[j],0) + 1 180 | # # indicate has a key more than once 181 | # if classCount[closest_y[j]] >= 2: 182 | # flag = 1 183 | # 184 | # if flag == 1: 185 | # sortedclassCount = sorted(classCount.iteritems(),key=lambda item:item[1],reverse=True) 186 | # y_pred[i] = sortedclassCount[0][0] 187 | # else: 188 | # sortedclassCount = sorted(classCount.iteritems(),key=lambda item:item[0], reverse=False) 189 | # y_pred[i]=sortedclassCount[0][0] 190 | 191 | ######################################################################### 192 | # END OF YOUR CODE # 193 | ######################################################################### 194 | 195 | return y_pred 196 | -------------------------------------------------------------------------------- /assignment2/cs231n/fast_layers.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | try: 4 | from cs231n.im2col_cython import col2im_cython, im2col_cython 5 | from cs231n.im2col_cython import col2im_6d_cython 6 | except ImportError: 7 | print('run the following from the cs231n directory and try again:') 8 | print('python setup.py build_ext --inplace') 9 | print('You may also need to restart your iPython kernel') 10 | 11 | from cs231n.im2col import * 12 | 13 | 14 | def conv_forward_im2col(x, w, b, conv_param): 15 | """ 16 | A fast implementation of the forward pass for a convolutional layer 17 | based on im2col and col2im. 18 | """ 19 | N, C, H, W = x.shape 20 | num_filters, _, filter_height, filter_width = w.shape 21 | stride, pad = conv_param['stride'], conv_param['pad'] 22 | 23 | # Check dimensions 24 | assert (W + 2 * pad - filter_width) % stride == 0, 'width does not work' 25 | assert (H + 2 * pad - filter_height) % stride == 0, 'height does not work' 26 | 27 | # Create output 28 | out_height = (H + 2 * pad - filter_height) // stride + 1 29 | out_width = (W + 2 * pad - filter_width) // stride + 1 30 | out = np.zeros((N, num_filters, out_height, out_width), dtype=x.dtype) 31 | 32 | # x_cols = im2col_indices(x, w.shape[2], w.shape[3], pad, stride) 33 | x_cols = im2col_cython(x, w.shape[2], w.shape[3], pad, stride) 34 | res = w.reshape((w.shape[0], -1)).dot(x_cols) + b.reshape(-1, 1) 35 | 36 | out = res.reshape(w.shape[0], out.shape[2], out.shape[3], x.shape[0]) 37 | out = out.transpose(3, 0, 1, 2) 38 | 39 | cache = (x, w, b, conv_param, x_cols) 40 | return out, cache 41 | 42 | 43 | def conv_forward_strides(x, w, b, conv_param): 44 | N, C, H, W = x.shape 45 | F, _, HH, WW = w.shape 46 | stride, pad = conv_param['stride'], conv_param['pad'] 47 | 48 | # Check dimensions 49 | #assert (W + 2 * pad - WW) % stride == 0, 'width does not work' 50 | #assert (H + 2 * pad - HH) % stride == 0, 'height does not work' 51 | 52 | # Pad the input 53 | p = pad 54 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 55 | 56 | # Figure out output dimensions 57 | H += 2 * pad 58 | W += 2 * pad 59 | out_h = (H - HH) // stride + 1 60 | out_w = (W - WW) // stride + 1 61 | 62 | # Perform an im2col operation by picking clever strides 63 | shape = (C, HH, WW, N, out_h, out_w) 64 | strides = (H * W, W, 1, C * H * W, stride * W, stride) 65 | strides = x.itemsize * np.array(strides) 66 | x_stride = np.lib.stride_tricks.as_strided(x_padded, 67 | shape=shape, strides=strides) 68 | x_cols = np.ascontiguousarray(x_stride) 69 | x_cols.shape = (C * HH * WW, N * out_h * out_w) 70 | 71 | # Now all our convolutions are a big matrix multiply 72 | res = w.reshape(F, -1).dot(x_cols) + b.reshape(-1, 1) 73 | 74 | # Reshape the output 75 | res.shape = (F, N, out_h, out_w) 76 | out = res.transpose(1, 0, 2, 3) 77 | 78 | # Be nice and return a contiguous array 79 | # The old version of conv_forward_fast doesn't do this, so for a fair 80 | # comparison we won't either 81 | out = np.ascontiguousarray(out) 82 | 83 | cache = (x, w, b, conv_param, x_cols) 84 | return out, cache 85 | 86 | 87 | def conv_backward_strides(dout, cache): 88 | x, w, b, conv_param, x_cols = cache 89 | stride, pad = conv_param['stride'], conv_param['pad'] 90 | 91 | N, C, H, W = x.shape 92 | F, _, HH, WW = w.shape 93 | _, _, out_h, out_w = dout.shape 94 | 95 | db = np.sum(dout, axis=(0, 2, 3)) 96 | 97 | dout_reshaped = dout.transpose(1, 0, 2, 3).reshape(F, -1) 98 | dw = dout_reshaped.dot(x_cols.T).reshape(w.shape) 99 | 100 | dx_cols = w.reshape(F, -1).T.dot(dout_reshaped) 101 | dx_cols.shape = (C, HH, WW, N, out_h, out_w) 102 | dx = col2im_6d_cython(dx_cols, N, C, H, W, HH, WW, pad, stride) 103 | 104 | return dx, dw, db 105 | 106 | 107 | def conv_backward_im2col(dout, cache): 108 | """ 109 | A fast implementation of the backward pass for a convolutional layer 110 | based on im2col and col2im. 111 | """ 112 | x, w, b, conv_param, x_cols = cache 113 | stride, pad = conv_param['stride'], conv_param['pad'] 114 | 115 | db = np.sum(dout, axis=(0, 2, 3)) 116 | 117 | num_filters, _, filter_height, filter_width = w.shape 118 | dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(num_filters, -1) 119 | dw = dout_reshaped.dot(x_cols.T).reshape(w.shape) 120 | 121 | dx_cols = w.reshape(num_filters, -1).T.dot(dout_reshaped) 122 | # dx = col2im_indices(dx_cols, x.shape, filter_height, filter_width, pad, stride) 123 | dx = col2im_cython(dx_cols, x.shape[0], x.shape[1], x.shape[2], x.shape[3], 124 | filter_height, filter_width, pad, stride) 125 | 126 | return dx, dw, db 127 | 128 | 129 | conv_forward_fast = conv_forward_strides 130 | conv_backward_fast = conv_backward_strides 131 | 132 | 133 | def max_pool_forward_fast(x, pool_param): 134 | """ 135 | A fast implementation of the forward pass for a max pooling layer. 136 | 137 | This chooses between the reshape method and the im2col method. If the pooling 138 | regions are square and tile the input image, then we can use the reshape 139 | method which is very fast. Otherwise we fall back on the im2col method, which 140 | is not much faster than the naive method. 141 | """ 142 | N, C, H, W = x.shape 143 | pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] 144 | stride = pool_param['stride'] 145 | 146 | same_size = pool_height == pool_width == stride 147 | tiles = H % pool_height == 0 and W % pool_width == 0 148 | if same_size and tiles: 149 | out, reshape_cache = max_pool_forward_reshape(x, pool_param) 150 | cache = ('reshape', reshape_cache) 151 | else: 152 | out, im2col_cache = max_pool_forward_im2col(x, pool_param) 153 | cache = ('im2col', im2col_cache) 154 | return out, cache 155 | 156 | 157 | def max_pool_backward_fast(dout, cache): 158 | """ 159 | A fast implementation of the backward pass for a max pooling layer. 160 | 161 | This switches between the reshape method an the im2col method depending on 162 | which method was used to generate the cache. 163 | """ 164 | method, real_cache = cache 165 | if method == 'reshape': 166 | return max_pool_backward_reshape(dout, real_cache) 167 | elif method == 'im2col': 168 | return max_pool_backward_im2col(dout, real_cache) 169 | else: 170 | raise ValueError('Unrecognized method "%s"' % method) 171 | 172 | 173 | def max_pool_forward_reshape(x, pool_param): 174 | """ 175 | A fast implementation of the forward pass for the max pooling layer that uses 176 | some clever reshaping. 177 | 178 | This can only be used for square pooling regions that tile the input. 179 | """ 180 | N, C, H, W = x.shape 181 | pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] 182 | stride = pool_param['stride'] 183 | assert pool_height == pool_width == stride, 'Invalid pool params' 184 | assert H % pool_height == 0 185 | assert W % pool_height == 0 186 | x_reshaped = x.reshape(N, C, H // pool_height, pool_height, 187 | W // pool_width, pool_width) 188 | out = x_reshaped.max(axis=3).max(axis=4) 189 | 190 | cache = (x, x_reshaped, out) 191 | return out, cache 192 | 193 | 194 | def max_pool_backward_reshape(dout, cache): 195 | """ 196 | A fast implementation of the backward pass for the max pooling layer that 197 | uses some clever broadcasting and reshaping. 198 | 199 | This can only be used if the forward pass was computed using 200 | max_pool_forward_reshape. 201 | 202 | NOTE: If there are multiple argmaxes, this method will assign gradient to 203 | ALL argmax elements of the input rather than picking one. In this case the 204 | gradient will actually be incorrect. However this is unlikely to occur in 205 | practice, so it shouldn't matter much. One possible solution is to split the 206 | upstream gradient equally among all argmax elements; this should result in a 207 | valid subgradient. You can make this happen by uncommenting the line below; 208 | however this results in a significant performance penalty (about 40% slower) 209 | and is unlikely to matter in practice so we don't do it. 210 | """ 211 | x, x_reshaped, out = cache 212 | 213 | dx_reshaped = np.zeros_like(x_reshaped) 214 | out_newaxis = out[:, :, :, np.newaxis, :, np.newaxis] 215 | mask = (x_reshaped == out_newaxis) 216 | dout_newaxis = dout[:, :, :, np.newaxis, :, np.newaxis] 217 | dout_broadcast, _ = np.broadcast_arrays(dout_newaxis, dx_reshaped) 218 | dx_reshaped[mask] = dout_broadcast[mask] 219 | dx_reshaped /= np.sum(mask, axis=(3, 5), keepdims=True) 220 | dx = dx_reshaped.reshape(x.shape) 221 | 222 | return dx 223 | 224 | 225 | def max_pool_forward_im2col(x, pool_param): 226 | """ 227 | An implementation of the forward pass for max pooling based on im2col. 228 | 229 | This isn't much faster than the naive version, so it should be avoided if 230 | possible. 231 | """ 232 | N, C, H, W = x.shape 233 | pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] 234 | stride = pool_param['stride'] 235 | 236 | assert (H - pool_height) % stride == 0, 'Invalid height' 237 | assert (W - pool_width) % stride == 0, 'Invalid width' 238 | 239 | out_height = (H - pool_height) // stride + 1 240 | out_width = (W - pool_width) // stride + 1 241 | 242 | x_split = x.reshape(N * C, 1, H, W) 243 | x_cols = im2col(x_split, pool_height, pool_width, padding=0, stride=stride) 244 | x_cols_argmax = np.argmax(x_cols, axis=0) 245 | x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])] 246 | out = x_cols_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1) 247 | 248 | cache = (x, x_cols, x_cols_argmax, pool_param) 249 | return out, cache 250 | 251 | 252 | def max_pool_backward_im2col(dout, cache): 253 | """ 254 | An implementation of the backward pass for max pooling based on im2col. 255 | 256 | This isn't much faster than the naive version, so it should be avoided if 257 | possible. 258 | """ 259 | x, x_cols, x_cols_argmax, pool_param = cache 260 | N, C, H, W = x.shape 261 | pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] 262 | stride = pool_param['stride'] 263 | 264 | dout_reshaped = dout.transpose(2, 3, 0, 1).flatten() 265 | dx_cols = np.zeros_like(x_cols) 266 | dx_cols[x_cols_argmax, np.arange(dx_cols.shape[1])] = dout_reshaped 267 | dx = col2im_indices(dx_cols, (N * C, 1, H, W), pool_height, pool_width, 268 | padding=0, stride=stride) 269 | dx = dx.reshape(x.shape) 270 | 271 | return dx 272 | -------------------------------------------------------------------------------- /assignment3/cs231n/fast_layers.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | try: 4 | from cs231n.im2col_cython import col2im_cython, im2col_cython 5 | from cs231n.im2col_cython import col2im_6d_cython 6 | except ImportError: 7 | print('run the following from the cs231n directory and try again:') 8 | print('python setup.py build_ext --inplace') 9 | print('You may also need to restart your iPython kernel') 10 | 11 | from cs231n.im2col import * 12 | 13 | 14 | def conv_forward_im2col(x, w, b, conv_param): 15 | """ 16 | A fast implementation of the forward pass for a convolutional layer 17 | based on im2col and col2im. 18 | """ 19 | N, C, H, W = x.shape 20 | num_filters, _, filter_height, filter_width = w.shape 21 | stride, pad = conv_param['stride'], conv_param['pad'] 22 | 23 | # Check dimensions 24 | assert (W + 2 * pad - filter_width) % stride == 0, 'width does not work' 25 | assert (H + 2 * pad - filter_height) % stride == 0, 'height does not work' 26 | 27 | # Create output 28 | out_height = (H + 2 * pad - filter_height) // stride + 1 29 | out_width = (W + 2 * pad - filter_width) // stride + 1 30 | out = np.zeros((N, num_filters, out_height, out_width), dtype=x.dtype) 31 | 32 | # x_cols = im2col_indices(x, w.shape[2], w.shape[3], pad, stride) 33 | x_cols = im2col_cython(x, w.shape[2], w.shape[3], pad, stride) 34 | res = w.reshape((w.shape[0], -1)).dot(x_cols) + b.reshape(-1, 1) 35 | 36 | out = res.reshape(w.shape[0], out.shape[2], out.shape[3], x.shape[0]) 37 | out = out.transpose(3, 0, 1, 2) 38 | 39 | cache = (x, w, b, conv_param, x_cols) 40 | return out, cache 41 | 42 | 43 | def conv_forward_strides(x, w, b, conv_param): 44 | N, C, H, W = x.shape 45 | F, _, HH, WW = w.shape 46 | stride, pad = conv_param['stride'], conv_param['pad'] 47 | 48 | # Check dimensions 49 | #assert (W + 2 * pad - WW) % stride == 0, 'width does not work' 50 | #assert (H + 2 * pad - HH) % stride == 0, 'height does not work' 51 | 52 | # Pad the input 53 | p = pad 54 | x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant') 55 | 56 | # Figure out output dimensions 57 | H += 2 * pad 58 | W += 2 * pad 59 | out_h = (H - HH) // stride + 1 60 | out_w = (W - WW) // stride + 1 61 | 62 | # Perform an im2col operation by picking clever strides 63 | shape = (C, HH, WW, N, out_h, out_w) 64 | strides = (H * W, W, 1, C * H * W, stride * W, stride) 65 | strides = x.itemsize * np.array(strides) 66 | x_stride = np.lib.stride_tricks.as_strided(x_padded, 67 | shape=shape, strides=strides) 68 | x_cols = np.ascontiguousarray(x_stride) 69 | x_cols.shape = (C * HH * WW, N * out_h * out_w) 70 | 71 | # Now all our convolutions are a big matrix multiply 72 | res = w.reshape(F, -1).dot(x_cols) + b.reshape(-1, 1) 73 | 74 | # Reshape the output 75 | res.shape = (F, N, out_h, out_w) 76 | out = res.transpose(1, 0, 2, 3) 77 | 78 | # Be nice and return a contiguous array 79 | # The old version of conv_forward_fast doesn't do this, so for a fair 80 | # comparison we won't either 81 | out = np.ascontiguousarray(out) 82 | 83 | cache = (x, w, b, conv_param, x_cols) 84 | return out, cache 85 | 86 | 87 | def conv_backward_strides(dout, cache): 88 | x, w, b, conv_param, x_cols = cache 89 | stride, pad = conv_param['stride'], conv_param['pad'] 90 | 91 | N, C, H, W = x.shape 92 | F, _, HH, WW = w.shape 93 | _, _, out_h, out_w = dout.shape 94 | 95 | db = np.sum(dout, axis=(0, 2, 3)) 96 | 97 | dout_reshaped = dout.transpose(1, 0, 2, 3).reshape(F, -1) 98 | dw = dout_reshaped.dot(x_cols.T).reshape(w.shape) 99 | 100 | dx_cols = w.reshape(F, -1).T.dot(dout_reshaped) 101 | dx_cols.shape = (C, HH, WW, N, out_h, out_w) 102 | dx = col2im_6d_cython(dx_cols, N, C, H, W, HH, WW, pad, stride) 103 | 104 | return dx, dw, db 105 | 106 | 107 | def conv_backward_im2col(dout, cache): 108 | """ 109 | A fast implementation of the backward pass for a convolutional layer 110 | based on im2col and col2im. 111 | """ 112 | x, w, b, conv_param, x_cols = cache 113 | stride, pad = conv_param['stride'], conv_param['pad'] 114 | 115 | db = np.sum(dout, axis=(0, 2, 3)) 116 | 117 | num_filters, _, filter_height, filter_width = w.shape 118 | dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(num_filters, -1) 119 | dw = dout_reshaped.dot(x_cols.T).reshape(w.shape) 120 | 121 | dx_cols = w.reshape(num_filters, -1).T.dot(dout_reshaped) 122 | # dx = col2im_indices(dx_cols, x.shape, filter_height, filter_width, pad, stride) 123 | dx = col2im_cython(dx_cols, x.shape[0], x.shape[1], x.shape[2], x.shape[3], 124 | filter_height, filter_width, pad, stride) 125 | 126 | return dx, dw, db 127 | 128 | 129 | conv_forward_fast = conv_forward_strides 130 | conv_backward_fast = conv_backward_strides 131 | 132 | 133 | def max_pool_forward_fast(x, pool_param): 134 | """ 135 | A fast implementation of the forward pass for a max pooling layer. 136 | 137 | This chooses between the reshape method and the im2col method. If the pooling 138 | regions are square and tile the input image, then we can use the reshape 139 | method which is very fast. Otherwise we fall back on the im2col method, which 140 | is not much faster than the naive method. 141 | """ 142 | N, C, H, W = x.shape 143 | pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] 144 | stride = pool_param['stride'] 145 | 146 | same_size = pool_height == pool_width == stride 147 | tiles = H % pool_height == 0 and W % pool_width == 0 148 | if same_size and tiles: 149 | out, reshape_cache = max_pool_forward_reshape(x, pool_param) 150 | cache = ('reshape', reshape_cache) 151 | else: 152 | out, im2col_cache = max_pool_forward_im2col(x, pool_param) 153 | cache = ('im2col', im2col_cache) 154 | return out, cache 155 | 156 | 157 | def max_pool_backward_fast(dout, cache): 158 | """ 159 | A fast implementation of the backward pass for a max pooling layer. 160 | 161 | This switches between the reshape method an the im2col method depending on 162 | which method was used to generate the cache. 163 | """ 164 | method, real_cache = cache 165 | if method == 'reshape': 166 | return max_pool_backward_reshape(dout, real_cache) 167 | elif method == 'im2col': 168 | return max_pool_backward_im2col(dout, real_cache) 169 | else: 170 | raise ValueError('Unrecognized method "%s"' % method) 171 | 172 | 173 | def max_pool_forward_reshape(x, pool_param): 174 | """ 175 | A fast implementation of the forward pass for the max pooling layer that uses 176 | some clever reshaping. 177 | 178 | This can only be used for square pooling regions that tile the input. 179 | """ 180 | N, C, H, W = x.shape 181 | pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] 182 | stride = pool_param['stride'] 183 | assert pool_height == pool_width == stride, 'Invalid pool params' 184 | assert H % pool_height == 0 185 | assert W % pool_height == 0 186 | x_reshaped = x.reshape(N, C, H // pool_height, pool_height, 187 | W // pool_width, pool_width) 188 | out = x_reshaped.max(axis=3).max(axis=4) 189 | 190 | cache = (x, x_reshaped, out) 191 | return out, cache 192 | 193 | 194 | def max_pool_backward_reshape(dout, cache): 195 | """ 196 | A fast implementation of the backward pass for the max pooling layer that 197 | uses some clever broadcasting and reshaping. 198 | 199 | This can only be used if the forward pass was computed using 200 | max_pool_forward_reshape. 201 | 202 | NOTE: If there are multiple argmaxes, this method will assign gradient to 203 | ALL argmax elements of the input rather than picking one. In this case the 204 | gradient will actually be incorrect. However this is unlikely to occur in 205 | practice, so it shouldn't matter much. One possible solution is to split the 206 | upstream gradient equally among all argmax elements; this should result in a 207 | valid subgradient. You can make this happen by uncommenting the line below; 208 | however this results in a significant performance penalty (about 40% slower) 209 | and is unlikely to matter in practice so we don't do it. 210 | """ 211 | x, x_reshaped, out = cache 212 | 213 | dx_reshaped = np.zeros_like(x_reshaped) 214 | out_newaxis = out[:, :, :, np.newaxis, :, np.newaxis] 215 | mask = (x_reshaped == out_newaxis) 216 | dout_newaxis = dout[:, :, :, np.newaxis, :, np.newaxis] 217 | dout_broadcast, _ = np.broadcast_arrays(dout_newaxis, dx_reshaped) 218 | dx_reshaped[mask] = dout_broadcast[mask] 219 | dx_reshaped /= np.sum(mask, axis=(3, 5), keepdims=True) 220 | dx = dx_reshaped.reshape(x.shape) 221 | 222 | return dx 223 | 224 | 225 | def max_pool_forward_im2col(x, pool_param): 226 | """ 227 | An implementation of the forward pass for max pooling based on im2col. 228 | 229 | This isn't much faster than the naive version, so it should be avoided if 230 | possible. 231 | """ 232 | N, C, H, W = x.shape 233 | pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] 234 | stride = pool_param['stride'] 235 | 236 | assert (H - pool_height) % stride == 0, 'Invalid height' 237 | assert (W - pool_width) % stride == 0, 'Invalid width' 238 | 239 | out_height = (H - pool_height) // stride + 1 240 | out_width = (W - pool_width) // stride + 1 241 | 242 | x_split = x.reshape(N * C, 1, H, W) 243 | x_cols = im2col(x_split, pool_height, pool_width, padding=0, stride=stride) 244 | x_cols_argmax = np.argmax(x_cols, axis=0) 245 | x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])] 246 | out = x_cols_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1) 247 | 248 | cache = (x, x_cols, x_cols_argmax, pool_param) 249 | return out, cache 250 | 251 | 252 | def max_pool_backward_im2col(dout, cache): 253 | """ 254 | An implementation of the backward pass for max pooling based on im2col. 255 | 256 | This isn't much faster than the naive version, so it should be avoided if 257 | possible. 258 | """ 259 | x, x_cols, x_cols_argmax, pool_param = cache 260 | N, C, H, W = x.shape 261 | pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width'] 262 | stride = pool_param['stride'] 263 | 264 | dout_reshaped = dout.transpose(2, 3, 0, 1).flatten() 265 | dx_cols = np.zeros_like(x_cols) 266 | dx_cols[x_cols_argmax, np.arange(dx_cols.shape[1])] = dout_reshaped 267 | dx = col2im_indices(dx_cols, (N * C, 1, H, W), pool_height, pool_width, 268 | padding=0, stride=stride) 269 | dx = dx.reshape(x.shape) 270 | 271 | return dx 272 | -------------------------------------------------------------------------------- /assignment3/cs231n/layers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def affine_forward(x, w, b): 5 | """ 6 | Computes the forward pass for an affine (fully-connected) layer. 7 | 8 | The input x has shape (N, d_1, ..., d_k) where x[i] is the ith input. 9 | We multiply this against a weight matrix of shape (D, M) where 10 | D = \prod_i d_i 11 | 12 | Inputs: 13 | x - Input data, of shape (N, d_1, ..., d_k) 14 | w - Weights, of shape (D, M) 15 | b - Biases, of shape (M,) 16 | 17 | Returns a tuple of: 18 | - out: output, of shape (N, M) 19 | - cache: (x, w, b) 20 | """ 21 | out = x.reshape(x.shape[0], -1).dot(w) + b 22 | cache = (x, w, b) 23 | return out, cache 24 | 25 | 26 | def affine_backward(dout, cache): 27 | """ 28 | Computes the backward pass for an affine layer. 29 | 30 | Inputs: 31 | - dout: Upstream derivative, of shape (N, M) 32 | - cache: Tuple of: 33 | - x: Input data, of shape (N, d_1, ... d_k) 34 | - w: Weights, of shape (D, M) 35 | 36 | Returns a tuple of: 37 | - dx: Gradient with respect to x, of shape (N, d1, ..., d_k) 38 | - dw: Gradient with respect to w, of shape (D, M) 39 | - db: Gradient with respect to b, of shape (M,) 40 | """ 41 | x, w, b = cache 42 | dx = dout.dot(w.T).reshape(x.shape) 43 | dw = x.reshape(x.shape[0], -1).T.dot(dout) 44 | db = np.sum(dout, axis=0) 45 | return dx, dw, db 46 | 47 | 48 | def relu_forward(x): 49 | """ 50 | Computes the forward pass for a layer of rectified linear units (ReLUs). 51 | 52 | Input: 53 | - x: Inputs, of any shape 54 | 55 | Returns a tuple of: 56 | - out: Output, of the same shape as x 57 | - cache: x 58 | """ 59 | out = np.maximum(0, x) 60 | cache = x 61 | return out, cache 62 | 63 | 64 | def relu_backward(dout, cache): 65 | """ 66 | Computes the backward pass for a layer of rectified linear units (ReLUs). 67 | 68 | Input: 69 | - dout: Upstream derivatives, of any shape 70 | - cache: Input x, of same shape as dout 71 | 72 | Returns: 73 | - dx: Gradient with respect to x 74 | """ 75 | x = cache 76 | dx = np.where(x > 0, dout, 0) 77 | return dx 78 | 79 | 80 | def batchnorm_forward(x, gamma, beta, bn_param): 81 | """ 82 | Forward pass for batch normalization. 83 | 84 | During training the sample mean and (uncorrected) sample variance are 85 | computed from minibatch statistics and used to normalize the incoming data. 86 | During training we also keep an exponentially decaying running mean of the mean 87 | and variance of each feature, and these averages are used to normalize data 88 | at test-time. 89 | 90 | At each timestep we update the running averages for mean and variance using 91 | an exponential decay based on the momentum parameter: 92 | 93 | running_mean = momentum * running_mean + (1 - momentum) * sample_mean 94 | running_var = momentum * running_var + (1 - momentum) * sample_var 95 | 96 | Note that the batch normalization paper suggests a different test-time 97 | behavior: they compute sample mean and variance for each feature using a 98 | large number of training images rather than using a running average. For 99 | this implementation we have chosen to use running averages instead since 100 | they do not require an additional estimation step; the torch7 implementation 101 | of batch normalization also uses running averages. 102 | 103 | Input: 104 | - x: Data of shape (N, D) 105 | - gamma: Scale parameter of shape (D,) 106 | - beta: Shift paremeter of shape (D,) 107 | - bn_param: Dictionary with the following keys: 108 | - mode: 'train' or 'test'; required 109 | - eps: Constant for numeric stability 110 | - momentum: Constant for running mean / variance. 111 | - running_mean: Array of shape (D,) giving running mean of features 112 | - running_var Array of shape (D,) giving running variance of features 113 | 114 | Returns a tuple of: 115 | - out: of shape (N, D) 116 | - cache: A tuple of values needed in the backward pass 117 | """ 118 | mode = bn_param['mode'] 119 | eps = bn_param.get('eps', 1e-5) 120 | momentum = bn_param.get('momentum', 0.9) 121 | 122 | N, D = x.shape 123 | running_mean = bn_param.get('running_mean', np.zeros(D, dtype=x.dtype)) 124 | running_var = bn_param.get('running_var', np.zeros(D, dtype=x.dtype)) 125 | 126 | out, cache = None, None 127 | if mode == 'train': 128 | # Compute output 129 | mu = x.mean(axis=0) 130 | xc = x - mu 131 | var = np.mean(xc ** 2, axis=0) 132 | std = np.sqrt(var + eps) 133 | xn = xc / std 134 | out = gamma * xn + beta 135 | 136 | cache = (mode, x, gamma, xc, std, xn, out) 137 | 138 | # Update running average of mean 139 | running_mean *= momentum 140 | running_mean += (1 - momentum) * mu 141 | 142 | # Update running average of variance 143 | running_var *= momentum 144 | running_var += (1 - momentum) * var 145 | elif mode == 'test': 146 | # Using running mean and variance to normalize 147 | std = np.sqrt(running_var + eps) 148 | xn = (x - running_mean) / std 149 | out = gamma * xn + beta 150 | cache = (mode, x, xn, gamma, beta, std) 151 | else: 152 | raise ValueError('Invalid forward batchnorm mode "%s"' % mode) 153 | 154 | # Store the updated running means back into bn_param 155 | bn_param['running_mean'] = running_mean 156 | bn_param['running_var'] = running_var 157 | 158 | return out, cache 159 | 160 | 161 | def batchnorm_backward(dout, cache): 162 | """ 163 | Backward pass for batch normalization. 164 | 165 | For this implementation, you should write out a computation graph for 166 | batch normalization on paper and propagate gradients backward through 167 | intermediate nodes. 168 | 169 | Inputs: 170 | - dout: Upstream derivatives, of shape (N, D) 171 | - cache: Variable of intermediates from batchnorm_forward. 172 | 173 | Returns a tuple of: 174 | - dx: Gradient with respect to inputs x, of shape (N, D) 175 | - dgamma: Gradient with respect to scale parameter gamma, of shape (D,) 176 | - dbeta: Gradient with respect to shift parameter beta, of shape (D,) 177 | """ 178 | mode = cache[0] 179 | if mode == 'train': 180 | mode, x, gamma, xc, std, xn, out = cache 181 | 182 | N = x.shape[0] 183 | dbeta = dout.sum(axis=0) 184 | dgamma = np.sum(xn * dout, axis=0) 185 | dxn = gamma * dout 186 | dxc = dxn / std 187 | dstd = -np.sum((dxn * xc) / (std * std), axis=0) 188 | dvar = 0.5 * dstd / std 189 | dxc += (2.0 / N) * xc * dvar 190 | dmu = np.sum(dxc, axis=0) 191 | dx = dxc - dmu / N 192 | elif mode == 'test': 193 | mode, x, xn, gamma, beta, std = cache 194 | dbeta = dout.sum(axis=0) 195 | dgamma = np.sum(xn * dout, axis=0) 196 | dxn = gamma * dout 197 | dx = dxn / std 198 | else: 199 | raise ValueError(mode) 200 | 201 | return dx, dgamma, dbeta 202 | 203 | 204 | def spatial_batchnorm_forward(x, gamma, beta, bn_param): 205 | """ 206 | Computes the forward pass for spatial batch normalization. 207 | 208 | Inputs: 209 | - x: Input data of shape (N, C, H, W) 210 | - gamma: Scale parameter, of shape (C,) 211 | - beta: Shift parameter, of shape (C,) 212 | - bn_param: Dictionary with the following keys: 213 | - mode: 'train' or 'test'; required 214 | - eps: Constant for numeric stability 215 | - momentum: Constant for running mean / variance. momentum=0 means that 216 | old information is discarded completely at every time step, while 217 | momentum=1 means that new information is never incorporated. The 218 | default of momentum=0.9 should work well in most situations. 219 | - running_mean: Array of shape (D,) giving running mean of features 220 | - running_var Array of shape (D,) giving running variance of features 221 | 222 | Returns a tuple of: 223 | - out: Output data, of shape (N, C, H, W) 224 | - cache: Values needed for the backward pass 225 | """ 226 | N, C, H, W = x.shape 227 | x_flat = x.transpose(0, 2, 3, 1).reshape(-1, C) 228 | out_flat, cache = batchnorm_forward(x_flat, gamma, beta, bn_param) 229 | out = out_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2) 230 | return out, cache 231 | 232 | 233 | def spatial_batchnorm_backward(dout, cache): 234 | """ 235 | Computes the backward pass for spatial batch normalization. 236 | 237 | Inputs: 238 | - dout: Upstream derivatives, of shape (N, C, H, W) 239 | - cache: Values from the forward pass 240 | 241 | Returns a tuple of: 242 | - dx: Gradient with respect to inputs, of shape (N, C, H, W) 243 | - dgamma: Gradient with respect to scale parameter, of shape (C,) 244 | - dbeta: Gradient with respect to shift parameter, of shape (C,) 245 | """ 246 | N, C, H, W = dout.shape 247 | dout_flat = dout.transpose(0, 2, 3, 1).reshape(-1, C) 248 | dx_flat, dgamma, dbeta = batchnorm_backward(dout_flat, cache) 249 | dx = dx_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2) 250 | return dx, dgamma, dbeta 251 | 252 | 253 | def svm_loss(x, y): 254 | """ 255 | Computes the loss and gradient using for multiclass SVM classification. 256 | 257 | Inputs: 258 | - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class 259 | for the ith input. 260 | - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 261 | 0 <= y[i] < C 262 | 263 | Returns a tuple of: 264 | - loss: Scalar giving the loss 265 | - dx: Gradient of the loss with respect to x 266 | """ 267 | N = x.shape[0] 268 | correct_class_scores = x[np.arange(N), y] 269 | margins = np.maximum(0, x - correct_class_scores[:, np.newaxis] + 1.0) 270 | margins[np.arange(N), y] = 0 271 | loss = np.sum(margins) / N 272 | num_pos = np.sum(margins > 0, axis=1) 273 | dx = np.zeros_like(x) 274 | dx[margins > 0] = 1 275 | dx[np.arange(N), y] -= num_pos 276 | dx /= N 277 | return loss, dx 278 | 279 | 280 | def softmax_loss(x, y): 281 | """ 282 | Computes the loss and gradient for softmax classification. 283 | 284 | Inputs: 285 | - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class 286 | for the ith input. 287 | - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 288 | 0 <= y[i] < C 289 | 290 | Returns a tuple of: 291 | - loss: Scalar giving the loss 292 | - dx: Gradient of the loss with respect to x 293 | """ 294 | probs = np.exp(x - np.max(x, axis=1, keepdims=True)) 295 | probs /= np.sum(probs, axis=1, keepdims=True) 296 | N = x.shape[0] 297 | loss = -np.sum(np.log(probs[np.arange(N), y])) / N 298 | dx = probs.copy() 299 | dx[np.arange(N), y] -= 1 300 | dx /= N 301 | return loss, dx 302 | -------------------------------------------------------------------------------- /assignment1/softmax.md: -------------------------------------------------------------------------------- 1 | 2 | # Softmax exercise 3 | 4 | *Complete and hand in this completed worksheet (including its outputs and any supporting code outside of the worksheet) with your assignment submission. For more details see the [assignments page](http://vision.stanford.edu/teaching/cs231n/assignments.html) on the course website.* 5 | 6 | This exercise is analogous to the SVM exercise. You will: 7 | 8 | - implement a fully-vectorized **loss function** for the Softmax classifier 9 | - implement the fully-vectorized expression for its **analytic gradient** 10 | - **check your implementation** with numerical gradient 11 | - use a validation set to **tune the learning rate and regularization** strength 12 | - **optimize** the loss function with **SGD** 13 | - **visualize** the final learned weights 14 | 15 | 16 | 17 | ```python 18 | import random 19 | import numpy as np 20 | from cs231n.data_utils import load_CIFAR10 21 | import matplotlib.pyplot as plt 22 | 23 | from __future__ import print_function 24 | 25 | %matplotlib inline 26 | plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots 27 | plt.rcParams['image.interpolation'] = 'nearest' 28 | plt.rcParams['image.cmap'] = 'gray' 29 | 30 | # for auto-reloading extenrnal modules 31 | # see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython 32 | %load_ext autoreload 33 | %autoreload 2 34 | ``` 35 | 36 | 37 | ```python 38 | def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500): 39 | """ 40 | Load the CIFAR-10 dataset from disk and perform preprocessing to prepare 41 | it for the linear classifier. These are the same steps as we used for the 42 | SVM, but condensed to a single function. 43 | """ 44 | # Load the raw CIFAR-10 data 45 | cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' 46 | X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) 47 | 48 | # subsample the data 49 | mask = list(range(num_training, num_training + num_validation)) 50 | X_val = X_train[mask] 51 | y_val = y_train[mask] 52 | mask = list(range(num_training)) 53 | X_train = X_train[mask] 54 | y_train = y_train[mask] 55 | mask = list(range(num_test)) 56 | X_test = X_test[mask] 57 | y_test = y_test[mask] 58 | mask = np.random.choice(num_training, num_dev, replace=False) 59 | X_dev = X_train[mask] 60 | y_dev = y_train[mask] 61 | 62 | # Preprocessing: reshape the image data into rows 63 | X_train = np.reshape(X_train, (X_train.shape[0], -1)) 64 | X_val = np.reshape(X_val, (X_val.shape[0], -1)) 65 | X_test = np.reshape(X_test, (X_test.shape[0], -1)) 66 | X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) 67 | 68 | # Normalize the data: subtract the mean image 69 | mean_image = np.mean(X_train, axis = 0) 70 | X_train -= mean_image 71 | X_val -= mean_image 72 | X_test -= mean_image 73 | X_dev -= mean_image 74 | 75 | # add bias dimension and transform into columns 76 | X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) 77 | X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) 78 | X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) 79 | X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) 80 | 81 | return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev 82 | 83 | 84 | # Invoke the above function to get our data. 85 | X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data() 86 | print('Train data shape: ', X_train.shape) 87 | print('Train labels shape: ', y_train.shape) 88 | print('Validation data shape: ', X_val.shape) 89 | print('Validation labels shape: ', y_val.shape) 90 | print('Test data shape: ', X_test.shape) 91 | print('Test labels shape: ', y_test.shape) 92 | print('dev data shape: ', X_dev.shape) 93 | print('dev labels shape: ', y_dev.shape) 94 | ``` 95 | 96 | Train data shape: (49000, 3073) 97 | Train labels shape: (49000,) 98 | Validation data shape: (1000, 3073) 99 | Validation labels shape: (1000,) 100 | Test data shape: (1000, 3073) 101 | Test labels shape: (1000,) 102 | dev data shape: (500, 3073) 103 | dev labels shape: (500,) 104 | 105 | 106 | ## Softmax Classifier 107 | 108 | Your code for this section will all be written inside **cs231n/classifiers/softmax.py**. 109 | 110 | 111 | 112 | ```python 113 | # First implement the naive softmax loss function with nested loops. 114 | # Open the file cs231n/classifiers/softmax.py and implement the 115 | # softmax_loss_naive function. 116 | 117 | from cs231n.classifiers.softmax import softmax_loss_naive 118 | import time 119 | 120 | # Generate a random softmax weight matrix and use it to compute the loss. 121 | W = np.random.randn(3073, 10) * 0.0001 122 | loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0) 123 | 124 | # As a rough sanity check, our loss should be something close to -log(0.1). 125 | print('loss: %f' % loss) 126 | print('sanity check: %f' % (-np.log(0.1))) 127 | ``` 128 | 129 | loss: 2.299190 130 | sanity check: 2.302585 131 | 132 | 133 | ## Inline Question 1: 134 | Why do we expect our loss to be close to -log(0.1)? Explain briefly.** 135 | 136 | **Your answer:** *Fill this in* 137 | 138 | 139 | 140 | ```python 141 | # Complete the implementation of softmax_loss_naive and implement a (naive) 142 | # version of the gradient that uses nested loops. 143 | loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0) 144 | 145 | # As we did for the SVM, use numeric gradient checking as a debugging tool. 146 | # The numeric gradient should be close to the analytic gradient. 147 | from cs231n.gradient_check import grad_check_sparse 148 | f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 0.0)[0] 149 | grad_numerical = grad_check_sparse(f, W, grad, 10) 150 | 151 | # similar to SVM case, do another gradient check with regularization 152 | loss, grad = softmax_loss_naive(W, X_dev, y_dev, 5e1) 153 | f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 5e1)[0] 154 | grad_numerical = grad_check_sparse(f, W, grad, 10) 155 | ``` 156 | 157 | numerical: -0.994952 analytic: -0.994952, relative error: 6.994422e-08 158 | numerical: 2.846696 analytic: 2.846696, relative error: 3.612826e-09 159 | numerical: -0.671531 analytic: -0.671531, relative error: 2.605211e-09 160 | numerical: 0.994550 analytic: 0.994550, relative error: 4.481386e-08 161 | numerical: 2.151402 analytic: 2.151402, relative error: 1.898146e-08 162 | numerical: 3.071263 analytic: 3.071263, relative error: 1.528335e-08 163 | numerical: -2.056691 analytic: -2.056691, relative error: 2.350468e-08 164 | numerical: -2.970053 analytic: -2.970053, relative error: 2.470392e-08 165 | numerical: -1.339934 analytic: -1.339934, relative error: 1.604170e-08 166 | numerical: 0.710373 analytic: 0.710373, relative error: 6.659996e-08 167 | numerical: 3.004861 analytic: 3.004861, relative error: 1.918921e-08 168 | numerical: -0.390148 analytic: -0.390148, relative error: 6.354846e-09 169 | numerical: 0.287195 analytic: 0.287195, relative error: 8.483866e-08 170 | numerical: -0.619845 analytic: -0.619845, relative error: 2.163794e-08 171 | numerical: 1.122758 analytic: 1.122758, relative error: 1.166628e-08 172 | numerical: 1.725309 analytic: 1.725309, relative error: 2.307015e-08 173 | numerical: 1.899298 analytic: 1.899298, relative error: 8.304561e-09 174 | numerical: 1.446963 analytic: 1.446963, relative error: 3.126118e-08 175 | numerical: -2.717449 analytic: -2.717449, relative error: 2.436764e-08 176 | numerical: 0.010049 analytic: 0.010049, relative error: 6.364739e-07 177 | 178 | 179 | 180 | ```python 181 | # Now that we have a naive implementation of the softmax loss function and its gradient, 182 | # implement a vectorized version in softmax_loss_vectorized. 183 | # The two versions should compute the same results, but the vectorized version should be 184 | # much faster. 185 | tic = time.time() 186 | loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.000005) 187 | toc = time.time() 188 | print('naive loss: %e computed in %fs' % (loss_naive, toc - tic)) 189 | 190 | from cs231n.classifiers.softmax import softmax_loss_vectorized 191 | tic = time.time() 192 | loss_vectorized, grad_vectorized = softmax_loss_vectorized(W, X_dev, y_dev, 0.000005) 193 | toc = time.time() 194 | print('vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)) 195 | 196 | # As we did for the SVM, we use the Frobenius norm to compare the two versions 197 | # of the gradient. 198 | grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') 199 | print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized)) 200 | print('Gradient difference: %f' % grad_difference) 201 | ``` 202 | 203 | naive loss: 2.321617e+00 computed in 0.105338s 204 | vectorized loss: 2.321617e+00 computed in 0.004589s 205 | Loss difference: 0.000000 206 | Gradient difference: 0.000000 207 | 208 | 209 | 210 | ```python 211 | # Use the validation set to tune hyperparameters (regularization strength and 212 | # learning rate). You should experiment with different ranges for the learning 213 | # rates and regularization strengths; if you are careful you should be able to 214 | # get a classification accuracy of over 0.35 on the validation set. 215 | from cs231n.classifiers import Softmax 216 | results = {} 217 | best_val = -1 218 | best_softmax = None 219 | learning_rates = [1e-7, 5e-7] 220 | regularization_strengths = [2.5e4, 5e4] 221 | 222 | ################################################################################ 223 | # TODO: # 224 | # Use the validation set to set the learning rate and regularization strength. # 225 | # This should be identical to the validation that you did for the SVM; save # 226 | # the best trained softmax classifer in best_softmax. # 227 | ################################################################################ 228 | for lr in learning_rates: 229 | for reg in regularization_strengths: 230 | 231 | ################################################################################ 232 | # END OF YOUR CODE # 233 | ################################################################################ 234 | 235 | # Print out results. 236 | for lr, reg in sorted(results): 237 | train_accuracy, val_accuracy = results[(lr, reg)] 238 | print('lr %e reg %e train accuracy: %f val accuracy: %f' % ( 239 | lr, reg, train_accuracy, val_accuracy)) 240 | 241 | print('best validation accuracy achieved during cross-validation: %f' % best_val) 242 | ``` 243 | 244 | best validation accuracy achieved during cross-validation: -1.000000 245 | 246 | 247 | 248 | ```python 249 | # evaluate on test set 250 | # Evaluate the best softmax on test set 251 | y_test_pred = best_softmax.predict(X_test) 252 | test_accuracy = np.mean(y_test == y_test_pred) 253 | print('softmax on raw pixels final test set accuracy: %f' % (test_accuracy, )) 254 | ``` 255 | 256 | 257 | ```python 258 | # Visualize the learned weights for each class 259 | w = best_softmax.W[:-1,:] # strip out the bias 260 | w = w.reshape(32, 32, 3, 10) 261 | 262 | w_min, w_max = np.min(w), np.max(w) 263 | 264 | classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] 265 | for i in range(10): 266 | plt.subplot(2, 5, i + 1) 267 | 268 | # Rescale the weights to be between 0 and 255 269 | wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min) 270 | plt.imshow(wimg.astype('uint8')) 271 | plt.axis('off') 272 | plt.title(classes[i]) 273 | ``` 274 | --------------------------------------------------------------------------------